diff --git a/cli.py b/cli.py index 28ae0a371d4..d4053c8cb9c 100644 --- a/cli.py +++ b/cli.py @@ -576,6 +576,7 @@ def load_cli_config() -> Dict[str, Any]: "docker_env": "TERMINAL_DOCKER_ENV", "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER", + "docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "sandbox_dir": "TERMINAL_SANDBOX_DIR", # Persistent shell (non-local backends) "persistent_shell": "TERMINAL_PERSISTENT_SHELL", diff --git a/gateway/run.py b/gateway/run.py index bbfaad85f89..d1b1c744d38 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -831,6 +831,7 @@ if _config_path.exists(): "docker_env": "TERMINAL_DOCKER_ENV", "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER", + "docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "sandbox_dir": "TERMINAL_SANDBOX_DIR", "persistent_shell": "TERMINAL_PERSISTENT_SHELL", } diff --git a/hermes_cli/config.py b/hermes_cli/config.py index ff1f988f69d..f6eb69d3b80 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -5551,6 +5551,7 @@ def set_config_value(key: str, value: str): "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE", "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER", + "terminal.docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "terminal.docker_env": "TERMINAL_DOCKER_ENV", # terminal.cwd intentionally excluded — CLI resolves at runtime, # gateway bridges it in gateway/run.py. Persisting to .env causes diff --git a/tests/conftest.py b/tests/conftest.py index 81067be6f3e..fd91b558f5f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -227,6 +227,7 @@ _HERMES_BEHAVIORAL_VARS = frozenset({ "TERMINAL_CONTAINER_DISK", "TERMINAL_CONTAINER_MEMORY", "TERMINAL_CONTAINER_PERSISTENT", + "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "TERMINAL_DOCKER_RUN_AS_HOST_USER", "BROWSER_CDP_URL", "CAMOFOX_URL", diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index 89ebe9c7e82..abdce91ba5f 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -203,25 +203,43 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path): def test_non_persistent_cleanup_removes_container(monkeypatch): - """When persistent=false, cleanup() must schedule docker stop + rm.""" + """When persist_across_processes=false, cleanup() must docker stop AND + docker rm so containers don't leak across hermes processes. + + Updated for issue #20561: the previous implementation used fire-and-forget + ``subprocess.Popen("... &", shell=True)`` which raced with parent exit; + the new implementation uses ``subprocess.run`` on a daemon thread with + bounded timeouts. See test_cleanup_with_persist_disabled_stops_and_rms + for the full behavior contract. + """ monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") - calls = _mock_subprocess_run(monkeypatch) + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + _mock_subprocess_run(monkeypatch) + # Run the worker thread synchronously so assertions can observe its work. + import threading + monkeypatch.setattr(threading, "Thread", _FakeThread) - popen_cmds = [] - monkeypatch.setattr( - docker_env.subprocess, "Popen", - lambda cmd, **kw: (popen_cmds.append(cmd), type("P", (), {"poll": lambda s: 0, "wait": lambda s, **k: None, "returncode": 0, "stdout": iter([]), "stdin": None})())[1], + env = docker_env.DockerEnvironment( + image="python:3.11", cwd="/root", timeout=60, + task_id="ephemeral-task", persistent_filesystem=False, + persist_across_processes=False, ) - - env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task") - assert env._container_id container_id = env._container_id + assert container_id + # Capture cleanup-time docker calls (everything before this was init). + cleanup_calls = [] + real_run = docker_env.subprocess.run + + def _capture(cmd, **kw): + cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kw)) + return real_run(cmd, **kw) + + monkeypatch.setattr(docker_env.subprocess, "run", _capture) env.cleanup() - # Should have stop and rm calls via Popen - stop_cmds = [c for c in popen_cmds if container_id in str(c) and "stop" in str(c)] - assert len(stop_cmds) >= 1, f"cleanup() should schedule docker stop for {container_id}" + stops = [c for c in cleanup_calls if isinstance(c[0], list) and c[0][1:2] == ["stop"]] + assert stops, f"cleanup() should docker stop {container_id}; got {cleanup_calls}" class _FakePopen: @@ -622,3 +640,338 @@ def test_labels_attribute_populated_after_init(monkeypatch): "hermes-task-id": "abc", "hermes-profile": "default", } + + +# ── Cross-process container reuse (issue #20561) ────────────────── + + +def _mock_subprocess_run_with_reuse(monkeypatch, ps_state: str | None, + start_succeeds: bool = True): + """Reuse-aware subprocess.run mock. + + ``ps_state`` controls what ``docker ps -a --filter ...`` returns: + * ``None`` → no match (empty stdout). Forces a fresh ``docker run``. + * ``"running"`` / ``"exited"`` / ... → emit ``CID\\tSTATE`` so the reuse + path picks it up. ``"running"`` skips ``docker start``; other states + trigger ``docker start`` (which can be forced to fail via + ``start_succeeds=False``). + + Returns the captured call list so the test can verify which docker + commands actually ran. + """ + calls = [] + + def _run(cmd, **kwargs): + calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs)) + if isinstance(cmd, list) and len(cmd) >= 2: + sub = cmd[1] + if sub == "version": + return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="") + if sub == "ps": + if ps_state is None: + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + return subprocess.CompletedProcess( + cmd, 0, stdout=f"reused-cid\t{ps_state}\n", stderr="", + ) + if sub == "start": + if not start_succeeds: + # Real subprocess.run with check=True raises on non-zero exit; + # mirror that so the production code's except clause fires. + raise subprocess.CalledProcessError(1, cmd, output="", stderr="no such container") + return subprocess.CompletedProcess(cmd, 0, stdout="reused-cid\n", stderr="") + if sub == "run": + return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + return calls + + +def test_reuse_attaches_to_running_container_without_docker_run(monkeypatch): + """When a labeled container is already ``running``, the reuse probe + must pick it up and skip ``docker run`` entirely. Regression for the + issue #20561 root cause: every Hermes process spawning a new container + despite docs claiming "ONE long-lived container shared across sessions".""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running") + + env = _make_dummy_env(task_id="reuse-test") + + # The reuse path must populate _container_id from the ps probe output. + assert env._container_id == "reused-cid", ( + f"expected reused container id, got {env._container_id!r}" + ) + # And it must NOT have run `docker run`. + run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"] + assert not run_invocations, ( + f"docker run should be skipped on reuse, got: {run_invocations}" + ) + # And it must have NOT issued a `docker start` for an already-running container. + start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"] + assert not start_invocations, ( + f"docker start should be skipped when container already running, got: {start_invocations}" + ) + + +def test_reuse_starts_stopped_container_before_attaching(monkeypatch): + """A labeled container in ``exited`` state must be restarted via + ``docker start`` before the new Hermes process uses it. Without this + step, ``docker exec`` against a stopped container errors out and the + first agent command fails opaquely.""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="exited") + + env = _make_dummy_env(task_id="reuse-stopped") + + assert env._container_id == "reused-cid" + start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"] + assert start_invocations, "expected docker start for exited container" + run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"] + assert not run_invocations, "should not docker run when reusing an exited container" + + +def test_reuse_falls_back_to_fresh_run_when_start_fails(monkeypatch): + """If ``docker start`` on the matched container fails (container was + removed between probe and start, daemon paused, etc.), the code must + silently fall through to a fresh ``docker run`` rather than leaving the + user with a broken environment. Defensive recovery — the probe is best- + effort, not authoritative.""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + calls = _mock_subprocess_run_with_reuse( + monkeypatch, ps_state="exited", start_succeeds=False, + ) + + env = _make_dummy_env(task_id="reuse-broken-start") + + # docker start should be attempted then fail; code falls through to run. + assert env._container_id == "fresh-cid", ( + f"expected fresh container id after fallback, got {env._container_id!r}" + ) + run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"] + assert run_invocations, "fallback to fresh docker run must happen on start failure" + + +def test_no_reuse_when_persist_across_processes_disabled(monkeypatch): + """Opt-out path: ``persist_across_processes=False`` skips the ps probe + entirely and always starts a fresh container, matching the pre-fix + behavior for users who want hard per-process isolation.""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + # ps_state=running would trigger reuse if the probe ran — assert it doesn't. + calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running") + + env = docker_env.DockerEnvironment( + image="python:3.11", cwd="/root", timeout=60, + task_id="no-reuse", persist_across_processes=False, + ) + + # Must NOT have issued docker ps (the probe is gated by the flag). + ps_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "ps"] + assert not ps_invocations, ( + f"docker ps probe should be skipped when persist_across_processes=False, got: {ps_invocations}" + ) + # Should have started a fresh container. + assert env._container_id == "fresh-cid" + + +def test_find_reusable_container_prefers_running_over_stopped(monkeypatch): + """When the probe returns multiple matches (shouldn't normally happen, + but can after a crash leaves stale duplicates), a ``running`` container + is preferred over any stopped one. The duplicate gets reaped later by + the orphan reaper; we don't try to be heroic about it here.""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + + def _run(cmd, **kwargs): + if isinstance(cmd, list) and len(cmd) >= 2: + if cmd[1] == "version": + return subprocess.CompletedProcess(cmd, 0, stdout="ok", stderr="") + if cmd[1] == "ps": + # Two matches: stopped first, running second. + return subprocess.CompletedProcess( + cmd, 0, + stdout="stopped-cid\texited\nrunning-cid\trunning\n", + stderr="", + ) + return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + + env = _make_dummy_env(task_id="dup-match") + assert env._container_id == "running-cid", ( + f"running container should win over stopped duplicate, got {env._container_id!r}" + ) + + +# ── Cleanup correctness (issue #20561) ──────────────────────────── + + +class _FakeThread: + """Stand-in for threading.Thread that captures target/args and calls + target() synchronously when .start() runs, so cleanup behavior is + observable without actually backgrounding subprocess calls.""" + + def __init__(self, target=None, daemon=None, name=None): + self._target = target + self.daemon = daemon + self.name = name + self._done = False + + def start(self): + if self._target is not None: + self._target() + self._done = True + + def is_alive(self): + return not self._done + + def join(self, timeout=None): + self._done = True + + +def _install_fake_thread(monkeypatch): + import threading + monkeypatch.setattr(threading, "Thread", _FakeThread) + + +def test_cleanup_with_persist_only_stops_no_rm(monkeypatch): + """``persist_across_processes=True`` (default) cleanup must docker stop + the container but NEVER docker rm — the container has to survive so the + next Hermes process can reuse it. Issue #20561 — the previous code + matched this on the `_persistent` flag instead of a dedicated + cross-process flag, which made reuse impossible.""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + _mock_subprocess_run(monkeypatch) + _install_fake_thread(monkeypatch) + + env = _make_dummy_env(task_id="cleanup-persist", persistent_filesystem=False) + # Default persist_across_processes=True. + container_id = env._container_id + assert container_id + + cleanup_calls = [] + real_run = docker_env.subprocess.run + + def _capturing_run(cmd, **kwargs): + cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs)) + return real_run(cmd, **kwargs) + + monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run) + + env.cleanup() + + stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"] + rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"] + assert stops, f"expected docker stop call, got cleanup_calls: {cleanup_calls}" + assert not rms, ( + f"docker rm must NOT be called when persist_across_processes=True; " + f"reuse would be impossible. Got: {rms}" + ) + + +def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch): + """``persist_across_processes=False`` cleanup must docker stop AND docker + rm so containers don't leak. Crucially, this runs regardless of the + ``persistent_filesystem`` setting — the original code only rm'd when + ``not self._persistent``, which meant the default-on ``container_persistent: + true`` users (the documented happy path) leaked Exited containers forever. + Issue #20561 root-cause fix.""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + _mock_subprocess_run(monkeypatch) + _install_fake_thread(monkeypatch) + + # Note: persistent_filesystem=True (the prior-leak scenario) + the new + # cross-process toggle OFF must still result in a clean rm. + env = docker_env.DockerEnvironment( + image="python:3.11", cwd="/root", timeout=60, + task_id="cleanup-no-persist", persistent_filesystem=True, + persist_across_processes=False, + ) + + cleanup_calls = [] + real_run = docker_env.subprocess.run + + def _capturing_run(cmd, **kwargs): + cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs)) + return real_run(cmd, **kwargs) + + monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run) + + env.cleanup() + + stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"] + rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"] + assert stops, "expected docker stop" + assert rms, ( + "docker rm MUST run when persist_across_processes=False, even with " + "persistent_filesystem=True — that gating was the leak source in #20561." + ) + + +def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch): + """The pre-fix code used ``subprocess.Popen(\"... &\", shell=True)`` which + raced with parent-process exit and silently dropped cleanup work. The + new code must use ``subprocess.run`` with bounded ``timeout=`` so the + work actually completes within the process lifetime. + + Asserts cleanup never reaches into shell-mode Popen. + """ + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + _mock_subprocess_run(monkeypatch) + _install_fake_thread(monkeypatch) + + def _forbidden_popen(*args, **kwargs): + raise AssertionError( + f"cleanup must not use subprocess.Popen anymore (issue #20561); " + f"got args={args} kwargs={kwargs}" + ) + + monkeypatch.setattr(docker_env.subprocess, "Popen", _forbidden_popen) + + env = _make_dummy_env(task_id="no-popen-cleanup") + env.cleanup() # must not raise + + +def test_wait_for_cleanup_returns_true_when_no_thread_started(): + """``wait_for_cleanup`` must be a no-op when ``cleanup`` was never called + (or the env has no live cleanup thread) — atexit calls it unconditionally + across all active envs, so a False return would falsely flag healthy + shutdowns.""" + env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment) + # No _cleanup_thread set — simulates an env that was never cleanup()'d. + assert env.wait_for_cleanup(timeout=1.0) is True + + +def test_wait_for_cleanup_after_cleanup_returns_true(monkeypatch): + """End-to-end: cleanup() starts a thread, wait_for_cleanup() joins it + and reports completion. Atexit relies on this contract to ensure docker + stop/rm actually finishes before the Python interpreter exits.""" + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + _mock_subprocess_run(monkeypatch) + _install_fake_thread(monkeypatch) + + env = _make_dummy_env(task_id="wait-test") + env.cleanup() + assert env.wait_for_cleanup(timeout=5.0) is True + + +def test_cleanup_on_env_with_no_container_id_does_not_raise(monkeypatch): + """A DockerEnvironment whose ``__init__`` failed before the container_id + was set (image-pull error, docker daemon down) should still be safe to + cleanup() — the post-creation failure path in callers always tries. + Without this guard the daemon-down case used to NameError on the cleanup + branch.""" + env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment) + env._container_id = None + env._persistent = False + env._workspace_dir = None + env._home_dir = None + # No exception expected. + env.cleanup() diff --git a/tests/tools/test_terminal_config_env_sync.py b/tests/tools/test_terminal_config_env_sync.py index 1aecea0cd7c..ba15cc2670e 100644 --- a/tests/tools/test_terminal_config_env_sync.py +++ b/tests/tools/test_terminal_config_env_sync.py @@ -224,3 +224,23 @@ def test_docker_env_is_bridged_everywhere(): assert "docker_env" in _gateway_env_map_keys() assert "docker_env" in _save_config_env_sync_keys() assert "TERMINAL_DOCKER_ENV" in _terminal_tool_env_var_names() + + +def test_docker_persist_across_processes_is_bridged_everywhere(): + """Regression pin for the cross-process container reuse toggle. + + ``terminal.docker_persist_across_processes`` (issue #20561) controls + whether ``DockerEnvironment.__init__`` probes for and reuses an existing + labeled container at startup, and whether ``cleanup()`` removes the + container on Hermes exit or just stops it (keeping it for the next + process). Same four-bridge invariant as docker_run_as_host_user / + docker_env / docker_mount_cwd_to_workspace — drift between any of the + four sites means ``terminal.docker_persist_across_processes: false`` in + config.yaml silently does nothing for that entry point, leaving the + user unable to opt out of the documented "ONE long-lived container + shared across sessions" behavior. + """ + assert "docker_persist_across_processes" in _cli_env_map_keys() + assert "docker_persist_across_processes" in _gateway_env_map_keys() + assert "docker_persist_across_processes" in _save_config_env_sync_keys() + assert "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES" in _terminal_tool_env_var_names() diff --git a/tools/environments/docker.py b/tools/environments/docker.py index 32f62aa2cc2..30eb2230957 100644 --- a/tools/environments/docker.py +++ b/tools/environments/docker.py @@ -339,11 +339,13 @@ class DockerEnvironment(BaseEnvironment): auto_mount_cwd: bool = False, run_as_host_user: bool = False, extra_args: list = None, + persist_across_processes: bool = True, ): if cwd == "~": cwd = "/root" super().__init__(cwd=cwd, timeout=timeout) self._persistent = persistent_filesystem + self._persist_across_processes = persist_across_processes self._task_id = task_id self._forward_env = _normalize_forward_env_names(forward_env) self._env = _normalize_env_dict(env) @@ -561,26 +563,69 @@ class DockerEnvironment(BaseEnvironment): "hermes-task-id": task_label, "hermes-profile": profile_name, } - run_cmd = [ - self._docker_exe, "run", "-d", - "--init", # tini/catatonit as PID 1 — reaps zombie children - "--name", container_name, - *label_args, - "-w", cwd, - *all_run_args, - image, - "sleep", "infinity", # no fixed lifetime — idle reaper handles cleanup - ] - logger.debug(f"Starting container: {' '.join(run_cmd)}") - result = subprocess.run( - run_cmd, - capture_output=True, - text=True, - timeout=120, # image pull may take a while - check=True, - ) - self._container_id = result.stdout.strip() - logger.info(f"Started container {container_name} ({self._container_id[:12]})") + + # Cross-process reuse (issue #20561 — docs claim "ONE long-lived + # container shared across sessions"). If a prior Hermes process + # already started a container for this (task_id, profile) and it + # still exists, attach to it instead of starting a fresh one. This + # restores the documented contract; opt out via + # ``terminal.docker_persist_across_processes: false``. + # + # Reuse matches on labels only — we deliberately do NOT compare image + # / mounts / resources. Operators who need a fresh container after + # changing those settings should set ``docker_persist_across_processes: + # false`` (or run ``docker rm -f`` against the labeled container) to + # force a clean start. + reused = False + if persist_across_processes: + existing = self._find_reusable_container(task_label, profile_name) + if existing is not None: + container_id, state = existing + self._container_id = container_id + if state != "running": + try: + subprocess.run( + [self._docker_exe, "start", container_id], + capture_output=True, + text=True, + timeout=30, + check=True, + ) + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: + logger.warning( + "Failed to start existing container %s (state=%s): " + "%s — falling back to a fresh container.", + container_id[:12], state, e, + ) + self._container_id = None + if self._container_id: + logger.info( + "Reusing container %s (task=%s, profile=%s, prior state=%s)", + container_id[:12], task_label, profile_name, state, + ) + reused = True + + if not reused: + run_cmd = [ + self._docker_exe, "run", "-d", + "--init", # tini/catatonit as PID 1 — reaps zombie children + "--name", container_name, + *label_args, + "-w", cwd, + *all_run_args, + image, + "sleep", "infinity", # no fixed lifetime — idle reaper handles cleanup + ] + logger.debug(f"Starting container: {' '.join(run_cmd)}") + result = subprocess.run( + run_cmd, + capture_output=True, + text=True, + timeout=120, # image pull may take a while + check=True, + ) + self._container_id = result.stdout.strip() + logger.info(f"Started container {container_name} ({self._container_id[:12]})") # Build the init-time env forwarding args (used only by init_session # to inject host env vars into the snapshot; subsequent commands get @@ -685,31 +730,143 @@ class DockerEnvironment(BaseEnvironment): logger.debug("Docker --storage-opt support: %s", _storage_opt_ok) return _storage_opt_ok - def cleanup(self): - """Stop and remove the container. Bind-mount dirs persist if persistent=True.""" - if self._container_id: - try: - # Stop in background so cleanup doesn't block - stop_cmd = ( - f"(timeout 60 {self._docker_exe} stop {self._container_id} || " - f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &" - ) - subprocess.Popen(stop_cmd, shell=True) - except Exception as e: - logger.warning("Failed to stop container %s: %s", self._container_id, e) + def _find_reusable_container(self, task_label: str, profile_label: str) -> Optional[tuple[str, str]]: + """Look for an existing container labeled for this (task, profile). + Returns ``(container_id, state)`` on hit, ``None`` on miss / on any + failure (including ``docker ps`` itself failing). State is one of the + values Docker reports via ``{{.State}}`` — e.g. ``running``, ``exited``, + ``created``, ``paused``, ``restarting``, ``dead``. The caller decides + whether the state warrants ``docker start`` before reuse. + + Restricted to the docker-stored label set this class creates; never + matches containers that happened to be named ``hermes-*`` but were + started by some other tool. + """ + try: + result = subprocess.run( + [ + self._docker_exe, "ps", "-a", + "--filter", "label=hermes-agent=1", + "--filter", f"label=hermes-task-id={task_label}", + "--filter", f"label=hermes-profile={profile_label}", + "--format", "{{.ID}}\t{{.State}}", + ], + capture_output=True, + text=True, + timeout=10, + check=False, + ) + except (subprocess.TimeoutExpired, OSError) as e: + logger.debug("docker ps probe failed: %s — will start a fresh container", e) + return None + if result.returncode != 0: + logger.debug( + "docker ps probe returned %d: %s — will start a fresh container", + result.returncode, result.stderr.strip(), + ) + return None + lines = [ln.strip() for ln in result.stdout.splitlines() if ln.strip()] + if not lines: + return None + # Multiple matches are unusual (one (task, profile) should produce one + # container) but can happen if a previous Hermes process crashed + # mid-cleanup. Prefer a running one if present; otherwise pick the + # first listed. Stale duplicates get reaped by the orphan-reaper in a + # follow-up commit; we don't try to be heroic about them here. + running = None + first = None + for ln in lines: + parts = ln.split("\t", 1) + if len(parts) != 2: + continue + cid, state = parts[0], parts[1].lower() + if first is None: + first = (cid, state) + if state == "running" and running is None: + running = (cid, state) + return running or first + + def cleanup(self): + """Stop (and optionally remove) the container. + + Behavior depends on ``persist_across_processes`` (init kwarg): + + * **True** (default) — only ``docker stop`` so the container is + available for reuse by the next Hermes process. The orphan-reaper + eventually removes it if no subsequent process picks it up. + * **False** — ``docker stop`` followed by ``docker rm -f``, regardless + of ``persistent_filesystem``. The previous ``rm`` path was gated on + ``not self._persistent`` which meant ``container_persistent: true`` + users (the default) leaked Exited containers forever (issue #20561). + + Cleanup runs on a daemon thread with bounded ``subprocess.run`` calls, + not the previous fire-and-forget ``Popen(... &)`` shell construct. + That pattern raced with parent-process exit and silently dropped + cleanup work when the parent didn't outlive the detached shell — the + primary mechanism behind Exited-container accumulation under SIGTERM + / ``hermes /quit`` / dead terminals. + """ + container_id = self._container_id + if not container_id: + # Still drop the bind-mount dirs if any were allocated. if not self._persistent: - # Also schedule removal (stop only leaves it as stopped) + for d in (self._workspace_dir, self._home_dir): + if d: + shutil.rmtree(d, ignore_errors=True) + return + + # Capture state needed by the worker before we null out the attrs — + # the worker thread can outlive ``self``. + docker_exe = self._docker_exe + should_remove = not self._persist_across_processes + log_id = container_id[:12] + + def _do_cleanup() -> None: + try: + subprocess.run( + [docker_exe, "stop", "-t", "10", container_id], + capture_output=True, timeout=30, + ) + except (subprocess.TimeoutExpired, OSError) as e: + logger.warning("docker stop %s timed out / failed: %s", log_id, e) + if should_remove: try: - subprocess.Popen( - f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &", - shell=True, + subprocess.run( + [docker_exe, "rm", "-f", container_id], + capture_output=True, timeout=30, ) - except Exception: - pass - self._container_id = None + except (subprocess.TimeoutExpired, OSError) as e: + logger.warning("docker rm -f %s failed: %s", log_id, e) + + # Daemon thread: doesn't block interpreter exit (atexit returns + # promptly), but unlike the old ``Popen(... &)`` shell trick the + # Python-level join semantics let the thread actually run to + # completion if the interpreter is still alive. atexit registers + # ``_atexit_cleanup`` in terminal_tool.py which waits up to ~60s for + # outstanding cleanups, so most exits complete the work cleanly. + import threading + t = threading.Thread(target=_do_cleanup, daemon=True, name=f"hermes-cleanup-{log_id}") + t.start() + self._cleanup_thread = t + self._container_id = None if not self._persistent: for d in (self._workspace_dir, self._home_dir): if d: shutil.rmtree(d, ignore_errors=True) + + def wait_for_cleanup(self, timeout: float = 30.0) -> bool: + """Block up to *timeout* seconds for the cleanup worker thread. + + Returns ``True`` if the thread finished (or no thread was started), + ``False`` on timeout. The atexit hook in terminal_tool.py calls this + on every active environment so docker stop/rm actually completes + before the Python process exits — without this, ``hermes /quit`` + races the interpreter shutdown and leaves stopped containers behind. + """ + thread = getattr(self, "_cleanup_thread", None) + if thread is None or not thread.is_alive(): + return True + thread.join(timeout=timeout) + return not thread.is_alive() diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 3cb13f5af50..513fa31b9f9 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1024,6 +1024,15 @@ def _get_env_config() -> Dict[str, Any]: "docker_env": _parse_env_var("TERMINAL_DOCKER_ENV", "{}", json.loads, "valid JSON"), "docker_run_as_host_user": os.getenv("TERMINAL_DOCKER_RUN_AS_HOST_USER", "false").lower() in {"true", "1", "yes"}, "docker_extra_args": _parse_env_var("TERMINAL_DOCKER_EXTRA_ARGS", "[]", json.loads, "valid JSON"), + # Cross-process container reuse (issue #20561). The docs claim + # "ONE long-lived container shared across sessions" — this toggle + # makes that real by probing for a labeled container at startup and + # attaching to it instead of always starting a fresh one. Set to + # ``false`` for hard per-process isolation (no reuse, container is + # removed on exit). + "docker_persist_across_processes": os.getenv( + "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "true" + ).lower() in {"true", "1", "yes"}, } @@ -1083,6 +1092,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, env=docker_env, run_as_host_user=cc.get("docker_run_as_host_user", False), extra_args=docker_extra_args, + persist_across_processes=cc.get("docker_persist_across_processes", True), ) elif env_type == "singularity": @@ -1378,7 +1388,23 @@ def _atexit_cleanup(): if _active_environments: count = len(_active_environments) logger.info("Shutting down %d remaining sandbox(es)...", count) + # Snapshot the env objects BEFORE cleanup_all_environments empties + # the dict; we need them to wait on docker cleanup threads after the + # registry has been cleared. + envs_to_wait = list(_active_environments.values()) cleanup_all_environments() + # Block briefly so docker stop/rm actually completes before the + # interpreter exits. Issue #20561 — without this join, the daemon + # cleanup threads were getting torn down mid-`docker stop`, leaving + # Exited containers piled up on the host. + for env in envs_to_wait: + wait_fn = getattr(env, "wait_for_cleanup", None) + if wait_fn is None: + continue + try: + wait_fn(timeout=15.0) + except Exception as e: # never block shutdown on a bad backend + logger.debug("wait_for_cleanup raised on exit: %s", e) atexit.register(_atexit_cleanup) @@ -1746,6 +1772,7 @@ def terminal_tool( "docker_env": config.get("docker_env", {}), "docker_run_as_host_user": config.get("docker_run_as_host_user", False), "docker_extra_args": config.get("docker_extra_args", []), + "docker_persist_across_processes": config.get("docker_persist_across_processes", True), } local_config = None