fix(docker): reuse containers across processes + fix cleanup leaks

The Docker backend docs claim "Single persistent container — ONE long- lived container shared across sessions, /new, /reset, and delegate_task subagents. Stopped/removed on shutdown." In practice the code only honored that contract within a single Python process via the in-memory \`_active_environments[task_id]\` cache. Every \`hermes chat\` invocation spawned a fresh \`hermes-<hex>\` container; older containers piled up in \`Exited\` state and accumulated until manual \`docker rm\` (issue #20561). Three root causes, all addressed by this commit: 1. No cross-process container discovery. 2. \`cleanup()\` used fire-and-forget \`subprocess.Popen("... &", shell=True)\` which raced with parent-process exit — when Python exited promptly the detached shell child got killed mid-\`docker stop\`, leaving stopped containers behind. 3. The \`docker rm\` step in cleanup was gated on \`not self._persistent\` (the bind-mount-persistence flag). Default config sets \`container_persistent: true\`, so the default happy path skipped \`rm\` entirely — even when the user explicitly didn't want cross-process reuse, containers leaked. Fix: * Add \`DockerEnvironment.__init__(persist_across_processes=True)\`. When true, init probes \`docker ps -a --filter label=hermes-agent=1 --filter label=hermes-task-id=<task> --filter label=hermes-profile=<profile>\` and reuses a matching container (running → attach; stopped → \`docker start\` → attach; \`docker start\` failure → fall through to a fresh \`docker run\`). Multiple matches prefer the running one, with the stragglers left for the orphan reaper (next commit) to clean up. * Rewrite \`cleanup()\`. Uses \`subprocess.run(..., timeout=30)\` on a daemon \`threading.Thread\`, not the racy \`Popen(... &)\`. The \`_persistent\` guard is dropped on the \`rm\` step — \`rm\` now runs whenever \`persist_across_processes\` is false, regardless of the bind-mount-persistence setting. The leak class is gone in all combinations. * Add \`wait_for_cleanup(timeout)\`. \`tools/terminal_tool.py\`'s atexit hook calls this on every active env, blocking up to 15s for the cleanup thread before interpreter exit. Without this, \`hermes /quit\` raced the daemon-thread teardown and dropped the stop/rm work. * New config \`terminal.docker_persist_across_processes\` (default \`true\` — restores the documented contract). Set \`false\` for hard per-process isolation. Wired through all four config-bridge sites (cli.py env_mappings, gateway/run.py _terminal_env_map, hermes_cli/config.py _config_to_env_sync, tests/conftest.py env-strip list); regression-pinned by \`test_docker_persist_across_processes_is_bridged_everywhere\` matching the existing pattern for docker_run_as_host_user / docker_env. Reuse intentionally does NOT compare image / mounts / resources — only the labels. Operators changing those settings should set \`docker_persist_across_processes: false\` (or \`docker rm -f\` the labeled container) to force a fresh start. This keeps the probe cheap and the failure mode obvious. Coverage: 12 new unit tests in tests/tools/test_docker_environment.py covering reuse paths (running, stopped, fallback, opt-out, duplicate preference) and cleanup behavior (persist-mode no-rm, opt-out always-rm, no-Popen, wait_for_cleanup semantics, partial-init safety). Plus one config-bridge regression pin. Refs #20561
2026-07-15 14:22:43 +00:00 · 2026-05-28 14:00:26 +10:00 · 2026-05-28 14:00:26 +10:00 · ac8e238bc8
commit ac8e238bc8
parent 8d129d013b
8 changed files with 612 additions and 51 deletions
--- a/cli.py
+++ b/cli.py
@ -576,6 +576,7 @@ def load_cli_config() -> Dict[str, Any]:
        "docker_env": "TERMINAL_DOCKER_ENV",
        "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
        "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
+        "docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
        "sandbox_dir": "TERMINAL_SANDBOX_DIR",
        # Persistent shell (non-local backends)
        "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
--- a/gateway/run.py
+++ b/gateway/run.py
@ -831,6 +831,7 @@ if _config_path.exists():
                "docker_env": "TERMINAL_DOCKER_ENV",
                "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
                "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
+                "docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
                "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
            }
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -5551,6 +5551,7 @@ def set_config_value(key: str, value: str):
        "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
        "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
        "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
+        "terminal.docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
        "terminal.docker_env": "TERMINAL_DOCKER_ENV",
        # terminal.cwd intentionally excluded — CLI resolves at runtime,
        # gateway bridges it in gateway/run.py. Persisting to .env causes
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -227,6 +227,7 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
    "TERMINAL_CONTAINER_DISK",
    "TERMINAL_CONTAINER_MEMORY",
    "TERMINAL_CONTAINER_PERSISTENT",
+    "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
    "TERMINAL_DOCKER_RUN_AS_HOST_USER",
    "BROWSER_CDP_URL",
    "CAMOFOX_URL",
--- a/tests/tools/test_docker_environment.py
+++ b/tests/tools/test_docker_environment.py
@ -203,25 +203,43 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):


 def test_non_persistent_cleanup_removes_container(monkeypatch):
-    """When persistent=false, cleanup() must schedule docker stop + rm."""
+    """When persist_across_processes=false, cleanup() must docker stop AND
+    docker rm so containers don't leak across hermes processes.
+
+    Updated for issue #20561: the previous implementation used fire-and-forget
+    ``subprocess.Popen("... &", shell=True)`` which raced with parent exit;
+    the new implementation uses ``subprocess.run`` on a daemon thread with
+    bounded timeouts. See test_cleanup_with_persist_disabled_stops_and_rms
+    for the full behavior contract.
+    """
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    calls = _mock_subprocess_run(monkeypatch)
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    # Run the worker thread synchronously so assertions can observe its work.
+    import threading
+    monkeypatch.setattr(threading, "Thread", _FakeThread)

-    popen_cmds = []
-    monkeypatch.setattr(
-        docker_env.subprocess, "Popen",
-        lambda cmd, **kw: (popen_cmds.append(cmd), type("P", (), {"poll": lambda s: 0, "wait": lambda s, **k: None, "returncode": 0, "stdout": iter([]), "stdin": None})())[1],
+    env = docker_env.DockerEnvironment(
+        image="python:3.11", cwd="/root", timeout=60,
+        task_id="ephemeral-task", persistent_filesystem=False,
+        persist_across_processes=False,
    )
-
-    env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task")
-    assert env._container_id
    container_id = env._container_id
+    assert container_id

+    # Capture cleanup-time docker calls (everything before this was init).
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capture(cmd, **kw):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kw))
+        return real_run(cmd, **kw)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capture)
    env.cleanup()

-    # Should have stop and rm calls via Popen
-    stop_cmds = [c for c in popen_cmds if container_id in str(c) and "stop" in str(c)]
-    assert len(stop_cmds) >= 1, f"cleanup() should schedule docker stop for {container_id}"
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and c[0][1:2] == ["stop"]]
+    assert stops, f"cleanup() should docker stop {container_id}; got {cleanup_calls}"


 class _FakePopen:
@ -622,3 +640,338 @@ def test_labels_attribute_populated_after_init(monkeypatch):
        "hermes-task-id": "abc",
        "hermes-profile": "default",
    }
+
+
+# ── Cross-process container reuse (issue #20561) ──────────────────
+
+
+def _mock_subprocess_run_with_reuse(monkeypatch, ps_state: str | None,
+                                     start_succeeds: bool = True):
+    """Reuse-aware subprocess.run mock.
+
+    ``ps_state`` controls what ``docker ps -a --filter ...`` returns:
+      * ``None`` → no match (empty stdout). Forces a fresh ``docker run``.
+      * ``"running"`` / ``"exited"`` / ... → emit ``CID\\tSTATE`` so the reuse
+        path picks it up. ``"running"`` skips ``docker start``; other states
+        trigger ``docker start`` (which can be forced to fail via
+        ``start_succeeds=False``).
+
+    Returns the captured call list so the test can verify which docker
+    commands actually ran.
+    """
+    calls = []
+
+    def _run(cmd, **kwargs):
+        calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        if isinstance(cmd, list) and len(cmd) >= 2:
+            sub = cmd[1]
+            if sub == "version":
+                return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
+            if sub == "ps":
+                if ps_state is None:
+                    return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+                return subprocess.CompletedProcess(
+                    cmd, 0, stdout=f"reused-cid\t{ps_state}\n", stderr="",
+                )
+            if sub == "start":
+                if not start_succeeds:
+                    # Real subprocess.run with check=True raises on non-zero exit;
+                    # mirror that so the production code's except clause fires.
+                    raise subprocess.CalledProcessError(1, cmd, output="", stderr="no such container")
+                return subprocess.CompletedProcess(cmd, 0, stdout="reused-cid\n", stderr="")
+            if sub == "run":
+                return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
+        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _run)
+    return calls
+
+
+def test_reuse_attaches_to_running_container_without_docker_run(monkeypatch):
+    """When a labeled container is already ``running``, the reuse probe
+    must pick it up and skip ``docker run`` entirely. Regression for the
+    issue #20561 root cause: every Hermes process spawning a new container
+    despite docs claiming "ONE long-lived container shared across sessions"."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
+
+    env = _make_dummy_env(task_id="reuse-test")
+
+    # The reuse path must populate _container_id from the ps probe output.
+    assert env._container_id == "reused-cid", (
+        f"expected reused container id, got {env._container_id!r}"
+    )
+    # And it must NOT have run `docker run`.
+    run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert not run_invocations, (
+        f"docker run should be skipped on reuse, got: {run_invocations}"
+    )
+    # And it must have NOT issued a `docker start` for an already-running container.
+    start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
+    assert not start_invocations, (
+        f"docker start should be skipped when container already running, got: {start_invocations}"
+    )
+
+
+def test_reuse_starts_stopped_container_before_attaching(monkeypatch):
+    """A labeled container in ``exited`` state must be restarted via
+    ``docker start`` before the new Hermes process uses it. Without this
+    step, ``docker exec`` against a stopped container errors out and the
+    first agent command fails opaquely."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="exited")
+
+    env = _make_dummy_env(task_id="reuse-stopped")
+
+    assert env._container_id == "reused-cid"
+    start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
+    assert start_invocations, "expected docker start for exited container"
+    run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert not run_invocations, "should not docker run when reusing an exited container"
+
+
+def test_reuse_falls_back_to_fresh_run_when_start_fails(monkeypatch):
+    """If ``docker start`` on the matched container fails (container was
+    removed between probe and start, daemon paused, etc.), the code must
+    silently fall through to a fresh ``docker run`` rather than leaving the
+    user with a broken environment. Defensive recovery — the probe is best-
+    effort, not authoritative."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    calls = _mock_subprocess_run_with_reuse(
+        monkeypatch, ps_state="exited", start_succeeds=False,
+    )
+
+    env = _make_dummy_env(task_id="reuse-broken-start")
+
+    # docker start should be attempted then fail; code falls through to run.
+    assert env._container_id == "fresh-cid", (
+        f"expected fresh container id after fallback, got {env._container_id!r}"
+    )
+    run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert run_invocations, "fallback to fresh docker run must happen on start failure"
+
+
+def test_no_reuse_when_persist_across_processes_disabled(monkeypatch):
+    """Opt-out path: ``persist_across_processes=False`` skips the ps probe
+    entirely and always starts a fresh container, matching the pre-fix
+    behavior for users who want hard per-process isolation."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    # ps_state=running would trigger reuse if the probe ran — assert it doesn't.
+    calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
+
+    env = docker_env.DockerEnvironment(
+        image="python:3.11", cwd="/root", timeout=60,
+        task_id="no-reuse", persist_across_processes=False,
+    )
+
+    # Must NOT have issued docker ps (the probe is gated by the flag).
+    ps_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "ps"]
+    assert not ps_invocations, (
+        f"docker ps probe should be skipped when persist_across_processes=False, got: {ps_invocations}"
+    )
+    # Should have started a fresh container.
+    assert env._container_id == "fresh-cid"
+
+
+def test_find_reusable_container_prefers_running_over_stopped(monkeypatch):
+    """When the probe returns multiple matches (shouldn't normally happen,
+    but can after a crash leaves stale duplicates), a ``running`` container
+    is preferred over any stopped one. The duplicate gets reaped later by
+    the orphan reaper; we don't try to be heroic about it here."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+
+    def _run(cmd, **kwargs):
+        if isinstance(cmd, list) and len(cmd) >= 2:
+            if cmd[1] == "version":
+                return subprocess.CompletedProcess(cmd, 0, stdout="ok", stderr="")
+            if cmd[1] == "ps":
+                # Two matches: stopped first, running second.
+                return subprocess.CompletedProcess(
+                    cmd, 0,
+                    stdout="stopped-cid\texited\nrunning-cid\trunning\n",
+                    stderr="",
+                )
+        return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _run)
+
+    env = _make_dummy_env(task_id="dup-match")
+    assert env._container_id == "running-cid", (
+        f"running container should win over stopped duplicate, got {env._container_id!r}"
+    )
+
+
+# ── Cleanup correctness (issue #20561) ────────────────────────────
+
+
+class _FakeThread:
+    """Stand-in for threading.Thread that captures target/args and calls
+    target() synchronously when .start() runs, so cleanup behavior is
+    observable without actually backgrounding subprocess calls."""
+
+    def __init__(self, target=None, daemon=None, name=None):
+        self._target = target
+        self.daemon = daemon
+        self.name = name
+        self._done = False
+
+    def start(self):
+        if self._target is not None:
+            self._target()
+        self._done = True
+
+    def is_alive(self):
+        return not self._done
+
+    def join(self, timeout=None):
+        self._done = True
+
+
+def _install_fake_thread(monkeypatch):
+    import threading
+    monkeypatch.setattr(threading, "Thread", _FakeThread)
+
+
+def test_cleanup_with_persist_only_stops_no_rm(monkeypatch):
+    """``persist_across_processes=True`` (default) cleanup must docker stop
+    the container but NEVER docker rm — the container has to survive so the
+    next Hermes process can reuse it. Issue #20561 — the previous code
+    matched this on the `_persistent` flag instead of a dedicated
+    cross-process flag, which made reuse impossible."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    env = _make_dummy_env(task_id="cleanup-persist", persistent_filesystem=False)
+    # Default persist_across_processes=True.
+    container_id = env._container_id
+    assert container_id
+
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capturing_run(cmd, **kwargs):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
+
+    env.cleanup()
+
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
+    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
+    assert stops, f"expected docker stop call, got cleanup_calls: {cleanup_calls}"
+    assert not rms, (
+        f"docker rm must NOT be called when persist_across_processes=True; "
+        f"reuse would be impossible. Got: {rms}"
+    )
+
+
+def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch):
+    """``persist_across_processes=False`` cleanup must docker stop AND docker
+    rm so containers don't leak. Crucially, this runs regardless of the
+    ``persistent_filesystem`` setting — the original code only rm'd when
+    ``not self._persistent``, which meant the default-on ``container_persistent:
+    true`` users (the documented happy path) leaked Exited containers forever.
+    Issue #20561 root-cause fix."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    # Note: persistent_filesystem=True (the prior-leak scenario) + the new
+    # cross-process toggle OFF must still result in a clean rm.
+    env = docker_env.DockerEnvironment(
+        image="python:3.11", cwd="/root", timeout=60,
+        task_id="cleanup-no-persist", persistent_filesystem=True,
+        persist_across_processes=False,
+    )
+
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capturing_run(cmd, **kwargs):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
+
+    env.cleanup()
+
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
+    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
+    assert stops, "expected docker stop"
+    assert rms, (
+        "docker rm MUST run when persist_across_processes=False, even with "
+        "persistent_filesystem=True — that gating was the leak source in #20561."
+    )
+
+
+def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch):
+    """The pre-fix code used ``subprocess.Popen(\"... &\", shell=True)`` which
+    raced with parent-process exit and silently dropped cleanup work. The
+    new code must use ``subprocess.run`` with bounded ``timeout=`` so the
+    work actually completes within the process lifetime.
+
+    Asserts cleanup never reaches into shell-mode Popen.
+    """
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    def _forbidden_popen(*args, **kwargs):
+        raise AssertionError(
+            f"cleanup must not use subprocess.Popen anymore (issue #20561); "
+            f"got args={args} kwargs={kwargs}"
+        )
+
+    monkeypatch.setattr(docker_env.subprocess, "Popen", _forbidden_popen)
+
+    env = _make_dummy_env(task_id="no-popen-cleanup")
+    env.cleanup()  # must not raise
+
+
+def test_wait_for_cleanup_returns_true_when_no_thread_started():
+    """``wait_for_cleanup`` must be a no-op when ``cleanup`` was never called
+    (or the env has no live cleanup thread) — atexit calls it unconditionally
+    across all active envs, so a False return would falsely flag healthy
+    shutdowns."""
+    env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
+    # No _cleanup_thread set — simulates an env that was never cleanup()'d.
+    assert env.wait_for_cleanup(timeout=1.0) is True
+
+
+def test_wait_for_cleanup_after_cleanup_returns_true(monkeypatch):
+    """End-to-end: cleanup() starts a thread, wait_for_cleanup() joins it
+    and reports completion. Atexit relies on this contract to ensure docker
+    stop/rm actually finishes before the Python interpreter exits."""
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    env = _make_dummy_env(task_id="wait-test")
+    env.cleanup()
+    assert env.wait_for_cleanup(timeout=5.0) is True
+
+
+def test_cleanup_on_env_with_no_container_id_does_not_raise(monkeypatch):
+    """A DockerEnvironment whose ``__init__`` failed before the container_id
+    was set (image-pull error, docker daemon down) should still be safe to
+    cleanup() — the post-creation failure path in callers always tries.
+    Without this guard the daemon-down case used to NameError on the cleanup
+    branch."""
+    env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
+    env._container_id = None
+    env._persistent = False
+    env._workspace_dir = None
+    env._home_dir = None
+    # No exception expected.
+    env.cleanup()
--- a/tests/tools/test_terminal_config_env_sync.py
+++ b/tests/tools/test_terminal_config_env_sync.py
@ -224,3 +224,23 @@ def test_docker_env_is_bridged_everywhere():
    assert "docker_env" in _gateway_env_map_keys()
    assert "docker_env" in _save_config_env_sync_keys()
    assert "TERMINAL_DOCKER_ENV" in _terminal_tool_env_var_names()
+
+
+def test_docker_persist_across_processes_is_bridged_everywhere():
+    """Regression pin for the cross-process container reuse toggle.
+
+    ``terminal.docker_persist_across_processes`` (issue #20561) controls
+    whether ``DockerEnvironment.__init__`` probes for and reuses an existing
+    labeled container at startup, and whether ``cleanup()`` removes the
+    container on Hermes exit or just stops it (keeping it for the next
+    process).  Same four-bridge invariant as docker_run_as_host_user /
+    docker_env / docker_mount_cwd_to_workspace — drift between any of the
+    four sites means ``terminal.docker_persist_across_processes: false`` in
+    config.yaml silently does nothing for that entry point, leaving the
+    user unable to opt out of the documented "ONE long-lived container
+    shared across sessions" behavior.
+    """
+    assert "docker_persist_across_processes" in _cli_env_map_keys()
+    assert "docker_persist_across_processes" in _gateway_env_map_keys()
+    assert "docker_persist_across_processes" in _save_config_env_sync_keys()
+    assert "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES" in _terminal_tool_env_var_names()
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@ -339,11 +339,13 @@ class DockerEnvironment(BaseEnvironment):
        auto_mount_cwd: bool = False,
        run_as_host_user: bool = False,
        extra_args: list = None,
+        persist_across_processes: bool = True,
    ):
        if cwd == "~":
            cwd = "/root"
        super().__init__(cwd=cwd, timeout=timeout)
        self._persistent = persistent_filesystem
+        self._persist_across_processes = persist_across_processes
        self._task_id = task_id
        self._forward_env = _normalize_forward_env_names(forward_env)
        self._env = _normalize_env_dict(env)
@ -561,26 +563,69 @@ class DockerEnvironment(BaseEnvironment):
            "hermes-task-id": task_label,
            "hermes-profile": profile_name,
        }
-        run_cmd = [
-            self._docker_exe, "run", "-d",
-            "--init",           # tini/catatonit as PID 1 — reaps zombie children
-            "--name", container_name,
-            *label_args,
-            "-w", cwd,
-            *all_run_args,
-            image,
-            "sleep", "infinity",  # no fixed lifetime — idle reaper handles cleanup
-        ]
-        logger.debug(f"Starting container: {' '.join(run_cmd)}")
-        result = subprocess.run(
-            run_cmd,
-            capture_output=True,
-            text=True,
-            timeout=120,  # image pull may take a while
-            check=True,
-        )
-        self._container_id = result.stdout.strip()
-        logger.info(f"Started container {container_name} ({self._container_id[:12]})")
+
+        # Cross-process reuse (issue #20561 — docs claim "ONE long-lived
+        # container shared across sessions").  If a prior Hermes process
+        # already started a container for this (task_id, profile) and it
+        # still exists, attach to it instead of starting a fresh one.  This
+        # restores the documented contract; opt out via
+        # ``terminal.docker_persist_across_processes: false``.
+        #
+        # Reuse matches on labels only — we deliberately do NOT compare image
+        # / mounts / resources.  Operators who need a fresh container after
+        # changing those settings should set ``docker_persist_across_processes:
+        # false`` (or run ``docker rm -f`` against the labeled container) to
+        # force a clean start.
+        reused = False
+        if persist_across_processes:
+            existing = self._find_reusable_container(task_label, profile_name)
+            if existing is not None:
+                container_id, state = existing
+                self._container_id = container_id
+                if state != "running":
+                    try:
+                        subprocess.run(
+                            [self._docker_exe, "start", container_id],
+                            capture_output=True,
+                            text=True,
+                            timeout=30,
+                            check=True,
+                        )
+                    except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+                        logger.warning(
+                            "Failed to start existing container %s (state=%s): "
+                            "%s — falling back to a fresh container.",
+                            container_id[:12], state, e,
+                        )
+                        self._container_id = None
+                if self._container_id:
+                    logger.info(
+                        "Reusing container %s (task=%s, profile=%s, prior state=%s)",
+                        container_id[:12], task_label, profile_name, state,
+                    )
+                    reused = True
+
+        if not reused:
+            run_cmd = [
+                self._docker_exe, "run", "-d",
+                "--init",           # tini/catatonit as PID 1 — reaps zombie children
+                "--name", container_name,
+                *label_args,
+                "-w", cwd,
+                *all_run_args,
+                image,
+                "sleep", "infinity",  # no fixed lifetime — idle reaper handles cleanup
+            ]
+            logger.debug(f"Starting container: {' '.join(run_cmd)}")
+            result = subprocess.run(
+                run_cmd,
+                capture_output=True,
+                text=True,
+                timeout=120,  # image pull may take a while
+                check=True,
+            )
+            self._container_id = result.stdout.strip()
+            logger.info(f"Started container {container_name} ({self._container_id[:12]})")

        # Build the init-time env forwarding args (used only by init_session
        # to inject host env vars into the snapshot; subsequent commands get
@ -685,31 +730,143 @@ class DockerEnvironment(BaseEnvironment):
        logger.debug("Docker --storage-opt support: %s", _storage_opt_ok)
        return _storage_opt_ok

-    def cleanup(self):
-        """Stop and remove the container. Bind-mount dirs persist if persistent=True."""
-        if self._container_id:
-            try:
-                # Stop in background so cleanup doesn't block
-                stop_cmd = (
-                    f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
-                    f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
-                )
-                subprocess.Popen(stop_cmd, shell=True)
-            except Exception as e:
-                logger.warning("Failed to stop container %s: %s", self._container_id, e)
+    def _find_reusable_container(self, task_label: str, profile_label: str) -> Optional[tuple[str, str]]:
+        """Look for an existing container labeled for this (task, profile).

+        Returns ``(container_id, state)`` on hit, ``None`` on miss / on any
+        failure (including ``docker ps`` itself failing). State is one of the
+        values Docker reports via ``{{.State}}`` — e.g. ``running``, ``exited``,
+        ``created``, ``paused``, ``restarting``, ``dead``. The caller decides
+        whether the state warrants ``docker start`` before reuse.
+
+        Restricted to the docker-stored label set this class creates; never
+        matches containers that happened to be named ``hermes-*`` but were
+        started by some other tool.
+        """
+        try:
+            result = subprocess.run(
+                [
+                    self._docker_exe, "ps", "-a",
+                    "--filter", "label=hermes-agent=1",
+                    "--filter", f"label=hermes-task-id={task_label}",
+                    "--filter", f"label=hermes-profile={profile_label}",
+                    "--format", "{{.ID}}\t{{.State}}",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=10,
+                check=False,
+            )
+        except (subprocess.TimeoutExpired, OSError) as e:
+            logger.debug("docker ps probe failed: %s — will start a fresh container", e)
+            return None
+        if result.returncode != 0:
+            logger.debug(
+                "docker ps probe returned %d: %s — will start a fresh container",
+                result.returncode, result.stderr.strip(),
+            )
+            return None
+        lines = [ln.strip() for ln in result.stdout.splitlines() if ln.strip()]
+        if not lines:
+            return None
+        # Multiple matches are unusual (one (task, profile) should produce one
+        # container) but can happen if a previous Hermes process crashed
+        # mid-cleanup. Prefer a running one if present; otherwise pick the
+        # first listed. Stale duplicates get reaped by the orphan-reaper in a
+        # follow-up commit; we don't try to be heroic about them here.
+        running = None
+        first = None
+        for ln in lines:
+            parts = ln.split("\t", 1)
+            if len(parts) != 2:
+                continue
+            cid, state = parts[0], parts[1].lower()
+            if first is None:
+                first = (cid, state)
+            if state == "running" and running is None:
+                running = (cid, state)
+        return running or first
+
+    def cleanup(self):
+        """Stop (and optionally remove) the container.
+
+        Behavior depends on ``persist_across_processes`` (init kwarg):
+
+        * **True** (default) — only ``docker stop`` so the container is
+          available for reuse by the next Hermes process. The orphan-reaper
+          eventually removes it if no subsequent process picks it up.
+        * **False** — ``docker stop`` followed by ``docker rm -f``, regardless
+          of ``persistent_filesystem``. The previous ``rm`` path was gated on
+          ``not self._persistent`` which meant ``container_persistent: true``
+          users (the default) leaked Exited containers forever (issue #20561).
+
+        Cleanup runs on a daemon thread with bounded ``subprocess.run`` calls,
+        not the previous fire-and-forget ``Popen(... &)`` shell construct.
+        That pattern raced with parent-process exit and silently dropped
+        cleanup work when the parent didn't outlive the detached shell — the
+        primary mechanism behind Exited-container accumulation under SIGTERM
+        / ``hermes /quit`` / dead terminals.
+        """
+        container_id = self._container_id
+        if not container_id:
+            # Still drop the bind-mount dirs if any were allocated.
            if not self._persistent:
-                # Also schedule removal (stop only leaves it as stopped)
+                for d in (self._workspace_dir, self._home_dir):
+                    if d:
+                        shutil.rmtree(d, ignore_errors=True)
+            return
+
+        # Capture state needed by the worker before we null out the attrs —
+        # the worker thread can outlive ``self``.
+        docker_exe = self._docker_exe
+        should_remove = not self._persist_across_processes
+        log_id = container_id[:12]
+
+        def _do_cleanup() -> None:
+            try:
+                subprocess.run(
+                    [docker_exe, "stop", "-t", "10", container_id],
+                    capture_output=True, timeout=30,
+                )
+            except (subprocess.TimeoutExpired, OSError) as e:
+                logger.warning("docker stop %s timed out / failed: %s", log_id, e)
+            if should_remove:
                try:
-                    subprocess.Popen(
-                        f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
-                        shell=True,
+                    subprocess.run(
+                        [docker_exe, "rm", "-f", container_id],
+                        capture_output=True, timeout=30,
                    )
-                except Exception:
-                    pass
-            self._container_id = None
+                except (subprocess.TimeoutExpired, OSError) as e:
+                    logger.warning("docker rm -f %s failed: %s", log_id, e)
+
+        # Daemon thread: doesn't block interpreter exit (atexit returns
+        # promptly), but unlike the old ``Popen(... &)`` shell trick the
+        # Python-level join semantics let the thread actually run to
+        # completion if the interpreter is still alive. atexit registers
+        # ``_atexit_cleanup`` in terminal_tool.py which waits up to ~60s for
+        # outstanding cleanups, so most exits complete the work cleanly.
+        import threading
+        t = threading.Thread(target=_do_cleanup, daemon=True, name=f"hermes-cleanup-{log_id}")
+        t.start()
+        self._cleanup_thread = t
+        self._container_id = None

        if not self._persistent:
            for d in (self._workspace_dir, self._home_dir):
                if d:
                    shutil.rmtree(d, ignore_errors=True)
+
+    def wait_for_cleanup(self, timeout: float = 30.0) -> bool:
+        """Block up to *timeout* seconds for the cleanup worker thread.
+
+        Returns ``True`` if the thread finished (or no thread was started),
+        ``False`` on timeout. The atexit hook in terminal_tool.py calls this
+        on every active environment so docker stop/rm actually completes
+        before the Python process exits — without this, ``hermes /quit``
+        races the interpreter shutdown and leaves stopped containers behind.
+        """
+        thread = getattr(self, "_cleanup_thread", None)
+        if thread is None or not thread.is_alive():
+            return True
+        thread.join(timeout=timeout)
+        return not thread.is_alive()
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -1024,6 +1024,15 @@ def _get_env_config() -> Dict[str, Any]:
        "docker_env": _parse_env_var("TERMINAL_DOCKER_ENV", "{}", json.loads, "valid JSON"),
        "docker_run_as_host_user": os.getenv("TERMINAL_DOCKER_RUN_AS_HOST_USER", "false").lower() in {"true", "1", "yes"},
        "docker_extra_args": _parse_env_var("TERMINAL_DOCKER_EXTRA_ARGS", "[]", json.loads, "valid JSON"),
+        # Cross-process container reuse (issue #20561).  The docs claim
+        # "ONE long-lived container shared across sessions" — this toggle
+        # makes that real by probing for a labeled container at startup and
+        # attaching to it instead of always starting a fresh one.  Set to
+        # ``false`` for hard per-process isolation (no reuse, container is
+        # removed on exit).
+        "docker_persist_across_processes": os.getenv(
+            "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "true"
+        ).lower() in {"true", "1", "yes"},
    }


@ -1083,6 +1092,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
            env=docker_env,
            run_as_host_user=cc.get("docker_run_as_host_user", False),
            extra_args=docker_extra_args,
+            persist_across_processes=cc.get("docker_persist_across_processes", True),
        )
    
    elif env_type == "singularity":
@ -1378,7 +1388,23 @@ def _atexit_cleanup():
    if _active_environments:
        count = len(_active_environments)
        logger.info("Shutting down %d remaining sandbox(es)...", count)
+        # Snapshot the env objects BEFORE cleanup_all_environments empties
+        # the dict; we need them to wait on docker cleanup threads after the
+        # registry has been cleared.
+        envs_to_wait = list(_active_environments.values())
        cleanup_all_environments()
+        # Block briefly so docker stop/rm actually completes before the
+        # interpreter exits. Issue #20561 — without this join, the daemon
+        # cleanup threads were getting torn down mid-`docker stop`, leaving
+        # Exited containers piled up on the host.
+        for env in envs_to_wait:
+            wait_fn = getattr(env, "wait_for_cleanup", None)
+            if wait_fn is None:
+                continue
+            try:
+                wait_fn(timeout=15.0)
+            except Exception as e:  # never block shutdown on a bad backend
+                logger.debug("wait_for_cleanup raised on exit: %s", e)

 atexit.register(_atexit_cleanup)

@ -1746,6 +1772,7 @@ def terminal_tool(
                                "docker_env": config.get("docker_env", {}),
                                "docker_run_as_host_user": config.get("docker_run_as_host_user", False),
                                "docker_extra_args": config.get("docker_extra_args", []),
+                                "docker_persist_across_processes": config.get("docker_persist_across_processes", True),
                            }

                        local_config = None