diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index 8073813f71c..00da8160a01 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -837,12 +837,16 @@ def _install_fake_thread(monkeypatch): monkeypatch.setattr(threading, "Thread", _FakeThread) -def test_cleanup_with_persist_only_stops_no_rm(monkeypatch): - """``persist_across_processes=True`` (default) cleanup must docker stop - the container but NEVER docker rm — the container has to survive so the - next Hermes process can reuse it. Issue #20561 — the previous code - matched this on the `_persistent` flag instead of a dedicated - cross-process flag, which made reuse impossible.""" +def test_cleanup_with_persist_is_noop_for_container(monkeypatch): + """``persist_across_processes=True`` (default) cleanup must NEITHER stop + NOR remove the container — the docs promise "ONE long-lived container + shared across sessions", and any docker stop would kill background + processes inside the container (npm watchers, pytest watchers, etc.). + + Resource reclamation in this mode happens via the orphan reaper on next + Hermes startup, not on graceful exit. Issue #20561 — the first iteration + of this PR did docker stop here, which Ben caught as contradicting the + "ONE long-lived container" semantics.""" monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") _mock_subprocess_run(monkeypatch) @@ -866,11 +870,55 @@ def test_cleanup_with_persist_only_stops_no_rm(monkeypatch): stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"] rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"] - assert stops, f"expected docker stop call, got cleanup_calls: {cleanup_calls}" + assert not stops, ( + f"docker stop must NOT be called when persist_across_processes=True; " + f"container has to stay running so background processes survive. " + f"Got: {stops}" + ) assert not rms, ( f"docker rm must NOT be called when persist_across_processes=True; " f"reuse would be impossible. Got: {rms}" ) + # The in-process handle must still be cleared so the next __init__ + # re-probes via labels (and reuses the still-running container). + assert env._container_id is None, ( + "in-process container_id should be cleared even in no-op cleanup" + ) + + +def test_cleanup_force_remove_stops_and_rms_even_in_persist_mode(monkeypatch): + """``cleanup(force_remove=True)`` must stop AND rm the container even + when ``persist_across_processes=True``. This is the explicit-teardown + path for ``/reset``, ``cleanup_vm(task_id, force_remove=True)``, and any + future caller that wants a guaranteed fresh container. + + Without this kwarg, callers in persist mode would have no way to force a + fresh container without also flipping the global config — too coarse for + a per-task reset. + """ + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") + _mock_subprocess_run(monkeypatch) + _install_fake_thread(monkeypatch) + + env = _make_dummy_env(task_id="cleanup-force", persistent_filesystem=False) + assert env._container_id + + cleanup_calls = [] + real_run = docker_env.subprocess.run + + def _capturing_run(cmd, **kwargs): + cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs)) + return real_run(cmd, **kwargs) + + monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run) + + env.cleanup(force_remove=True) + + stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"] + rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"] + assert stops, f"force_remove must docker stop; got: {cleanup_calls}" + assert rms, f"force_remove must docker rm; got: {cleanup_calls}" def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch): @@ -914,12 +962,15 @@ def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch): def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch): - """The pre-fix code used ``subprocess.Popen(\"... &\", shell=True)`` which + """The pre-fix code used ``subprocess.Popen("... &", shell=True)`` which raced with parent-process exit and silently dropped cleanup work. The new code must use ``subprocess.run`` with bounded ``timeout=`` so the work actually completes within the process lifetime. - Asserts cleanup never reaches into shell-mode Popen. + Asserts cleanup never reaches into shell-mode Popen. Uses + ``force_remove=True`` so cleanup actually issues docker calls — the + default persist-mode path is now a no-op (commit 4) and would trivially + pass this assertion without exercising the docker code at all. """ monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") @@ -935,7 +986,7 @@ def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch): monkeypatch.setattr(docker_env.subprocess, "Popen", _forbidden_popen) env = _make_dummy_env(task_id="no-popen-cleanup") - env.cleanup() # must not raise + env.cleanup(force_remove=True) # must not raise def test_wait_for_cleanup_returns_true_when_no_thread_started(): @@ -951,14 +1002,20 @@ def test_wait_for_cleanup_returns_true_when_no_thread_started(): def test_wait_for_cleanup_after_cleanup_returns_true(monkeypatch): """End-to-end: cleanup() starts a thread, wait_for_cleanup() joins it and reports completion. Atexit relies on this contract to ensure docker - stop/rm actually finishes before the Python interpreter exits.""" + stop/rm actually finishes before the Python interpreter exits. + + Uses ``force_remove=True`` so cleanup actually starts a worker thread — + the default persist-mode cleanup is a no-op (commit 4) and never spawns + a thread, so the trivial "no thread" branch of wait_for_cleanup is + already covered by the previous test. + """ monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default") _mock_subprocess_run(monkeypatch) _install_fake_thread(monkeypatch) env = _make_dummy_env(task_id="wait-test") - env.cleanup() + env.cleanup(force_remove=True) assert env.wait_for_cleanup(timeout=5.0) is True diff --git a/tools/environments/docker.py b/tools/environments/docker.py index 7ab97916f30..7a2728ebfe2 100644 --- a/tools/environments/docker.py +++ b/tools/environments/docker.py @@ -913,49 +913,94 @@ class DockerEnvironment(BaseEnvironment): running = (cid, state) return running or first - def cleanup(self): - """Stop (and optionally remove) the container. + def cleanup(self, *, force_remove: bool = False): + """Tear down the container according to persist mode and *force_remove*. - Behavior depends on ``persist_across_processes`` (init kwarg): + Persist-mode (``persist_across_processes=True``, the default) leaves the + container **running** untouched. The docs promise "ONE long-lived + container shared across sessions" and stopping it on every Hermes exit + breaks that promise: - * **True** (default) — only ``docker stop`` so the container is - available for reuse by the next Hermes process. The orphan-reaper - eventually removes it if no subsequent process picks it up. - * **False** — ``docker stop`` followed by ``docker rm -f``, regardless - of ``persistent_filesystem``. The previous ``rm`` path was gated on - ``not self._persistent`` which meant ``container_persistent: true`` - users (the default) leaked Exited containers forever (issue #20561). + * Background processes inside the container (``npm run dev``, watchers, + long-running pytest) get killed every time the user runs ``/quit``. + * Every reuse requires ``docker start`` + waiting for the container to + come back up, adding 1–2s to the first tool call of the new session. + * The user-visible difference between "ONE long-lived container" and + "a new container that happens to share state" is exactly this: + processes survive in the former, die in the latter. - Cleanup runs on a daemon thread with bounded ``subprocess.run`` calls, - not the previous fire-and-forget ``Popen(... &)`` shell construct. - That pattern raced with parent-process exit and silently dropped - cleanup work when the parent didn't outlive the detached shell — the - primary mechanism behind Exited-container accumulation under SIGTERM - / ``hermes /quit`` / dead terminals. + Resource reclamation for the persist-mode case lives in the + ``reap_orphan_containers()`` path (see issue #20561 commit 3): if no + Hermes process touches a labeled container for ``2 × lifetime_seconds`` + it gets ``docker rm -f``'d at the next Hermes startup. That covers the + SIGKILL / OOM / abandoned-laptop cases without us needing to stop the + container on every graceful exit. + + Opt-out mode (``persist_across_processes=False``) still does + ``docker stop`` + ``docker rm -f`` on every cleanup, matching the + pre-PR behavior for users who explicitly want per-process isolation. + + ``force_remove=True`` overrides persist mode and always tears the + container down (``docker stop`` + ``docker rm -f``). This is the + explicit-teardown path for ``/reset``, ``cleanup_vm(task_id)``-driven + resets, or any caller that wants a guaranteed fresh container on next + ``DockerEnvironment(task_id=...)``. No current caller passes + ``force_remove=True``; the parameter is here so the explicit-teardown + semantics can be wired up later without changing this method's + signature. + + Cleanup runs on a daemon thread with bounded ``subprocess.run`` calls + (not the racy ``Popen(... &)`` pattern from before PR #33645). The + atexit hook in ``tools/terminal_tool.py`` waits up to 15s for the + thread to finish before the interpreter exits, so ``docker stop`` / + ``docker rm`` actually completes when we do trigger it. """ container_id = self._container_id if not container_id: - # Still drop the bind-mount dirs if any were allocated. + # Still drop the bind-mount dirs if any were allocated and we're + # NOT in persist mode (persist mode preserves them). if not self._persistent: for d in (self._workspace_dir, self._home_dir): if d: shutil.rmtree(d, ignore_errors=True) return + # Decide what to actually do. Three cases: + # + # force_remove=True → stop + rm (explicit teardown) + # persist_across_processes=True → no-op (leave container running) + # persist_across_processes=False → stop + rm (per-process isolation) + # + # The persist-mode no-op is the issue-#20561 contract: the container + # outlives Hermes processes, processes inside it stay alive, and + # reuse on next startup is instant. + if force_remove: + should_stop = True + should_remove = True + elif self._persist_across_processes: + # No-op for the container. Drop the in-process handle so a fresh + # __init__ will re-probe via labels (and find the running + # container) instead of trying to reuse a stale Python reference. + self._container_id = None + return + else: + should_stop = True + should_remove = True + # Capture state needed by the worker before we null out the attrs — # the worker thread can outlive ``self``. docker_exe = self._docker_exe - should_remove = not self._persist_across_processes log_id = container_id[:12] def _do_cleanup() -> None: - try: - subprocess.run( - [docker_exe, "stop", "-t", "10", container_id], - capture_output=True, timeout=30, - ) - except (subprocess.TimeoutExpired, OSError) as e: - logger.warning("docker stop %s timed out / failed: %s", log_id, e) + if should_stop: + try: + subprocess.run( + [docker_exe, "stop", "-t", "10", container_id], + capture_output=True, timeout=30, + ) + except (subprocess.TimeoutExpired, OSError) as e: + logger.warning("docker stop %s timed out / failed: %s", log_id, e) if should_remove: try: subprocess.run( @@ -977,7 +1022,10 @@ class DockerEnvironment(BaseEnvironment): self._cleanup_thread = t self._container_id = None - if not self._persistent: + # Bind-mount dir teardown only runs when we actually removed the + # container (the dirs are the container's filesystem state; keeping + # them around with no container would orphan the data on disk). + if should_remove and not self._persistent: for d in (self._workspace_dir, self._home_dir): if d: shutil.rmtree(d, ignore_errors=True) diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 6b0d42aa886..2ff281c81bf 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1426,8 +1426,21 @@ def cleanup_all_environments(): return cleaned -def cleanup_vm(task_id: str): - """Manually clean up a specific environment by task_id.""" +def cleanup_vm(task_id: str, *, force_remove: bool = True): + """Manually clean up a specific environment by task_id. + + *force_remove* (default True) is forwarded to backends that accept it + — currently only ``DockerEnvironment``. The default of True matches the + semantics callers expect from this function: ``cleanup_vm(task_id)`` is + the explicit-teardown path (called from ``/reset``-style flows, + ``AIAgent.close()``, and the idle reaper after a long inactivity window), + so the container should be removed regardless of persist mode. + + The idle reaper passes the env through ``env.cleanup()`` directly (not + via this function), so persist-mode idle envs are NOT removed by idle + reaping — only by explicit ``cleanup_vm()`` or the orphan reaper at next + startup. See ``_cleanup_inactive_envs()`` for the idle path. + """ # Remove from tracking dicts while holding the lock, but defer the # actual (potentially slow) env.cleanup() call to outside the lock # so other tool calls aren't blocked. @@ -1452,7 +1465,14 @@ def cleanup_vm(task_id: str): try: if hasattr(env, 'cleanup'): - env.cleanup() + # Pass force_remove only if the env's cleanup() accepts it + # (DockerEnvironment after issue #20561; other backends don't). + import inspect + sig = inspect.signature(env.cleanup) + if "force_remove" in sig.parameters: + env.cleanup(force_remove=force_remove) + else: + env.cleanup() elif hasattr(env, 'stop'): env.stop() elif hasattr(env, 'terminate'):