fix(docker): cleanup_vm() default honors persist mode (don't kill container on session close)

Commit 4 made cleanup_vm() default to force_remove=True, which was wrong:
cleanup_vm() is called from AIAgent.close() (TUI session close at
tui_gateway/server.py:2991, gateway session teardown at gateway/run.py:3569)
and from per-turn cleanup (agent/chat_completion_helpers.py:1517). All
three are session-lifecycle events that should honor persist mode, not
explicit user-initiated teardown.

Ben reported the symptom: container shared between multiple TUI sessions
(good) but killed as soon as any session closed (bad). With force_remove=True
as the default, every `session.close` JSON-RPC tore down the container.

The fix is to flip cleanup_vm()'s force_remove default back to False.
The kwarg still exists for future explicit-teardown paths (`/reset`-style
flows, "destroy my sandbox" commands) that haven't been wired up yet.

Two new unit tests pin the behavior:

* `test_cleanup_vm_default_honors_persist_mode` — asserts
  `cleanup_vm(task_id)` does neither docker stop nor docker rm on a
  persist-mode container (the regression Ben caught).
* `test_cleanup_vm_force_remove_tears_down_persist_container` —
  asserts the kwarg still flows through the runtime-signature-inspection
  plumbing to the backend's cleanup().

E2E verified against real Docker (in addition to all 17 existing checks):

  ✓ Default cleanup_vm() leaves persist-mode container running
  ✓ cleanup_vm(force_remove=True) removed the container

Refs #20561
This commit is contained in:
Ben 2026-05-29 08:09:37 +10:00 committed by Ben Barclay
parent 5c2170a7c6
commit 2f0f03c40d
2 changed files with 101 additions and 10 deletions

View file

@ -921,6 +921,91 @@ def test_cleanup_force_remove_stops_and_rms_even_in_persist_mode(monkeypatch):
assert rms, f"force_remove must docker rm; got: {cleanup_calls}"
def test_cleanup_vm_default_honors_persist_mode(monkeypatch):
"""``cleanup_vm(task_id)`` without ``force_remove=True`` must be a no-op
for a persist-mode container.
Regression for the bug Ben caught after commit 4: ``AIAgent.close()``
(which is called from ``tui_gateway/server.py`` on session.close, from
``gateway/run.py`` on per-session teardown, and from per-turn cleanup)
calls ``cleanup_vm(task_id)``. If that defaulted to ``force_remove=True``
we'd tear down the container on every TUI session close, defeating the
"ONE long-lived container shared across sessions" contract.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
from tools import terminal_tool
env = _make_dummy_env(task_id="session-close-test")
container_id = env._container_id
terminal_tool._active_environments["session-close-test"] = env
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capturing_run(cmd, **kwargs):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
return real_run(cmd, **kwargs)
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
try:
terminal_tool.cleanup_vm("session-close-test")
finally:
terminal_tool._active_environments.pop("session-close-test", None)
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
assert not stops, (
f"cleanup_vm() default must not docker stop a persist-mode container; "
f"got: {stops}"
)
assert not rms, (
f"cleanup_vm() default must not docker rm a persist-mode container; "
f"got: {rms}"
)
def test_cleanup_vm_force_remove_tears_down_persist_container(monkeypatch):
"""``cleanup_vm(task_id, force_remove=True)`` tears down a persist-mode
container the explicit-teardown path for ``/reset``-style flows.
Also pins the runtime-signature-inspection plumbing: the kwarg must
actually flow through ``cleanup_vm`` into the backend's ``cleanup()``.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
from tools import terminal_tool
env = _make_dummy_env(task_id="explicit-teardown-test")
terminal_tool._active_environments["explicit-teardown-test"] = env
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capturing_run(cmd, **kwargs):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
return real_run(cmd, **kwargs)
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
try:
terminal_tool.cleanup_vm("explicit-teardown-test", force_remove=True)
finally:
terminal_tool._active_environments.pop("explicit-teardown-test", None)
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
assert stops, f"force_remove must reach docker stop; got: {cleanup_calls}"
assert rms, f"force_remove must reach docker rm; got: {cleanup_calls}"
def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch):
"""``persist_across_processes=False`` cleanup must docker stop AND docker
rm so containers don't leak. Crucially, this runs regardless of the

View file

@ -1426,20 +1426,26 @@ def cleanup_all_environments():
return cleaned
def cleanup_vm(task_id: str, *, force_remove: bool = True):
def cleanup_vm(task_id: str, *, force_remove: bool = False):
"""Manually clean up a specific environment by task_id.
*force_remove* (default True) is forwarded to backends that accept it
currently only ``DockerEnvironment``. The default of True matches the
semantics callers expect from this function: ``cleanup_vm(task_id)`` is
the explicit-teardown path (called from ``/reset``-style flows,
``AIAgent.close()``, and the idle reaper after a long inactivity window),
so the container should be removed regardless of persist mode.
*force_remove* (default False) is forwarded to backends that accept it
currently only ``DockerEnvironment``. The default of False matches
session-lifecycle semantics: this function is called from
``AIAgent.close()`` (TUI session close, gateway session teardown) and the
per-turn cleanup branch for non-persistent envs, both of which should
honor the user's persist-mode preference. Stopping the container here
would defeat the "ONE long-lived container shared across sessions"
contract exactly the bug Ben reported when the container was killed
on every TUI session close.
Pass ``force_remove=True`` for actual user-initiated teardown
(e.g. ``/reset``-style flows that haven't been wired yet, or future
"destroy my sandbox" commands).
The idle reaper passes the env through ``env.cleanup()`` directly (not
via this function), so persist-mode idle envs are NOT removed by idle
reaping only by explicit ``cleanup_vm()`` or the orphan reaper at next
startup. See ``_cleanup_inactive_envs()`` for the idle path.
via this function), so persist-mode idle envs are similarly no-op'd —
only the orphan reaper at next startup reclaims them.
"""
# Remove from tracking dicts while holding the lock, but defer the
# actual (potentially slow) env.cleanup() call to outside the lock