diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py
index 8073813f71c..00da8160a01 100644
--- a/tests/tools/test_docker_environment.py
+++ b/tests/tools/test_docker_environment.py
@@ -837,12 +837,16 @@ def _install_fake_thread(monkeypatch):
     monkeypatch.setattr(threading, "Thread", _FakeThread)
 
 
-def test_cleanup_with_persist_only_stops_no_rm(monkeypatch):
-    """``persist_across_processes=True`` (default) cleanup must docker stop
-    the container but NEVER docker rm — the container has to survive so the
-    next Hermes process can reuse it. Issue #20561 — the previous code
-    matched this on the `_persistent` flag instead of a dedicated
-    cross-process flag, which made reuse impossible."""
+def test_cleanup_with_persist_is_noop_for_container(monkeypatch):
+    """``persist_across_processes=True`` (default) cleanup must NEITHER stop
+    NOR remove the container — the docs promise "ONE long-lived container
+    shared across sessions", and any docker stop would kill background
+    processes inside the container (npm watchers, pytest watchers, etc.).
+
+    Resource reclamation in this mode happens via the orphan reaper on next
+    Hermes startup, not on graceful exit. Issue #20561 — the first iteration
+    of this PR did docker stop here, which Ben caught as contradicting the
+    "ONE long-lived container" semantics."""
     monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
     monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
     _mock_subprocess_run(monkeypatch)
@@ -866,11 +870,55 @@ def test_cleanup_with_persist_only_stops_no_rm(monkeypatch):
 
     stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
     rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
-    assert stops, f"expected docker stop call, got cleanup_calls: {cleanup_calls}"
+    assert not stops, (
+        f"docker stop must NOT be called when persist_across_processes=True; "
+        f"container has to stay running so background processes survive. "
+        f"Got: {stops}"
+    )
     assert not rms, (
         f"docker rm must NOT be called when persist_across_processes=True; "
         f"reuse would be impossible. Got: {rms}"
     )
+    # The in-process handle must still be cleared so the next __init__
+    # re-probes via labels (and reuses the still-running container).
+    assert env._container_id is None, (
+        "in-process container_id should be cleared even in no-op cleanup"
+    )
+
+
+def test_cleanup_force_remove_stops_and_rms_even_in_persist_mode(monkeypatch):
+    """``cleanup(force_remove=True)`` must stop AND rm the container even
+    when ``persist_across_processes=True``. This is the explicit-teardown
+    path for ``/reset``, ``cleanup_vm(task_id, force_remove=True)``, and any
+    future caller that wants a guaranteed fresh container.
+
+    Without this kwarg, callers in persist mode would have no way to force a
+    fresh container without also flipping the global config — too coarse for
+    a per-task reset.
+    """
+    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
+    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
+    _mock_subprocess_run(monkeypatch)
+    _install_fake_thread(monkeypatch)
+
+    env = _make_dummy_env(task_id="cleanup-force", persistent_filesystem=False)
+    assert env._container_id
+
+    cleanup_calls = []
+    real_run = docker_env.subprocess.run
+
+    def _capturing_run(cmd, **kwargs):
+        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        return real_run(cmd, **kwargs)
+
+    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
+
+    env.cleanup(force_remove=True)
+
+    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
+    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
+    assert stops, f"force_remove must docker stop; got: {cleanup_calls}"
+    assert rms, f"force_remove must docker rm; got: {cleanup_calls}"
 
 
 def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch):
@@ -914,12 +962,15 @@ def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch):
 
 
 def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch):
-    """The pre-fix code used ``subprocess.Popen(\"... &\", shell=True)`` which
+    """The pre-fix code used ``subprocess.Popen("... &", shell=True)`` which
     raced with parent-process exit and silently dropped cleanup work. The
     new code must use ``subprocess.run`` with bounded ``timeout=`` so the
     work actually completes within the process lifetime.
 
-    Asserts cleanup never reaches into shell-mode Popen.
+    Asserts cleanup never reaches into shell-mode Popen. Uses
+    ``force_remove=True`` so cleanup actually issues docker calls — the
+    default persist-mode path is now a no-op (commit 4) and would trivially
+    pass this assertion without exercising the docker code at all.
     """
     monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
     monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
@@ -935,7 +986,7 @@ def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch):
     monkeypatch.setattr(docker_env.subprocess, "Popen", _forbidden_popen)
 
     env = _make_dummy_env(task_id="no-popen-cleanup")
-    env.cleanup()  # must not raise
+    env.cleanup(force_remove=True)  # must not raise
 
 
 def test_wait_for_cleanup_returns_true_when_no_thread_started():
@@ -951,14 +1002,20 @@ def test_wait_for_cleanup_returns_true_when_no_thread_started():
 def test_wait_for_cleanup_after_cleanup_returns_true(monkeypatch):
     """End-to-end: cleanup() starts a thread, wait_for_cleanup() joins it
     and reports completion. Atexit relies on this contract to ensure docker
-    stop/rm actually finishes before the Python interpreter exits."""
+    stop/rm actually finishes before the Python interpreter exits.
+
+    Uses ``force_remove=True`` so cleanup actually starts a worker thread —
+    the default persist-mode cleanup is a no-op (commit 4) and never spawns
+    a thread, so the trivial "no thread" branch of wait_for_cleanup is
+    already covered by the previous test.
+    """
     monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
     monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
     _mock_subprocess_run(monkeypatch)
     _install_fake_thread(monkeypatch)
 
     env = _make_dummy_env(task_id="wait-test")
-    env.cleanup()
+    env.cleanup(force_remove=True)
     assert env.wait_for_cleanup(timeout=5.0) is True
 
 
diff --git a/tools/environments/docker.py b/tools/environments/docker.py
index 7ab97916f30..7a2728ebfe2 100644
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@@ -913,49 +913,94 @@ class DockerEnvironment(BaseEnvironment):
                 running = (cid, state)
         return running or first
 
-    def cleanup(self):
-        """Stop (and optionally remove) the container.
+    def cleanup(self, *, force_remove: bool = False):
+        """Tear down the container according to persist mode and *force_remove*.
 
-        Behavior depends on ``persist_across_processes`` (init kwarg):
+        Persist-mode (``persist_across_processes=True``, the default) leaves the
+        container **running** untouched. The docs promise "ONE long-lived
+        container shared across sessions" and stopping it on every Hermes exit
+        breaks that promise:
 
-        * **True** (default) — only ``docker stop`` so the container is
-          available for reuse by the next Hermes process. The orphan-reaper
-          eventually removes it if no subsequent process picks it up.
-        * **False** — ``docker stop`` followed by ``docker rm -f``, regardless
-          of ``persistent_filesystem``. The previous ``rm`` path was gated on
-          ``not self._persistent`` which meant ``container_persistent: true``
-          users (the default) leaked Exited containers forever (issue #20561).
+        * Background processes inside the container (``npm run dev``, watchers,
+          long-running pytest) get killed every time the user runs ``/quit``.
+        * Every reuse requires ``docker start`` + waiting for the container to
+          come back up, adding 1–2s to the first tool call of the new session.
+        * The user-visible difference between "ONE long-lived container" and
+          "a new container that happens to share state" is exactly this:
+          processes survive in the former, die in the latter.
 
-        Cleanup runs on a daemon thread with bounded ``subprocess.run`` calls,
-        not the previous fire-and-forget ``Popen(... &)`` shell construct.
-        That pattern raced with parent-process exit and silently dropped
-        cleanup work when the parent didn't outlive the detached shell — the
-        primary mechanism behind Exited-container accumulation under SIGTERM
-        / ``hermes /quit`` / dead terminals.
+        Resource reclamation for the persist-mode case lives in the
+        ``reap_orphan_containers()`` path (see issue #20561 commit 3): if no
+        Hermes process touches a labeled container for ``2 × lifetime_seconds``
+        it gets ``docker rm -f``'d at the next Hermes startup. That covers the
+        SIGKILL / OOM / abandoned-laptop cases without us needing to stop the
+        container on every graceful exit.
+
+        Opt-out mode (``persist_across_processes=False``) still does
+        ``docker stop`` + ``docker rm -f`` on every cleanup, matching the
+        pre-PR behavior for users who explicitly want per-process isolation.
+
+        ``force_remove=True`` overrides persist mode and always tears the
+        container down (``docker stop`` + ``docker rm -f``). This is the
+        explicit-teardown path for ``/reset``, ``cleanup_vm(task_id)``-driven
+        resets, or any caller that wants a guaranteed fresh container on next
+        ``DockerEnvironment(task_id=...)``. No current caller passes
+        ``force_remove=True``; the parameter is here so the explicit-teardown
+        semantics can be wired up later without changing this method's
+        signature.
+
+        Cleanup runs on a daemon thread with bounded ``subprocess.run`` calls
+        (not the racy ``Popen(... &)`` pattern from before PR #33645). The
+        atexit hook in ``tools/terminal_tool.py`` waits up to 15s for the
+        thread to finish before the interpreter exits, so ``docker stop`` /
+        ``docker rm`` actually completes when we do trigger it.
         """
         container_id = self._container_id
         if not container_id:
-            # Still drop the bind-mount dirs if any were allocated.
+            # Still drop the bind-mount dirs if any were allocated and we're
+            # NOT in persist mode (persist mode preserves them).
             if not self._persistent:
                 for d in (self._workspace_dir, self._home_dir):
                     if d:
                         shutil.rmtree(d, ignore_errors=True)
             return
 
+        # Decide what to actually do. Three cases:
+        #
+        #   force_remove=True             → stop + rm (explicit teardown)
+        #   persist_across_processes=True → no-op (leave container running)
+        #   persist_across_processes=False → stop + rm (per-process isolation)
+        #
+        # The persist-mode no-op is the issue-#20561 contract: the container
+        # outlives Hermes processes, processes inside it stay alive, and
+        # reuse on next startup is instant.
+        if force_remove:
+            should_stop = True
+            should_remove = True
+        elif self._persist_across_processes:
+            # No-op for the container. Drop the in-process handle so a fresh
+            # __init__ will re-probe via labels (and find the running
+            # container) instead of trying to reuse a stale Python reference.
+            self._container_id = None
+            return
+        else:
+            should_stop = True
+            should_remove = True
+
         # Capture state needed by the worker before we null out the attrs —
         # the worker thread can outlive ``self``.
         docker_exe = self._docker_exe
-        should_remove = not self._persist_across_processes
         log_id = container_id[:12]
 
         def _do_cleanup() -> None:
-            try:
-                subprocess.run(
-                    [docker_exe, "stop", "-t", "10", container_id],
-                    capture_output=True, timeout=30,
-                )
-            except (subprocess.TimeoutExpired, OSError) as e:
-                logger.warning("docker stop %s timed out / failed: %s", log_id, e)
+            if should_stop:
+                try:
+                    subprocess.run(
+                        [docker_exe, "stop", "-t", "10", container_id],
+                        capture_output=True, timeout=30,
+                    )
+                except (subprocess.TimeoutExpired, OSError) as e:
+                    logger.warning("docker stop %s timed out / failed: %s", log_id, e)
             if should_remove:
                 try:
                     subprocess.run(
@@ -977,7 +1022,10 @@ class DockerEnvironment(BaseEnvironment):
         self._cleanup_thread = t
         self._container_id = None
 
-        if not self._persistent:
+        # Bind-mount dir teardown only runs when we actually removed the
+        # container (the dirs are the container's filesystem state; keeping
+        # them around with no container would orphan the data on disk).
+        if should_remove and not self._persistent:
             for d in (self._workspace_dir, self._home_dir):
                 if d:
                     shutil.rmtree(d, ignore_errors=True)
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 6b0d42aa886..2ff281c81bf 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -1426,8 +1426,21 @@ def cleanup_all_environments():
     return cleaned
 
 
-def cleanup_vm(task_id: str):
-    """Manually clean up a specific environment by task_id."""
+def cleanup_vm(task_id: str, *, force_remove: bool = True):
+    """Manually clean up a specific environment by task_id.
+
+    *force_remove* (default True) is forwarded to backends that accept it
+    — currently only ``DockerEnvironment``. The default of True matches the
+    semantics callers expect from this function: ``cleanup_vm(task_id)`` is
+    the explicit-teardown path (called from ``/reset``-style flows,
+    ``AIAgent.close()``, and the idle reaper after a long inactivity window),
+    so the container should be removed regardless of persist mode.
+
+    The idle reaper passes the env through ``env.cleanup()`` directly (not
+    via this function), so persist-mode idle envs are NOT removed by idle
+    reaping — only by explicit ``cleanup_vm()`` or the orphan reaper at next
+    startup. See ``_cleanup_inactive_envs()`` for the idle path.
+    """
     # Remove from tracking dicts while holding the lock, but defer the
     # actual (potentially slow) env.cleanup() call to outside the lock
     # so other tool calls aren't blocked.
@@ -1452,7 +1465,14 @@ def cleanup_vm(task_id: str):
 
     try:
         if hasattr(env, 'cleanup'):
-            env.cleanup()
+            # Pass force_remove only if the env's cleanup() accepts it
+            # (DockerEnvironment after issue #20561; other backends don't).
+            import inspect
+            sig = inspect.signature(env.cleanup)
+            if "force_remove" in sig.parameters:
+                env.cleanup(force_remove=force_remove)
+            else:
+                env.cleanup()
         elif hasattr(env, 'stop'):
             env.stop()
         elif hasattr(env, 'terminate'):