diff --git a/cli.py b/cli.py index d4053c8cb9c..a815175d9fe 100644 --- a/cli.py +++ b/cli.py @@ -577,6 +577,7 @@ def load_cli_config() -> Dict[str, Any]: "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER", "docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", + "docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER", "sandbox_dir": "TERMINAL_SANDBOX_DIR", # Persistent shell (non-local backends) "persistent_shell": "TERMINAL_PERSISTENT_SHELL", diff --git a/gateway/run.py b/gateway/run.py index d1b1c744d38..876a23879e1 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -832,6 +832,7 @@ if _config_path.exists(): "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER", "docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", + "docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER", "sandbox_dir": "TERMINAL_SANDBOX_DIR", "persistent_shell": "TERMINAL_PERSISTENT_SHELL", } diff --git a/hermes_cli/config.py b/hermes_cli/config.py index f6eb69d3b80..74c04635712 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -5552,6 +5552,7 @@ def set_config_value(key: str, value: str): "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER", "terminal.docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", + "terminal.docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER", "terminal.docker_env": "TERMINAL_DOCKER_ENV", # terminal.cwd intentionally excluded — CLI resolves at runtime, # gateway bridges it in gateway/run.py. Persisting to .env causes diff --git a/tests/conftest.py b/tests/conftest.py index fd91b558f5f..fcb19c71fb4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -228,6 +228,7 @@ _HERMES_BEHAVIORAL_VARS = frozenset({ "TERMINAL_CONTAINER_MEMORY", "TERMINAL_CONTAINER_PERSISTENT", "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", + "TERMINAL_DOCKER_ORPHAN_REAPER", "TERMINAL_DOCKER_RUN_AS_HOST_USER", "BROWSER_CDP_URL", "CAMOFOX_URL", diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index abdce91ba5f..8073813f71c 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -975,3 +975,254 @@ def test_cleanup_on_env_with_no_container_id_does_not_raise(monkeypatch): env._home_dir = None # No exception expected. env.cleanup() + + +# ── Orphan reaper (issue #20561) ────────────────────────────────── + + +def _now_iso(offset_seconds: int = 0) -> str: + """Return an RFC3339 timestamp ``offset_seconds`` in the past.""" + import datetime + t = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(seconds=offset_seconds) + # Format like Docker emits — with nanoseconds-style trailing digits. + return t.isoformat().replace("+00:00", ".123456789Z") + + +def _reaper_run_mock(monkeypatch, ps_ids: list[str], inspect_responses: dict[str, str], + rm_succeeds: bool = True): + """Build a subprocess.run mock for reaper tests. + + * ``ps_ids`` — what ``docker ps -a --filter ... --format '{{.ID}}'`` returns + * ``inspect_responses[cid]`` — what ``docker inspect ... FinishedAt`` returns + for each cid; ``""`` means "field unset". + * ``rm_succeeds`` — whether ``docker rm -f`` returns 0. + + Captures every call so tests can assert which containers were rm'd. + """ + calls = [] + + def _run(cmd, **kwargs): + calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs)) + if not isinstance(cmd, list) or len(cmd) < 2: + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + sub = cmd[1] + if sub == "ps": + return subprocess.CompletedProcess( + cmd, 0, stdout="\n".join(ps_ids) + ("\n" if ps_ids else ""), stderr="", + ) + if sub == "inspect": + # cmd is [docker, inspect, --format, '{{.State.FinishedAt}}', cid] + cid = cmd[-1] + return subprocess.CompletedProcess( + cmd, 0, stdout=inspect_responses.get(cid, "") + "\n", stderr="", + ) + if sub == "rm": + return subprocess.CompletedProcess( + cmd, 0 if rm_succeeds else 1, + stdout="", stderr="" if rm_succeeds else "no such container", + ) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + return calls + + +def test_reap_orphan_returns_zero_when_no_matches(monkeypatch): + """No labeled containers → no rm calls, returns 0. Establishes the + happy-path baseline for the orphan reaper (issue #20561).""" + calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={}) + + removed = docker_env.reap_orphan_containers( + max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker", + ) + + assert removed == 0 + rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]] + assert not rms, "no rm calls expected when ps returns empty" + + +def test_reap_orphan_removes_stale_exited_container(monkeypatch): + """An Exited container older than max_age_seconds must be removed. + This is the core repair path for issue #20561 — without the reaper, + SIGKILL'd Hermes processes leak containers permanently.""" + old = _now_iso(offset_seconds=900) # 15 minutes ago + calls = _reaper_run_mock( + monkeypatch, ps_ids=["old-cid"], inspect_responses={"old-cid": old}, + ) + + removed = docker_env.reap_orphan_containers( + max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker", + ) + + assert removed == 1 + rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]] + assert len(rms) == 1 + assert "old-cid" in rms[0][0], f"expected rm of old-cid, got {rms[0][0]}" + + +def test_reap_orphan_spares_recently_exited_container(monkeypatch): + """A container exited within max_age_seconds must NOT be reaped — that + container belongs to a Hermes process that just finished and may be + about to be replaced. Conservative window prevents racing sibling + processes.""" + recent = _now_iso(offset_seconds=60) # 1 minute ago + calls = _reaper_run_mock( + monkeypatch, ps_ids=["recent-cid"], inspect_responses={"recent-cid": recent}, + ) + + removed = docker_env.reap_orphan_containers( + max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker", + ) + + assert removed == 0 + rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]] + assert not rms, f"recent container must not be reaped, got rm calls: {rms}" + + +def test_reap_orphan_scopes_to_profile_filter_via_label(monkeypatch): + """The reaper must pass ``--filter label=hermes-profile=`` to + docker ps so it never sweeps another profile's containers. A research + profile must not tear down the default profile's stragglers.""" + calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={}) + + docker_env.reap_orphan_containers( + max_age_seconds=600, profile_filter="research-bot", docker_exe="/usr/bin/docker", + ) + + ps_calls = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["ps"]] + assert ps_calls, "expected at least one docker ps call" + flat = " ".join(ps_calls[0][0]) + assert "label=hermes-profile=research-bot" in flat, ( + f"profile filter not applied to docker ps; got args: {ps_calls[0][0]}" + ) + assert "label=hermes-agent=1" in flat, ( + f"hermes-agent label filter must also be applied; got: {ps_calls[0][0]}" + ) + assert "status=exited" in flat, ( + "must filter to exited containers only — running containers may " + "belong to a sibling Hermes process and must NEVER be reaped" + ) + + +def test_reap_orphan_skips_container_with_unparseable_finished_at(monkeypatch): + """If docker inspect returns the zero-value ``0001-01-01T00:00:00Z`` (no + FinishedAt yet) or an unparseable timestamp, the reaper must leave the + container alone. Defensive — never reap a container whose age we can't + determine.""" + calls = _reaper_run_mock( + monkeypatch, + ps_ids=["never-finished", "garbage-ts"], + inspect_responses={ + "never-finished": "0001-01-01T00:00:00Z", + "garbage-ts": "not-a-timestamp", + }, + ) + + removed = docker_env.reap_orphan_containers( + max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker", + ) + + assert removed == 0 + rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]] + assert not rms, ( + f"reaper must NOT remove containers with unparseable FinishedAt; got: {rms}" + ) + + +def test_reap_orphan_handles_docker_ps_failure_gracefully(monkeypatch): + """If docker ps itself fails (daemon down, permission denied), the + reaper returns 0 without crashing. The reaper is best-effort plumbing, + not a critical path — it must never block container creation.""" + def _failing_ps(cmd, **kwargs): + if isinstance(cmd, list) and len(cmd) >= 2 and cmd[1] == "ps": + return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="Cannot connect to daemon") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + monkeypatch.setattr(docker_env.subprocess, "run", _failing_ps) + + # Must not raise + removed = docker_env.reap_orphan_containers( + max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker", + ) + assert removed == 0 + + +def test_reap_orphan_continues_after_individual_rm_failure(monkeypatch): + """If ``docker rm -f`` fails on one container (already removed by a + concurrent process, container locked, etc.), the reaper must log and + continue to the next candidate rather than aborting the whole sweep.""" + old = _now_iso(offset_seconds=900) + rm_calls = [] + + def _run(cmd, **kwargs): + if not isinstance(cmd, list) or len(cmd) < 2: + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + sub = cmd[1] + if sub == "ps": + return subprocess.CompletedProcess( + cmd, 0, stdout="cid-a\ncid-b\ncid-c\n", stderr="", + ) + if sub == "inspect": + return subprocess.CompletedProcess(cmd, 0, stdout=old + "\n", stderr="") + if sub == "rm": + rm_calls.append(cmd[-1]) + # cid-b fails; cid-a and cid-c succeed. + if cmd[-1] == "cid-b": + return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="no such container") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + + removed = docker_env.reap_orphan_containers( + max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker", + ) + + # All three were attempted, two succeeded. + assert removed == 2 + assert set(rm_calls) == {"cid-a", "cid-b", "cid-c"}, ( + f"reaper must attempt all candidates even when one fails; got: {rm_calls}" + ) + + +def test_container_finished_at_parses_nanosecond_timestamp(monkeypatch): + """Docker emits FinishedAt with nanosecond precision (RFC3339 with up to + 9 fractional digits), but Python's fromisoformat caps at microseconds. + The helper must trim the extra digits without raising — otherwise every + candidate gets skipped and the reaper does nothing.""" + + def _run(cmd, **kwargs): + return subprocess.CompletedProcess( + cmd, 0, + stdout="2026-05-28T13:45:00.123456789Z\n", + stderr="", + ) + + monkeypatch.setattr(docker_env.subprocess, "run", _run) + + result = docker_env._container_finished_at("/usr/bin/docker", "test-cid") + assert result is not None, "must parse RFC3339 with nanoseconds" + import datetime + assert result.tzinfo == datetime.timezone.utc + assert result.year == 2026 and result.month == 5 and result.day == 28 + + +def test_container_finished_at_returns_none_on_zero_value(): + """Docker's zero-value ``0001-01-01T00:00:00Z`` (never finished) must + map to None so the reaper treats the container as unreapable.""" + # Direct test of the parsing helper — no subprocess needed since the + # check happens after the inspect call returns. + import subprocess as _subprocess + + class _MockRun: + def __init__(self, stdout): + self.returncode = 0 + self.stdout = stdout + self.stderr = "" + + import unittest.mock + with unittest.mock.patch.object( + docker_env.subprocess, "run", return_value=_MockRun("0001-01-01T00:00:00Z\n"), + ): + result = docker_env._container_finished_at("/usr/bin/docker", "never-finished") + assert result is None diff --git a/tests/tools/test_docker_orphan_reaper_integration.py b/tests/tools/test_docker_orphan_reaper_integration.py new file mode 100644 index 00000000000..d52dbcdaec7 --- /dev/null +++ b/tests/tools/test_docker_orphan_reaper_integration.py @@ -0,0 +1,139 @@ +"""Integration tests for the docker orphan-reaper wiring in terminal_tool. + +The reaper itself is unit-tested in tests/tools/test_docker_environment.py +under the "Orphan reaper" section. These tests cover the terminal_tool-side +gates: once-per-process behavior, the disable flag, and the +``lifetime_seconds`` doubling that determines the reaper's age threshold. + +Issue #20561 — without these gates, parallel subagents would each fire the +reaper on container creation, and the ``terminal.docker_orphan_reaper: false`` +opt-out would silently do nothing. +""" + +import os +from unittest.mock import patch + +import tools.terminal_tool as terminal_tool + + +def _reset_reaper_gate(): + """Clear the once-per-process flag between tests.""" + terminal_tool._docker_orphan_reaper_ran = False + + +def test_maybe_reap_runs_once_per_process(monkeypatch): + """The reaper sweep must run at most once per Python interpreter. + Parallel subagents that each call _create_environment(env_type='docker') + would otherwise fire N concurrent docker ps + inspect storms against the + daemon and waste 5–10s of startup.""" + _reset_reaper_gate() + call_count = {"reap": 0} + + def _fake_reap(**kwargs): + call_count["reap"] += 1 + return 0 + + with patch("tools.environments.docker.reap_orphan_containers", _fake_reap): + config = {"docker_orphan_reaper": True} + terminal_tool._maybe_reap_docker_orphans(config) + terminal_tool._maybe_reap_docker_orphans(config) + terminal_tool._maybe_reap_docker_orphans(config) + + assert call_count["reap"] == 1, ( + f"reaper must run exactly once per process; got {call_count['reap']} calls" + ) + + +def test_maybe_reap_respects_disable_flag(monkeypatch): + """``terminal.docker_orphan_reaper: false`` (via container_config) must + skip the sweep entirely — no docker ps, no inspect, no rm. The escape + hatch for operators running multiple Hermes processes in the same + profile.""" + _reset_reaper_gate() + call_count = {"reap": 0} + + def _fake_reap(**kwargs): + call_count["reap"] += 1 + return 0 + + with patch("tools.environments.docker.reap_orphan_containers", _fake_reap): + terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": False}) + + assert call_count["reap"] == 0, "disabled reaper must not run any docker calls" + # The once-per-process gate must NOT be tripped when the reaper is + # disabled — that would prevent a subsequent toggle to true from working. + assert terminal_tool._docker_orphan_reaper_ran is False + + +def test_maybe_reap_doubles_lifetime_for_max_age(monkeypatch): + """The reaper's age threshold is ``2 × lifetime_seconds`` (with a 60s + floor). Generous default — gives sibling Hermes processes ample grace + to be replaced without their just-exited containers being yanked.""" + _reset_reaper_gate() + captured_args = {} + + def _fake_reap(**kwargs): + captured_args.update(kwargs) + return 0 + + monkeypatch.setenv("TERMINAL_LIFETIME_SECONDS", "300") + with patch("tools.environments.docker.reap_orphan_containers", _fake_reap): + terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True}) + + assert captured_args.get("max_age_seconds") == 600, ( + f"expected 2 × 300 = 600, got {captured_args.get('max_age_seconds')}" + ) + + +def test_maybe_reap_floors_at_60_seconds(monkeypatch): + """A user pinning TERMINAL_LIFETIME_SECONDS=0 (or any value <30) would + otherwise get an effective age threshold of zero, which would race the + user's own just-started container creation. Floor at 60s × 2 = 120s.""" + _reset_reaper_gate() + captured_args = {} + + def _fake_reap(**kwargs): + captured_args.update(kwargs) + return 0 + + monkeypatch.setenv("TERMINAL_LIFETIME_SECONDS", "0") + with patch("tools.environments.docker.reap_orphan_containers", _fake_reap): + terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True}) + + assert captured_args.get("max_age_seconds") == 120, ( + f"expected floored 60 × 2 = 120, got {captured_args.get('max_age_seconds')}" + ) + + +def test_maybe_reap_passes_current_profile_as_filter(monkeypatch): + """The reaper must be scoped to the current Hermes profile — a research + profile must NEVER reap default's containers. Verifies the + profile-filter wiring.""" + _reset_reaper_gate() + captured_args = {} + + def _fake_reap(**kwargs): + captured_args.update(kwargs) + return 0 + + with patch("tools.environments.docker.reap_orphan_containers", _fake_reap), \ + patch("tools.environments.docker._get_active_profile_name", return_value="research-bot"): + terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True}) + + assert captured_args.get("profile_filter") == "research-bot", ( + f"expected profile_filter='research-bot', got {captured_args.get('profile_filter')!r}" + ) + + +def test_maybe_reap_swallows_exceptions(monkeypatch): + """A reaper crash (docker daemon down, parse error in helper) must NOT + block env creation. The reaper is best-effort plumbing, not a critical + path; failures get logged at debug level and execution continues.""" + _reset_reaper_gate() + + def _exploding_reap(**kwargs): + raise RuntimeError("docker daemon ate the cat") + + with patch("tools.environments.docker.reap_orphan_containers", _exploding_reap): + # Must not raise + terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True}) diff --git a/tests/tools/test_terminal_config_env_sync.py b/tests/tools/test_terminal_config_env_sync.py index ba15cc2670e..16131843417 100644 --- a/tests/tools/test_terminal_config_env_sync.py +++ b/tests/tools/test_terminal_config_env_sync.py @@ -244,3 +244,19 @@ def test_docker_persist_across_processes_is_bridged_everywhere(): assert "docker_persist_across_processes" in _gateway_env_map_keys() assert "docker_persist_across_processes" in _save_config_env_sync_keys() assert "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES" in _terminal_tool_env_var_names() + + +def test_docker_orphan_reaper_is_bridged_everywhere(): + """Regression pin for the startup orphan reaper toggle (issue #20561). + + ``terminal.docker_orphan_reaper`` controls whether Hermes sweeps stale + Exited containers from prior SIGKILL'd processes at startup. Same + four-site bridge invariant — drift means + ``terminal.docker_orphan_reaper: false`` silently does nothing for one + entry point, and the reaper either runs when the operator disabled it + or fails to run when they enabled it. + """ + assert "docker_orphan_reaper" in _cli_env_map_keys() + assert "docker_orphan_reaper" in _gateway_env_map_keys() + assert "docker_orphan_reaper" in _save_config_env_sync_keys() + assert "TERMINAL_DOCKER_ORPHAN_REAPER" in _terminal_tool_env_var_names() diff --git a/tools/environments/docker.py b/tools/environments/docker.py index 30eb2230957..7ab97916f30 100644 --- a/tools/environments/docker.py +++ b/tools/environments/docker.py @@ -133,6 +133,132 @@ def _get_active_profile_name() -> str: return "default" +def reap_orphan_containers( + *, + max_age_seconds: int = 600, + profile_filter: str | None = None, + docker_exe: str | None = None, +) -> int: + """Remove stale hermes-tagged containers left behind by prior processes. + + Targets containers that match all of: + + * ``label=hermes-agent=1`` (created by this codebase) + * ``status=exited`` (running containers are NEVER reaped — they may + belong to a sibling Hermes process whose reuse path will pick them + up; killing them would crash the sibling mid-command) + * (optional) ``label=hermes-profile=`` (sweep only the + caller's profile by default; a hermes process in profile A must not + tear down profile B's containers) + * ``State.FinishedAt`` older than *max_age_seconds* ago (so a sibling + process that just exited and is about to be replaced doesn't get + its container yanked out from under it) + + Returns the number of containers removed. Best-effort: any failure + (docker daemon unreachable, slow inspect, parse error) is logged at + debug level and the function returns whatever it managed before the + failure. Safe to call repeatedly; idempotent. + + Issue #20561 — this is the safety net for SIGKILL / OOM / crashed + terminal exits that bypass the ``atexit`` cleanup hook. Without it, + even with the cleanup-fix in the prior commit, a hard-killed Hermes + process leaves its container behind permanently because there's no + subsequent Hermes process scheduled to reuse that exact (task, profile) + pair. + """ + docker = docker_exe or find_docker() or "docker" + filters = ["--filter", "label=hermes-agent=1", "--filter", "status=exited"] + if profile_filter: + filters.extend(["--filter", f"label=hermes-profile={_sanitize_label_value(profile_filter)}"]) + + try: + listing = subprocess.run( + [docker, "ps", "-a", *filters, "--format", "{{.ID}}"], + capture_output=True, text=True, timeout=15, check=False, + ) + except (subprocess.TimeoutExpired, OSError) as e: + logger.debug("orphan reaper docker ps failed: %s", e) + return 0 + if listing.returncode != 0: + logger.debug( + "orphan reaper docker ps returned %d: %s", + listing.returncode, listing.stderr.strip(), + ) + return 0 + + candidate_ids = [ln.strip() for ln in listing.stdout.splitlines() if ln.strip()] + if not candidate_ids: + return 0 + + # Inspect each candidate to get FinishedAt; reap only those exited + # long enough ago. Doing this per-container (rather than bulk inspect) + # keeps the failure blast radius to one container at a time. + import datetime + now = datetime.datetime.now(datetime.timezone.utc) + removed = 0 + for cid in candidate_ids: + finished_at = _container_finished_at(docker, cid) + if finished_at is None: + # Couldn't determine age — be conservative and leave it alone. + continue + age = (now - finished_at).total_seconds() + if age < max_age_seconds: + continue + try: + result = subprocess.run( + [docker, "rm", "-f", cid], + capture_output=True, text=True, timeout=30, + ) + if result.returncode == 0: + removed += 1 + logger.info( + "Reaped orphan container %s (exited %d seconds ago)", + cid[:12], int(age), + ) + else: + logger.debug( + "docker rm -f %s failed: %s", + cid[:12], result.stderr.strip(), + ) + except (subprocess.TimeoutExpired, OSError) as e: + logger.debug("orphan reaper docker rm %s failed: %s", cid[:12], e) + return removed + + +def _container_finished_at(docker_exe: str, container_id: str): + """Parse ``docker inspect`` FinishedAt for *container_id*. + + Returns a timezone-aware datetime, or ``None`` if the field is missing, + unparseable, or the zero-value ``0001-01-01T00:00:00Z`` Docker emits + for never-finished containers. ``None`` means "don't reap" — the caller + leaves the container alone. + """ + try: + result = subprocess.run( + [docker_exe, "inspect", "--format", "{{.State.FinishedAt}}", container_id], + capture_output=True, text=True, timeout=10, check=False, + ) + except (subprocess.TimeoutExpired, OSError) as e: + logger.debug("orphan reaper docker inspect %s failed: %s", container_id[:12], e) + return None + if result.returncode != 0: + return None + raw = result.stdout.strip() + if not raw or raw.startswith("0001-01-01"): + return None + # Docker emits RFC3339 with nanoseconds (e.g. "2026-05-28T13:45:00.123456789Z"). + # Python's fromisoformat handles microseconds but not nanoseconds; trim. + import re as _re + raw = _re.sub(r"(\.\d{6})\d+", r"\1", raw) + raw = raw.replace("Z", "+00:00") + try: + import datetime + return datetime.datetime.fromisoformat(raw) + except ValueError as e: + logger.debug("could not parse FinishedAt %r for %s: %s", raw, container_id[:12], e) + return None + + def find_docker() -> Optional[str]: """Locate the docker (or podman) CLI binary. @@ -564,7 +690,7 @@ class DockerEnvironment(BaseEnvironment): "hermes-profile": profile_name, } - # Cross-process reuse (issue #20561 — docs claim "ONE long-lived + # Cross-process container reuse (issue #20561 — docs claim "ONE long-lived # container shared across sessions"). If a prior Hermes process # already started a container for this (task_id, profile) and it # still exists, attach to it instead of starting a fresh one. This diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 513fa31b9f9..6b0d42aa886 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -861,6 +861,78 @@ _creation_locks_lock = threading.Lock() # Protects _creation_locks dict itself _cleanup_thread = None _cleanup_running = False +# Once-per-process guard for the docker orphan reaper (issue #20561). +# Set when _maybe_reap_docker_orphans first runs; concurrent _create_environment +# calls for parallel subagents won't re-trigger the sweep. +_docker_orphan_reaper_ran = False +_docker_orphan_reaper_lock = threading.Lock() + + +def _maybe_reap_docker_orphans(container_config: Dict[str, Any]) -> None: + """Run the docker orphan reaper once per process, if enabled. + + Sweeps long-Exited containers labeled ``hermes-agent=1`` for the current + profile that match the issue #20561 leak class — containers left behind + by Hermes processes that exited without firing ``atexit`` (SIGKILL, + OOM, terminal-window-close). The reaper is conservative by default: + only Exited containers older than ``2 × lifetime_seconds`` and scoped to + the current profile. + + Gates: + + * ``terminal.docker_orphan_reaper: false`` disables it entirely (the + operator opted out — usually because they're running multiple + Hermes processes in the same profile and don't trust the + conservative defaults). + * ``_docker_orphan_reaper_ran`` flag — sweep runs once per Python + interpreter, not on every subagent / RL-rollout / parallel + ``terminal()`` call. + """ + global _docker_orphan_reaper_ran + if not container_config.get("docker_orphan_reaper", True): + return + # Cheap double-checked-locking: read without the lock, take the lock + # only on first run, recheck inside. + if _docker_orphan_reaper_ran: + return + with _docker_orphan_reaper_lock: + if _docker_orphan_reaper_ran: + return + _docker_orphan_reaper_ran = True + + # 2 × lifetime_seconds gives sibling Hermes processes a generous grace + # window. Floor at 60s so an operator with TERMINAL_LIFETIME_SECONDS=0 + # doesn't get an instant-reap that races their own setup. + # ``container_config`` only carries container_* keys, so read + # lifetime_seconds from the env var the rest of the module uses. + try: + lifetime = int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")) + except (TypeError, ValueError): + lifetime = 300 + lifetime = max(60, lifetime) + max_age = lifetime * 2 + + try: + from tools.environments.docker import ( + reap_orphan_containers, _get_active_profile_name, + ) + except ImportError: + return + try: + profile = _get_active_profile_name() + removed = reap_orphan_containers( + max_age_seconds=max_age, profile_filter=profile, + ) + if removed: + logger.info( + "Docker orphan reaper removed %d stale container(s) for profile %s", + removed, profile, + ) + except Exception as e: + # Never fail the env-creation path because of a janitor problem. + logger.debug("Docker orphan reaper raised: %s", e) + + # Per-task environment overrides registry. # Allows environments (e.g., TerminalBench2Env) to specify a custom Docker/Modal # image for a specific task_id BEFORE the agent loop starts. When the terminal or @@ -1033,6 +1105,13 @@ def _get_env_config() -> Dict[str, Any]: "docker_persist_across_processes": os.getenv( "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "true" ).lower() in {"true", "1", "yes"}, + # Startup orphan reaper for hermes-tagged containers left behind by + # crashed / SIGKILL'd previous processes that bypassed atexit. + # Conservative: only sweeps Exited containers older than 2× the + # idle-reap window AND scoped to the current profile. Issue #20561. + "docker_orphan_reaper": os.getenv( + "TERMINAL_DOCKER_ORPHAN_REAPER", "true" + ).lower() in {"true", "1", "yes"}, } @@ -1081,6 +1160,13 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, return _LocalEnvironment(cwd=cwd, timeout=timeout) elif env_type == "docker": + # One-shot orphan reaper: clean up labeled containers left behind by + # prior Hermes processes that hit SIGKILL / OOM / a closed terminal + # before the atexit cleanup hook could run. Gated to once per + # process so concurrent _create_environment calls (parallel + # subagents, RL benchmarks) don't run the reaper N times. + # Disable via ``terminal.docker_orphan_reaper: false`` (issue #20561). + _maybe_reap_docker_orphans(cc) return _DockerEnvironment( image=image, cwd=cwd, timeout=timeout, cpu=cpu, memory=memory, disk=disk, @@ -1773,6 +1859,7 @@ def terminal_tool( "docker_run_as_host_user": config.get("docker_run_as_host_user", False), "docker_extra_args": config.get("docker_extra_args", []), "docker_persist_across_processes": config.get("docker_persist_across_processes", True), + "docker_orphan_reaper": config.get("docker_orphan_reaper", True), } local_config = None