fix(docker): startup orphan reaper for crashed-process containers

The cleanup-fix in the previous commit handles the graceful-exit leak: a
Hermes process that runs ``atexit`` will now actually wait on the docker
stop/rm worker thread, so containers either survive (persist mode) or are
fully removed (opt-out mode) by the time the interpreter exits.

But ``atexit`` doesn't fire on SIGKILL, OOM-kill, or terminal-window
close. Containers from those exits stay parked with no surviving Python
process to reuse or remove them, so they accumulate until the operator
intervenes with ``docker rm -f``. The cleanup-fix doesn't help this class
— there's no live cleanup() to fix.

This commit adds the safety net: a startup orphan reaper that runs once
per Hermes process and removes long-Exited hermes-labeled containers
that the prior commit couldn't reach.

Implementation:

* New ``reap_orphan_containers()`` in ``tools/environments/docker.py``.
  Filters: ``label=hermes-agent=1`` + ``status=exited`` + (optional)
  ``label=hermes-profile=<current>``. Per-container ``docker inspect``
  parses ``State.FinishedAt`` (with nanosecond-precision trimming for
  Python's microsecond-bound ``fromisoformat``); containers older than
  the threshold get ``docker rm -f``'d. The ``status=exited`` filter is
  load-bearing — a running container may belong to a sibling Hermes
  process whose reuse path will pick it up; killing it would crash the
  sibling mid-command. Single-container failures are logged and the
  sweep continues to the next candidate.

* New ``_maybe_reap_docker_orphans()`` helper in
  ``tools/terminal_tool.py``. Wired into ``_create_environment()`` for
  ``env_type == "docker"``. Gated by:

    - ``terminal.docker_orphan_reaper: true`` (default; opt-out for
      operators running multiple Hermes processes in the same profile
      who don't trust the conservative defaults)
    - ``_docker_orphan_reaper_ran`` module flag with double-checked
      locking — parallel subagents and RL rollouts don't trigger N
      concurrent docker ps storms
    - Age threshold = ``2 × TERMINAL_LIFETIME_SECONDS`` with a 60s floor
      (so ``TERMINAL_LIFETIME_SECONDS=0`` doesn't race the user's own
      setup)
    - Profile scoping — a research profile NEVER reaps the default
      profile's stragglers
    - Exception swallow — a janitor failure must never block container
      creation

* New config ``terminal.docker_orphan_reaper`` wired through all four
  config-bridge sites (cli.py, gateway/run.py, hermes_cli/config.py,
  tests/conftest.py) and pinned by
  ``test_docker_orphan_reaper_is_bridged_everywhere``.

Coverage:

* 9 new unit tests in test_docker_environment.py — happy path, recent-
  container sparing, profile scoping, unparseable-timestamp safety,
  docker-ps-failure handling, partial-failure continuation, nanosecond
  timestamp parsing, zero-value FinishedAt rejection.
* 6 new integration tests in test_docker_orphan_reaper_integration.py
  — once-per-process gate, disable-flag respected, lifetime doubling
  with 60s floor, current-profile filter wiring, exception swallow.
* 1 new bridge-invariant regression test.

Closes #20561 (combined with the two prior commits on this branch).
This commit is contained in:
Ben 2026-05-28 14:09:13 +10:00 committed by Ben Barclay
parent ac8e238bc8
commit d77d877665
9 changed files with 624 additions and 1 deletions

1
cli.py
View file

@ -577,6 +577,7 @@ def load_cli_config() -> Dict[str, Any]:
"docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
"docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
"docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
"docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER",
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
# Persistent shell (non-local backends)
"persistent_shell": "TERMINAL_PERSISTENT_SHELL",

View file

@ -832,6 +832,7 @@ if _config_path.exists():
"docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
"docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
"docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
"docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER",
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
"persistent_shell": "TERMINAL_PERSISTENT_SHELL",
}

View file

@ -5552,6 +5552,7 @@ def set_config_value(key: str, value: str):
"terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
"terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
"terminal.docker_persist_across_processes": "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
"terminal.docker_orphan_reaper": "TERMINAL_DOCKER_ORPHAN_REAPER",
"terminal.docker_env": "TERMINAL_DOCKER_ENV",
# terminal.cwd intentionally excluded — CLI resolves at runtime,
# gateway bridges it in gateway/run.py. Persisting to .env causes

View file

@ -228,6 +228,7 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"TERMINAL_CONTAINER_MEMORY",
"TERMINAL_CONTAINER_PERSISTENT",
"TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES",
"TERMINAL_DOCKER_ORPHAN_REAPER",
"TERMINAL_DOCKER_RUN_AS_HOST_USER",
"BROWSER_CDP_URL",
"CAMOFOX_URL",

View file

@ -975,3 +975,254 @@ def test_cleanup_on_env_with_no_container_id_does_not_raise(monkeypatch):
env._home_dir = None
# No exception expected.
env.cleanup()
# ── Orphan reaper (issue #20561) ──────────────────────────────────
def _now_iso(offset_seconds: int = 0) -> str:
"""Return an RFC3339 timestamp ``offset_seconds`` in the past."""
import datetime
t = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(seconds=offset_seconds)
# Format like Docker emits — with nanoseconds-style trailing digits.
return t.isoformat().replace("+00:00", ".123456789Z")
def _reaper_run_mock(monkeypatch, ps_ids: list[str], inspect_responses: dict[str, str],
rm_succeeds: bool = True):
"""Build a subprocess.run mock for reaper tests.
* ``ps_ids`` what ``docker ps -a --filter ... --format '{{.ID}}'`` returns
* ``inspect_responses[cid]`` what ``docker inspect ... FinishedAt`` returns
for each cid; ``""`` means "field unset".
* ``rm_succeeds`` whether ``docker rm -f`` returns 0.
Captures every call so tests can assert which containers were rm'd.
"""
calls = []
def _run(cmd, **kwargs):
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
if not isinstance(cmd, list) or len(cmd) < 2:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
sub = cmd[1]
if sub == "ps":
return subprocess.CompletedProcess(
cmd, 0, stdout="\n".join(ps_ids) + ("\n" if ps_ids else ""), stderr="",
)
if sub == "inspect":
# cmd is [docker, inspect, --format, '{{.State.FinishedAt}}', cid]
cid = cmd[-1]
return subprocess.CompletedProcess(
cmd, 0, stdout=inspect_responses.get(cid, "") + "\n", stderr="",
)
if sub == "rm":
return subprocess.CompletedProcess(
cmd, 0 if rm_succeeds else 1,
stdout="", stderr="" if rm_succeeds else "no such container",
)
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
return calls
def test_reap_orphan_returns_zero_when_no_matches(monkeypatch):
"""No labeled containers → no rm calls, returns 0. Establishes the
happy-path baseline for the orphan reaper (issue #20561)."""
calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={})
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 0
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
assert not rms, "no rm calls expected when ps returns empty"
def test_reap_orphan_removes_stale_exited_container(monkeypatch):
"""An Exited container older than max_age_seconds must be removed.
This is the core repair path for issue #20561 — without the reaper,
SIGKILL'd Hermes processes leak containers permanently."""
old = _now_iso(offset_seconds=900) # 15 minutes ago
calls = _reaper_run_mock(
monkeypatch, ps_ids=["old-cid"], inspect_responses={"old-cid": old},
)
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 1
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
assert len(rms) == 1
assert "old-cid" in rms[0][0], f"expected rm of old-cid, got {rms[0][0]}"
def test_reap_orphan_spares_recently_exited_container(monkeypatch):
"""A container exited within max_age_seconds must NOT be reaped — that
container belongs to a Hermes process that just finished and may be
about to be replaced. Conservative window prevents racing sibling
processes."""
recent = _now_iso(offset_seconds=60) # 1 minute ago
calls = _reaper_run_mock(
monkeypatch, ps_ids=["recent-cid"], inspect_responses={"recent-cid": recent},
)
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 0
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
assert not rms, f"recent container must not be reaped, got rm calls: {rms}"
def test_reap_orphan_scopes_to_profile_filter_via_label(monkeypatch):
"""The reaper must pass ``--filter label=hermes-profile=<profile>`` to
docker ps so it never sweeps another profile's containers. A research
profile must not tear down the default profile's stragglers."""
calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={})
docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="research-bot", docker_exe="/usr/bin/docker",
)
ps_calls = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["ps"]]
assert ps_calls, "expected at least one docker ps call"
flat = " ".join(ps_calls[0][0])
assert "label=hermes-profile=research-bot" in flat, (
f"profile filter not applied to docker ps; got args: {ps_calls[0][0]}"
)
assert "label=hermes-agent=1" in flat, (
f"hermes-agent label filter must also be applied; got: {ps_calls[0][0]}"
)
assert "status=exited" in flat, (
"must filter to exited containers only — running containers may "
"belong to a sibling Hermes process and must NEVER be reaped"
)
def test_reap_orphan_skips_container_with_unparseable_finished_at(monkeypatch):
"""If docker inspect returns the zero-value ``0001-01-01T00:00:00Z`` (no
FinishedAt yet) or an unparseable timestamp, the reaper must leave the
container alone. Defensive never reap a container whose age we can't
determine."""
calls = _reaper_run_mock(
monkeypatch,
ps_ids=["never-finished", "garbage-ts"],
inspect_responses={
"never-finished": "0001-01-01T00:00:00Z",
"garbage-ts": "not-a-timestamp",
},
)
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 0
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
assert not rms, (
f"reaper must NOT remove containers with unparseable FinishedAt; got: {rms}"
)
def test_reap_orphan_handles_docker_ps_failure_gracefully(monkeypatch):
"""If docker ps itself fails (daemon down, permission denied), the
reaper returns 0 without crashing. The reaper is best-effort plumbing,
not a critical path it must never block container creation."""
def _failing_ps(cmd, **kwargs):
if isinstance(cmd, list) and len(cmd) >= 2 and cmd[1] == "ps":
return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="Cannot connect to daemon")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _failing_ps)
# Must not raise
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 0
def test_reap_orphan_continues_after_individual_rm_failure(monkeypatch):
"""If ``docker rm -f`` fails on one container (already removed by a
concurrent process, container locked, etc.), the reaper must log and
continue to the next candidate rather than aborting the whole sweep."""
old = _now_iso(offset_seconds=900)
rm_calls = []
def _run(cmd, **kwargs):
if not isinstance(cmd, list) or len(cmd) < 2:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
sub = cmd[1]
if sub == "ps":
return subprocess.CompletedProcess(
cmd, 0, stdout="cid-a\ncid-b\ncid-c\n", stderr="",
)
if sub == "inspect":
return subprocess.CompletedProcess(cmd, 0, stdout=old + "\n", stderr="")
if sub == "rm":
rm_calls.append(cmd[-1])
# cid-b fails; cid-a and cid-c succeed.
if cmd[-1] == "cid-b":
return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="no such container")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
# All three were attempted, two succeeded.
assert removed == 2
assert set(rm_calls) == {"cid-a", "cid-b", "cid-c"}, (
f"reaper must attempt all candidates even when one fails; got: {rm_calls}"
)
def test_container_finished_at_parses_nanosecond_timestamp(monkeypatch):
"""Docker emits FinishedAt with nanosecond precision (RFC3339 with up to
9 fractional digits), but Python's fromisoformat caps at microseconds.
The helper must trim the extra digits without raising otherwise every
candidate gets skipped and the reaper does nothing."""
def _run(cmd, **kwargs):
return subprocess.CompletedProcess(
cmd, 0,
stdout="2026-05-28T13:45:00.123456789Z\n",
stderr="",
)
monkeypatch.setattr(docker_env.subprocess, "run", _run)
result = docker_env._container_finished_at("/usr/bin/docker", "test-cid")
assert result is not None, "must parse RFC3339 with nanoseconds"
import datetime
assert result.tzinfo == datetime.timezone.utc
assert result.year == 2026 and result.month == 5 and result.day == 28
def test_container_finished_at_returns_none_on_zero_value():
"""Docker's zero-value ``0001-01-01T00:00:00Z`` (never finished) must
map to None so the reaper treats the container as unreapable."""
# Direct test of the parsing helper — no subprocess needed since the
# check happens after the inspect call returns.
import subprocess as _subprocess
class _MockRun:
def __init__(self, stdout):
self.returncode = 0
self.stdout = stdout
self.stderr = ""
import unittest.mock
with unittest.mock.patch.object(
docker_env.subprocess, "run", return_value=_MockRun("0001-01-01T00:00:00Z\n"),
):
result = docker_env._container_finished_at("/usr/bin/docker", "never-finished")
assert result is None

View file

@ -0,0 +1,139 @@
"""Integration tests for the docker orphan-reaper wiring in terminal_tool.
The reaper itself is unit-tested in tests/tools/test_docker_environment.py
under the "Orphan reaper" section. These tests cover the terminal_tool-side
gates: once-per-process behavior, the disable flag, and the
``lifetime_seconds`` doubling that determines the reaper's age threshold.
Issue #20561 — without these gates, parallel subagents would each fire the
reaper on container creation, and the ``terminal.docker_orphan_reaper: false``
opt-out would silently do nothing.
"""
import os
from unittest.mock import patch
import tools.terminal_tool as terminal_tool
def _reset_reaper_gate():
"""Clear the once-per-process flag between tests."""
terminal_tool._docker_orphan_reaper_ran = False
def test_maybe_reap_runs_once_per_process(monkeypatch):
"""The reaper sweep must run at most once per Python interpreter.
Parallel subagents that each call _create_environment(env_type='docker')
would otherwise fire N concurrent docker ps + inspect storms against the
daemon and waste 510s of startup."""
_reset_reaper_gate()
call_count = {"reap": 0}
def _fake_reap(**kwargs):
call_count["reap"] += 1
return 0
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
config = {"docker_orphan_reaper": True}
terminal_tool._maybe_reap_docker_orphans(config)
terminal_tool._maybe_reap_docker_orphans(config)
terminal_tool._maybe_reap_docker_orphans(config)
assert call_count["reap"] == 1, (
f"reaper must run exactly once per process; got {call_count['reap']} calls"
)
def test_maybe_reap_respects_disable_flag(monkeypatch):
"""``terminal.docker_orphan_reaper: false`` (via container_config) must
skip the sweep entirely no docker ps, no inspect, no rm. The escape
hatch for operators running multiple Hermes processes in the same
profile."""
_reset_reaper_gate()
call_count = {"reap": 0}
def _fake_reap(**kwargs):
call_count["reap"] += 1
return 0
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": False})
assert call_count["reap"] == 0, "disabled reaper must not run any docker calls"
# The once-per-process gate must NOT be tripped when the reaper is
# disabled — that would prevent a subsequent toggle to true from working.
assert terminal_tool._docker_orphan_reaper_ran is False
def test_maybe_reap_doubles_lifetime_for_max_age(monkeypatch):
"""The reaper's age threshold is ``2 × lifetime_seconds`` (with a 60s
floor). Generous default gives sibling Hermes processes ample grace
to be replaced without their just-exited containers being yanked."""
_reset_reaper_gate()
captured_args = {}
def _fake_reap(**kwargs):
captured_args.update(kwargs)
return 0
monkeypatch.setenv("TERMINAL_LIFETIME_SECONDS", "300")
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
assert captured_args.get("max_age_seconds") == 600, (
f"expected 2 × 300 = 600, got {captured_args.get('max_age_seconds')}"
)
def test_maybe_reap_floors_at_60_seconds(monkeypatch):
"""A user pinning TERMINAL_LIFETIME_SECONDS=0 (or any value <30) would
otherwise get an effective age threshold of zero, which would race the
user's own just-started container creation. Floor at 60s × 2 = 120s."""
_reset_reaper_gate()
captured_args = {}
def _fake_reap(**kwargs):
captured_args.update(kwargs)
return 0
monkeypatch.setenv("TERMINAL_LIFETIME_SECONDS", "0")
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap):
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
assert captured_args.get("max_age_seconds") == 120, (
f"expected floored 60 × 2 = 120, got {captured_args.get('max_age_seconds')}"
)
def test_maybe_reap_passes_current_profile_as_filter(monkeypatch):
"""The reaper must be scoped to the current Hermes profile — a research
profile must NEVER reap default's containers. Verifies the
profile-filter wiring."""
_reset_reaper_gate()
captured_args = {}
def _fake_reap(**kwargs):
captured_args.update(kwargs)
return 0
with patch("tools.environments.docker.reap_orphan_containers", _fake_reap), \
patch("tools.environments.docker._get_active_profile_name", return_value="research-bot"):
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})
assert captured_args.get("profile_filter") == "research-bot", (
f"expected profile_filter='research-bot', got {captured_args.get('profile_filter')!r}"
)
def test_maybe_reap_swallows_exceptions(monkeypatch):
"""A reaper crash (docker daemon down, parse error in helper) must NOT
block env creation. The reaper is best-effort plumbing, not a critical
path; failures get logged at debug level and execution continues."""
_reset_reaper_gate()
def _exploding_reap(**kwargs):
raise RuntimeError("docker daemon ate the cat")
with patch("tools.environments.docker.reap_orphan_containers", _exploding_reap):
# Must not raise
terminal_tool._maybe_reap_docker_orphans({"docker_orphan_reaper": True})

View file

@ -244,3 +244,19 @@ def test_docker_persist_across_processes_is_bridged_everywhere():
assert "docker_persist_across_processes" in _gateway_env_map_keys()
assert "docker_persist_across_processes" in _save_config_env_sync_keys()
assert "TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES" in _terminal_tool_env_var_names()
def test_docker_orphan_reaper_is_bridged_everywhere():
"""Regression pin for the startup orphan reaper toggle (issue #20561).
``terminal.docker_orphan_reaper`` controls whether Hermes sweeps stale
Exited containers from prior SIGKILL'd processes at startup. Same
four-site bridge invariant drift means
``terminal.docker_orphan_reaper: false`` silently does nothing for one
entry point, and the reaper either runs when the operator disabled it
or fails to run when they enabled it.
"""
assert "docker_orphan_reaper" in _cli_env_map_keys()
assert "docker_orphan_reaper" in _gateway_env_map_keys()
assert "docker_orphan_reaper" in _save_config_env_sync_keys()
assert "TERMINAL_DOCKER_ORPHAN_REAPER" in _terminal_tool_env_var_names()

View file

@ -133,6 +133,132 @@ def _get_active_profile_name() -> str:
return "default"
def reap_orphan_containers(
*,
max_age_seconds: int = 600,
profile_filter: str | None = None,
docker_exe: str | None = None,
) -> int:
"""Remove stale hermes-tagged containers left behind by prior processes.
Targets containers that match all of:
* ``label=hermes-agent=1`` (created by this codebase)
* ``status=exited`` (running containers are NEVER reaped they may
belong to a sibling Hermes process whose reuse path will pick them
up; killing them would crash the sibling mid-command)
* (optional) ``label=hermes-profile=<profile_filter>`` (sweep only the
caller's profile by default; a hermes process in profile A must not
tear down profile B's containers)
* ``State.FinishedAt`` older than *max_age_seconds* ago (so a sibling
process that just exited and is about to be replaced doesn't get
its container yanked out from under it)
Returns the number of containers removed. Best-effort: any failure
(docker daemon unreachable, slow inspect, parse error) is logged at
debug level and the function returns whatever it managed before the
failure. Safe to call repeatedly; idempotent.
Issue #20561 — this is the safety net for SIGKILL / OOM / crashed
terminal exits that bypass the ``atexit`` cleanup hook. Without it,
even with the cleanup-fix in the prior commit, a hard-killed Hermes
process leaves its container behind permanently because there's no
subsequent Hermes process scheduled to reuse that exact (task, profile)
pair.
"""
docker = docker_exe or find_docker() or "docker"
filters = ["--filter", "label=hermes-agent=1", "--filter", "status=exited"]
if profile_filter:
filters.extend(["--filter", f"label=hermes-profile={_sanitize_label_value(profile_filter)}"])
try:
listing = subprocess.run(
[docker, "ps", "-a", *filters, "--format", "{{.ID}}"],
capture_output=True, text=True, timeout=15, check=False,
)
except (subprocess.TimeoutExpired, OSError) as e:
logger.debug("orphan reaper docker ps failed: %s", e)
return 0
if listing.returncode != 0:
logger.debug(
"orphan reaper docker ps returned %d: %s",
listing.returncode, listing.stderr.strip(),
)
return 0
candidate_ids = [ln.strip() for ln in listing.stdout.splitlines() if ln.strip()]
if not candidate_ids:
return 0
# Inspect each candidate to get FinishedAt; reap only those exited
# long enough ago. Doing this per-container (rather than bulk inspect)
# keeps the failure blast radius to one container at a time.
import datetime
now = datetime.datetime.now(datetime.timezone.utc)
removed = 0
for cid in candidate_ids:
finished_at = _container_finished_at(docker, cid)
if finished_at is None:
# Couldn't determine age — be conservative and leave it alone.
continue
age = (now - finished_at).total_seconds()
if age < max_age_seconds:
continue
try:
result = subprocess.run(
[docker, "rm", "-f", cid],
capture_output=True, text=True, timeout=30,
)
if result.returncode == 0:
removed += 1
logger.info(
"Reaped orphan container %s (exited %d seconds ago)",
cid[:12], int(age),
)
else:
logger.debug(
"docker rm -f %s failed: %s",
cid[:12], result.stderr.strip(),
)
except (subprocess.TimeoutExpired, OSError) as e:
logger.debug("orphan reaper docker rm %s failed: %s", cid[:12], e)
return removed
def _container_finished_at(docker_exe: str, container_id: str):
"""Parse ``docker inspect`` FinishedAt for *container_id*.
Returns a timezone-aware datetime, or ``None`` if the field is missing,
unparseable, or the zero-value ``0001-01-01T00:00:00Z`` Docker emits
for never-finished containers. ``None`` means "don't reap" the caller
leaves the container alone.
"""
try:
result = subprocess.run(
[docker_exe, "inspect", "--format", "{{.State.FinishedAt}}", container_id],
capture_output=True, text=True, timeout=10, check=False,
)
except (subprocess.TimeoutExpired, OSError) as e:
logger.debug("orphan reaper docker inspect %s failed: %s", container_id[:12], e)
return None
if result.returncode != 0:
return None
raw = result.stdout.strip()
if not raw or raw.startswith("0001-01-01"):
return None
# Docker emits RFC3339 with nanoseconds (e.g. "2026-05-28T13:45:00.123456789Z").
# Python's fromisoformat handles microseconds but not nanoseconds; trim.
import re as _re
raw = _re.sub(r"(\.\d{6})\d+", r"\1", raw)
raw = raw.replace("Z", "+00:00")
try:
import datetime
return datetime.datetime.fromisoformat(raw)
except ValueError as e:
logger.debug("could not parse FinishedAt %r for %s: %s", raw, container_id[:12], e)
return None
def find_docker() -> Optional[str]:
"""Locate the docker (or podman) CLI binary.
@ -564,7 +690,7 @@ class DockerEnvironment(BaseEnvironment):
"hermes-profile": profile_name,
}
# Cross-process reuse (issue #20561 — docs claim "ONE long-lived
# Cross-process container reuse (issue #20561 — docs claim "ONE long-lived
# container shared across sessions"). If a prior Hermes process
# already started a container for this (task_id, profile) and it
# still exists, attach to it instead of starting a fresh one. This

View file

@ -861,6 +861,78 @@ _creation_locks_lock = threading.Lock() # Protects _creation_locks dict itself
_cleanup_thread = None
_cleanup_running = False
# Once-per-process guard for the docker orphan reaper (issue #20561).
# Set when _maybe_reap_docker_orphans first runs; concurrent _create_environment
# calls for parallel subagents won't re-trigger the sweep.
_docker_orphan_reaper_ran = False
_docker_orphan_reaper_lock = threading.Lock()
def _maybe_reap_docker_orphans(container_config: Dict[str, Any]) -> None:
"""Run the docker orphan reaper once per process, if enabled.
Sweeps long-Exited containers labeled ``hermes-agent=1`` for the current
profile that match the issue #20561 leak class — containers left behind
by Hermes processes that exited without firing ``atexit`` (SIGKILL,
OOM, terminal-window-close). The reaper is conservative by default:
only Exited containers older than ``2 × lifetime_seconds`` and scoped to
the current profile.
Gates:
* ``terminal.docker_orphan_reaper: false`` disables it entirely (the
operator opted out usually because they're running multiple
Hermes processes in the same profile and don't trust the
conservative defaults).
* ``_docker_orphan_reaper_ran`` flag sweep runs once per Python
interpreter, not on every subagent / RL-rollout / parallel
``terminal()`` call.
"""
global _docker_orphan_reaper_ran
if not container_config.get("docker_orphan_reaper", True):
return
# Cheap double-checked-locking: read without the lock, take the lock
# only on first run, recheck inside.
if _docker_orphan_reaper_ran:
return
with _docker_orphan_reaper_lock:
if _docker_orphan_reaper_ran:
return
_docker_orphan_reaper_ran = True
# 2 × lifetime_seconds gives sibling Hermes processes a generous grace
# window. Floor at 60s so an operator with TERMINAL_LIFETIME_SECONDS=0
# doesn't get an instant-reap that races their own setup.
# ``container_config`` only carries container_* keys, so read
# lifetime_seconds from the env var the rest of the module uses.
try:
lifetime = int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300"))
except (TypeError, ValueError):
lifetime = 300
lifetime = max(60, lifetime)
max_age = lifetime * 2
try:
from tools.environments.docker import (
reap_orphan_containers, _get_active_profile_name,
)
except ImportError:
return
try:
profile = _get_active_profile_name()
removed = reap_orphan_containers(
max_age_seconds=max_age, profile_filter=profile,
)
if removed:
logger.info(
"Docker orphan reaper removed %d stale container(s) for profile %s",
removed, profile,
)
except Exception as e:
# Never fail the env-creation path because of a janitor problem.
logger.debug("Docker orphan reaper raised: %s", e)
# Per-task environment overrides registry.
# Allows environments (e.g., TerminalBench2Env) to specify a custom Docker/Modal
# image for a specific task_id BEFORE the agent loop starts. When the terminal or
@ -1033,6 +1105,13 @@ def _get_env_config() -> Dict[str, Any]:
"docker_persist_across_processes": os.getenv(
"TERMINAL_DOCKER_PERSIST_ACROSS_PROCESSES", "true"
).lower() in {"true", "1", "yes"},
# Startup orphan reaper for hermes-tagged containers left behind by
# crashed / SIGKILL'd previous processes that bypassed atexit.
# Conservative: only sweeps Exited containers older than 2× the
# idle-reap window AND scoped to the current profile. Issue #20561.
"docker_orphan_reaper": os.getenv(
"TERMINAL_DOCKER_ORPHAN_REAPER", "true"
).lower() in {"true", "1", "yes"},
}
@ -1081,6 +1160,13 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
return _LocalEnvironment(cwd=cwd, timeout=timeout)
elif env_type == "docker":
# One-shot orphan reaper: clean up labeled containers left behind by
# prior Hermes processes that hit SIGKILL / OOM / a closed terminal
# before the atexit cleanup hook could run. Gated to once per
# process so concurrent _create_environment calls (parallel
# subagents, RL benchmarks) don't run the reaper N times.
# Disable via ``terminal.docker_orphan_reaper: false`` (issue #20561).
_maybe_reap_docker_orphans(cc)
return _DockerEnvironment(
image=image, cwd=cwd, timeout=timeout,
cpu=cpu, memory=memory, disk=disk,
@ -1773,6 +1859,7 @@ def terminal_tool(
"docker_run_as_host_user": config.get("docker_run_as_host_user", False),
"docker_extra_args": config.get("docker_extra_args", []),
"docker_persist_across_processes": config.get("docker_persist_across_processes", True),
"docker_orphan_reaper": config.get("docker_orphan_reaper", True),
}
local_config = None