hermes-agent/tests/tools/test_docker_environment.py
Ben 2f0f03c40d fix(docker): cleanup_vm() default honors persist mode (don't kill container on session close)
Commit 4 made cleanup_vm() default to force_remove=True, which was wrong:
cleanup_vm() is called from AIAgent.close() (TUI session close at
tui_gateway/server.py:2991, gateway session teardown at gateway/run.py:3569)
and from per-turn cleanup (agent/chat_completion_helpers.py:1517). All
three are session-lifecycle events that should honor persist mode, not
explicit user-initiated teardown.

Ben reported the symptom: container shared between multiple TUI sessions
(good) but killed as soon as any session closed (bad). With force_remove=True
as the default, every `session.close` JSON-RPC tore down the container.

The fix is to flip cleanup_vm()'s force_remove default back to False.
The kwarg still exists for future explicit-teardown paths (`/reset`-style
flows, "destroy my sandbox" commands) that haven't been wired up yet.

Two new unit tests pin the behavior:

* `test_cleanup_vm_default_honors_persist_mode` — asserts
  `cleanup_vm(task_id)` does neither docker stop nor docker rm on a
  persist-mode container (the regression Ben caught).
* `test_cleanup_vm_force_remove_tears_down_persist_container` —
  asserts the kwarg still flows through the runtime-signature-inspection
  plumbing to the backend's cleanup().

E2E verified against real Docker (in addition to all 17 existing checks):

  ✓ Default cleanup_vm() leaves persist-mode container running
  ✓ cleanup_vm(force_remove=True) removed the container

Refs #20561
2026-05-29 11:49:54 +10:00

1370 lines
56 KiB
Python

import logging
from io import StringIO
import subprocess
import sys
import types
import pytest
from tools.environments import docker as docker_env
def _mock_subprocess_run(monkeypatch):
"""Mock subprocess.run to intercept docker run -d and docker version calls.
Returns a list of captured (cmd, kwargs) tuples for inspection.
"""
calls = []
def _run(cmd, **kwargs):
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
if isinstance(cmd, list) and len(cmd) >= 2:
if cmd[1] == "version":
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
if cmd[1] == "run":
return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
return calls
def _make_dummy_env(**kwargs):
"""Helper to construct DockerEnvironment with minimal required args."""
return docker_env.DockerEnvironment(
image=kwargs.get("image", "python:3.11"),
cwd=kwargs.get("cwd", "/root"),
timeout=kwargs.get("timeout", 60),
cpu=kwargs.get("cpu", 0),
memory=kwargs.get("memory", 0),
disk=kwargs.get("disk", 0),
persistent_filesystem=kwargs.get("persistent_filesystem", False),
task_id=kwargs.get("task_id", "test-task"),
volumes=kwargs.get("volumes", []),
network=kwargs.get("network", True),
host_cwd=kwargs.get("host_cwd"),
auto_mount_cwd=kwargs.get("auto_mount_cwd", False),
env=kwargs.get("env"),
run_as_host_user=kwargs.get("run_as_host_user", False),
)
def test_ensure_docker_available_logs_and_raises_when_not_found(monkeypatch, caplog):
"""When docker cannot be found, raise a clear error before container setup."""
monkeypatch.setattr(docker_env, "find_docker", lambda: None)
monkeypatch.setattr(
docker_env.subprocess,
"run",
lambda *args, **kwargs: pytest.fail("subprocess.run should not be called when docker is missing"),
)
with caplog.at_level(logging.ERROR):
with pytest.raises(RuntimeError) as excinfo:
_make_dummy_env()
assert "Docker executable not found in PATH or known install locations" in str(excinfo.value)
assert any(
"no docker executable was found in PATH or known install locations"
in record.getMessage()
for record in caplog.records
)
def test_ensure_docker_available_logs_and_raises_on_timeout(monkeypatch, caplog):
"""When docker version times out, surface a helpful error instead of hanging."""
def _raise_timeout(*args, **kwargs):
raise subprocess.TimeoutExpired(cmd=["/custom/docker", "version"], timeout=5)
monkeypatch.setattr(docker_env, "find_docker", lambda: "/custom/docker")
monkeypatch.setattr(docker_env.subprocess, "run", _raise_timeout)
with caplog.at_level(logging.ERROR):
with pytest.raises(RuntimeError) as excinfo:
_make_dummy_env()
assert "Docker daemon is not responding" in str(excinfo.value)
assert any(
"/custom/docker version' timed out" in record.getMessage()
for record in caplog.records
)
def test_ensure_docker_available_uses_resolved_executable(monkeypatch):
"""When docker is found outside PATH, preflight should use that resolved path."""
calls = []
def _run(cmd, **kwargs):
calls.append((cmd, kwargs))
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
monkeypatch.setattr(docker_env, "find_docker", lambda: "/opt/homebrew/bin/docker")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
docker_env._ensure_docker_available()
assert calls == [
(["/opt/homebrew/bin/docker", "version"], {
"capture_output": True,
"text": True,
"timeout": 5,
})
]
def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
"""Opt-in docker cwd mounting should bind the host cwd to /workspace."""
project_dir = tmp_path / "my-project"
project_dir.mkdir()
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/workspace",
host_cwd=str(project_dir),
auto_mount_cwd=True,
)
# Find the docker run call and check its args
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{project_dir}:/workspace" in run_args_str
def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
"""Host cwd should not be mounted unless the caller explicitly opts in."""
project_dir = tmp_path / "my-project"
project_dir.mkdir()
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/root",
host_cwd=str(project_dir),
auto_mount_cwd=False,
)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{project_dir}:/workspace" not in run_args_str
def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path):
"""Explicit user volumes for /workspace should take precedence over cwd mount."""
project_dir = tmp_path / "my-project"
project_dir.mkdir()
other_dir = tmp_path / "other"
other_dir.mkdir()
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/workspace",
host_cwd=str(project_dir),
auto_mount_cwd=True,
volumes=[f"{other_dir}:/workspace"],
)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{other_dir}:/workspace" in run_args_str
assert run_args_str.count(":/workspace") == 1
def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
"""Persistent mode should still prefer the configured host cwd at /workspace."""
project_dir = tmp_path / "my-project"
project_dir.mkdir()
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/workspace",
persistent_filesystem=True,
host_cwd=str(project_dir),
auto_mount_cwd=True,
task_id="test-persistent-auto-mount",
)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{project_dir}:/workspace" in run_args_str
assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str
def test_non_persistent_cleanup_removes_container(monkeypatch):
"""When persist_across_processes=false, cleanup() must docker stop AND
docker rm so containers don't leak across hermes processes.
Updated for issue #20561: the previous implementation used fire-and-forget
``subprocess.Popen("... &", shell=True)`` which raced with parent exit;
the new implementation uses ``subprocess.run`` on a daemon thread with
bounded timeouts. See test_cleanup_with_persist_disabled_stops_and_rms
for the full behavior contract.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
# Run the worker thread synchronously so assertions can observe its work.
import threading
monkeypatch.setattr(threading, "Thread", _FakeThread)
env = docker_env.DockerEnvironment(
image="python:3.11", cwd="/root", timeout=60,
task_id="ephemeral-task", persistent_filesystem=False,
persist_across_processes=False,
)
container_id = env._container_id
assert container_id
# Capture cleanup-time docker calls (everything before this was init).
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capture(cmd, **kw):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kw))
return real_run(cmd, **kw)
monkeypatch.setattr(docker_env.subprocess, "run", _capture)
env.cleanup()
stops = [c for c in cleanup_calls if isinstance(c[0], list) and c[0][1:2] == ["stop"]]
assert stops, f"cleanup() should docker stop {container_id}; got {cleanup_calls}"
class _FakePopen:
def __init__(self, cmd, **kwargs):
self.cmd = cmd
self.kwargs = kwargs
self.stdout = StringIO("")
self.stdin = None
self.returncode = 0
def poll(self):
return self.returncode
def _make_execute_only_env(forward_env=None):
env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
env.cwd = "/root"
env.timeout = 60
env._forward_env = forward_env or []
env._env = {}
env._prepare_command = lambda command: (command, None)
env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
env._container_id = "test-container"
env._docker_exe = "/usr/bin/docker"
# Base class attributes needed by unified execute()
env._session_id = "test123"
env._snapshot_path = "/tmp/hermes-snap-test123.sh"
env._cwd_file = "/tmp/hermes-cwd-test123.txt"
env._cwd_marker = "__HERMES_CWD_test123__"
env._snapshot_ready = True
env._last_sync_time = None
env._init_env_args = []
return env
def test_init_env_args_uses_hermes_dotenv_for_allowlisted_env(monkeypatch):
"""_build_init_env_args picks up forwarded env vars from .env file at init time."""
# Use a var that is NOT in _HERMES_PROVIDER_ENV_BLOCKLIST (GITHUB_TOKEN
# is in the copilot provider's api_key_env_vars and gets stripped).
env = _make_execute_only_env(["DATABASE_URL"])
monkeypatch.delenv("DATABASE_URL", raising=False)
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"})
args = env._build_init_env_args()
args_str = " ".join(args)
assert "DATABASE_URL=value_from_dotenv" in args_str
def test_init_env_args_prefers_shell_env_over_hermes_dotenv(monkeypatch):
"""Shell env vars take priority over .env file values in init env args."""
env = _make_execute_only_env(["DATABASE_URL"])
monkeypatch.setenv("DATABASE_URL", "value_from_shell")
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"})
args = env._build_init_env_args()
args_str = " ".join(args)
assert "DATABASE_URL=value_from_shell" in args_str
assert "value_from_dotenv" not in args_str
# ── docker_env tests ──────────────────────────────────────────────
def test_docker_env_appears_in_run_command(monkeypatch):
"""Explicit docker_env values should be passed via -e at docker run time."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(env={"SSH_AUTH_SOCK": "/run/user/1000/ssh-agent.sock", "GNUPGHOME": "/root/.gnupg"})
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args = run_calls[0][0]
run_args_str = " ".join(run_args)
assert "SSH_AUTH_SOCK=/run/user/1000/ssh-agent.sock" in run_args_str
assert "GNUPGHOME=/root/.gnupg" in run_args_str
def test_docker_env_appears_in_init_env_args(monkeypatch):
"""Explicit docker_env values should appear in _build_init_env_args."""
env = _make_execute_only_env()
env._env = {"MY_VAR": "my_value"}
args = env._build_init_env_args()
args_str = " ".join(args)
assert "MY_VAR=my_value" in args_str
def test_forward_env_overrides_docker_env_in_init_args(monkeypatch):
"""docker_forward_env should override docker_env for the same key."""
env = _make_execute_only_env(forward_env=["MY_KEY"])
env._env = {"MY_KEY": "static_value"}
monkeypatch.setenv("MY_KEY", "dynamic_value")
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})
args = env._build_init_env_args()
args_str = " ".join(args)
assert "MY_KEY=dynamic_value" in args_str
assert "MY_KEY=static_value" not in args_str
def test_docker_env_and_forward_env_merge_in_init_args(monkeypatch):
"""docker_env and docker_forward_env with different keys should both appear."""
env = _make_execute_only_env(forward_env=["TOKEN"])
env._env = {"SSH_AUTH_SOCK": "/run/user/1000/agent.sock"}
monkeypatch.setenv("TOKEN", "secret123")
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})
args = env._build_init_env_args()
args_str = " ".join(args)
assert "SSH_AUTH_SOCK=/run/user/1000/agent.sock" in args_str
assert "TOKEN=secret123" in args_str
def test_normalize_env_dict_filters_invalid_keys():
"""_normalize_env_dict should reject invalid variable names."""
result = docker_env._normalize_env_dict({
"VALID_KEY": "ok",
"123bad": "rejected",
"": "rejected",
"also valid": "rejected", # spaces invalid
"GOOD": "ok",
})
assert result == {"VALID_KEY": "ok", "GOOD": "ok"}
def test_normalize_env_dict_coerces_scalars():
"""_normalize_env_dict should coerce int/float/bool to str."""
result = docker_env._normalize_env_dict({
"PORT": 8080,
"DEBUG": True,
"RATIO": 0.5,
})
assert result == {"PORT": "8080", "DEBUG": "True", "RATIO": "0.5"}
def test_normalize_env_dict_rejects_non_dict():
"""_normalize_env_dict should return empty dict for non-dict input."""
assert docker_env._normalize_env_dict("not a dict") == {}
assert docker_env._normalize_env_dict(None) == {}
assert docker_env._normalize_env_dict([]) == {}
def test_normalize_env_dict_rejects_complex_values():
"""_normalize_env_dict should reject list/dict values."""
result = docker_env._normalize_env_dict({
"GOOD": "string",
"BAD_LIST": [1, 2, 3],
"BAD_DICT": {"nested": True},
})
assert result == {"GOOD": "string"}
def test_security_args_include_setuid_setgid_for_privdrop(monkeypatch):
"""The default (run_as_host_user=False) invocation must include SETUID and
SETGID caps so the image's init can drop from root to a non-root user
(e.g. via ``s6-setuidgid`` in the bundled Hermes image, or ``gosu``/``su``
in user-provided images).
Without these caps the privilege-drop helper fails with
``operation not permitted`` and the container exits immediately (exit 1)
before running any work.
``no-new-privileges`` is kept, so the dropped process still cannot
escalate back to root after the drop — the drop is a one-way transition
performed before the ``no_new_privs`` bit is enforced on the exec boundary.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env()
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args = run_calls[0][0]
added = {
run_args[i + 1]
for i, flag in enumerate(run_args[:-1])
if flag == "--cap-add"
}
assert "SETUID" in added, "SETUID cap missing — image privilege-drop will fail"
assert "SETGID" in added, "SETGID cap missing — image privilege-drop will fail"
# ── run_as_host_user tests ────────────────────────────────────────
def test_run_as_host_user_passes_uid_gid(monkeypatch):
"""With run_as_host_user=True, --user <uid>:<gid> is added to docker run."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env.os, "getuid", lambda: 1234, raising=False)
monkeypatch.setattr(docker_env.os, "getgid", lambda: 5678, raising=False)
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(run_as_host_user=True)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args = run_calls[0][0]
# --user must be present and must be paired with "1234:5678"
assert "--user" in run_args, f"--user flag missing from docker run args: {run_args}"
idx = run_args.index("--user")
assert run_args[idx + 1] == "1234:5678", (
f"expected --user 1234:5678, got --user {run_args[idx + 1]}"
)
def test_run_as_host_user_drops_setuid_setgid_caps(monkeypatch):
"""When --user is passed, the container already starts unprivileged and
never needs a privilege drop, so SETUID/SETGID caps are omitted for a
tighter security posture."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env.os, "getuid", lambda: 1000, raising=False)
monkeypatch.setattr(docker_env.os, "getgid", lambda: 1000, raising=False)
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(run_as_host_user=True)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
run_args = run_calls[0][0]
added = {
run_args[i + 1]
for i, flag in enumerate(run_args[:-1])
if flag == "--cap-add"
}
assert "SETUID" not in added, (
"SETUID cap should be dropped when running as host user — no privilege drop is needed"
)
assert "SETGID" not in added, (
"SETGID cap should be dropped when running as host user — no privilege drop is needed"
)
# Core non-privilege-drop caps must still be there (pip/npm/apt need them).
assert "DAC_OVERRIDE" in added
assert "CHOWN" in added
assert "FOWNER" in added
def test_run_as_host_user_default_off(monkeypatch):
"""Without the opt-in, no --user flag is emitted — preserving existing behavior."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env() # run_as_host_user defaults to False
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
run_args = run_calls[0][0]
assert "--user" not in run_args, (
f"--user should not be in docker run args when opt-in is off: {run_args}"
)
def test_run_as_host_user_warns_and_skips_when_no_posix_ids(monkeypatch, caplog):
"""On platforms without POSIX getuid/getgid, log a warning and leave the
container at its image default user (no --user flag, full cap set)."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
# Simulate a platform where os.getuid is absent (e.g. Windows host).
monkeypatch.delattr(docker_env.os, "getuid", raising=False)
monkeypatch.delattr(docker_env.os, "getgid", raising=False)
calls = _mock_subprocess_run(monkeypatch)
with caplog.at_level(logging.WARNING):
_make_dummy_env(run_as_host_user=True)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
run_args = run_calls[0][0]
assert "--user" not in run_args
# Fall back to the full cap set since the container still starts as root.
added = {
run_args[i + 1]
for i, flag in enumerate(run_args[:-1])
if flag == "--cap-add"
}
assert "SETUID" in added
assert "SETGID" in added
assert any(
"does not expose POSIX uid/gid" in rec.getMessage()
for rec in caplog.records
), "expected a warning when POSIX ids are unavailable"
# ── Docker labels (issue #20561) ──────────────────────────────────
def _run_args_from_calls(calls):
"""Pull the argv list passed to the first ``docker run`` invocation."""
run_calls = [
c for c in calls
if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"
]
assert run_calls, "docker run should have been called"
return run_calls[0][0]
def _labels_in_run_args(run_args):
"""Return the set of ``key=value`` strings passed via ``--label``."""
return {
run_args[i + 1]
for i, flag in enumerate(run_args[:-1])
if flag == "--label"
}
def test_run_command_tags_hermes_agent_label(monkeypatch):
"""Every container hermes-agent starts must carry the hermes-agent=1 label
so the orphan reaper (and external operators) can identify them with a
single ``docker ps --filter label=hermes-agent=1`` call. Regression test
for issue #20561 — without the label there is no global sweep target."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(task_id="my-task")
labels = _labels_in_run_args(_run_args_from_calls(calls))
assert "hermes-agent=1" in labels, (
f"hermes-agent=1 label missing; got labels: {sorted(labels)}"
)
def test_run_command_tags_task_and_profile_labels(monkeypatch):
"""task_id and the active profile name are surfaced as labels so future
cross-process reuse logic can filter to a specific (task, profile) pair
without parsing container names. Profile resolution uses the helper that
returns ``"default"`` for the root Hermes home."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "research-bot")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(task_id="kanban-42")
labels = _labels_in_run_args(_run_args_from_calls(calls))
assert "hermes-task-id=kanban-42" in labels, (
f"hermes-task-id=kanban-42 missing; got: {sorted(labels)}"
)
assert "hermes-profile=research-bot" in labels, (
f"hermes-profile=research-bot missing; got: {sorted(labels)}"
)
def test_label_sanitizer_rejects_invalid_characters():
"""Docker label values must be alnum + ``_.-`` and ≤63 chars. Profile or
task names containing slashes, colons, or unicode would otherwise emit
invalid labels that round-trip badly through ``docker ps --filter``."""
assert docker_env._sanitize_label_value("plain-name_1.0") == "plain-name_1.0"
assert docker_env._sanitize_label_value("with/slash") == "with_slash"
assert docker_env._sanitize_label_value("with:colon") == "with_colon"
assert docker_env._sanitize_label_value("emoji-😀-here") == "emoji-_-here"
# Empty / non-string inputs must collapse to a queryable token, not "".
assert docker_env._sanitize_label_value("") == "unknown"
assert docker_env._sanitize_label_value(None) == "unknown" # type: ignore[arg-type]
# >63 chars must truncate, not error.
long_value = "x" * 100
assert len(docker_env._sanitize_label_value(long_value)) == 63
def test_run_command_sanitizes_unsafe_task_id(monkeypatch):
"""A task_id containing characters Docker rejects in label values must be
sanitized before reaching ``docker run --label``; otherwise the daemon
refuses the run with an inscrutable error and the agent's first command
blows up."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(task_id="task/with:weird*chars")
labels = _labels_in_run_args(_run_args_from_calls(calls))
# Each non-OK character becomes an underscore; the safe chars survive.
assert "hermes-task-id=task_with_weird_chars" in labels, (
f"sanitized task-id label missing; got: {sorted(labels)}"
)
def test_labels_attribute_populated_after_init(monkeypatch):
"""``self._labels`` must be set to the same key/value pairs that went onto
docker run, so subsequent reuse / reaper paths can match without re-running
the sanitizer or re-importing the profile module."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
env = _make_dummy_env(task_id="abc")
assert env._labels == {
"hermes-agent": "1",
"hermes-task-id": "abc",
"hermes-profile": "default",
}
# ── Cross-process container reuse (issue #20561) ──────────────────
def _mock_subprocess_run_with_reuse(monkeypatch, ps_state: str | None,
start_succeeds: bool = True):
"""Reuse-aware subprocess.run mock.
``ps_state`` controls what ``docker ps -a --filter ...`` returns:
* ``None`` → no match (empty stdout). Forces a fresh ``docker run``.
* ``"running"`` / ``"exited"`` / ... → emit ``CID\\tSTATE`` so the reuse
path picks it up. ``"running"`` skips ``docker start``; other states
trigger ``docker start`` (which can be forced to fail via
``start_succeeds=False``).
Returns the captured call list so the test can verify which docker
commands actually ran.
"""
calls = []
def _run(cmd, **kwargs):
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
if isinstance(cmd, list) and len(cmd) >= 2:
sub = cmd[1]
if sub == "version":
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
if sub == "ps":
if ps_state is None:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
return subprocess.CompletedProcess(
cmd, 0, stdout=f"reused-cid\t{ps_state}\n", stderr="",
)
if sub == "start":
if not start_succeeds:
# Real subprocess.run with check=True raises on non-zero exit;
# mirror that so the production code's except clause fires.
raise subprocess.CalledProcessError(1, cmd, output="", stderr="no such container")
return subprocess.CompletedProcess(cmd, 0, stdout="reused-cid\n", stderr="")
if sub == "run":
return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
return calls
def test_reuse_attaches_to_running_container_without_docker_run(monkeypatch):
"""When a labeled container is already ``running``, the reuse probe
must pick it up and skip ``docker run`` entirely. Regression for the
issue #20561 root cause: every Hermes process spawning a new container
despite docs claiming "ONE long-lived container shared across sessions"."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
env = _make_dummy_env(task_id="reuse-test")
# The reuse path must populate _container_id from the ps probe output.
assert env._container_id == "reused-cid", (
f"expected reused container id, got {env._container_id!r}"
)
# And it must NOT have run `docker run`.
run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert not run_invocations, (
f"docker run should be skipped on reuse, got: {run_invocations}"
)
# And it must have NOT issued a `docker start` for an already-running container.
start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
assert not start_invocations, (
f"docker start should be skipped when container already running, got: {start_invocations}"
)
def test_reuse_starts_stopped_container_before_attaching(monkeypatch):
"""A labeled container in ``exited`` state must be restarted via
``docker start`` before the new Hermes process uses it. Without this
step, ``docker exec`` against a stopped container errors out and the
first agent command fails opaquely."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="exited")
env = _make_dummy_env(task_id="reuse-stopped")
assert env._container_id == "reused-cid"
start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
assert start_invocations, "expected docker start for exited container"
run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert not run_invocations, "should not docker run when reusing an exited container"
def test_reuse_falls_back_to_fresh_run_when_start_fails(monkeypatch):
"""If ``docker start`` on the matched container fails (container was
removed between probe and start, daemon paused, etc.), the code must
silently fall through to a fresh ``docker run`` rather than leaving the
user with a broken environment. Defensive recovery — the probe is best-
effort, not authoritative."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
calls = _mock_subprocess_run_with_reuse(
monkeypatch, ps_state="exited", start_succeeds=False,
)
env = _make_dummy_env(task_id="reuse-broken-start")
# docker start should be attempted then fail; code falls through to run.
assert env._container_id == "fresh-cid", (
f"expected fresh container id after fallback, got {env._container_id!r}"
)
run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_invocations, "fallback to fresh docker run must happen on start failure"
def test_no_reuse_when_persist_across_processes_disabled(monkeypatch):
"""Opt-out path: ``persist_across_processes=False`` skips the ps probe
entirely and always starts a fresh container, matching the pre-fix
behavior for users who want hard per-process isolation."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
# ps_state=running would trigger reuse if the probe ran — assert it doesn't.
calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
env = docker_env.DockerEnvironment(
image="python:3.11", cwd="/root", timeout=60,
task_id="no-reuse", persist_across_processes=False,
)
# Must NOT have issued docker ps (the probe is gated by the flag).
ps_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "ps"]
assert not ps_invocations, (
f"docker ps probe should be skipped when persist_across_processes=False, got: {ps_invocations}"
)
# Should have started a fresh container.
assert env._container_id == "fresh-cid"
def test_find_reusable_container_prefers_running_over_stopped(monkeypatch):
"""When the probe returns multiple matches (shouldn't normally happen,
but can after a crash leaves stale duplicates), a ``running`` container
is preferred over any stopped one. The duplicate gets reaped later by
the orphan reaper; we don't try to be heroic about it here."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
def _run(cmd, **kwargs):
if isinstance(cmd, list) and len(cmd) >= 2:
if cmd[1] == "version":
return subprocess.CompletedProcess(cmd, 0, stdout="ok", stderr="")
if cmd[1] == "ps":
# Two matches: stopped first, running second.
return subprocess.CompletedProcess(
cmd, 0,
stdout="stopped-cid\texited\nrunning-cid\trunning\n",
stderr="",
)
return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
env = _make_dummy_env(task_id="dup-match")
assert env._container_id == "running-cid", (
f"running container should win over stopped duplicate, got {env._container_id!r}"
)
# ── Cleanup correctness (issue #20561) ────────────────────────────
class _FakeThread:
"""Stand-in for threading.Thread that captures target/args and calls
target() synchronously when .start() runs, so cleanup behavior is
observable without actually backgrounding subprocess calls."""
def __init__(self, target=None, daemon=None, name=None):
self._target = target
self.daemon = daemon
self.name = name
self._done = False
def start(self):
if self._target is not None:
self._target()
self._done = True
def is_alive(self):
return not self._done
def join(self, timeout=None):
self._done = True
def _install_fake_thread(monkeypatch):
import threading
monkeypatch.setattr(threading, "Thread", _FakeThread)
def test_cleanup_with_persist_is_noop_for_container(monkeypatch):
"""``persist_across_processes=True`` (default) cleanup must NEITHER stop
NOR remove the container — the docs promise "ONE long-lived container
shared across sessions", and any docker stop would kill background
processes inside the container (npm watchers, pytest watchers, etc.).
Resource reclamation in this mode happens via the orphan reaper on next
Hermes startup, not on graceful exit. Issue #20561 — the first iteration
of this PR did docker stop here, which Ben caught as contradicting the
"ONE long-lived container" semantics."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
env = _make_dummy_env(task_id="cleanup-persist", persistent_filesystem=False)
# Default persist_across_processes=True.
container_id = env._container_id
assert container_id
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capturing_run(cmd, **kwargs):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
return real_run(cmd, **kwargs)
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
env.cleanup()
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
assert not stops, (
f"docker stop must NOT be called when persist_across_processes=True; "
f"container has to stay running so background processes survive. "
f"Got: {stops}"
)
assert not rms, (
f"docker rm must NOT be called when persist_across_processes=True; "
f"reuse would be impossible. Got: {rms}"
)
# The in-process handle must still be cleared so the next __init__
# re-probes via labels (and reuses the still-running container).
assert env._container_id is None, (
"in-process container_id should be cleared even in no-op cleanup"
)
def test_cleanup_force_remove_stops_and_rms_even_in_persist_mode(monkeypatch):
"""``cleanup(force_remove=True)`` must stop AND rm the container even
when ``persist_across_processes=True``. This is the explicit-teardown
path for ``/reset``, ``cleanup_vm(task_id, force_remove=True)``, and any
future caller that wants a guaranteed fresh container.
Without this kwarg, callers in persist mode would have no way to force a
fresh container without also flipping the global config — too coarse for
a per-task reset.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
env = _make_dummy_env(task_id="cleanup-force", persistent_filesystem=False)
assert env._container_id
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capturing_run(cmd, **kwargs):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
return real_run(cmd, **kwargs)
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
env.cleanup(force_remove=True)
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
assert stops, f"force_remove must docker stop; got: {cleanup_calls}"
assert rms, f"force_remove must docker rm; got: {cleanup_calls}"
def test_cleanup_vm_default_honors_persist_mode(monkeypatch):
"""``cleanup_vm(task_id)`` without ``force_remove=True`` must be a no-op
for a persist-mode container.
Regression for the bug Ben caught after commit 4: ``AIAgent.close()``
(which is called from ``tui_gateway/server.py`` on session.close, from
``gateway/run.py`` on per-session teardown, and from per-turn cleanup)
calls ``cleanup_vm(task_id)``. If that defaulted to ``force_remove=True``
we'd tear down the container on every TUI session close, defeating the
"ONE long-lived container shared across sessions" contract.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
from tools import terminal_tool
env = _make_dummy_env(task_id="session-close-test")
container_id = env._container_id
terminal_tool._active_environments["session-close-test"] = env
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capturing_run(cmd, **kwargs):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
return real_run(cmd, **kwargs)
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
try:
terminal_tool.cleanup_vm("session-close-test")
finally:
terminal_tool._active_environments.pop("session-close-test", None)
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
assert not stops, (
f"cleanup_vm() default must not docker stop a persist-mode container; "
f"got: {stops}"
)
assert not rms, (
f"cleanup_vm() default must not docker rm a persist-mode container; "
f"got: {rms}"
)
def test_cleanup_vm_force_remove_tears_down_persist_container(monkeypatch):
"""``cleanup_vm(task_id, force_remove=True)`` tears down a persist-mode
container — the explicit-teardown path for ``/reset``-style flows.
Also pins the runtime-signature-inspection plumbing: the kwarg must
actually flow through ``cleanup_vm`` into the backend's ``cleanup()``.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
from tools import terminal_tool
env = _make_dummy_env(task_id="explicit-teardown-test")
terminal_tool._active_environments["explicit-teardown-test"] = env
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capturing_run(cmd, **kwargs):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
return real_run(cmd, **kwargs)
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
try:
terminal_tool.cleanup_vm("explicit-teardown-test", force_remove=True)
finally:
terminal_tool._active_environments.pop("explicit-teardown-test", None)
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
assert stops, f"force_remove must reach docker stop; got: {cleanup_calls}"
assert rms, f"force_remove must reach docker rm; got: {cleanup_calls}"
def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch):
"""``persist_across_processes=False`` cleanup must docker stop AND docker
rm so containers don't leak. Crucially, this runs regardless of the
``persistent_filesystem`` setting — the original code only rm'd when
``not self._persistent``, which meant the default-on ``container_persistent:
true`` users (the documented happy path) leaked Exited containers forever.
Issue #20561 root-cause fix."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
# Note: persistent_filesystem=True (the prior-leak scenario) + the new
# cross-process toggle OFF must still result in a clean rm.
env = docker_env.DockerEnvironment(
image="python:3.11", cwd="/root", timeout=60,
task_id="cleanup-no-persist", persistent_filesystem=True,
persist_across_processes=False,
)
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capturing_run(cmd, **kwargs):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
return real_run(cmd, **kwargs)
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
env.cleanup()
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
assert stops, "expected docker stop"
assert rms, (
"docker rm MUST run when persist_across_processes=False, even with "
"persistent_filesystem=True — that gating was the leak source in #20561."
)
def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch):
"""The pre-fix code used ``subprocess.Popen("... &", shell=True)`` which
raced with parent-process exit and silently dropped cleanup work. The
new code must use ``subprocess.run`` with bounded ``timeout=`` so the
work actually completes within the process lifetime.
Asserts cleanup never reaches into shell-mode Popen. Uses
``force_remove=True`` so cleanup actually issues docker calls — the
default persist-mode path is now a no-op (commit 4) and would trivially
pass this assertion without exercising the docker code at all.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
def _forbidden_popen(*args, **kwargs):
raise AssertionError(
f"cleanup must not use subprocess.Popen anymore (issue #20561); "
f"got args={args} kwargs={kwargs}"
)
monkeypatch.setattr(docker_env.subprocess, "Popen", _forbidden_popen)
env = _make_dummy_env(task_id="no-popen-cleanup")
env.cleanup(force_remove=True) # must not raise
def test_wait_for_cleanup_returns_true_when_no_thread_started():
"""``wait_for_cleanup`` must be a no-op when ``cleanup`` was never called
(or the env has no live cleanup thread) — atexit calls it unconditionally
across all active envs, so a False return would falsely flag healthy
shutdowns."""
env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
# No _cleanup_thread set — simulates an env that was never cleanup()'d.
assert env.wait_for_cleanup(timeout=1.0) is True
def test_wait_for_cleanup_after_cleanup_returns_true(monkeypatch):
"""End-to-end: cleanup() starts a thread, wait_for_cleanup() joins it
and reports completion. Atexit relies on this contract to ensure docker
stop/rm actually finishes before the Python interpreter exits.
Uses ``force_remove=True`` so cleanup actually starts a worker thread —
the default persist-mode cleanup is a no-op (commit 4) and never spawns
a thread, so the trivial "no thread" branch of wait_for_cleanup is
already covered by the previous test.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
env = _make_dummy_env(task_id="wait-test")
env.cleanup(force_remove=True)
assert env.wait_for_cleanup(timeout=5.0) is True
def test_cleanup_on_env_with_no_container_id_does_not_raise(monkeypatch):
"""A DockerEnvironment whose ``__init__`` failed before the container_id
was set (image-pull error, docker daemon down) should still be safe to
cleanup() — the post-creation failure path in callers always tries.
Without this guard the daemon-down case used to NameError on the cleanup
branch."""
env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
env._container_id = None
env._persistent = False
env._workspace_dir = None
env._home_dir = None
# No exception expected.
env.cleanup()
# ── Orphan reaper (issue #20561) ──────────────────────────────────
def _now_iso(offset_seconds: int = 0) -> str:
"""Return an RFC3339 timestamp ``offset_seconds`` in the past."""
import datetime
t = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(seconds=offset_seconds)
# Format like Docker emits — with nanoseconds-style trailing digits.
return t.isoformat().replace("+00:00", ".123456789Z")
def _reaper_run_mock(monkeypatch, ps_ids: list[str], inspect_responses: dict[str, str],
rm_succeeds: bool = True):
"""Build a subprocess.run mock for reaper tests.
* ``ps_ids`` — what ``docker ps -a --filter ... --format '{{.ID}}'`` returns
* ``inspect_responses[cid]`` — what ``docker inspect ... FinishedAt`` returns
for each cid; ``""`` means "field unset".
* ``rm_succeeds`` — whether ``docker rm -f`` returns 0.
Captures every call so tests can assert which containers were rm'd.
"""
calls = []
def _run(cmd, **kwargs):
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
if not isinstance(cmd, list) or len(cmd) < 2:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
sub = cmd[1]
if sub == "ps":
return subprocess.CompletedProcess(
cmd, 0, stdout="\n".join(ps_ids) + ("\n" if ps_ids else ""), stderr="",
)
if sub == "inspect":
# cmd is [docker, inspect, --format, '{{.State.FinishedAt}}', cid]
cid = cmd[-1]
return subprocess.CompletedProcess(
cmd, 0, stdout=inspect_responses.get(cid, "") + "\n", stderr="",
)
if sub == "rm":
return subprocess.CompletedProcess(
cmd, 0 if rm_succeeds else 1,
stdout="", stderr="" if rm_succeeds else "no such container",
)
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
return calls
def test_reap_orphan_returns_zero_when_no_matches(monkeypatch):
"""No labeled containers → no rm calls, returns 0. Establishes the
happy-path baseline for the orphan reaper (issue #20561)."""
calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={})
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 0
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
assert not rms, "no rm calls expected when ps returns empty"
def test_reap_orphan_removes_stale_exited_container(monkeypatch):
"""An Exited container older than max_age_seconds must be removed.
This is the core repair path for issue #20561 — without the reaper,
SIGKILL'd Hermes processes leak containers permanently."""
old = _now_iso(offset_seconds=900) # 15 minutes ago
calls = _reaper_run_mock(
monkeypatch, ps_ids=["old-cid"], inspect_responses={"old-cid": old},
)
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 1
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
assert len(rms) == 1
assert "old-cid" in rms[0][0], f"expected rm of old-cid, got {rms[0][0]}"
def test_reap_orphan_spares_recently_exited_container(monkeypatch):
"""A container exited within max_age_seconds must NOT be reaped — that
container belongs to a Hermes process that just finished and may be
about to be replaced. Conservative window prevents racing sibling
processes."""
recent = _now_iso(offset_seconds=60) # 1 minute ago
calls = _reaper_run_mock(
monkeypatch, ps_ids=["recent-cid"], inspect_responses={"recent-cid": recent},
)
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 0
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
assert not rms, f"recent container must not be reaped, got rm calls: {rms}"
def test_reap_orphan_scopes_to_profile_filter_via_label(monkeypatch):
"""The reaper must pass ``--filter label=hermes-profile=<profile>`` to
docker ps so it never sweeps another profile's containers. A research
profile must not tear down the default profile's stragglers."""
calls = _reaper_run_mock(monkeypatch, ps_ids=[], inspect_responses={})
docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="research-bot", docker_exe="/usr/bin/docker",
)
ps_calls = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["ps"]]
assert ps_calls, "expected at least one docker ps call"
flat = " ".join(ps_calls[0][0])
assert "label=hermes-profile=research-bot" in flat, (
f"profile filter not applied to docker ps; got args: {ps_calls[0][0]}"
)
assert "label=hermes-agent=1" in flat, (
f"hermes-agent label filter must also be applied; got: {ps_calls[0][0]}"
)
assert "status=exited" in flat, (
"must filter to exited containers only — running containers may "
"belong to a sibling Hermes process and must NEVER be reaped"
)
def test_reap_orphan_skips_container_with_unparseable_finished_at(monkeypatch):
"""If docker inspect returns the zero-value ``0001-01-01T00:00:00Z`` (no
FinishedAt yet) or an unparseable timestamp, the reaper must leave the
container alone. Defensive — never reap a container whose age we can't
determine."""
calls = _reaper_run_mock(
monkeypatch,
ps_ids=["never-finished", "garbage-ts"],
inspect_responses={
"never-finished": "0001-01-01T00:00:00Z",
"garbage-ts": "not-a-timestamp",
},
)
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 0
rms = [c for c in calls if isinstance(c[0], list) and c[0][1:2] == ["rm"]]
assert not rms, (
f"reaper must NOT remove containers with unparseable FinishedAt; got: {rms}"
)
def test_reap_orphan_handles_docker_ps_failure_gracefully(monkeypatch):
"""If docker ps itself fails (daemon down, permission denied), the
reaper returns 0 without crashing. The reaper is best-effort plumbing,
not a critical path — it must never block container creation."""
def _failing_ps(cmd, **kwargs):
if isinstance(cmd, list) and len(cmd) >= 2 and cmd[1] == "ps":
return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="Cannot connect to daemon")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _failing_ps)
# Must not raise
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
assert removed == 0
def test_reap_orphan_continues_after_individual_rm_failure(monkeypatch):
"""If ``docker rm -f`` fails on one container (already removed by a
concurrent process, container locked, etc.), the reaper must log and
continue to the next candidate rather than aborting the whole sweep."""
old = _now_iso(offset_seconds=900)
rm_calls = []
def _run(cmd, **kwargs):
if not isinstance(cmd, list) or len(cmd) < 2:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
sub = cmd[1]
if sub == "ps":
return subprocess.CompletedProcess(
cmd, 0, stdout="cid-a\ncid-b\ncid-c\n", stderr="",
)
if sub == "inspect":
return subprocess.CompletedProcess(cmd, 0, stdout=old + "\n", stderr="")
if sub == "rm":
rm_calls.append(cmd[-1])
# cid-b fails; cid-a and cid-c succeed.
if cmd[-1] == "cid-b":
return subprocess.CompletedProcess(cmd, 1, stdout="", stderr="no such container")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
removed = docker_env.reap_orphan_containers(
max_age_seconds=600, profile_filter="default", docker_exe="/usr/bin/docker",
)
# All three were attempted, two succeeded.
assert removed == 2
assert set(rm_calls) == {"cid-a", "cid-b", "cid-c"}, (
f"reaper must attempt all candidates even when one fails; got: {rm_calls}"
)
def test_container_finished_at_parses_nanosecond_timestamp(monkeypatch):
"""Docker emits FinishedAt with nanosecond precision (RFC3339 with up to
9 fractional digits), but Python's fromisoformat caps at microseconds.
The helper must trim the extra digits without raising — otherwise every
candidate gets skipped and the reaper does nothing."""
def _run(cmd, **kwargs):
return subprocess.CompletedProcess(
cmd, 0,
stdout="2026-05-28T13:45:00.123456789Z\n",
stderr="",
)
monkeypatch.setattr(docker_env.subprocess, "run", _run)
result = docker_env._container_finished_at("/usr/bin/docker", "test-cid")
assert result is not None, "must parse RFC3339 with nanoseconds"
import datetime
assert result.tzinfo == datetime.timezone.utc
assert result.year == 2026 and result.month == 5 and result.day == 28
def test_container_finished_at_returns_none_on_zero_value():
"""Docker's zero-value ``0001-01-01T00:00:00Z`` (never finished) must
map to None so the reaper treats the container as unreapable."""
# Direct test of the parsing helper — no subprocess needed since the
# check happens after the inspect call returns.
import subprocess as _subprocess
class _MockRun:
def __init__(self, stdout):
self.returncode = 0
self.stdout = stdout
self.stderr = ""
import unittest.mock
with unittest.mock.patch.object(
docker_env.subprocess, "run", return_value=_MockRun("0001-01-01T00:00:00Z\n"),
):
result = docker_env._container_finished_at("/usr/bin/docker", "never-finished")
assert result is None