hermes-agent/tests/tools/test_docker_environment.py
Ben ac8e238bc8 fix(docker): reuse containers across processes + fix cleanup leaks
The Docker backend docs claim "Single persistent container — ONE long-
lived container shared across sessions, /new, /reset, and delegate_task
subagents. Stopped/removed on shutdown." In practice the code only
honored that contract within a single Python process via the in-memory
\`_active_environments[task_id]\` cache. Every \`hermes chat\` invocation
spawned a fresh \`hermes-<hex>\` container; older containers piled up in
\`Exited\` state and accumulated until manual \`docker rm\` (issue #20561).

Three root causes, all addressed by this commit:

1. No cross-process container discovery.
2. \`cleanup()\` used fire-and-forget \`subprocess.Popen("... &", shell=True)\`
   which raced with parent-process exit — when Python exited promptly the
   detached shell child got killed mid-\`docker stop\`, leaving stopped
   containers behind.
3. The \`docker rm\` step in cleanup was gated on \`not self._persistent\`
   (the bind-mount-persistence flag). Default config sets
   \`container_persistent: true\`, so the default happy path skipped \`rm\`
   entirely — even when the user explicitly didn't want cross-process
   reuse, containers leaked.

Fix:

* Add \`DockerEnvironment.__init__(persist_across_processes=True)\`. When
  true, init probes
  \`docker ps -a --filter label=hermes-agent=1
                  --filter label=hermes-task-id=<task>
                  --filter label=hermes-profile=<profile>\`
  and reuses a matching container (running → attach; stopped →
  \`docker start\` → attach; \`docker start\` failure → fall through to a
  fresh \`docker run\`). Multiple matches prefer the running one, with the
  stragglers left for the orphan reaper (next commit) to clean up.

* Rewrite \`cleanup()\`. Uses \`subprocess.run(..., timeout=30)\` on a
  daemon \`threading.Thread\`, not the racy \`Popen(... &)\`. The
  \`_persistent\` guard is dropped on the \`rm\` step — \`rm\` now runs
  whenever \`persist_across_processes\` is false, regardless of the
  bind-mount-persistence setting. The leak class is gone in all
  combinations.

* Add \`wait_for_cleanup(timeout)\`. \`tools/terminal_tool.py\`'s atexit
  hook calls this on every active env, blocking up to 15s for the
  cleanup thread before interpreter exit. Without this, \`hermes /quit\`
  raced the daemon-thread teardown and dropped the stop/rm work.

* New config \`terminal.docker_persist_across_processes\` (default
  \`true\` — restores the documented contract). Set \`false\` for hard
  per-process isolation. Wired through all four config-bridge sites
  (cli.py env_mappings, gateway/run.py _terminal_env_map,
  hermes_cli/config.py _config_to_env_sync, tests/conftest.py env-strip
  list); regression-pinned by
  \`test_docker_persist_across_processes_is_bridged_everywhere\` matching
  the existing pattern for docker_run_as_host_user / docker_env.

Reuse intentionally does NOT compare image / mounts / resources — only
the labels. Operators changing those settings should set
\`docker_persist_across_processes: false\` (or \`docker rm -f\` the
labeled container) to force a fresh start. This keeps the probe cheap
and the failure mode obvious.

Coverage: 12 new unit tests in tests/tools/test_docker_environment.py
covering reuse paths (running, stopped, fallback, opt-out, duplicate
preference) and cleanup behavior (persist-mode no-rm, opt-out always-rm,
no-Popen, wait_for_cleanup semantics, partial-init safety). Plus one
config-bridge regression pin.

Refs #20561
2026-05-29 11:49:54 +10:00

977 lines
40 KiB
Python

import logging
from io import StringIO
import subprocess
import sys
import types
import pytest
from tools.environments import docker as docker_env
def _mock_subprocess_run(monkeypatch):
"""Mock subprocess.run to intercept docker run -d and docker version calls.
Returns a list of captured (cmd, kwargs) tuples for inspection.
"""
calls = []
def _run(cmd, **kwargs):
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
if isinstance(cmd, list) and len(cmd) >= 2:
if cmd[1] == "version":
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
if cmd[1] == "run":
return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
return calls
def _make_dummy_env(**kwargs):
"""Helper to construct DockerEnvironment with minimal required args."""
return docker_env.DockerEnvironment(
image=kwargs.get("image", "python:3.11"),
cwd=kwargs.get("cwd", "/root"),
timeout=kwargs.get("timeout", 60),
cpu=kwargs.get("cpu", 0),
memory=kwargs.get("memory", 0),
disk=kwargs.get("disk", 0),
persistent_filesystem=kwargs.get("persistent_filesystem", False),
task_id=kwargs.get("task_id", "test-task"),
volumes=kwargs.get("volumes", []),
network=kwargs.get("network", True),
host_cwd=kwargs.get("host_cwd"),
auto_mount_cwd=kwargs.get("auto_mount_cwd", False),
env=kwargs.get("env"),
run_as_host_user=kwargs.get("run_as_host_user", False),
)
def test_ensure_docker_available_logs_and_raises_when_not_found(monkeypatch, caplog):
"""When docker cannot be found, raise a clear error before container setup."""
monkeypatch.setattr(docker_env, "find_docker", lambda: None)
monkeypatch.setattr(
docker_env.subprocess,
"run",
lambda *args, **kwargs: pytest.fail("subprocess.run should not be called when docker is missing"),
)
with caplog.at_level(logging.ERROR):
with pytest.raises(RuntimeError) as excinfo:
_make_dummy_env()
assert "Docker executable not found in PATH or known install locations" in str(excinfo.value)
assert any(
"no docker executable was found in PATH or known install locations"
in record.getMessage()
for record in caplog.records
)
def test_ensure_docker_available_logs_and_raises_on_timeout(monkeypatch, caplog):
"""When docker version times out, surface a helpful error instead of hanging."""
def _raise_timeout(*args, **kwargs):
raise subprocess.TimeoutExpired(cmd=["/custom/docker", "version"], timeout=5)
monkeypatch.setattr(docker_env, "find_docker", lambda: "/custom/docker")
monkeypatch.setattr(docker_env.subprocess, "run", _raise_timeout)
with caplog.at_level(logging.ERROR):
with pytest.raises(RuntimeError) as excinfo:
_make_dummy_env()
assert "Docker daemon is not responding" in str(excinfo.value)
assert any(
"/custom/docker version' timed out" in record.getMessage()
for record in caplog.records
)
def test_ensure_docker_available_uses_resolved_executable(monkeypatch):
"""When docker is found outside PATH, preflight should use that resolved path."""
calls = []
def _run(cmd, **kwargs):
calls.append((cmd, kwargs))
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
monkeypatch.setattr(docker_env, "find_docker", lambda: "/opt/homebrew/bin/docker")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
docker_env._ensure_docker_available()
assert calls == [
(["/opt/homebrew/bin/docker", "version"], {
"capture_output": True,
"text": True,
"timeout": 5,
})
]
def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
"""Opt-in docker cwd mounting should bind the host cwd to /workspace."""
project_dir = tmp_path / "my-project"
project_dir.mkdir()
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/workspace",
host_cwd=str(project_dir),
auto_mount_cwd=True,
)
# Find the docker run call and check its args
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{project_dir}:/workspace" in run_args_str
def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
"""Host cwd should not be mounted unless the caller explicitly opts in."""
project_dir = tmp_path / "my-project"
project_dir.mkdir()
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/root",
host_cwd=str(project_dir),
auto_mount_cwd=False,
)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{project_dir}:/workspace" not in run_args_str
def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path):
"""Explicit user volumes for /workspace should take precedence over cwd mount."""
project_dir = tmp_path / "my-project"
project_dir.mkdir()
other_dir = tmp_path / "other"
other_dir.mkdir()
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/workspace",
host_cwd=str(project_dir),
auto_mount_cwd=True,
volumes=[f"{other_dir}:/workspace"],
)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{other_dir}:/workspace" in run_args_str
assert run_args_str.count(":/workspace") == 1
def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
"""Persistent mode should still prefer the configured host cwd at /workspace."""
project_dir = tmp_path / "my-project"
project_dir.mkdir()
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/workspace",
persistent_filesystem=True,
host_cwd=str(project_dir),
auto_mount_cwd=True,
task_id="test-persistent-auto-mount",
)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{project_dir}:/workspace" in run_args_str
assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str
def test_non_persistent_cleanup_removes_container(monkeypatch):
"""When persist_across_processes=false, cleanup() must docker stop AND
docker rm so containers don't leak across hermes processes.
Updated for issue #20561: the previous implementation used fire-and-forget
``subprocess.Popen("... &", shell=True)`` which raced with parent exit;
the new implementation uses ``subprocess.run`` on a daemon thread with
bounded timeouts. See test_cleanup_with_persist_disabled_stops_and_rms
for the full behavior contract.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
# Run the worker thread synchronously so assertions can observe its work.
import threading
monkeypatch.setattr(threading, "Thread", _FakeThread)
env = docker_env.DockerEnvironment(
image="python:3.11", cwd="/root", timeout=60,
task_id="ephemeral-task", persistent_filesystem=False,
persist_across_processes=False,
)
container_id = env._container_id
assert container_id
# Capture cleanup-time docker calls (everything before this was init).
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capture(cmd, **kw):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kw))
return real_run(cmd, **kw)
monkeypatch.setattr(docker_env.subprocess, "run", _capture)
env.cleanup()
stops = [c for c in cleanup_calls if isinstance(c[0], list) and c[0][1:2] == ["stop"]]
assert stops, f"cleanup() should docker stop {container_id}; got {cleanup_calls}"
class _FakePopen:
def __init__(self, cmd, **kwargs):
self.cmd = cmd
self.kwargs = kwargs
self.stdout = StringIO("")
self.stdin = None
self.returncode = 0
def poll(self):
return self.returncode
def _make_execute_only_env(forward_env=None):
env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
env.cwd = "/root"
env.timeout = 60
env._forward_env = forward_env or []
env._env = {}
env._prepare_command = lambda command: (command, None)
env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
env._container_id = "test-container"
env._docker_exe = "/usr/bin/docker"
# Base class attributes needed by unified execute()
env._session_id = "test123"
env._snapshot_path = "/tmp/hermes-snap-test123.sh"
env._cwd_file = "/tmp/hermes-cwd-test123.txt"
env._cwd_marker = "__HERMES_CWD_test123__"
env._snapshot_ready = True
env._last_sync_time = None
env._init_env_args = []
return env
def test_init_env_args_uses_hermes_dotenv_for_allowlisted_env(monkeypatch):
"""_build_init_env_args picks up forwarded env vars from .env file at init time."""
# Use a var that is NOT in _HERMES_PROVIDER_ENV_BLOCKLIST (GITHUB_TOKEN
# is in the copilot provider's api_key_env_vars and gets stripped).
env = _make_execute_only_env(["DATABASE_URL"])
monkeypatch.delenv("DATABASE_URL", raising=False)
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"})
args = env._build_init_env_args()
args_str = " ".join(args)
assert "DATABASE_URL=value_from_dotenv" in args_str
def test_init_env_args_prefers_shell_env_over_hermes_dotenv(monkeypatch):
"""Shell env vars take priority over .env file values in init env args."""
env = _make_execute_only_env(["DATABASE_URL"])
monkeypatch.setenv("DATABASE_URL", "value_from_shell")
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"})
args = env._build_init_env_args()
args_str = " ".join(args)
assert "DATABASE_URL=value_from_shell" in args_str
assert "value_from_dotenv" not in args_str
# ── docker_env tests ──────────────────────────────────────────────
def test_docker_env_appears_in_run_command(monkeypatch):
"""Explicit docker_env values should be passed via -e at docker run time."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(env={"SSH_AUTH_SOCK": "/run/user/1000/ssh-agent.sock", "GNUPGHOME": "/root/.gnupg"})
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args = run_calls[0][0]
run_args_str = " ".join(run_args)
assert "SSH_AUTH_SOCK=/run/user/1000/ssh-agent.sock" in run_args_str
assert "GNUPGHOME=/root/.gnupg" in run_args_str
def test_docker_env_appears_in_init_env_args(monkeypatch):
"""Explicit docker_env values should appear in _build_init_env_args."""
env = _make_execute_only_env()
env._env = {"MY_VAR": "my_value"}
args = env._build_init_env_args()
args_str = " ".join(args)
assert "MY_VAR=my_value" in args_str
def test_forward_env_overrides_docker_env_in_init_args(monkeypatch):
"""docker_forward_env should override docker_env for the same key."""
env = _make_execute_only_env(forward_env=["MY_KEY"])
env._env = {"MY_KEY": "static_value"}
monkeypatch.setenv("MY_KEY", "dynamic_value")
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})
args = env._build_init_env_args()
args_str = " ".join(args)
assert "MY_KEY=dynamic_value" in args_str
assert "MY_KEY=static_value" not in args_str
def test_docker_env_and_forward_env_merge_in_init_args(monkeypatch):
"""docker_env and docker_forward_env with different keys should both appear."""
env = _make_execute_only_env(forward_env=["TOKEN"])
env._env = {"SSH_AUTH_SOCK": "/run/user/1000/agent.sock"}
monkeypatch.setenv("TOKEN", "secret123")
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})
args = env._build_init_env_args()
args_str = " ".join(args)
assert "SSH_AUTH_SOCK=/run/user/1000/agent.sock" in args_str
assert "TOKEN=secret123" in args_str
def test_normalize_env_dict_filters_invalid_keys():
"""_normalize_env_dict should reject invalid variable names."""
result = docker_env._normalize_env_dict({
"VALID_KEY": "ok",
"123bad": "rejected",
"": "rejected",
"also valid": "rejected", # spaces invalid
"GOOD": "ok",
})
assert result == {"VALID_KEY": "ok", "GOOD": "ok"}
def test_normalize_env_dict_coerces_scalars():
"""_normalize_env_dict should coerce int/float/bool to str."""
result = docker_env._normalize_env_dict({
"PORT": 8080,
"DEBUG": True,
"RATIO": 0.5,
})
assert result == {"PORT": "8080", "DEBUG": "True", "RATIO": "0.5"}
def test_normalize_env_dict_rejects_non_dict():
"""_normalize_env_dict should return empty dict for non-dict input."""
assert docker_env._normalize_env_dict("not a dict") == {}
assert docker_env._normalize_env_dict(None) == {}
assert docker_env._normalize_env_dict([]) == {}
def test_normalize_env_dict_rejects_complex_values():
"""_normalize_env_dict should reject list/dict values."""
result = docker_env._normalize_env_dict({
"GOOD": "string",
"BAD_LIST": [1, 2, 3],
"BAD_DICT": {"nested": True},
})
assert result == {"GOOD": "string"}
def test_security_args_include_setuid_setgid_for_privdrop(monkeypatch):
"""The default (run_as_host_user=False) invocation must include SETUID and
SETGID caps so the image's init can drop from root to a non-root user
(e.g. via ``s6-setuidgid`` in the bundled Hermes image, or ``gosu``/``su``
in user-provided images).
Without these caps the privilege-drop helper fails with
``operation not permitted`` and the container exits immediately (exit 1)
before running any work.
``no-new-privileges`` is kept, so the dropped process still cannot
escalate back to root after the drop — the drop is a one-way transition
performed before the ``no_new_privs`` bit is enforced on the exec boundary.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env()
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args = run_calls[0][0]
added = {
run_args[i + 1]
for i, flag in enumerate(run_args[:-1])
if flag == "--cap-add"
}
assert "SETUID" in added, "SETUID cap missing — image privilege-drop will fail"
assert "SETGID" in added, "SETGID cap missing — image privilege-drop will fail"
# ── run_as_host_user tests ────────────────────────────────────────
def test_run_as_host_user_passes_uid_gid(monkeypatch):
"""With run_as_host_user=True, --user <uid>:<gid> is added to docker run."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env.os, "getuid", lambda: 1234, raising=False)
monkeypatch.setattr(docker_env.os, "getgid", lambda: 5678, raising=False)
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(run_as_host_user=True)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args = run_calls[0][0]
# --user must be present and must be paired with "1234:5678"
assert "--user" in run_args, f"--user flag missing from docker run args: {run_args}"
idx = run_args.index("--user")
assert run_args[idx + 1] == "1234:5678", (
f"expected --user 1234:5678, got --user {run_args[idx + 1]}"
)
def test_run_as_host_user_drops_setuid_setgid_caps(monkeypatch):
"""When --user is passed, the container already starts unprivileged and
never needs a privilege drop, so SETUID/SETGID caps are omitted for a
tighter security posture."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env.os, "getuid", lambda: 1000, raising=False)
monkeypatch.setattr(docker_env.os, "getgid", lambda: 1000, raising=False)
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(run_as_host_user=True)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
run_args = run_calls[0][0]
added = {
run_args[i + 1]
for i, flag in enumerate(run_args[:-1])
if flag == "--cap-add"
}
assert "SETUID" not in added, (
"SETUID cap should be dropped when running as host user — no privilege drop is needed"
)
assert "SETGID" not in added, (
"SETGID cap should be dropped when running as host user — no privilege drop is needed"
)
# Core non-privilege-drop caps must still be there (pip/npm/apt need them).
assert "DAC_OVERRIDE" in added
assert "CHOWN" in added
assert "FOWNER" in added
def test_run_as_host_user_default_off(monkeypatch):
"""Without the opt-in, no --user flag is emitted — preserving existing behavior."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env() # run_as_host_user defaults to False
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
run_args = run_calls[0][0]
assert "--user" not in run_args, (
f"--user should not be in docker run args when opt-in is off: {run_args}"
)
def test_run_as_host_user_warns_and_skips_when_no_posix_ids(monkeypatch, caplog):
"""On platforms without POSIX getuid/getgid, log a warning and leave the
container at its image default user (no --user flag, full cap set)."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
# Simulate a platform where os.getuid is absent (e.g. Windows host).
monkeypatch.delattr(docker_env.os, "getuid", raising=False)
monkeypatch.delattr(docker_env.os, "getgid", raising=False)
calls = _mock_subprocess_run(monkeypatch)
with caplog.at_level(logging.WARNING):
_make_dummy_env(run_as_host_user=True)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
run_args = run_calls[0][0]
assert "--user" not in run_args
# Fall back to the full cap set since the container still starts as root.
added = {
run_args[i + 1]
for i, flag in enumerate(run_args[:-1])
if flag == "--cap-add"
}
assert "SETUID" in added
assert "SETGID" in added
assert any(
"does not expose POSIX uid/gid" in rec.getMessage()
for rec in caplog.records
), "expected a warning when POSIX ids are unavailable"
# ── Docker labels (issue #20561) ──────────────────────────────────
def _run_args_from_calls(calls):
"""Pull the argv list passed to the first ``docker run`` invocation."""
run_calls = [
c for c in calls
if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"
]
assert run_calls, "docker run should have been called"
return run_calls[0][0]
def _labels_in_run_args(run_args):
"""Return the set of ``key=value`` strings passed via ``--label``."""
return {
run_args[i + 1]
for i, flag in enumerate(run_args[:-1])
if flag == "--label"
}
def test_run_command_tags_hermes_agent_label(monkeypatch):
"""Every container hermes-agent starts must carry the hermes-agent=1 label
so the orphan reaper (and external operators) can identify them with a
single ``docker ps --filter label=hermes-agent=1`` call. Regression test
for issue #20561 — without the label there is no global sweep target."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(task_id="my-task")
labels = _labels_in_run_args(_run_args_from_calls(calls))
assert "hermes-agent=1" in labels, (
f"hermes-agent=1 label missing; got labels: {sorted(labels)}"
)
def test_run_command_tags_task_and_profile_labels(monkeypatch):
"""task_id and the active profile name are surfaced as labels so future
cross-process reuse logic can filter to a specific (task, profile) pair
without parsing container names. Profile resolution uses the helper that
returns ``"default"`` for the root Hermes home."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "research-bot")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(task_id="kanban-42")
labels = _labels_in_run_args(_run_args_from_calls(calls))
assert "hermes-task-id=kanban-42" in labels, (
f"hermes-task-id=kanban-42 missing; got: {sorted(labels)}"
)
assert "hermes-profile=research-bot" in labels, (
f"hermes-profile=research-bot missing; got: {sorted(labels)}"
)
def test_label_sanitizer_rejects_invalid_characters():
"""Docker label values must be alnum + ``_.-`` and ≤63 chars. Profile or
task names containing slashes, colons, or unicode would otherwise emit
invalid labels that round-trip badly through ``docker ps --filter``."""
assert docker_env._sanitize_label_value("plain-name_1.0") == "plain-name_1.0"
assert docker_env._sanitize_label_value("with/slash") == "with_slash"
assert docker_env._sanitize_label_value("with:colon") == "with_colon"
assert docker_env._sanitize_label_value("emoji-😀-here") == "emoji-_-here"
# Empty / non-string inputs must collapse to a queryable token, not "".
assert docker_env._sanitize_label_value("") == "unknown"
assert docker_env._sanitize_label_value(None) == "unknown" # type: ignore[arg-type]
# >63 chars must truncate, not error.
long_value = "x" * 100
assert len(docker_env._sanitize_label_value(long_value)) == 63
def test_run_command_sanitizes_unsafe_task_id(monkeypatch):
"""A task_id containing characters Docker rejects in label values must be
sanitized before reaching ``docker run --label``; otherwise the daemon
refuses the run with an inscrutable error and the agent's first command
blows up."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(task_id="task/with:weird*chars")
labels = _labels_in_run_args(_run_args_from_calls(calls))
# Each non-OK character becomes an underscore; the safe chars survive.
assert "hermes-task-id=task_with_weird_chars" in labels, (
f"sanitized task-id label missing; got: {sorted(labels)}"
)
def test_labels_attribute_populated_after_init(monkeypatch):
"""``self._labels`` must be set to the same key/value pairs that went onto
docker run, so subsequent reuse / reaper paths can match without re-running
the sanitizer or re-importing the profile module."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
env = _make_dummy_env(task_id="abc")
assert env._labels == {
"hermes-agent": "1",
"hermes-task-id": "abc",
"hermes-profile": "default",
}
# ── Cross-process container reuse (issue #20561) ──────────────────
def _mock_subprocess_run_with_reuse(monkeypatch, ps_state: str | None,
start_succeeds: bool = True):
"""Reuse-aware subprocess.run mock.
``ps_state`` controls what ``docker ps -a --filter ...`` returns:
* ``None`` → no match (empty stdout). Forces a fresh ``docker run``.
* ``"running"`` / ``"exited"`` / ... → emit ``CID\\tSTATE`` so the reuse
path picks it up. ``"running"`` skips ``docker start``; other states
trigger ``docker start`` (which can be forced to fail via
``start_succeeds=False``).
Returns the captured call list so the test can verify which docker
commands actually ran.
"""
calls = []
def _run(cmd, **kwargs):
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
if isinstance(cmd, list) and len(cmd) >= 2:
sub = cmd[1]
if sub == "version":
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
if sub == "ps":
if ps_state is None:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
return subprocess.CompletedProcess(
cmd, 0, stdout=f"reused-cid\t{ps_state}\n", stderr="",
)
if sub == "start":
if not start_succeeds:
# Real subprocess.run with check=True raises on non-zero exit;
# mirror that so the production code's except clause fires.
raise subprocess.CalledProcessError(1, cmd, output="", stderr="no such container")
return subprocess.CompletedProcess(cmd, 0, stdout="reused-cid\n", stderr="")
if sub == "run":
return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
return calls
def test_reuse_attaches_to_running_container_without_docker_run(monkeypatch):
"""When a labeled container is already ``running``, the reuse probe
must pick it up and skip ``docker run`` entirely. Regression for the
issue #20561 root cause: every Hermes process spawning a new container
despite docs claiming "ONE long-lived container shared across sessions"."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
env = _make_dummy_env(task_id="reuse-test")
# The reuse path must populate _container_id from the ps probe output.
assert env._container_id == "reused-cid", (
f"expected reused container id, got {env._container_id!r}"
)
# And it must NOT have run `docker run`.
run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert not run_invocations, (
f"docker run should be skipped on reuse, got: {run_invocations}"
)
# And it must have NOT issued a `docker start` for an already-running container.
start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
assert not start_invocations, (
f"docker start should be skipped when container already running, got: {start_invocations}"
)
def test_reuse_starts_stopped_container_before_attaching(monkeypatch):
"""A labeled container in ``exited`` state must be restarted via
``docker start`` before the new Hermes process uses it. Without this
step, ``docker exec`` against a stopped container errors out and the
first agent command fails opaquely."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="exited")
env = _make_dummy_env(task_id="reuse-stopped")
assert env._container_id == "reused-cid"
start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
assert start_invocations, "expected docker start for exited container"
run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert not run_invocations, "should not docker run when reusing an exited container"
def test_reuse_falls_back_to_fresh_run_when_start_fails(monkeypatch):
"""If ``docker start`` on the matched container fails (container was
removed between probe and start, daemon paused, etc.), the code must
silently fall through to a fresh ``docker run`` rather than leaving the
user with a broken environment. Defensive recovery — the probe is best-
effort, not authoritative."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
calls = _mock_subprocess_run_with_reuse(
monkeypatch, ps_state="exited", start_succeeds=False,
)
env = _make_dummy_env(task_id="reuse-broken-start")
# docker start should be attempted then fail; code falls through to run.
assert env._container_id == "fresh-cid", (
f"expected fresh container id after fallback, got {env._container_id!r}"
)
run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_invocations, "fallback to fresh docker run must happen on start failure"
def test_no_reuse_when_persist_across_processes_disabled(monkeypatch):
"""Opt-out path: ``persist_across_processes=False`` skips the ps probe
entirely and always starts a fresh container, matching the pre-fix
behavior for users who want hard per-process isolation."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
# ps_state=running would trigger reuse if the probe ran — assert it doesn't.
calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")
env = docker_env.DockerEnvironment(
image="python:3.11", cwd="/root", timeout=60,
task_id="no-reuse", persist_across_processes=False,
)
# Must NOT have issued docker ps (the probe is gated by the flag).
ps_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "ps"]
assert not ps_invocations, (
f"docker ps probe should be skipped when persist_across_processes=False, got: {ps_invocations}"
)
# Should have started a fresh container.
assert env._container_id == "fresh-cid"
def test_find_reusable_container_prefers_running_over_stopped(monkeypatch):
"""When the probe returns multiple matches (shouldn't normally happen,
but can after a crash leaves stale duplicates), a ``running`` container
is preferred over any stopped one. The duplicate gets reaped later by
the orphan reaper; we don't try to be heroic about it here."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
def _run(cmd, **kwargs):
if isinstance(cmd, list) and len(cmd) >= 2:
if cmd[1] == "version":
return subprocess.CompletedProcess(cmd, 0, stdout="ok", stderr="")
if cmd[1] == "ps":
# Two matches: stopped first, running second.
return subprocess.CompletedProcess(
cmd, 0,
stdout="stopped-cid\texited\nrunning-cid\trunning\n",
stderr="",
)
return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
env = _make_dummy_env(task_id="dup-match")
assert env._container_id == "running-cid", (
f"running container should win over stopped duplicate, got {env._container_id!r}"
)
# ── Cleanup correctness (issue #20561) ────────────────────────────
class _FakeThread:
"""Stand-in for threading.Thread that captures target/args and calls
target() synchronously when .start() runs, so cleanup behavior is
observable without actually backgrounding subprocess calls."""
def __init__(self, target=None, daemon=None, name=None):
self._target = target
self.daemon = daemon
self.name = name
self._done = False
def start(self):
if self._target is not None:
self._target()
self._done = True
def is_alive(self):
return not self._done
def join(self, timeout=None):
self._done = True
def _install_fake_thread(monkeypatch):
import threading
monkeypatch.setattr(threading, "Thread", _FakeThread)
def test_cleanup_with_persist_only_stops_no_rm(monkeypatch):
"""``persist_across_processes=True`` (default) cleanup must docker stop
the container but NEVER docker rm — the container has to survive so the
next Hermes process can reuse it. Issue #20561 — the previous code
matched this on the `_persistent` flag instead of a dedicated
cross-process flag, which made reuse impossible."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
env = _make_dummy_env(task_id="cleanup-persist", persistent_filesystem=False)
# Default persist_across_processes=True.
container_id = env._container_id
assert container_id
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capturing_run(cmd, **kwargs):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
return real_run(cmd, **kwargs)
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
env.cleanup()
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
assert stops, f"expected docker stop call, got cleanup_calls: {cleanup_calls}"
assert not rms, (
f"docker rm must NOT be called when persist_across_processes=True; "
f"reuse would be impossible. Got: {rms}"
)
def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch):
"""``persist_across_processes=False`` cleanup must docker stop AND docker
rm so containers don't leak. Crucially, this runs regardless of the
``persistent_filesystem`` setting — the original code only rm'd when
``not self._persistent``, which meant the default-on ``container_persistent:
true`` users (the documented happy path) leaked Exited containers forever.
Issue #20561 root-cause fix."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
# Note: persistent_filesystem=True (the prior-leak scenario) + the new
# cross-process toggle OFF must still result in a clean rm.
env = docker_env.DockerEnvironment(
image="python:3.11", cwd="/root", timeout=60,
task_id="cleanup-no-persist", persistent_filesystem=True,
persist_across_processes=False,
)
cleanup_calls = []
real_run = docker_env.subprocess.run
def _capturing_run(cmd, **kwargs):
cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
return real_run(cmd, **kwargs)
monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)
env.cleanup()
stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
assert stops, "expected docker stop"
assert rms, (
"docker rm MUST run when persist_across_processes=False, even with "
"persistent_filesystem=True — that gating was the leak source in #20561."
)
def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch):
"""The pre-fix code used ``subprocess.Popen(\"... &\", shell=True)`` which
raced with parent-process exit and silently dropped cleanup work. The
new code must use ``subprocess.run`` with bounded ``timeout=`` so the
work actually completes within the process lifetime.
Asserts cleanup never reaches into shell-mode Popen.
"""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
def _forbidden_popen(*args, **kwargs):
raise AssertionError(
f"cleanup must not use subprocess.Popen anymore (issue #20561); "
f"got args={args} kwargs={kwargs}"
)
monkeypatch.setattr(docker_env.subprocess, "Popen", _forbidden_popen)
env = _make_dummy_env(task_id="no-popen-cleanup")
env.cleanup() # must not raise
def test_wait_for_cleanup_returns_true_when_no_thread_started():
"""``wait_for_cleanup`` must be a no-op when ``cleanup`` was never called
(or the env has no live cleanup thread) — atexit calls it unconditionally
across all active envs, so a False return would falsely flag healthy
shutdowns."""
env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
# No _cleanup_thread set — simulates an env that was never cleanup()'d.
assert env.wait_for_cleanup(timeout=1.0) is True
def test_wait_for_cleanup_after_cleanup_returns_true(monkeypatch):
"""End-to-end: cleanup() starts a thread, wait_for_cleanup() joins it
and reports completion. Atexit relies on this contract to ensure docker
stop/rm actually finishes before the Python interpreter exits."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
_mock_subprocess_run(monkeypatch)
_install_fake_thread(monkeypatch)
env = _make_dummy_env(task_id="wait-test")
env.cleanup()
assert env.wait_for_cleanup(timeout=5.0) is True
def test_cleanup_on_env_with_no_container_id_does_not_raise(monkeypatch):
"""A DockerEnvironment whose ``__init__`` failed before the container_id
was set (image-pull error, docker daemon down) should still be safe to
cleanup() — the post-creation failure path in callers always tries.
Without this guard the daemon-down case used to NameError on the cleanup
branch."""
env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
env._container_id = None
env._persistent = False
env._workspace_dir = None
env._home_dir = None
# No exception expected.
env.cleanup()