hermes-agent/tests/tools/test_docker_environment.py

import logging
from io import StringIO
import subprocess
import sys
import types

import pytest

from tools.environments import docker as docker_env


def _mock_subprocess_run(monkeypatch):
    """Mock subprocess.run to intercept docker run -d and docker version calls.

    Returns a list of captured (cmd, kwargs) tuples for inspection.
    """
    calls = []

    def _run(cmd, **kwargs):
        calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
        if isinstance(cmd, list) and len(cmd) >= 2:
            if cmd[1] == "version":
                return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
            if cmd[1] == "run":
                return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="")
        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")

    monkeypatch.setattr(docker_env.subprocess, "run", _run)
    return calls


def _make_dummy_env(**kwargs):
    """Helper to construct DockerEnvironment with minimal required args."""
    return docker_env.DockerEnvironment(
        image=kwargs.get("image", "python:3.11"),
        cwd=kwargs.get("cwd", "/root"),
        timeout=kwargs.get("timeout", 60),
        cpu=kwargs.get("cpu", 0),
        memory=kwargs.get("memory", 0),
        disk=kwargs.get("disk", 0),
        persistent_filesystem=kwargs.get("persistent_filesystem", False),
        task_id=kwargs.get("task_id", "test-task"),
        volumes=kwargs.get("volumes", []),
        network=kwargs.get("network", True),
        host_cwd=kwargs.get("host_cwd"),
        auto_mount_cwd=kwargs.get("auto_mount_cwd", False),
        env=kwargs.get("env"),
        run_as_host_user=kwargs.get("run_as_host_user", False),
    )


def test_ensure_docker_available_logs_and_raises_when_not_found(monkeypatch, caplog):
    """When docker cannot be found, raise a clear error before container setup."""

    monkeypatch.setattr(docker_env, "find_docker", lambda: None)
    monkeypatch.setattr(
        docker_env.subprocess,
        "run",
        lambda *args, **kwargs: pytest.fail("subprocess.run should not be called when docker is missing"),
    )

    with caplog.at_level(logging.ERROR):
        with pytest.raises(RuntimeError) as excinfo:
            _make_dummy_env()

    assert "Docker executable not found in PATH or known install locations" in str(excinfo.value)
    assert any(
        "no docker executable was found in PATH or known install locations"
        in record.getMessage()
        for record in caplog.records
    )


def test_ensure_docker_available_logs_and_raises_on_timeout(monkeypatch, caplog):
    """When docker version times out, surface a helpful error instead of hanging."""

    def _raise_timeout(*args, **kwargs):
        raise subprocess.TimeoutExpired(cmd=["/custom/docker", "version"], timeout=5)

    monkeypatch.setattr(docker_env, "find_docker", lambda: "/custom/docker")
    monkeypatch.setattr(docker_env.subprocess, "run", _raise_timeout)

    with caplog.at_level(logging.ERROR):
        with pytest.raises(RuntimeError) as excinfo:
            _make_dummy_env()

    assert "Docker daemon is not responding" in str(excinfo.value)
    assert any(
        "/custom/docker version' timed out" in record.getMessage()
        for record in caplog.records
    )


def test_ensure_docker_available_uses_resolved_executable(monkeypatch):
    """When docker is found outside PATH, preflight should use that resolved path."""

    calls = []

    def _run(cmd, **kwargs):
        calls.append((cmd, kwargs))
        return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")

    monkeypatch.setattr(docker_env, "find_docker", lambda: "/opt/homebrew/bin/docker")
    monkeypatch.setattr(docker_env.subprocess, "run", _run)

    docker_env._ensure_docker_available()

    assert calls == [
        (["/opt/homebrew/bin/docker", "version"], {
            "capture_output": True,
            "text": True,
            "timeout": 5,
        })
    ]


def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
    """Opt-in docker cwd mounting should bind the host cwd to /workspace."""
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()

    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(
        cwd="/workspace",
        host_cwd=str(project_dir),
        auto_mount_cwd=True,
    )

    # Find the docker run call and check its args
    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args_str = " ".join(run_calls[0][0])
    assert f"{project_dir}:/workspace" in run_args_str


def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
    """Host cwd should not be mounted unless the caller explicitly opts in."""
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()

    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(
        cwd="/root",
        host_cwd=str(project_dir),
        auto_mount_cwd=False,
    )

    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args_str = " ".join(run_calls[0][0])
    assert f"{project_dir}:/workspace" not in run_args_str


def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path):
    """Explicit user volumes for /workspace should take precedence over cwd mount."""
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()
    other_dir = tmp_path / "other"
    other_dir.mkdir()

    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(
        cwd="/workspace",
        host_cwd=str(project_dir),
        auto_mount_cwd=True,
        volumes=[f"{other_dir}:/workspace"],
    )

    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args_str = " ".join(run_calls[0][0])
    assert f"{other_dir}:/workspace" in run_args_str
    assert run_args_str.count(":/workspace") == 1


def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
    """Persistent mode should still prefer the configured host cwd at /workspace."""
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()

    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(
        cwd="/workspace",
        persistent_filesystem=True,
        host_cwd=str(project_dir),
        auto_mount_cwd=True,
        task_id="test-persistent-auto-mount",
    )

    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args_str = " ".join(run_calls[0][0])
    assert f"{project_dir}:/workspace" in run_args_str
    assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str


def test_non_persistent_cleanup_removes_container(monkeypatch):
    """When persist_across_processes=false, cleanup() must docker stop AND
    docker rm so containers don't leak across hermes processes.

    Updated for issue #20561: the previous implementation used fire-and-forget
    ``subprocess.Popen("... &", shell=True)`` which raced with parent exit;
    the new implementation uses ``subprocess.run`` on a daemon thread with
    bounded timeouts. See test_cleanup_with_persist_disabled_stops_and_rms
    for the full behavior contract.
    """
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    _mock_subprocess_run(monkeypatch)
    # Run the worker thread synchronously so assertions can observe its work.
    import threading
    monkeypatch.setattr(threading, "Thread", _FakeThread)

    env = docker_env.DockerEnvironment(
        image="python:3.11", cwd="/root", timeout=60,
        task_id="ephemeral-task", persistent_filesystem=False,
        persist_across_processes=False,
    )
    container_id = env._container_id
    assert container_id

    # Capture cleanup-time docker calls (everything before this was init).
    cleanup_calls = []
    real_run = docker_env.subprocess.run

    def _capture(cmd, **kw):
        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kw))
        return real_run(cmd, **kw)

    monkeypatch.setattr(docker_env.subprocess, "run", _capture)
    env.cleanup()

    stops = [c for c in cleanup_calls if isinstance(c[0], list) and c[0][1:2] == ["stop"]]
    assert stops, f"cleanup() should docker stop {container_id}; got {cleanup_calls}"


class _FakePopen:
    def __init__(self, cmd, **kwargs):
        self.cmd = cmd
        self.kwargs = kwargs
        self.stdout = StringIO("")
        self.stdin = None
        self.returncode = 0

    def poll(self):
        return self.returncode


def _make_execute_only_env(forward_env=None):
    env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
    env.cwd = "/root"
    env.timeout = 60
    env._forward_env = forward_env or []
    env._env = {}
    env._prepare_command = lambda command: (command, None)
    env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
    env._container_id = "test-container"
    env._docker_exe = "/usr/bin/docker"
    # Base class attributes needed by unified execute()
    env._session_id = "test123"
    env._snapshot_path = "/tmp/hermes-snap-test123.sh"
    env._cwd_file = "/tmp/hermes-cwd-test123.txt"
    env._cwd_marker = "__HERMES_CWD_test123__"
    env._snapshot_ready = True
    env._last_sync_time = None
    env._init_env_args = []
    return env


def test_init_env_args_uses_hermes_dotenv_for_allowlisted_env(monkeypatch):
    """_build_init_env_args picks up forwarded env vars from .env file at init time."""
    # Use a var that is NOT in _HERMES_PROVIDER_ENV_BLOCKLIST (GITHUB_TOKEN
    # is in the copilot provider's api_key_env_vars and gets stripped).
    env = _make_execute_only_env(["DATABASE_URL"])

    monkeypatch.delenv("DATABASE_URL", raising=False)
    monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"})

    args = env._build_init_env_args()
    args_str = " ".join(args)

    assert "DATABASE_URL=value_from_dotenv" in args_str


def test_init_env_args_prefers_shell_env_over_hermes_dotenv(monkeypatch):
    """Shell env vars take priority over .env file values in init env args."""
    env = _make_execute_only_env(["DATABASE_URL"])

    monkeypatch.setenv("DATABASE_URL", "value_from_shell")
    monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"})

    args = env._build_init_env_args()
    args_str = " ".join(args)

    assert "DATABASE_URL=value_from_shell" in args_str
    assert "value_from_dotenv" not in args_str


# ── docker_env tests ──────────────────────────────────────────────


def test_docker_env_appears_in_run_command(monkeypatch):
    """Explicit docker_env values should be passed via -e at docker run time."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(env={"SSH_AUTH_SOCK": "/run/user/1000/ssh-agent.sock", "GNUPGHOME": "/root/.gnupg"})

    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args = run_calls[0][0]
    run_args_str = " ".join(run_args)
    assert "SSH_AUTH_SOCK=/run/user/1000/ssh-agent.sock" in run_args_str
    assert "GNUPGHOME=/root/.gnupg" in run_args_str


def test_docker_env_appears_in_init_env_args(monkeypatch):
    """Explicit docker_env values should appear in _build_init_env_args."""
    env = _make_execute_only_env()
    env._env = {"MY_VAR": "my_value"}

    args = env._build_init_env_args()
    args_str = " ".join(args)

    assert "MY_VAR=my_value" in args_str


def test_forward_env_overrides_docker_env_in_init_args(monkeypatch):
    """docker_forward_env should override docker_env for the same key."""
    env = _make_execute_only_env(forward_env=["MY_KEY"])
    env._env = {"MY_KEY": "static_value"}

    monkeypatch.setenv("MY_KEY", "dynamic_value")
    monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})

    args = env._build_init_env_args()
    args_str = " ".join(args)

    assert "MY_KEY=dynamic_value" in args_str
    assert "MY_KEY=static_value" not in args_str


def test_docker_env_and_forward_env_merge_in_init_args(monkeypatch):
    """docker_env and docker_forward_env with different keys should both appear."""
    env = _make_execute_only_env(forward_env=["TOKEN"])
    env._env = {"SSH_AUTH_SOCK": "/run/user/1000/agent.sock"}

    monkeypatch.setenv("TOKEN", "secret123")
    monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})

    args = env._build_init_env_args()
    args_str = " ".join(args)

    assert "SSH_AUTH_SOCK=/run/user/1000/agent.sock" in args_str
    assert "TOKEN=secret123" in args_str


def test_normalize_env_dict_filters_invalid_keys():
    """_normalize_env_dict should reject invalid variable names."""
    result = docker_env._normalize_env_dict({
        "VALID_KEY": "ok",
        "123bad": "rejected",
        "": "rejected",
        "also valid": "rejected",  # spaces invalid
        "GOOD": "ok",
    })
    assert result == {"VALID_KEY": "ok", "GOOD": "ok"}


def test_normalize_env_dict_coerces_scalars():
    """_normalize_env_dict should coerce int/float/bool to str."""
    result = docker_env._normalize_env_dict({
        "PORT": 8080,
        "DEBUG": True,
        "RATIO": 0.5,
    })
    assert result == {"PORT": "8080", "DEBUG": "True", "RATIO": "0.5"}


def test_normalize_env_dict_rejects_non_dict():
    """_normalize_env_dict should return empty dict for non-dict input."""
    assert docker_env._normalize_env_dict("not a dict") == {}
    assert docker_env._normalize_env_dict(None) == {}
    assert docker_env._normalize_env_dict([]) == {}


def test_normalize_env_dict_rejects_complex_values():
    """_normalize_env_dict should reject list/dict values."""
    result = docker_env._normalize_env_dict({
        "GOOD": "string",
        "BAD_LIST": [1, 2, 3],
        "BAD_DICT": {"nested": True},
    })
    assert result == {"GOOD": "string"}


def test_security_args_include_setuid_setgid_for_privdrop(monkeypatch):
    """The default (run_as_host_user=False) invocation must include SETUID and
    SETGID caps so the image's init can drop from root to a non-root user
    (e.g. via ``s6-setuidgid`` in the bundled Hermes image, or ``gosu``/``su``
    in user-provided images).

    Without these caps the privilege-drop helper fails with
    ``operation not permitted`` and the container exits immediately (exit 1)
    before running any work.

    ``no-new-privileges`` is kept, so the dropped process still cannot
    escalate back to root after the drop — the drop is a one-way transition
    performed before the ``no_new_privs`` bit is enforced on the exec boundary.
    """
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env()

    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args = run_calls[0][0]

    added = {
        run_args[i + 1]
        for i, flag in enumerate(run_args[:-1])
        if flag == "--cap-add"
    }
    assert "SETUID" in added, "SETUID cap missing — image privilege-drop will fail"
    assert "SETGID" in added, "SETGID cap missing — image privilege-drop will fail"


# ── run_as_host_user tests ────────────────────────────────────────


def test_run_as_host_user_passes_uid_gid(monkeypatch):
    """With run_as_host_user=True, --user <uid>:<gid> is added to docker run."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env.os, "getuid", lambda: 1234, raising=False)
    monkeypatch.setattr(docker_env.os, "getgid", lambda: 5678, raising=False)
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(run_as_host_user=True)

    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args = run_calls[0][0]

    # --user must be present and must be paired with "1234:5678"
    assert "--user" in run_args, f"--user flag missing from docker run args: {run_args}"
    idx = run_args.index("--user")
    assert run_args[idx + 1] == "1234:5678", (
        f"expected --user 1234:5678, got --user {run_args[idx + 1]}"
    )


def test_run_as_host_user_drops_setuid_setgid_caps(monkeypatch):
    """When --user is passed, the container already starts unprivileged and
    never needs a privilege drop, so SETUID/SETGID caps are omitted for a
    tighter security posture."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env.os, "getuid", lambda: 1000, raising=False)
    monkeypatch.setattr(docker_env.os, "getgid", lambda: 1000, raising=False)
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(run_as_host_user=True)

    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    run_args = run_calls[0][0]

    added = {
        run_args[i + 1]
        for i, flag in enumerate(run_args[:-1])
        if flag == "--cap-add"
    }
    assert "SETUID" not in added, (
        "SETUID cap should be dropped when running as host user — no privilege drop is needed"
    )
    assert "SETGID" not in added, (
        "SETGID cap should be dropped when running as host user — no privilege drop is needed"
    )
    # Core non-privilege-drop caps must still be there (pip/npm/apt need them).
    assert "DAC_OVERRIDE" in added
    assert "CHOWN" in added
    assert "FOWNER" in added


def test_run_as_host_user_default_off(monkeypatch):
    """Without the opt-in, no --user flag is emitted — preserving existing behavior."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env()  # run_as_host_user defaults to False

    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    run_args = run_calls[0][0]
    assert "--user" not in run_args, (
        f"--user should not be in docker run args when opt-in is off: {run_args}"
    )


def test_run_as_host_user_warns_and_skips_when_no_posix_ids(monkeypatch, caplog):
    """On platforms without POSIX getuid/getgid, log a warning and leave the
    container at its image default user (no --user flag, full cap set)."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    # Simulate a platform where os.getuid is absent (e.g. Windows host).
    monkeypatch.delattr(docker_env.os, "getuid", raising=False)
    monkeypatch.delattr(docker_env.os, "getgid", raising=False)
    calls = _mock_subprocess_run(monkeypatch)

    with caplog.at_level(logging.WARNING):
        _make_dummy_env(run_as_host_user=True)

    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    run_args = run_calls[0][0]

    assert "--user" not in run_args
    # Fall back to the full cap set since the container still starts as root.
    added = {
        run_args[i + 1]
        for i, flag in enumerate(run_args[:-1])
        if flag == "--cap-add"
    }
    assert "SETUID" in added
    assert "SETGID" in added
    assert any(
        "does not expose POSIX uid/gid" in rec.getMessage()
        for rec in caplog.records
    ), "expected a warning when POSIX ids are unavailable"


# ── Docker labels (issue #20561) ──────────────────────────────────


def _run_args_from_calls(calls):
    """Pull the argv list passed to the first ``docker run`` invocation."""
    run_calls = [
        c for c in calls
        if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"
    ]
    assert run_calls, "docker run should have been called"
    return run_calls[0][0]


def _labels_in_run_args(run_args):
    """Return the set of ``key=value`` strings passed via ``--label``."""
    return {
        run_args[i + 1]
        for i, flag in enumerate(run_args[:-1])
        if flag == "--label"
    }


def test_run_command_tags_hermes_agent_label(monkeypatch):
    """Every container hermes-agent starts must carry the hermes-agent=1 label
    so the orphan reaper (and external operators) can identify them with a
    single ``docker ps --filter label=hermes-agent=1`` call. Regression test
    for issue #20561 — without the label there is no global sweep target."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(task_id="my-task")

    labels = _labels_in_run_args(_run_args_from_calls(calls))
    assert "hermes-agent=1" in labels, (
        f"hermes-agent=1 label missing; got labels: {sorted(labels)}"
    )


def test_run_command_tags_task_and_profile_labels(monkeypatch):
    """task_id and the active profile name are surfaced as labels so future
    cross-process reuse logic can filter to a specific (task, profile) pair
    without parsing container names. Profile resolution uses the helper that
    returns ``"default"`` for the root Hermes home."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "research-bot")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(task_id="kanban-42")

    labels = _labels_in_run_args(_run_args_from_calls(calls))
    assert "hermes-task-id=kanban-42" in labels, (
        f"hermes-task-id=kanban-42 missing; got: {sorted(labels)}"
    )
    assert "hermes-profile=research-bot" in labels, (
        f"hermes-profile=research-bot missing; got: {sorted(labels)}"
    )


def test_label_sanitizer_rejects_invalid_characters():
    """Docker label values must be alnum + ``_.-`` and ≤63 chars. Profile or
    task names containing slashes, colons, or unicode would otherwise emit
    invalid labels that round-trip badly through ``docker ps --filter``."""
    assert docker_env._sanitize_label_value("plain-name_1.0") == "plain-name_1.0"
    assert docker_env._sanitize_label_value("with/slash") == "with_slash"
    assert docker_env._sanitize_label_value("with:colon") == "with_colon"
    assert docker_env._sanitize_label_value("emoji-😀-here") == "emoji-_-here"
    # Empty / non-string inputs must collapse to a queryable token, not "".
    assert docker_env._sanitize_label_value("") == "unknown"
    assert docker_env._sanitize_label_value(None) == "unknown"  # type: ignore[arg-type]
    # >63 chars must truncate, not error.
    long_value = "x" * 100
    assert len(docker_env._sanitize_label_value(long_value)) == 63


def test_run_command_sanitizes_unsafe_task_id(monkeypatch):
    """A task_id containing characters Docker rejects in label values must be
    sanitized before reaching ``docker run --label``; otherwise the daemon
    refuses the run with an inscrutable error and the agent's first command
    blows up."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(task_id="task/with:weird*chars")

    labels = _labels_in_run_args(_run_args_from_calls(calls))
    # Each non-OK character becomes an underscore; the safe chars survive.
    assert "hermes-task-id=task_with_weird_chars" in labels, (
        f"sanitized task-id label missing; got: {sorted(labels)}"
    )


def test_labels_attribute_populated_after_init(monkeypatch):
    """``self._labels`` must be set to the same key/value pairs that went onto
    docker run, so subsequent reuse / reaper paths can match without re-running
    the sanitizer or re-importing the profile module."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    _mock_subprocess_run(monkeypatch)

    env = _make_dummy_env(task_id="abc")

    assert env._labels == {
        "hermes-agent": "1",
        "hermes-task-id": "abc",
        "hermes-profile": "default",
    }


# ── Cross-process container reuse (issue #20561) ──────────────────


def _mock_subprocess_run_with_reuse(monkeypatch, ps_state: str | None,
                                     start_succeeds: bool = True):
    """Reuse-aware subprocess.run mock.

    ``ps_state`` controls what ``docker ps -a --filter ...`` returns:
      * ``None`` → no match (empty stdout). Forces a fresh ``docker run``.
      * ``"running"`` / ``"exited"`` / ... → emit ``CID\\tSTATE`` so the reuse
        path picks it up. ``"running"`` skips ``docker start``; other states
        trigger ``docker start`` (which can be forced to fail via
        ``start_succeeds=False``).

    Returns the captured call list so the test can verify which docker
    commands actually ran.
    """
    calls = []

    def _run(cmd, **kwargs):
        calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
        if isinstance(cmd, list) and len(cmd) >= 2:
            sub = cmd[1]
            if sub == "version":
                return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
            if sub == "ps":
                if ps_state is None:
                    return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
                return subprocess.CompletedProcess(
                    cmd, 0, stdout=f"reused-cid\t{ps_state}\n", stderr="",
                )
            if sub == "start":
                if not start_succeeds:
                    # Real subprocess.run with check=True raises on non-zero exit;
                    # mirror that so the production code's except clause fires.
                    raise subprocess.CalledProcessError(1, cmd, output="", stderr="no such container")
                return subprocess.CompletedProcess(cmd, 0, stdout="reused-cid\n", stderr="")
            if sub == "run":
                return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")
        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")

    monkeypatch.setattr(docker_env.subprocess, "run", _run)
    return calls


def test_reuse_attaches_to_running_container_without_docker_run(monkeypatch):
    """When a labeled container is already ``running``, the reuse probe
    must pick it up and skip ``docker run`` entirely. Regression for the
    issue #20561 root cause: every Hermes process spawning a new container
    despite docs claiming "ONE long-lived container shared across sessions"."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")

    env = _make_dummy_env(task_id="reuse-test")

    # The reuse path must populate _container_id from the ps probe output.
    assert env._container_id == "reused-cid", (
        f"expected reused container id, got {env._container_id!r}"
    )
    # And it must NOT have run `docker run`.
    run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert not run_invocations, (
        f"docker run should be skipped on reuse, got: {run_invocations}"
    )
    # And it must have NOT issued a `docker start` for an already-running container.
    start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
    assert not start_invocations, (
        f"docker start should be skipped when container already running, got: {start_invocations}"
    )


def test_reuse_starts_stopped_container_before_attaching(monkeypatch):
    """A labeled container in ``exited`` state must be restarted via
    ``docker start`` before the new Hermes process uses it. Without this
    step, ``docker exec`` against a stopped container errors out and the
    first agent command fails opaquely."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="exited")

    env = _make_dummy_env(task_id="reuse-stopped")

    assert env._container_id == "reused-cid"
    start_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "start"]
    assert start_invocations, "expected docker start for exited container"
    run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert not run_invocations, "should not docker run when reusing an exited container"


def test_reuse_falls_back_to_fresh_run_when_start_fails(monkeypatch):
    """If ``docker start`` on the matched container fails (container was
    removed between probe and start, daemon paused, etc.), the code must
    silently fall through to a fresh ``docker run`` rather than leaving the
    user with a broken environment. Defensive recovery — the probe is best-
    effort, not authoritative."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    calls = _mock_subprocess_run_with_reuse(
        monkeypatch, ps_state="exited", start_succeeds=False,
    )

    env = _make_dummy_env(task_id="reuse-broken-start")

    # docker start should be attempted then fail; code falls through to run.
    assert env._container_id == "fresh-cid", (
        f"expected fresh container id after fallback, got {env._container_id!r}"
    )
    run_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_invocations, "fallback to fresh docker run must happen on start failure"


def test_no_reuse_when_persist_across_processes_disabled(monkeypatch):
    """Opt-out path: ``persist_across_processes=False`` skips the ps probe
    entirely and always starts a fresh container, matching the pre-fix
    behavior for users who want hard per-process isolation."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    # ps_state=running would trigger reuse if the probe ran — assert it doesn't.
    calls = _mock_subprocess_run_with_reuse(monkeypatch, ps_state="running")

    env = docker_env.DockerEnvironment(
        image="python:3.11", cwd="/root", timeout=60,
        task_id="no-reuse", persist_across_processes=False,
    )

    # Must NOT have issued docker ps (the probe is gated by the flag).
    ps_invocations = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "ps"]
    assert not ps_invocations, (
        f"docker ps probe should be skipped when persist_across_processes=False, got: {ps_invocations}"
    )
    # Should have started a fresh container.
    assert env._container_id == "fresh-cid"


def test_find_reusable_container_prefers_running_over_stopped(monkeypatch):
    """When the probe returns multiple matches (shouldn't normally happen,
    but can after a crash leaves stale duplicates), a ``running`` container
    is preferred over any stopped one. The duplicate gets reaped later by
    the orphan reaper; we don't try to be heroic about it here."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")

    def _run(cmd, **kwargs):
        if isinstance(cmd, list) and len(cmd) >= 2:
            if cmd[1] == "version":
                return subprocess.CompletedProcess(cmd, 0, stdout="ok", stderr="")
            if cmd[1] == "ps":
                # Two matches: stopped first, running second.
                return subprocess.CompletedProcess(
                    cmd, 0,
                    stdout="stopped-cid\texited\nrunning-cid\trunning\n",
                    stderr="",
                )
        return subprocess.CompletedProcess(cmd, 0, stdout="fresh-cid\n", stderr="")

    monkeypatch.setattr(docker_env.subprocess, "run", _run)

    env = _make_dummy_env(task_id="dup-match")
    assert env._container_id == "running-cid", (
        f"running container should win over stopped duplicate, got {env._container_id!r}"
    )


# ── Cleanup correctness (issue #20561) ────────────────────────────


class _FakeThread:
    """Stand-in for threading.Thread that captures target/args and calls
    target() synchronously when .start() runs, so cleanup behavior is
    observable without actually backgrounding subprocess calls."""

    def __init__(self, target=None, daemon=None, name=None):
        self._target = target
        self.daemon = daemon
        self.name = name
        self._done = False

    def start(self):
        if self._target is not None:
            self._target()
        self._done = True

    def is_alive(self):
        return not self._done

    def join(self, timeout=None):
        self._done = True


def _install_fake_thread(monkeypatch):
    import threading
    monkeypatch.setattr(threading, "Thread", _FakeThread)


def test_cleanup_with_persist_only_stops_no_rm(monkeypatch):
    """``persist_across_processes=True`` (default) cleanup must docker stop
    the container but NEVER docker rm — the container has to survive so the
    next Hermes process can reuse it. Issue #20561 — the previous code
    matched this on the `_persistent` flag instead of a dedicated
    cross-process flag, which made reuse impossible."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    _mock_subprocess_run(monkeypatch)
    _install_fake_thread(monkeypatch)

    env = _make_dummy_env(task_id="cleanup-persist", persistent_filesystem=False)
    # Default persist_across_processes=True.
    container_id = env._container_id
    assert container_id

    cleanup_calls = []
    real_run = docker_env.subprocess.run

    def _capturing_run(cmd, **kwargs):
        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
        return real_run(cmd, **kwargs)

    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)

    env.cleanup()

    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
    assert stops, f"expected docker stop call, got cleanup_calls: {cleanup_calls}"
    assert not rms, (
        f"docker rm must NOT be called when persist_across_processes=True; "
        f"reuse would be impossible. Got: {rms}"
    )


def test_cleanup_with_persist_disabled_stops_and_rms(monkeypatch):
    """``persist_across_processes=False`` cleanup must docker stop AND docker
    rm so containers don't leak. Crucially, this runs regardless of the
    ``persistent_filesystem`` setting — the original code only rm'd when
    ``not self._persistent``, which meant the default-on ``container_persistent:
    true`` users (the documented happy path) leaked Exited containers forever.
    Issue #20561 root-cause fix."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    _mock_subprocess_run(monkeypatch)
    _install_fake_thread(monkeypatch)

    # Note: persistent_filesystem=True (the prior-leak scenario) + the new
    # cross-process toggle OFF must still result in a clean rm.
    env = docker_env.DockerEnvironment(
        image="python:3.11", cwd="/root", timeout=60,
        task_id="cleanup-no-persist", persistent_filesystem=True,
        persist_across_processes=False,
    )

    cleanup_calls = []
    real_run = docker_env.subprocess.run

    def _capturing_run(cmd, **kwargs):
        cleanup_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
        return real_run(cmd, **kwargs)

    monkeypatch.setattr(docker_env.subprocess, "run", _capturing_run)

    env.cleanup()

    stops = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "stop"]
    rms = [c for c in cleanup_calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "rm"]
    assert stops, "expected docker stop"
    assert rms, (
        "docker rm MUST run when persist_across_processes=False, even with "
        "persistent_filesystem=True — that gating was the leak source in #20561."
    )


def test_cleanup_uses_subprocess_run_not_detached_shell(monkeypatch):
    """The pre-fix code used ``subprocess.Popen(\"... &\", shell=True)`` which
    raced with parent-process exit and silently dropped cleanup work. The
    new code must use ``subprocess.run`` with bounded ``timeout=`` so the
    work actually completes within the process lifetime.

    Asserts cleanup never reaches into shell-mode Popen.
    """
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    _mock_subprocess_run(monkeypatch)
    _install_fake_thread(monkeypatch)

    def _forbidden_popen(*args, **kwargs):
        raise AssertionError(
            f"cleanup must not use subprocess.Popen anymore (issue #20561); "
            f"got args={args} kwargs={kwargs}"
        )

    monkeypatch.setattr(docker_env.subprocess, "Popen", _forbidden_popen)

    env = _make_dummy_env(task_id="no-popen-cleanup")
    env.cleanup()  # must not raise


def test_wait_for_cleanup_returns_true_when_no_thread_started():
    """``wait_for_cleanup`` must be a no-op when ``cleanup`` was never called
    (or the env has no live cleanup thread) — atexit calls it unconditionally
    across all active envs, so a False return would falsely flag healthy
    shutdowns."""
    env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
    # No _cleanup_thread set — simulates an env that was never cleanup()'d.
    assert env.wait_for_cleanup(timeout=1.0) is True


def test_wait_for_cleanup_after_cleanup_returns_true(monkeypatch):
    """End-to-end: cleanup() starts a thread, wait_for_cleanup() joins it
    and reports completion. Atexit relies on this contract to ensure docker
    stop/rm actually finishes before the Python interpreter exits."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
    monkeypatch.setattr(docker_env, "_get_active_profile_name", lambda: "default")
    _mock_subprocess_run(monkeypatch)
    _install_fake_thread(monkeypatch)

    env = _make_dummy_env(task_id="wait-test")
    env.cleanup()
    assert env.wait_for_cleanup(timeout=5.0) is True


def test_cleanup_on_env_with_no_container_id_does_not_raise(monkeypatch):
    """A DockerEnvironment whose ``__init__`` failed before the container_id
    was set (image-pull error, docker daemon down) should still be safe to
    cleanup() — the post-creation failure path in callers always tries.
    Without this guard the daemon-down case used to NameError on the cleanup
    branch."""
    env = docker_env.DockerEnvironment.__new__(docker_env.DockerEnvironment)
    env._container_id = None
    env._persistent = False
    env._workspace_dir = None
    env._home_dir = None
    # No exception expected.
    env.cleanup()