mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
test(docker): lock baseline behavior for Phase 0 harness
Tasks 0.2-0.6 of the s6-overlay supervision plan. Locks the user-visible behavior we must preserve through the Phase 2 init- system swap: - test_main_invocation.py (Task 0.2): docker run <image> with no args, chat subcommand passthrough, bare executable passthrough, bash pattern, exit-code propagation - test_tui_passthrough.py (Task 0.3): TTY allocation via docker -t using the host's script(1) for a PTY - test_dashboard.py (Task 0.4): HERMES_DASHBOARD=1 opt-in, HERMES_DASHBOARD_PORT override - test_profile_gateway.py (Task 0.5): per-profile gateway start/stop and profile-delete-stops-gateway. Both marked xfail(strict=True) because the current tini image refuses gateway lifecycle commands inside the container; Phase 4 Task 4.3 flips them to passing. - test_zombie_reaping.py (Task 0.6): PID 1 reaps orphaned zombies. tini does this today; s6-overlay's /init must continue to. Refs: docs/plans/2026-05-07-s6-overlay-dynamic-subagent-gateways.md
This commit is contained in:
parent
08302135b6
commit
6e6acdea2a
5 changed files with 346 additions and 0 deletions
75
tests/docker/test_dashboard.py
Normal file
75
tests/docker/test_dashboard.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
"""Harness: dashboard opt-in via HERMES_DASHBOARD.
|
||||
|
||||
Today (tini): dashboard starts once when HERMES_DASHBOARD=1; if it crashes
|
||||
it stays dead. After Phase 2 (s6): dashboard starts once; if it crashes
|
||||
it is restarted under supervision. The restart-after-crash test lives in
|
||||
Phase 2 Task 2.5; this file only locks the opt-in surface (which must
|
||||
not change between tini and s6).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
|
||||
def test_dashboard_not_running_by_default(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Without HERMES_DASHBOARD, no dashboard process should be running."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "30"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(3)
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container_name,
|
||||
"pgrep", "-f", "hermes dashboard"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
# pgrep exits non-zero when no match found
|
||||
assert r.returncode != 0, (
|
||||
"Dashboard should not be running without HERMES_DASHBOARD"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_opt_in_starts(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""With HERMES_DASHBOARD=1, a dashboard process should be visible."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", built_image, "sleep", "30"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(5)
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container_name,
|
||||
"pgrep", "-f", "hermes dashboard"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
"Dashboard should be running with HERMES_DASHBOARD=1"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_port_override(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""HERMES_DASHBOARD_PORT changes the dashboard's listen port."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120",
|
||||
built_image, "sleep", "30"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(5)
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container_name, "sh", "-c",
|
||||
"ss -tlnp 2>/dev/null | grep ':9120' "
|
||||
"|| netstat -tln 2>/dev/null | grep ':9120'"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
assert "9120" in r.stdout, (
|
||||
f"Dashboard not listening on port 9120: stdout={r.stdout!r}"
|
||||
)
|
||||
79
tests/docker/test_main_invocation.py
Normal file
79
tests/docker/test_main_invocation.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
"""Harness: docker run <image> [cmd...] invocation patterns.
|
||||
|
||||
These tests MUST pass on the current tini-based image AND continue to
|
||||
pass after the Phase 2 s6 migration. Any behavior drift is a regression.
|
||||
|
||||
The harness expects ``built_image`` and ``container_name`` fixtures from
|
||||
``tests/docker/conftest.py``. When Docker isn't available every test
|
||||
here is skipped at collection time.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
|
||||
|
||||
def test_no_args_starts_hermes(built_image: str) -> None:
|
||||
"""``docker run <image>`` should start hermes cleanly.
|
||||
|
||||
We invoke ``--version`` so the call exits without needing a configured
|
||||
model. Exit code may be 0 (printed version) or 1 (config bootstrapping
|
||||
failure on a fresh volume), but never a stack trace.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", built_image, "--version"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode in (0, 1), (
|
||||
f"Unexpected exit {r.returncode}: stderr={r.stderr!r}"
|
||||
)
|
||||
assert "Traceback" not in r.stderr
|
||||
|
||||
|
||||
def test_chat_subcommand_passthrough(built_image: str) -> None:
|
||||
"""``docker run <image> chat --help`` should exec ``hermes chat --help``.
|
||||
|
||||
Uses ``--help`` so the call doesn't need an upstream model configured.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", built_image, "chat", "--help"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0
|
||||
combined = (r.stdout + r.stderr).lower()
|
||||
assert "chat" in combined or "usage" in combined
|
||||
|
||||
|
||||
def test_bare_executable_passthrough(built_image: str) -> None:
|
||||
"""``docker run <image> sleep 1`` should exec ``sleep`` directly.
|
||||
|
||||
The entrypoint detects that ``sleep`` is on PATH and routes around the
|
||||
hermes wrapper. Useful for long-lived sandbox mode and for testing.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", built_image, "sleep", "1"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
assert r.returncode == 0
|
||||
|
||||
|
||||
def test_bash_pattern(built_image: str) -> None:
|
||||
"""``docker run <image> bash -c 'echo ok'`` should exec bash directly."""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", built_image, "bash", "-c", "echo ok"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
assert r.returncode == 0
|
||||
assert "ok" in r.stdout
|
||||
|
||||
|
||||
def test_container_exit_code_matches_inner_exit(built_image: str) -> None:
|
||||
"""The container exit code must match the inner process's exit code.
|
||||
|
||||
Critical for CI: ``docker run <image> hermes batch ...`` returns a
|
||||
non-zero status when batch fails. Phase 2 (s6) must preserve this.
|
||||
"""
|
||||
r = subprocess.run(
|
||||
["docker", "run", "--rm", built_image, "sh", "-c", "exit 42"],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
assert r.returncode == 42
|
||||
97
tests/docker/test_profile_gateway.py
Normal file
97
tests/docker/test_profile_gateway.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
"""Harness: per-profile gateway start/stop inside the container.
|
||||
|
||||
Phase 4 will change the *implementation* of these commands inside the
|
||||
container — they'll talk to s6 instead of refusing. The user-visible
|
||||
surface that should result is locked here.
|
||||
|
||||
NOTE: These tests are marked ``xfail(strict=True)`` until Phase 4 lands.
|
||||
The current tini image deliberately refuses gateway start/stop inside
|
||||
containers — ``pgrep`` finds nothing and the tests fail. After Phase 4
|
||||
they should flip to passing automatically; ``strict=True`` means an
|
||||
unexpected pass also fails the test, protecting against side-channel
|
||||
fixes outside the planned Phase 4 mechanism.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
PROFILE = "test-harness-profile"
|
||||
|
||||
_PHASE4_REASON = (
|
||||
"Phase 4 not yet landed: container-side `hermes gateway start` "
|
||||
"currently exits 0 with an informational message instead of "
|
||||
"spawning/supervising a gateway. Remove this marker after Task 4.3."
|
||||
)
|
||||
|
||||
|
||||
def _sh(
|
||||
container: str, command: str, timeout: int = 30,
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
return subprocess.run(
|
||||
["docker", "exec", container, "sh", "-c", command],
|
||||
capture_output=True, text=True, timeout=timeout,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason=_PHASE4_REASON, strict=True)
|
||||
def test_profile_create_then_gateway_start(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(3)
|
||||
|
||||
r = _sh(container_name, f"hermes profile create {PROFILE}")
|
||||
assert r.returncode == 0, f"profile create failed: {r.stderr}"
|
||||
|
||||
r = _sh(container_name, f"hermes -p {PROFILE} gateway start", timeout=60)
|
||||
assert r.returncode == 0, (
|
||||
f"gateway start failed: stderr={r.stderr!r} stdout={r.stdout!r}"
|
||||
)
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
r = _sh(container_name, f"pgrep -f 'gateway.*{PROFILE}'")
|
||||
assert r.returncode == 0, "gateway process not running"
|
||||
|
||||
r = _sh(container_name, f"hermes -p {PROFILE} gateway stop", timeout=30)
|
||||
assert r.returncode == 0
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
r = _sh(container_name, f"pgrep -f 'gateway.*{PROFILE}'")
|
||||
assert r.returncode != 0, "gateway process still running after stop"
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason=_PHASE4_REASON, strict=True)
|
||||
def test_profile_delete_stops_gateway(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Deleting a profile should stop its gateway if running."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(3)
|
||||
|
||||
_sh(container_name, f"hermes profile create {PROFILE}")
|
||||
_sh(container_name, f"hermes -p {PROFILE} gateway start", timeout=60)
|
||||
time.sleep(3)
|
||||
|
||||
r = _sh(
|
||||
container_name,
|
||||
f"hermes profile delete {PROFILE} --yes",
|
||||
timeout=30,
|
||||
)
|
||||
assert r.returncode == 0
|
||||
|
||||
time.sleep(2)
|
||||
r = _sh(container_name, f"pgrep -f 'gateway.*{PROFILE}'")
|
||||
assert r.returncode != 0, "gateway still running after profile delete"
|
||||
51
tests/docker/test_tui_passthrough.py
Normal file
51
tests/docker/test_tui_passthrough.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
"""Harness: interactive TUI TTY passthrough.
|
||||
|
||||
Uses ``script -qc`` on the host to allocate a PTY for the docker client,
|
||||
which then allocates a container-side PTY via ``-t``. The probe inside
|
||||
the container is ``tput cols``, which returns a real column count when
|
||||
stdout is a TTY and either prints ``80`` (the terminfo fallback) or
|
||||
nothing when it is not.
|
||||
|
||||
These tests MUST pass on the current tini-based image AND continue to
|
||||
pass after the Phase 2 s6 migration. Any drift is a regression.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
shutil.which("script") is None,
|
||||
reason="`script` command not available on this host",
|
||||
)
|
||||
|
||||
|
||||
def test_tty_passthrough_to_container(built_image: str) -> None:
|
||||
"""``docker run -t`` must deliver a real TTY to the container process."""
|
||||
probe = "if [ -t 1 ]; then tput cols; else echo NO_TTY; fi"
|
||||
cmd = (
|
||||
f"docker run --rm -t -e COLUMNS=123 {built_image} "
|
||||
f"sh -c {shlex.quote(probe)}"
|
||||
)
|
||||
r = subprocess.run(
|
||||
["script", "-qc", cmd, "/dev/null"],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
)
|
||||
output = r.stdout.strip()
|
||||
assert "NO_TTY" not in output, f"TTY passthrough failed: {output!r}"
|
||||
numeric_lines = [s for s in output.split() if s.strip().isdigit()]
|
||||
assert numeric_lines, f"No numeric width in output: {output!r}"
|
||||
assert int(numeric_lines[0]) > 0
|
||||
|
||||
|
||||
def test_tui_flag_recognized(built_image: str) -> None:
|
||||
"""``docker run -it <image> --help`` should run without crashing."""
|
||||
cmd = f"docker run --rm -t {built_image} --help"
|
||||
r = subprocess.run(
|
||||
["script", "-qc", cmd, "/dev/null"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
assert r.returncode == 0
|
||||
44
tests/docker/test_zombie_reaping.py
Normal file
44
tests/docker/test_zombie_reaping.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
"""Harness: PID 1 must reap orphaned zombie processes.
|
||||
|
||||
tini (current PID 1) reaps zombies via its built-in subreaper behavior.
|
||||
s6-overlay's ``/init`` (Phase 2 PID 1) does the same. This invariant is
|
||||
required for long-running containers spawning subprocesses (subagents,
|
||||
dashboard, dynamic gateways) — otherwise the process table fills with
|
||||
defunct entries and eventually exhausts the kernel PID space.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
|
||||
def test_orphan_zombies_reaped(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Spawn an orphan child that exits immediately. PID 1 must reap it."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "60"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(2)
|
||||
|
||||
# `( ( sleep 0.1 & ) & ); sleep 1` creates a grandchild detached from
|
||||
# the original docker exec session — it becomes an orphan reparented
|
||||
# to PID 1 in the container. When it exits, PID 1 must reap it.
|
||||
subprocess.run(
|
||||
["docker", "exec", container_name, "sh", "-c",
|
||||
"( ( sleep 0.1 & ) & ); sleep 1"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
time.sleep(1)
|
||||
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container_name, "ps", "axo", "stat,pid,comm"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
zombies = [
|
||||
line for line in r.stdout.split("\n")
|
||||
if line.strip().startswith("Z")
|
||||
]
|
||||
assert not zombies, f"Zombies not reaped by PID 1: {zombies}"
|
||||
Loading…
Add table
Add a link
Reference in a new issue