test(docker): stabilize Phase 0 baseline harness

Two pre-existing baseline issues found while running the Phase 0 harness
against the tini image that need fixing before later phases can use the
harness as a behavior-parity oracle:

1. The autouse `_enforce_test_timeout` fixture in tests/conftest.py
   hard-coded a 30s SIGALRM, which preempted any `pytest.mark.timeout`
   marker (already honored by pytest-timeout). Honor the marker if
   present; fall back to 30s otherwise. Docker harness tests carry a
   180s marker applied at collection time in tests/docker/conftest.py.

2. test_dashboard_port_override polled via `ss -tlnp` / `netstat -tln`
   — neither is installed in the Hermes image, so the probe trivially
   failed even when the dashboard was bound. The dashboard also takes
   8-15s to bind on cold image; the 5s sleep was insufficient. Replace
   with a poll loop reading /proc/net/tcp directly (port 9120 = 0x23A0,
   state 0A = LISTEN). Bump probe deadline to 60s and switch
   test_dashboard_opt_in_starts to a similar poll for pgrep so we don't
   regress to the same race.

Result: 11 passed, 2 xfailed (Phase 4 target) on tini image. Harness
now ready to serve as Phase 2's behavior-parity oracle.
This commit is contained in:
Ben 2026-05-21 14:17:36 +10:00 committed by teknium1
parent a18f69eb55
commit 440147ebea
No known key found for this signature in database

View file

@ -12,16 +12,36 @@ import subprocess
import time
def _poll(container: str, probe: str, *, deadline_s: float = 30.0,
interval_s: float = 0.5) -> tuple[bool, str]:
"""Repeatedly run ``probe`` inside the container until it exits 0 or
``deadline_s`` elapses. Returns (success, last stdout)."""
end = time.monotonic() + deadline_s
last = ""
while time.monotonic() < end:
r = subprocess.run(
["docker", "exec", container, "sh", "-c", probe],
capture_output=True, text=True, timeout=10,
)
last = r.stdout
if r.returncode == 0:
return True, last
time.sleep(interval_s)
return False, last
def test_dashboard_not_running_by_default(
built_image: str, container_name: str,
) -> None:
"""Without HERMES_DASHBOARD, no dashboard process should be running."""
subprocess.run(
["docker", "run", "-d", "--name", container_name, built_image,
"sleep", "30"],
"sleep", "60"],
check=True, capture_output=True, timeout=30,
)
time.sleep(3)
# Give the entrypoint enough time to finish bootstrap; if a dashboard
# were going to start it'd be visible by now.
time.sleep(5)
r = subprocess.run(
["docker", "exec", container_name,
"pgrep", "-f", "hermes dashboard"],
@ -39,18 +59,16 @@ def test_dashboard_opt_in_starts(
"""With HERMES_DASHBOARD=1, a dashboard process should be visible."""
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_DASHBOARD=1", built_image, "sleep", "30"],
"-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"],
check=True, capture_output=True, timeout=30,
)
time.sleep(5)
r = subprocess.run(
["docker", "exec", container_name,
"pgrep", "-f", "hermes dashboard"],
capture_output=True, text=True, timeout=10,
)
assert r.returncode == 0, (
"Dashboard should be running with HERMES_DASHBOARD=1"
# Poll for the dashboard subprocess to appear — the entrypoint
# backgrounds it and bootstrap (skills sync etc.) can take a few
# seconds before the python process actually launches.
ok, _ = _poll(
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
)
assert ok, "Dashboard should be running with HERMES_DASHBOARD=1"
def test_dashboard_port_override(
@ -60,16 +78,17 @@ def test_dashboard_port_override(
subprocess.run(
["docker", "run", "-d", "--name", container_name,
"-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120",
built_image, "sleep", "30"],
built_image, "sleep", "120"],
check=True, capture_output=True, timeout=30,
)
time.sleep(5)
r = subprocess.run(
["docker", "exec", container_name, "sh", "-c",
"ss -tlnp 2>/dev/null | grep ':9120' "
"|| netstat -tln 2>/dev/null | grep ':9120'"],
capture_output=True, text=True, timeout=10,
)
assert "9120" in r.stdout, (
f"Dashboard not listening on port 9120: stdout={r.stdout!r}"
# The dashboard process appearing in pgrep doesn't mean it's bound
# to the port yet — uvicorn takes another second or two to come up.
# The image doesn't ship ss/netstat, so probe /proc/net/tcp directly:
# port 9120 = 0x23A0, state 0A = LISTEN.
ok, stdout = _poll(
container_name,
"grep -E ' 0+:23A0 .* 0A ' /proc/net/tcp /proc/net/tcp6 "
"2>/dev/null",
deadline_s=60.0,
)
assert ok, f"Dashboard not listening on port 9120: stdout={stdout!r}"