From 440147ebea08bf7ad5fb12770218c9a63116fa0f Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 21 May 2026 14:17:36 +1000 Subject: [PATCH] test(docker): stabilize Phase 0 baseline harness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two pre-existing baseline issues found while running the Phase 0 harness against the tini image that need fixing before later phases can use the harness as a behavior-parity oracle: 1. The autouse `_enforce_test_timeout` fixture in tests/conftest.py hard-coded a 30s SIGALRM, which preempted any `pytest.mark.timeout` marker (already honored by pytest-timeout). Honor the marker if present; fall back to 30s otherwise. Docker harness tests carry a 180s marker applied at collection time in tests/docker/conftest.py. 2. test_dashboard_port_override polled via `ss -tlnp` / `netstat -tln` — neither is installed in the Hermes image, so the probe trivially failed even when the dashboard was bound. The dashboard also takes 8-15s to bind on cold image; the 5s sleep was insufficient. Replace with a poll loop reading /proc/net/tcp directly (port 9120 = 0x23A0, state 0A = LISTEN). Bump probe deadline to 60s and switch test_dashboard_opt_in_starts to a similar poll for pgrep so we don't regress to the same race. Result: 11 passed, 2 xfailed (Phase 4 target) on tini image. Harness now ready to serve as Phase 2's behavior-parity oracle. --- tests/docker/test_dashboard.py | 61 ++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/tests/docker/test_dashboard.py b/tests/docker/test_dashboard.py index ff2d2e42e0d..d68c81b2525 100644 --- a/tests/docker/test_dashboard.py +++ b/tests/docker/test_dashboard.py @@ -12,16 +12,36 @@ import subprocess import time +def _poll(container: str, probe: str, *, deadline_s: float = 30.0, + interval_s: float = 0.5) -> tuple[bool, str]: + """Repeatedly run ``probe`` inside the container until it exits 0 or + ``deadline_s`` elapses. Returns (success, last stdout).""" + end = time.monotonic() + deadline_s + last = "" + while time.monotonic() < end: + r = subprocess.run( + ["docker", "exec", container, "sh", "-c", probe], + capture_output=True, text=True, timeout=10, + ) + last = r.stdout + if r.returncode == 0: + return True, last + time.sleep(interval_s) + return False, last + + def test_dashboard_not_running_by_default( built_image: str, container_name: str, ) -> None: """Without HERMES_DASHBOARD, no dashboard process should be running.""" subprocess.run( ["docker", "run", "-d", "--name", container_name, built_image, - "sleep", "30"], + "sleep", "60"], check=True, capture_output=True, timeout=30, ) - time.sleep(3) + # Give the entrypoint enough time to finish bootstrap; if a dashboard + # were going to start it'd be visible by now. + time.sleep(5) r = subprocess.run( ["docker", "exec", container_name, "pgrep", "-f", "hermes dashboard"], @@ -39,18 +59,16 @@ def test_dashboard_opt_in_starts( """With HERMES_DASHBOARD=1, a dashboard process should be visible.""" subprocess.run( ["docker", "run", "-d", "--name", container_name, - "-e", "HERMES_DASHBOARD=1", built_image, "sleep", "30"], + "-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"], check=True, capture_output=True, timeout=30, ) - time.sleep(5) - r = subprocess.run( - ["docker", "exec", container_name, - "pgrep", "-f", "hermes dashboard"], - capture_output=True, text=True, timeout=10, - ) - assert r.returncode == 0, ( - "Dashboard should be running with HERMES_DASHBOARD=1" + # Poll for the dashboard subprocess to appear — the entrypoint + # backgrounds it and bootstrap (skills sync etc.) can take a few + # seconds before the python process actually launches. + ok, _ = _poll( + container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0, ) + assert ok, "Dashboard should be running with HERMES_DASHBOARD=1" def test_dashboard_port_override( @@ -60,16 +78,17 @@ def test_dashboard_port_override( subprocess.run( ["docker", "run", "-d", "--name", container_name, "-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120", - built_image, "sleep", "30"], + built_image, "sleep", "120"], check=True, capture_output=True, timeout=30, ) - time.sleep(5) - r = subprocess.run( - ["docker", "exec", container_name, "sh", "-c", - "ss -tlnp 2>/dev/null | grep ':9120' " - "|| netstat -tln 2>/dev/null | grep ':9120'"], - capture_output=True, text=True, timeout=10, - ) - assert "9120" in r.stdout, ( - f"Dashboard not listening on port 9120: stdout={r.stdout!r}" + # The dashboard process appearing in pgrep doesn't mean it's bound + # to the port yet — uvicorn takes another second or two to come up. + # The image doesn't ship ss/netstat, so probe /proc/net/tcp directly: + # port 9120 = 0x23A0, state 0A = LISTEN. + ok, stdout = _poll( + container_name, + "grep -E ' 0+:23A0 .* 0A ' /proc/net/tcp /proc/net/tcp6 " + "2>/dev/null", + deadline_s=60.0, ) + assert ok, f"Dashboard not listening on port 9120: stdout={stdout!r}"