mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
test(docker): stabilize Phase 0 baseline harness
Two pre-existing baseline issues found while running the Phase 0 harness against the tini image that need fixing before later phases can use the harness as a behavior-parity oracle: 1. The autouse `_enforce_test_timeout` fixture in tests/conftest.py hard-coded a 30s SIGALRM, which preempted any `pytest.mark.timeout` marker (already honored by pytest-timeout). Honor the marker if present; fall back to 30s otherwise. Docker harness tests carry a 180s marker applied at collection time in tests/docker/conftest.py. 2. test_dashboard_port_override polled via `ss -tlnp` / `netstat -tln` — neither is installed in the Hermes image, so the probe trivially failed even when the dashboard was bound. The dashboard also takes 8-15s to bind on cold image; the 5s sleep was insufficient. Replace with a poll loop reading /proc/net/tcp directly (port 9120 = 0x23A0, state 0A = LISTEN). Bump probe deadline to 60s and switch test_dashboard_opt_in_starts to a similar poll for pgrep so we don't regress to the same race. Result: 11 passed, 2 xfailed (Phase 4 target) on tini image. Harness now ready to serve as Phase 2's behavior-parity oracle.
This commit is contained in:
parent
a18f69eb55
commit
440147ebea
1 changed files with 40 additions and 21 deletions
|
|
@ -12,16 +12,36 @@ import subprocess
|
|||
import time
|
||||
|
||||
|
||||
def _poll(container: str, probe: str, *, deadline_s: float = 30.0,
|
||||
interval_s: float = 0.5) -> tuple[bool, str]:
|
||||
"""Repeatedly run ``probe`` inside the container until it exits 0 or
|
||||
``deadline_s`` elapses. Returns (success, last stdout)."""
|
||||
end = time.monotonic() + deadline_s
|
||||
last = ""
|
||||
while time.monotonic() < end:
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container, "sh", "-c", probe],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
last = r.stdout
|
||||
if r.returncode == 0:
|
||||
return True, last
|
||||
time.sleep(interval_s)
|
||||
return False, last
|
||||
|
||||
|
||||
def test_dashboard_not_running_by_default(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Without HERMES_DASHBOARD, no dashboard process should be running."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "30"],
|
||||
"sleep", "60"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(3)
|
||||
# Give the entrypoint enough time to finish bootstrap; if a dashboard
|
||||
# were going to start it'd be visible by now.
|
||||
time.sleep(5)
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container_name,
|
||||
"pgrep", "-f", "hermes dashboard"],
|
||||
|
|
@ -39,18 +59,16 @@ def test_dashboard_opt_in_starts(
|
|||
"""With HERMES_DASHBOARD=1, a dashboard process should be visible."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", built_image, "sleep", "30"],
|
||||
"-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(5)
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container_name,
|
||||
"pgrep", "-f", "hermes dashboard"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
assert r.returncode == 0, (
|
||||
"Dashboard should be running with HERMES_DASHBOARD=1"
|
||||
# Poll for the dashboard subprocess to appear — the entrypoint
|
||||
# backgrounds it and bootstrap (skills sync etc.) can take a few
|
||||
# seconds before the python process actually launches.
|
||||
ok, _ = _poll(
|
||||
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
|
||||
)
|
||||
assert ok, "Dashboard should be running with HERMES_DASHBOARD=1"
|
||||
|
||||
|
||||
def test_dashboard_port_override(
|
||||
|
|
@ -60,16 +78,17 @@ def test_dashboard_port_override(
|
|||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120",
|
||||
built_image, "sleep", "30"],
|
||||
built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(5)
|
||||
r = subprocess.run(
|
||||
["docker", "exec", container_name, "sh", "-c",
|
||||
"ss -tlnp 2>/dev/null | grep ':9120' "
|
||||
"|| netstat -tln 2>/dev/null | grep ':9120'"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
assert "9120" in r.stdout, (
|
||||
f"Dashboard not listening on port 9120: stdout={r.stdout!r}"
|
||||
# The dashboard process appearing in pgrep doesn't mean it's bound
|
||||
# to the port yet — uvicorn takes another second or two to come up.
|
||||
# The image doesn't ship ss/netstat, so probe /proc/net/tcp directly:
|
||||
# port 9120 = 0x23A0, state 0A = LISTEN.
|
||||
ok, stdout = _poll(
|
||||
container_name,
|
||||
"grep -E ' 0+:23A0 .* 0A ' /proc/net/tcp /proc/net/tcp6 "
|
||||
"2>/dev/null",
|
||||
deadline_s=60.0,
|
||||
)
|
||||
assert ok, f"Dashboard not listening on port 9120: stdout={stdout!r}"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue