mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-01 07:01:41 +00:00
fix(docker): dashboard slot stays 'down' when HERMES_DASHBOARD unset
PR #30136 review caught a false positive: when HERMES_DASHBOARD was unset, the dashboard run script did `exec sleep infinity`, so `s6-svstat /run/service/dashboard` reported the slot as 'up'. `hermes doctor` and any other s6-svstat-based health check saw the dashboard as supervised-running even though no dashboard process existed. Add cont-init.d/03-dashboard-toggle: writes a `down` marker file into `/run/service/dashboard/` when HERMES_DASHBOARD is falsy, removes any leftover marker when it's truthy. s6-supervise honors `down` by not starting the service, so s6-svstat reports 'down' — matching reality. The run script's HERMES_DASHBOARD case-statement stays in place as a belt-and-suspenders guard, so the two layers can never disagree. Two new integration tests lock the behavior: slot reports down when unset; slot reports up when set to 1.
This commit is contained in:
parent
b28b3f51d3
commit
1dfabe47b3
3 changed files with 110 additions and 0 deletions
|
|
@ -52,6 +52,60 @@ def test_dashboard_not_running_by_default(
|
|||
)
|
||||
|
||||
|
||||
def test_dashboard_slot_reports_down_when_disabled(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Without HERMES_DASHBOARD, s6-svstat should report the dashboard
|
||||
slot as DOWN (not up-with-sleep-infinity, which would
|
||||
false-positive `hermes doctor` and any other health check).
|
||||
|
||||
Locks the PR #30136 review item I3 fix: cont-init.d/03-dashboard-toggle
|
||||
writes a `down` marker file in the live service-dir when
|
||||
HERMES_DASHBOARD is unset, so the slot reflects reality.
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name, built_image,
|
||||
"sleep", "60"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
time.sleep(5)
|
||||
# /command/ isn't on PATH for docker-exec sessions, so call by
|
||||
# absolute path.
|
||||
r = docker_exec(
|
||||
container_name, "/command/s6-svstat", "/run/service/dashboard",
|
||||
)
|
||||
assert r.returncode == 0, f"s6-svstat failed: {r.stderr!r} / {r.stdout!r}"
|
||||
assert "down" in r.stdout, (
|
||||
f"Dashboard slot should be 'down' without HERMES_DASHBOARD; "
|
||||
f"svstat reports: {r.stdout!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_slot_reports_up_when_enabled(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
"""Symmetry: with HERMES_DASHBOARD=1, s6-svstat reports the slot as up."""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
"-e", "HERMES_DASHBOARD=1", built_image, "sleep", "120"],
|
||||
check=True, capture_output=True, timeout=30,
|
||||
)
|
||||
# uvicorn takes a moment to bind; poll svstat.
|
||||
deadline = time.monotonic() + 30.0
|
||||
last = ""
|
||||
while time.monotonic() < deadline:
|
||||
r = docker_exec(
|
||||
container_name, "/command/s6-svstat", "/run/service/dashboard",
|
||||
)
|
||||
last = r.stdout
|
||||
if r.returncode == 0 and "up " in r.stdout:
|
||||
return # success
|
||||
time.sleep(0.5)
|
||||
raise AssertionError(
|
||||
f"Dashboard slot never reached up state; last svstat: {last!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_opt_in_starts(
|
||||
built_image: str, container_name: str,
|
||||
) -> None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue