mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
Two parallel public-path allowlists drifted: _PUBLIC_API_PATHS in
hermes_cli/web_server.py (legacy _SESSION_TOKEN middleware) and
_GATE_PUBLIC_PREFIXES in hermes_cli/dashboard_auth/middleware.py
(OAuth gate). The legacy list included /api/status (documented as a
non-sensitive read-only liveness target); the OAuth gate's list did not.
Effect: every wildcard-subdomain agent surfaced as STARTING/down to the
portal even though the dashboard was serving correctly. Nous account
service (src/server/agents/fly-provider.ts
getInstanceRuntimeStatus) fetches ``/api/status`` without a cookie
as its sole liveness probe; the OAuth gate's 401 looked identical to
'agent dead' on the portal side.
Fix: lift the allowlist into hermes_cli/dashboard_auth/public_paths.py
and have both middlewares import it. _path_is_public now consults
the shared frozenset first, then falls back to the gate's
auth-bootstrap/static prefix list. Future additions to the public list
hit both gates automatically.
Endpoint inventory (verified safe to remain public):
* /api/status — version, gateway state, active session count,
auth-gate shape. Portal liveness probe target.
* /api/config/defaults — config-defaults feed for the SPA's Config page
* /api/config/schema — config schema for the SPA's Config page
* /api/model/info — model catalogue metadata (context windows)
* /api/dashboard/themes — theme manifests for the skin engine
* /api/dashboard/plugins — plugin manifests for the dashboard
No user data, no session content, no secrets. Same shape an external
monitoring agent would hit on /healthz.
Tests:
* New: test_gated_status_is_public (regression guard with the NAS
fly-provider.ts liveness-probe rationale spelled out in the docstring)
* New: test_other_public_api_paths_are_public_under_gate (parametrised
over the rest of PUBLIC_API_PATHS — proves 401 / 302-to-login is
never the response)
* New: docker integration check #3 in
test_dashboard_oauth_gate_engaged_by_default — /api/status
remains 200 under the gate AND reports auth_required=True so the
portal can distinguish modes
* Updated: test_full_login_round_trip_unlocks_gated_api now probes
/api/sessions instead of /api/status (status is public, so it
can no longer distinguish 'logged in' from 'gate accidentally
disabled')
* Updated: TestApi401Envelope (the no-cookie / invalid-cookie /
dead-cookie tests) probes /api/sessions for the same reason
* Updated: docker integration check #2 in
test_dashboard_oauth_gate_engaged_by_default probes
/api/sessions to prove the gate is intercepting
* Removed: dead _login() helper in
test_dashboard_auth_status_endpoint.py (no longer needed since
/api/status is reachable cold)
Companion to docs/handover/hermes-agent-dashboard-s6-insecure-fix.md
(the --insecure flag fix that shipped earlier).
415 lines
17 KiB
Python
415 lines
17 KiB
Python
"""Harness: dashboard opt-in via HERMES_DASHBOARD.
|
|
|
|
Today (tini): dashboard starts once when HERMES_DASHBOARD=1; if it crashes
|
|
it stays dead. After Phase 2 (s6): dashboard starts once; if it crashes
|
|
it is restarted under supervision. The restart-after-crash test lives in
|
|
Phase 2 Task 2.5; this file only locks the opt-in surface (which must
|
|
not change between tini and s6).
|
|
|
|
Every ``docker exec`` here runs as the unprivileged ``hermes`` user
|
|
(via :func:`docker_exec`/:func:`docker_exec_sh` in conftest), matching
|
|
the realistic runtime context. See the conftest module docstring.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import subprocess
|
|
import time
|
|
|
|
from tests.docker.conftest import docker_exec, docker_exec_sh
|
|
|
|
|
|
def _poll(container: str, probe: str, *, deadline_s: float = 30.0,
|
|
interval_s: float = 0.5) -> tuple[bool, str]:
|
|
"""Repeatedly run ``probe`` inside the container until it exits 0 or
|
|
``deadline_s`` elapses. Returns (success, last stdout)."""
|
|
end = time.monotonic() + deadline_s
|
|
last = ""
|
|
while time.monotonic() < end:
|
|
r = docker_exec_sh(container, probe, timeout=10)
|
|
last = r.stdout
|
|
if r.returncode == 0:
|
|
return True, last
|
|
time.sleep(interval_s)
|
|
return False, last
|
|
|
|
|
|
def test_dashboard_not_running_by_default(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""Without HERMES_DASHBOARD, no dashboard process should be running."""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name, built_image,
|
|
"sleep", "60"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
# Give the entrypoint enough time to finish bootstrap; if a dashboard
|
|
# were going to start it'd be visible by now.
|
|
time.sleep(5)
|
|
r = docker_exec(container_name, "pgrep", "-f", "hermes dashboard")
|
|
# pgrep exits non-zero when no match found
|
|
assert r.returncode != 0, (
|
|
"Dashboard should not be running without HERMES_DASHBOARD"
|
|
)
|
|
|
|
|
|
def test_dashboard_slot_reports_down_when_disabled(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""Without HERMES_DASHBOARD, s6-svstat should report the dashboard
|
|
slot as DOWN (not up-with-sleep-infinity, which would
|
|
false-positive `hermes doctor` and any other health check).
|
|
|
|
Locks the PR #30136 review item I3 fix: cont-init.d/03-dashboard-toggle
|
|
writes a `down` marker file in the live service-dir when
|
|
HERMES_DASHBOARD is unset, so the slot reflects reality.
|
|
"""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name, built_image,
|
|
"sleep", "60"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
time.sleep(5)
|
|
# /command/ isn't on PATH for docker-exec sessions, so call by
|
|
# absolute path.
|
|
r = docker_exec(
|
|
container_name, "/command/s6-svstat", "/run/service/dashboard",
|
|
)
|
|
assert r.returncode == 0, f"s6-svstat failed: {r.stderr!r} / {r.stdout!r}"
|
|
assert "down" in r.stdout, (
|
|
f"Dashboard slot should be 'down' without HERMES_DASHBOARD; "
|
|
f"svstat reports: {r.stdout!r}"
|
|
)
|
|
|
|
|
|
def test_dashboard_slot_reports_up_when_enabled(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""Symmetry: with HERMES_DASHBOARD=1, s6-svstat reports the slot as up."""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name,
|
|
"-e", "HERMES_DASHBOARD=1",
|
|
# The default dashboard host is 0.0.0.0, which now engages the
|
|
# OAuth auth gate. Without a provider registered (no
|
|
# HERMES_DASHBOARD_OAUTH_CLIENT_ID in this test env), start_server
|
|
# would fail closed and the slot would never come up. Pin the
|
|
# explicit insecure opt-in to keep this test focused on the s6
|
|
# supervision contract, not the auth gate.
|
|
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
|
built_image, "sleep", "120"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
# uvicorn takes a moment to bind; poll svstat.
|
|
deadline = time.monotonic() + 30.0
|
|
last = ""
|
|
while time.monotonic() < deadline:
|
|
r = docker_exec(
|
|
container_name, "/command/s6-svstat", "/run/service/dashboard",
|
|
)
|
|
last = r.stdout
|
|
if r.returncode == 0 and "up " in r.stdout:
|
|
return # success
|
|
time.sleep(0.5)
|
|
raise AssertionError(
|
|
f"Dashboard slot never reached up state; last svstat: {last!r}"
|
|
)
|
|
|
|
|
|
def test_dashboard_opt_in_starts(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""With HERMES_DASHBOARD=1, a dashboard process should be visible."""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name,
|
|
"-e", "HERMES_DASHBOARD=1",
|
|
# Default bind is 0.0.0.0; pin insecure opt-in so the auth gate
|
|
# doesn't fail-closed before the process can come up. See
|
|
# test_dashboard_slot_reports_up_when_enabled for the full rationale.
|
|
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
|
built_image, "sleep", "120"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
# Poll for the dashboard subprocess to appear — the entrypoint
|
|
# backgrounds it and bootstrap (skills sync etc.) can take a few
|
|
# seconds before the python process actually launches.
|
|
ok, _ = _poll(
|
|
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
|
|
)
|
|
assert ok, "Dashboard should be running with HERMES_DASHBOARD=1"
|
|
|
|
|
|
def test_dashboard_port_override(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""HERMES_DASHBOARD_PORT changes the dashboard's listen port."""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name,
|
|
"-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120",
|
|
# Default bind is 0.0.0.0; pin insecure opt-in so the auth gate
|
|
# doesn't fail-closed before the port is bound. See
|
|
# test_dashboard_slot_reports_up_when_enabled for the full rationale.
|
|
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
|
built_image, "sleep", "120"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
# The dashboard process appearing in pgrep doesn't mean it's bound
|
|
# to the port yet — uvicorn takes another second or two to come up.
|
|
# The image doesn't ship ss/netstat, so probe /proc/net/tcp directly:
|
|
# port 9120 = 0x23A0, state 0A = LISTEN.
|
|
ok, stdout = _poll(
|
|
container_name,
|
|
"grep -E ' 0+:23A0 .* 0A ' /proc/net/tcp /proc/net/tcp6 "
|
|
"2>/dev/null",
|
|
deadline_s=60.0,
|
|
)
|
|
assert ok, f"Dashboard not listening on port 9120: stdout={stdout!r}"
|
|
|
|
|
|
def test_dashboard_restarts_after_crash(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""Phase 2 invariant: under s6 supervision, killing the dashboard
|
|
process should be recovered automatically.
|
|
|
|
Pre-s6 (tini) behavior was "stays dead" — the test wouldn't have
|
|
passed against that image. After the s6-overlay migration the
|
|
dashboard runs as a longrun s6-rc service and s6-supervise restarts
|
|
it after a ~1s backoff (the default).
|
|
"""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name,
|
|
"-e", "HERMES_DASHBOARD=1",
|
|
# Default bind is 0.0.0.0; pin insecure opt-in so the auth gate
|
|
# doesn't fail-closed before the supervised dashboard can come up.
|
|
# See test_dashboard_slot_reports_up_when_enabled for the full
|
|
# rationale.
|
|
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
|
built_image, "sleep", "120"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
# Wait for the first dashboard to come up.
|
|
ok, _ = _poll(
|
|
container_name, "pgrep -f 'hermes dashboard'", deadline_s=30.0,
|
|
)
|
|
assert ok, "Dashboard never started initially"
|
|
|
|
# Grab the initial PID. s6 may briefly transition through restart
|
|
# state between our poll-success and the follow-up pgrep, so retry
|
|
# a couple of times before giving up.
|
|
first_pid: str | None = None
|
|
for _attempt in range(10):
|
|
first_pid_result = docker_exec(
|
|
container_name, "pgrep", "-f", "hermes dashboard",
|
|
)
|
|
first_pids = first_pid_result.stdout.strip().split()
|
|
if first_pids:
|
|
first_pid = first_pids[0]
|
|
break
|
|
time.sleep(0.5)
|
|
assert first_pid is not None, "Could not capture initial dashboard PID"
|
|
|
|
# Kill the dashboard. The dashboard process runs as hermes, so the
|
|
# hermes user can kill it (same UID).
|
|
docker_exec(container_name, "kill", "-9", first_pid)
|
|
|
|
# s6 backs off ~1s before restart; allow up to 15s for the new
|
|
# process to appear with a different PID.
|
|
deadline = time.monotonic() + 15.0
|
|
while time.monotonic() < deadline:
|
|
r = docker_exec(container_name, "pgrep", "-f", "hermes dashboard")
|
|
pids = r.stdout.strip().split() if r.returncode == 0 else []
|
|
if pids and pids[0] != first_pid:
|
|
return # success
|
|
time.sleep(0.5)
|
|
|
|
raise AssertionError(
|
|
f"Dashboard not restarted after kill (first_pid={first_pid})"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# OAuth auth-gate behaviour — regression guard for the dashboard-insecure
|
|
# auto-injection bug. Pre-fix, the s6 run script appended `--insecure`
|
|
# whenever `HERMES_DASHBOARD_HOST` was non-loopback, silently disabling
|
|
# the OAuth gate on every container-deployed dashboard. The matching
|
|
# static-text guard lives in tests/test_docker_home_override_scripts.py;
|
|
# this is the behavioural end-to-end check.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _http_probe(
|
|
container: str,
|
|
path: str,
|
|
*,
|
|
deadline_s: float = 60.0,
|
|
) -> tuple[int, str]:
|
|
"""Poll ``http://127.0.0.1:9119<path>`` from inside the container.
|
|
|
|
Returns ``(status_code, body)`` as soon as the dashboard answers any
|
|
HTTP response — 200, 401, 503, anything. The image doesn't ship
|
|
``curl`` but the venv's stdlib ``urllib`` is good enough; we use a
|
|
proper ``try``/``except`` to intercept ``HTTPError`` because
|
|
``urlopen`` raises on 4xx/5xx, and we treat those as legitimate
|
|
responses (the OAuth gate's 401 IS the success signal for the
|
|
gate-engaged test).
|
|
|
|
Connection errors (uvicorn still starting, fail-closed exited) keep
|
|
the poll loop running until ``deadline_s`` elapses.
|
|
|
|
The probe Python program is fed over stdin (``python -``) rather
|
|
than ``python -c`` so we can use proper multi-line syntax with
|
|
``try``/``except`` blocks without escaping hell.
|
|
|
|
Raises ``AssertionError`` on timeout.
|
|
"""
|
|
py_program = f"""\
|
|
import urllib.request, urllib.error
|
|
req = urllib.request.Request("http://127.0.0.1:9119{path}")
|
|
try:
|
|
r = urllib.request.urlopen(req, timeout=5)
|
|
print(r.status)
|
|
print(r.read().decode(), end="")
|
|
except urllib.error.HTTPError as h:
|
|
print(h.code)
|
|
print(h.read().decode(), end="")
|
|
"""
|
|
# Feed the program over stdin via a heredoc so docker_exec_sh's
|
|
# single bash string stays clean. The 'PY' delimiter is quoted to
|
|
# disable shell expansion inside the heredoc body.
|
|
probe = (
|
|
"/opt/hermes/.venv/bin/python - <<'PY'\n"
|
|
f"{py_program}"
|
|
"PY"
|
|
)
|
|
end = time.monotonic() + deadline_s
|
|
last_err = ""
|
|
while time.monotonic() < end:
|
|
r = docker_exec_sh(container, probe, timeout=10)
|
|
if r.returncode == 0 and r.stdout.strip():
|
|
lines = r.stdout.split("\n", 1)
|
|
try:
|
|
status = int(lines[0].strip())
|
|
body = lines[1] if len(lines) > 1 else ""
|
|
return status, body
|
|
except (ValueError, IndexError) as exc:
|
|
last_err = f"parse: {exc!r} / stdout={r.stdout!r}"
|
|
else:
|
|
last_err = f"rc={r.returncode} stderr={r.stderr!r}"
|
|
time.sleep(0.5)
|
|
raise AssertionError(
|
|
f"Probe of {path} never returned HTTP within {deadline_s}s; "
|
|
f"last error: {last_err}"
|
|
)
|
|
|
|
|
|
def test_dashboard_oauth_gate_engages_on_non_loopback_bind(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""The s6 dashboard run script must NOT auto-add ``--insecure`` when the
|
|
dashboard binds to ``0.0.0.0``. The OAuth auth gate engages on its own
|
|
when a ``DashboardAuthProvider`` is registered (the bundled nous
|
|
provider activates whenever ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` is
|
|
set).
|
|
|
|
Regression guard for the wildcard-subdomain rollout where every
|
|
portal-provisioned agent binds ``0.0.0.0`` and relies on the OAuth
|
|
gate to authenticate browser callers. Before this fix, the run script
|
|
flipped ``--insecure`` on for any non-loopback bind, which routed
|
|
``start_server`` straight back into the legacy ``allow_public=True``
|
|
branch and disabled the gate every time.
|
|
|
|
We verify two independent observable consequences of the gate being
|
|
on:
|
|
|
|
1. ``/api/auth/providers`` (publicly reachable through the gate so
|
|
the login page can bootstrap) returns 200 with ``nous`` in the
|
|
provider list — proves the bundled provider registered.
|
|
2. ``/api/sessions`` (a gated route under both the legacy
|
|
``_SESSION_TOKEN`` middleware and the OAuth gate) returns 401
|
|
to an unauthenticated caller — proves the OAuth gate is actively
|
|
intercepting browser traffic. We deliberately probe a gated route
|
|
here rather than ``/api/status``: status sits in the shared
|
|
``PUBLIC_API_PATHS`` allowlist (portal liveness probe target) and
|
|
responds 200 without a cookie under both gates, so it cannot
|
|
distinguish "gate on" from "gate off".
|
|
"""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name,
|
|
"-e", "HERMES_DASHBOARD=1",
|
|
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
|
|
"-e", "HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test-instance",
|
|
built_image, "sleep", "120"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
|
|
# (1) Provider registry visible via the public bootstrap endpoint.
|
|
status_code, body = _http_probe(container_name, "/api/auth/providers")
|
|
assert status_code == 200, (
|
|
f"/api/auth/providers should return 200 when a provider is "
|
|
f"registered; got {status_code} body={body!r}"
|
|
)
|
|
payload = json.loads(body)
|
|
provider_names = [p.get("name") for p in payload.get("providers", [])]
|
|
assert "nous" in provider_names, (
|
|
"Bundled dashboard_auth/nous provider should register when "
|
|
f"HERMES_DASHBOARD_OAUTH_CLIENT_ID is set. Got: {payload!r}"
|
|
)
|
|
|
|
# (2) A gated route (``/api/sessions``) returns 401 to an
|
|
# unauthenticated caller — the OAuth gate is intercepting.
|
|
status_code, body = _http_probe(container_name, "/api/sessions")
|
|
assert status_code == 401, (
|
|
"OAuth gate must intercept gated /api/* routes on 0.0.0.0 bind "
|
|
"when a provider is registered and HERMES_DASHBOARD_INSECURE "
|
|
f"is unset. Got: status={status_code} body={body!r}"
|
|
)
|
|
|
|
# (3) ``/api/status`` remains 200 under the gate — it's in the shared
|
|
# ``PUBLIC_API_PATHS`` allowlist so NAS's wildcard-subdomain
|
|
# liveness probe (``fly-provider.ts`` ``getInstanceRuntimeStatus``)
|
|
# can reach it without a cookie. Regression guard: this allowlist
|
|
# drifted once already and surfaced every healthy agent as
|
|
# STARTING/down in the portal UI.
|
|
status_code, body = _http_probe(container_name, "/api/status")
|
|
assert status_code == 200, (
|
|
"/api/status must remain publicly reachable under the OAuth gate "
|
|
"— the portal uses it as the wildcard-subdomain liveness probe. "
|
|
f"Got: status={status_code} body={body!r}"
|
|
)
|
|
status = json.loads(body)
|
|
assert status.get("auth_required") is True, (
|
|
"/api/status must report auth_required=True when the OAuth gate "
|
|
f"is engaged so the SPA/portal can distinguish modes. Got: {status!r}"
|
|
)
|
|
|
|
|
|
def test_dashboard_insecure_env_var_opts_out_of_gate(
|
|
built_image: str, container_name: str,
|
|
) -> None:
|
|
"""``HERMES_DASHBOARD_INSECURE=1`` re-enables the legacy no-gate mode
|
|
for operators running on trusted LANs behind a reverse proxy without
|
|
the OAuth contract. Same opt-out shape as the rest of the s6 boolean
|
|
envs (``HERMES_DASHBOARD``, ``HERMES_DASHBOARD_TUI``).
|
|
|
|
With the gate off, ``/api/status`` (a public endpoint under the
|
|
legacy ``_SESSION_TOKEN`` middleware) returns 200 with the
|
|
``auth_required: false`` body — proves the gate is bypassed.
|
|
"""
|
|
subprocess.run(
|
|
["docker", "run", "-d", "--name", container_name,
|
|
"-e", "HERMES_DASHBOARD=1",
|
|
"-e", "HERMES_DASHBOARD_HOST=0.0.0.0",
|
|
"-e", "HERMES_DASHBOARD_INSECURE=1",
|
|
built_image, "sleep", "120"],
|
|
check=True, capture_output=True, timeout=30,
|
|
)
|
|
status_code, body = _http_probe(container_name, "/api/status")
|
|
assert status_code == 200, (
|
|
f"/api/status should return 200 with the auth gate disabled; "
|
|
f"got {status_code} body={body!r}"
|
|
)
|
|
status = json.loads(body)
|
|
assert status.get("auth_required") is False, (
|
|
"HERMES_DASHBOARD_INSECURE=1 must disable the auth gate (explicit "
|
|
f"opt-in for trusted-LAN deployments). Got: {status!r}"
|
|
)
|