mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(dashboard-auth): share /api/* public allowlist between legacy and OAuth gates
Two parallel public-path allowlists drifted: _PUBLIC_API_PATHS in
hermes_cli/web_server.py (legacy _SESSION_TOKEN middleware) and
_GATE_PUBLIC_PREFIXES in hermes_cli/dashboard_auth/middleware.py
(OAuth gate). The legacy list included /api/status (documented as a
non-sensitive read-only liveness target); the OAuth gate's list did not.
Effect: every wildcard-subdomain agent surfaced as STARTING/down to the
portal even though the dashboard was serving correctly. Nous account
service (src/server/agents/fly-provider.ts
getInstanceRuntimeStatus) fetches ``/api/status`` without a cookie
as its sole liveness probe; the OAuth gate's 401 looked identical to
'agent dead' on the portal side.
Fix: lift the allowlist into hermes_cli/dashboard_auth/public_paths.py
and have both middlewares import it. _path_is_public now consults
the shared frozenset first, then falls back to the gate's
auth-bootstrap/static prefix list. Future additions to the public list
hit both gates automatically.
Endpoint inventory (verified safe to remain public):
* /api/status — version, gateway state, active session count,
auth-gate shape. Portal liveness probe target.
* /api/config/defaults — config-defaults feed for the SPA's Config page
* /api/config/schema — config schema for the SPA's Config page
* /api/model/info — model catalogue metadata (context windows)
* /api/dashboard/themes — theme manifests for the skin engine
* /api/dashboard/plugins — plugin manifests for the dashboard
No user data, no session content, no secrets. Same shape an external
monitoring agent would hit on /healthz.
Tests:
* New: test_gated_status_is_public (regression guard with the NAS
fly-provider.ts liveness-probe rationale spelled out in the docstring)
* New: test_other_public_api_paths_are_public_under_gate (parametrised
over the rest of PUBLIC_API_PATHS — proves 401 / 302-to-login is
never the response)
* New: docker integration check #3 in
test_dashboard_oauth_gate_engaged_by_default — /api/status
remains 200 under the gate AND reports auth_required=True so the
portal can distinguish modes
* Updated: test_full_login_round_trip_unlocks_gated_api now probes
/api/sessions instead of /api/status (status is public, so it
can no longer distinguish 'logged in' from 'gate accidentally
disabled')
* Updated: TestApi401Envelope (the no-cookie / invalid-cookie /
dead-cookie tests) probes /api/sessions for the same reason
* Updated: docker integration check #2 in
test_dashboard_oauth_gate_engaged_by_default probes
/api/sessions to prove the gate is intercepting
* Removed: dead _login() helper in
test_dashboard_auth_status_endpoint.py (no longer needed since
/api/status is reachable cold)
Companion to docs/handover/hermes-agent-dashboard-s6-insecure-fix.md
(the --insecure flag fix that shipped earlier).
This commit is contained in:
parent
3b6347af15
commit
a618789dba
7 changed files with 190 additions and 44 deletions
|
|
@ -26,10 +26,15 @@ from hermes_cli.dashboard_auth import list_providers
|
|||
from hermes_cli.dashboard_auth.audit import AuditEvent, audit_log
|
||||
from hermes_cli.dashboard_auth.base import ProviderError
|
||||
from hermes_cli.dashboard_auth.cookies import read_session_cookies
|
||||
from hermes_cli.dashboard_auth.public_paths import PUBLIC_API_PATHS
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
# Paths that bypass the auth gate. Order matters: prefix match.
|
||||
# Prefixes that bypass the auth gate. Match via ``path == prefix`` or
|
||||
# ``path.startswith(prefix)`` — so ``/assets/`` (with trailing slash)
|
||||
# matches ``/assets/foo.css`` but not ``/assetsleak``. Auth-bootstrap
|
||||
# (login page, OAuth round trip, provider listing) and static asset
|
||||
# mounts go here.
|
||||
_GATE_PUBLIC_PREFIXES: tuple[str, ...] = (
|
||||
"/auth/login",
|
||||
"/auth/callback",
|
||||
|
|
@ -45,6 +50,20 @@ _GATE_PUBLIC_PREFIXES: tuple[str, ...] = (
|
|||
|
||||
|
||||
def _path_is_public(path: str) -> bool:
|
||||
"""True if ``path`` bypasses the OAuth auth gate.
|
||||
|
||||
Two sources of public-ness:
|
||||
|
||||
* :data:`PUBLIC_API_PATHS` — the shared ``/api/*`` allowlist that
|
||||
the legacy ``_SESSION_TOKEN`` middleware also honours. Matched
|
||||
exactly (no prefix expansion) so adding ``/api/status`` doesn't
|
||||
accidentally expose ``/api/status/secret-extension``.
|
||||
* :data:`_GATE_PUBLIC_PREFIXES` — auth-bootstrap routes and static
|
||||
mounts. Prefix-matched so ``/assets/foo.css`` lights up via
|
||||
``/assets/``.
|
||||
"""
|
||||
if path in PUBLIC_API_PATHS:
|
||||
return True
|
||||
return any(
|
||||
path == prefix or path.startswith(prefix)
|
||||
for prefix in _GATE_PUBLIC_PREFIXES
|
||||
|
|
|
|||
49
hermes_cli/dashboard_auth/public_paths.py
Normal file
49
hermes_cli/dashboard_auth/public_paths.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
"""Shared allowlist of ``/api/*`` paths that bypass dashboard auth.
|
||||
|
||||
Two middlewares enforce dashboard auth and previously kept independent
|
||||
copies of this list:
|
||||
|
||||
* ``hermes_cli.web_server.auth_middleware`` — loopback / ``--insecure``
|
||||
mode, gates on the ephemeral ``_SESSION_TOKEN``.
|
||||
* ``hermes_cli.dashboard_auth.middleware.gated_auth_middleware`` —
|
||||
non-loopback mode, gates on the OAuth session cookie.
|
||||
|
||||
When the lists drifted, ``/api/status`` ended up public under the legacy
|
||||
gate but 401'd under the OAuth gate. That broke the portal's wildcard
|
||||
liveness probe (``nous-account-service`` ``fly-provider.ts``
|
||||
``getInstanceRuntimeStatus``), which fetches ``/api/status`` without a
|
||||
cookie as its sole signal of "agent dashboard is alive": every healthy
|
||||
wildcard-subdomain agent surfaced as STARTING/down in the portal UI even
|
||||
though the dashboard was serving correctly.
|
||||
|
||||
Centralising the allowlist here so both middlewares import the same
|
||||
frozenset prevents the next drift. Keep this list minimal — only truly
|
||||
non-sensitive, read-only endpoints belong here. As a sanity check, every
|
||||
entry should be safe to expose to:
|
||||
|
||||
* external uptime probes (Pingdom, Better Stack, NAS),
|
||||
* the dashboard SPA before the user has logged in,
|
||||
* anyone who happens to ``curl`` the hostname.
|
||||
|
||||
If a new endpoint doesn't pass all three tests, it should be gated and
|
||||
the SPA should bootstrap it after login instead.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
PUBLIC_API_PATHS: frozenset[str] = frozenset({
|
||||
# Liveness probe target. Returns version, gateway state, active
|
||||
# session count, and the dashboard auth-gate shape. No bodies, no
|
||||
# session content, no secrets. Documented as the portal's wildcard
|
||||
# liveness probe in
|
||||
# ``docs/agent-dashboard-public-url-contract.md`` (NAS side).
|
||||
"/api/status",
|
||||
# Read-only config-defaults / schema feeds for the SPA's Config page.
|
||||
"/api/config/defaults",
|
||||
"/api/config/schema",
|
||||
# Read-only model metadata (context windows, etc.) — same shape as
|
||||
# provider catalogs already exposed on the public internet.
|
||||
"/api/model/info",
|
||||
# Read-only theme + plugin manifests for the dashboard skin engine.
|
||||
"/api/dashboard/themes",
|
||||
"/api/dashboard/plugins",
|
||||
})
|
||||
|
|
@ -110,17 +110,20 @@ app.add_middleware(
|
|||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoints that do NOT require the session token. Everything else under
|
||||
# /api/ is gated by the auth middleware below. Keep this list minimal —
|
||||
# only truly non-sensitive, read-only endpoints belong here.
|
||||
# /api/ is gated by the auth middleware below.
|
||||
#
|
||||
# This list is defined in ``hermes_cli.dashboard_auth.public_paths`` so the
|
||||
# OAuth gate middleware can honour the same allowlist — keeping the two
|
||||
# gates in lockstep avoids drift like the wildcard-subdomain regression
|
||||
# where ``/api/status`` was public under the legacy gate but 401'd under
|
||||
# the OAuth gate (breaking the portal's liveness probe).
|
||||
#
|
||||
# Keep the upstream list minimal — only truly non-sensitive, read-only
|
||||
# endpoints belong there.
|
||||
# ---------------------------------------------------------------------------
|
||||
_PUBLIC_API_PATHS: frozenset = frozenset({
|
||||
"/api/status",
|
||||
"/api/config/defaults",
|
||||
"/api/config/schema",
|
||||
"/api/model/info",
|
||||
"/api/dashboard/themes",
|
||||
"/api/dashboard/plugins",
|
||||
})
|
||||
from hermes_cli.dashboard_auth.public_paths import (
|
||||
PUBLIC_API_PATHS as _PUBLIC_API_PATHS,
|
||||
)
|
||||
|
||||
|
||||
def _has_valid_session_token(request: Request) -> bool:
|
||||
|
|
|
|||
|
|
@ -324,10 +324,14 @@ def test_dashboard_oauth_gate_engages_on_non_loopback_bind(
|
|||
1. ``/api/auth/providers`` (publicly reachable through the gate so
|
||||
the login page can bootstrap) returns 200 with ``nous`` in the
|
||||
provider list — proves the bundled provider registered.
|
||||
2. ``/api/status`` (a public endpoint under the legacy
|
||||
``_SESSION_TOKEN`` middleware) returns 401 — proves the OAuth gate
|
||||
runs upstream of the legacy public list and is actively
|
||||
intercepting unauthenticated callers.
|
||||
2. ``/api/sessions`` (a gated route under both the legacy
|
||||
``_SESSION_TOKEN`` middleware and the OAuth gate) returns 401
|
||||
to an unauthenticated caller — proves the OAuth gate is actively
|
||||
intercepting browser traffic. We deliberately probe a gated route
|
||||
here rather than ``/api/status``: status sits in the shared
|
||||
``PUBLIC_API_PATHS`` allowlist (portal liveness probe target) and
|
||||
responds 200 without a cookie under both gates, so it cannot
|
||||
distinguish "gate on" from "gate off".
|
||||
"""
|
||||
subprocess.run(
|
||||
["docker", "run", "-d", "--name", container_name,
|
||||
|
|
@ -351,14 +355,32 @@ def test_dashboard_oauth_gate_engages_on_non_loopback_bind(
|
|||
f"HERMES_DASHBOARD_OAUTH_CLIENT_ID is set. Got: {payload!r}"
|
||||
)
|
||||
|
||||
# (2) /api/status is gated by the OAuth middleware → unauthenticated
|
||||
# callers get 401, not the legacy public 200 JSON.
|
||||
status_code, body = _http_probe(container_name, "/api/status")
|
||||
# (2) A gated route (``/api/sessions``) returns 401 to an
|
||||
# unauthenticated caller — the OAuth gate is intercepting.
|
||||
status_code, body = _http_probe(container_name, "/api/sessions")
|
||||
assert status_code == 401, (
|
||||
"OAuth gate must intercept /api/status on 0.0.0.0 bind when a "
|
||||
"provider is registered and HERMES_DASHBOARD_INSECURE is unset. "
|
||||
"OAuth gate must intercept gated /api/* routes on 0.0.0.0 bind "
|
||||
"when a provider is registered and HERMES_DASHBOARD_INSECURE "
|
||||
f"is unset. Got: status={status_code} body={body!r}"
|
||||
)
|
||||
|
||||
# (3) ``/api/status`` remains 200 under the gate — it's in the shared
|
||||
# ``PUBLIC_API_PATHS`` allowlist so NAS's wildcard-subdomain
|
||||
# liveness probe (``fly-provider.ts`` ``getInstanceRuntimeStatus``)
|
||||
# can reach it without a cookie. Regression guard: this allowlist
|
||||
# drifted once already and surfaced every healthy agent as
|
||||
# STARTING/down in the portal UI.
|
||||
status_code, body = _http_probe(container_name, "/api/status")
|
||||
assert status_code == 200, (
|
||||
"/api/status must remain publicly reachable under the OAuth gate "
|
||||
"— the portal uses it as the wildcard-subdomain liveness probe. "
|
||||
f"Got: status={status_code} body={body!r}"
|
||||
)
|
||||
status = json.loads(body)
|
||||
assert status.get("auth_required") is True, (
|
||||
"/api/status must report auth_required=True when the OAuth gate "
|
||||
f"is engaged so the SPA/portal can distinguish modes. Got: {status!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_dashboard_insecure_env_var_opts_out_of_gate(
|
||||
|
|
|
|||
|
|
@ -131,8 +131,13 @@ class TestRefreshTokenCookieDeprecation:
|
|||
|
||||
|
||||
class TestApi401Envelope:
|
||||
# NOTE: probe a gated route (``/api/sessions``) here rather than
|
||||
# ``/api/status`` — status is in the shared ``PUBLIC_API_PATHS``
|
||||
# allowlist (portal liveness probe) so it would 200 even without a
|
||||
# cookie and never exercise the 401-envelope code path.
|
||||
|
||||
def test_no_cookie_returns_unauthenticated_envelope(self, gated_app):
|
||||
r = gated_app.get("/api/status")
|
||||
r = gated_app.get("/api/sessions")
|
||||
assert r.status_code == 401
|
||||
body = r.json()
|
||||
assert body["error"] == "unauthenticated"
|
||||
|
|
@ -141,7 +146,7 @@ class TestApi401Envelope:
|
|||
|
||||
def test_invalid_cookie_returns_session_expired_envelope(self, gated_app):
|
||||
gated_app.cookies.set(SESSION_AT_COOKIE, "garbage")
|
||||
r = gated_app.get("/api/status")
|
||||
r = gated_app.get("/api/sessions")
|
||||
assert r.status_code == 401
|
||||
body = r.json()
|
||||
assert body["error"] == "session_expired"
|
||||
|
|
@ -151,7 +156,7 @@ class TestApi401Envelope:
|
|||
"""Dead-cookie cleanup — Phase 6 requirement so the browser
|
||||
doesn't keep replaying the stale token on every request."""
|
||||
gated_app.cookies.set(SESSION_AT_COOKIE, "garbage")
|
||||
r = gated_app.get("/api/status")
|
||||
r = gated_app.get("/api/sessions")
|
||||
set_cookies = r.headers.get_list("set-cookie")
|
||||
assert any(
|
||||
c.startswith(f"{SESSION_AT_COOKIE}=") and "Max-Age=0" in c
|
||||
|
|
|
|||
|
|
@ -56,10 +56,61 @@ def gated_app():
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_gated_status_now_requires_auth(gated_app):
|
||||
"""When gate is on, /api/status is NOT public — login bootstrap uses /api/auth/providers."""
|
||||
def test_gated_status_is_public(gated_app):
|
||||
"""``/api/status`` MUST be public under the OAuth gate.
|
||||
|
||||
Regression guard for the wildcard-subdomain rollout: NAS
|
||||
(``fly-provider.ts`` ``getInstanceRuntimeStatus``) hits
|
||||
``/api/status`` without a cookie as its sole liveness probe. A 401
|
||||
here surfaces every healthy agent as STARTING/down in the portal
|
||||
UI. The endpoint returns only version + gateway/auth-gate metadata
|
||||
(no user data, no session content), so it stays in the shared
|
||||
``PUBLIC_API_PATHS`` allowlist under both the legacy ``_SESSION_TOKEN``
|
||||
gate and the OAuth gate.
|
||||
|
||||
The body also reports the gate's shape (``auth_required``,
|
||||
``auth_providers``) so the SPA's StatusPage and external monitors
|
||||
can distinguish loopback / gated / no-providers without a separate
|
||||
round trip.
|
||||
"""
|
||||
r = gated_app.get("/api/status")
|
||||
assert r.status_code == 401
|
||||
assert r.status_code == 200, (
|
||||
f"Expected 200, got {r.status_code}: {r.text}"
|
||||
)
|
||||
body = r.json()
|
||||
assert body["auth_required"] is True
|
||||
assert "version" in body
|
||||
assert "gateway_state" in body
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path", [
|
||||
"/api/config/defaults",
|
||||
"/api/config/schema",
|
||||
"/api/model/info",
|
||||
"/api/dashboard/themes",
|
||||
"/api/dashboard/plugins",
|
||||
])
|
||||
def test_other_public_api_paths_are_public_under_gate(gated_app, path):
|
||||
"""The remaining ``PUBLIC_API_PATHS`` entries must also bypass the
|
||||
gate. They're documented as non-sensitive read-only endpoints that
|
||||
the SPA pre-loads before login (themes, config schema, model
|
||||
metadata). A 401 / 302-to-login here would block the dashboard
|
||||
shell from rendering pre-auth.
|
||||
|
||||
Accept any non-auth-failure status: 200 when the route succeeds,
|
||||
or any route-specific error (e.g. 400 / 404 / 500 from a missing
|
||||
dependency) — but NEVER 401, and NEVER a 302 to ``/login``.
|
||||
"""
|
||||
r = gated_app.get(path, follow_redirects=False)
|
||||
assert r.status_code != 401, (
|
||||
f"{path} returned 401 under the OAuth gate — should be public"
|
||||
)
|
||||
if r.status_code == 302:
|
||||
location = r.headers.get("location", "")
|
||||
assert "/login" not in location, (
|
||||
f"{path} redirected to {location} — should be public, "
|
||||
"not bounced to /login"
|
||||
)
|
||||
|
||||
|
||||
def test_gated_html_redirects_to_login(gated_app):
|
||||
|
|
@ -98,7 +149,7 @@ def test_gated_static_asset_path_is_public(gated_app):
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_full_login_round_trip_unlocks_api_status(gated_app):
|
||||
def test_full_login_round_trip_unlocks_gated_api(gated_app):
|
||||
# 1) Click "Sign in with Stub IdP" — /auth/login redirects to the stub
|
||||
# with a PKCE cookie on the response.
|
||||
r1 = gated_app.get("/auth/login?provider=stub", follow_redirects=False)
|
||||
|
|
@ -128,11 +179,16 @@ def test_full_login_round_trip_unlocks_api_status(gated_app):
|
|||
assert any("hermes_session_at" in c for c in set_cookies)
|
||||
assert any("hermes_session_rt" in c for c in set_cookies)
|
||||
|
||||
# 3) /api/status now succeeds because we're authenticated.
|
||||
r3 = gated_app.get("/api/status")
|
||||
assert r3.status_code == 200
|
||||
body = r3.json()
|
||||
assert "version" in body
|
||||
# 3) A gated API route (``/api/sessions``) now succeeds because we
|
||||
# have a valid session cookie. (We deliberately don't probe
|
||||
# ``/api/status`` here — it's in the shared PUBLIC_API_PATHS
|
||||
# allowlist and would 200 even without a login, so it can't
|
||||
# distinguish "logged in" from "gate accidentally disabled".)
|
||||
r3 = gated_app.get("/api/sessions")
|
||||
assert r3.status_code == 200, (
|
||||
f"Expected 200 for /api/sessions post-login, got {r3.status_code}: "
|
||||
f"{r3.text}"
|
||||
)
|
||||
|
||||
|
||||
def test_login_unknown_provider_returns_404(gated_app):
|
||||
|
|
|
|||
|
|
@ -59,19 +59,11 @@ def loopback_client():
|
|||
web_server.app.state.auth_required = prev_required
|
||||
|
||||
|
||||
def _login(client: TestClient) -> None:
|
||||
"""Drive the stub OAuth round trip so the gated client is authed."""
|
||||
r1 = client.get("/auth/login?provider=stub", follow_redirects=False)
|
||||
assert r1.status_code == 302
|
||||
state = r1.headers["location"].split("state=")[1]
|
||||
r2 = client.get(
|
||||
f"/auth/callback?code=stub_code&state={state}", follow_redirects=False
|
||||
)
|
||||
assert r2.status_code == 302
|
||||
|
||||
|
||||
def test_status_reports_auth_required_in_gated_mode(gated_client):
|
||||
_login(gated_client)
|
||||
# No ``_login()`` call — ``/api/status`` is in the shared
|
||||
# ``PUBLIC_API_PATHS`` allowlist precisely so external probes (and
|
||||
# the SPA's pre-login bootstrap) can read the gate's shape without
|
||||
# a cookie. Hit it cold.
|
||||
r = gated_client.get("/api/status")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue