diff --git a/hermes_cli/dashboard_auth/middleware.py b/hermes_cli/dashboard_auth/middleware.py index 5b42c90ebf7..3400a0cd979 100644 --- a/hermes_cli/dashboard_auth/middleware.py +++ b/hermes_cli/dashboard_auth/middleware.py @@ -26,10 +26,15 @@ from hermes_cli.dashboard_auth import list_providers from hermes_cli.dashboard_auth.audit import AuditEvent, audit_log from hermes_cli.dashboard_auth.base import ProviderError from hermes_cli.dashboard_auth.cookies import read_session_cookies +from hermes_cli.dashboard_auth.public_paths import PUBLIC_API_PATHS _log = logging.getLogger(__name__) -# Paths that bypass the auth gate. Order matters: prefix match. +# Prefixes that bypass the auth gate. Match via ``path == prefix`` or +# ``path.startswith(prefix)`` — so ``/assets/`` (with trailing slash) +# matches ``/assets/foo.css`` but not ``/assetsleak``. Auth-bootstrap +# (login page, OAuth round trip, provider listing) and static asset +# mounts go here. _GATE_PUBLIC_PREFIXES: tuple[str, ...] = ( "/auth/login", "/auth/callback", @@ -45,6 +50,20 @@ _GATE_PUBLIC_PREFIXES: tuple[str, ...] = ( def _path_is_public(path: str) -> bool: + """True if ``path`` bypasses the OAuth auth gate. + + Two sources of public-ness: + + * :data:`PUBLIC_API_PATHS` — the shared ``/api/*`` allowlist that + the legacy ``_SESSION_TOKEN`` middleware also honours. Matched + exactly (no prefix expansion) so adding ``/api/status`` doesn't + accidentally expose ``/api/status/secret-extension``. + * :data:`_GATE_PUBLIC_PREFIXES` — auth-bootstrap routes and static + mounts. Prefix-matched so ``/assets/foo.css`` lights up via + ``/assets/``. + """ + if path in PUBLIC_API_PATHS: + return True return any( path == prefix or path.startswith(prefix) for prefix in _GATE_PUBLIC_PREFIXES diff --git a/hermes_cli/dashboard_auth/public_paths.py b/hermes_cli/dashboard_auth/public_paths.py new file mode 100644 index 00000000000..2699e15c979 --- /dev/null +++ b/hermes_cli/dashboard_auth/public_paths.py @@ -0,0 +1,49 @@ +"""Shared allowlist of ``/api/*`` paths that bypass dashboard auth. + +Two middlewares enforce dashboard auth and previously kept independent +copies of this list: + +* ``hermes_cli.web_server.auth_middleware`` — loopback / ``--insecure`` + mode, gates on the ephemeral ``_SESSION_TOKEN``. +* ``hermes_cli.dashboard_auth.middleware.gated_auth_middleware`` — + non-loopback mode, gates on the OAuth session cookie. + +When the lists drifted, ``/api/status`` ended up public under the legacy +gate but 401'd under the OAuth gate. That broke the portal's wildcard +liveness probe (``nous-account-service`` ``fly-provider.ts`` +``getInstanceRuntimeStatus``), which fetches ``/api/status`` without a +cookie as its sole signal of "agent dashboard is alive": every healthy +wildcard-subdomain agent surfaced as STARTING/down in the portal UI even +though the dashboard was serving correctly. + +Centralising the allowlist here so both middlewares import the same +frozenset prevents the next drift. Keep this list minimal — only truly +non-sensitive, read-only endpoints belong here. As a sanity check, every +entry should be safe to expose to: + + * external uptime probes (Pingdom, Better Stack, NAS), + * the dashboard SPA before the user has logged in, + * anyone who happens to ``curl`` the hostname. + +If a new endpoint doesn't pass all three tests, it should be gated and +the SPA should bootstrap it after login instead. +""" +from __future__ import annotations + +PUBLIC_API_PATHS: frozenset[str] = frozenset({ + # Liveness probe target. Returns version, gateway state, active + # session count, and the dashboard auth-gate shape. No bodies, no + # session content, no secrets. Documented as the portal's wildcard + # liveness probe in + # ``docs/agent-dashboard-public-url-contract.md`` (NAS side). + "/api/status", + # Read-only config-defaults / schema feeds for the SPA's Config page. + "/api/config/defaults", + "/api/config/schema", + # Read-only model metadata (context windows, etc.) — same shape as + # provider catalogs already exposed on the public internet. + "/api/model/info", + # Read-only theme + plugin manifests for the dashboard skin engine. + "/api/dashboard/themes", + "/api/dashboard/plugins", +}) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 872546196c5..d16e2fbcede 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -110,17 +110,20 @@ app.add_middleware( # --------------------------------------------------------------------------- # Endpoints that do NOT require the session token. Everything else under -# /api/ is gated by the auth middleware below. Keep this list minimal — -# only truly non-sensitive, read-only endpoints belong here. +# /api/ is gated by the auth middleware below. +# +# This list is defined in ``hermes_cli.dashboard_auth.public_paths`` so the +# OAuth gate middleware can honour the same allowlist — keeping the two +# gates in lockstep avoids drift like the wildcard-subdomain regression +# where ``/api/status`` was public under the legacy gate but 401'd under +# the OAuth gate (breaking the portal's liveness probe). +# +# Keep the upstream list minimal — only truly non-sensitive, read-only +# endpoints belong there. # --------------------------------------------------------------------------- -_PUBLIC_API_PATHS: frozenset = frozenset({ - "/api/status", - "/api/config/defaults", - "/api/config/schema", - "/api/model/info", - "/api/dashboard/themes", - "/api/dashboard/plugins", -}) +from hermes_cli.dashboard_auth.public_paths import ( + PUBLIC_API_PATHS as _PUBLIC_API_PATHS, +) def _has_valid_session_token(request: Request) -> bool: diff --git a/tests/docker/test_dashboard.py b/tests/docker/test_dashboard.py index a2d8e65fb40..d615ef94dd0 100644 --- a/tests/docker/test_dashboard.py +++ b/tests/docker/test_dashboard.py @@ -324,10 +324,14 @@ def test_dashboard_oauth_gate_engages_on_non_loopback_bind( 1. ``/api/auth/providers`` (publicly reachable through the gate so the login page can bootstrap) returns 200 with ``nous`` in the provider list — proves the bundled provider registered. - 2. ``/api/status`` (a public endpoint under the legacy - ``_SESSION_TOKEN`` middleware) returns 401 — proves the OAuth gate - runs upstream of the legacy public list and is actively - intercepting unauthenticated callers. + 2. ``/api/sessions`` (a gated route under both the legacy + ``_SESSION_TOKEN`` middleware and the OAuth gate) returns 401 + to an unauthenticated caller — proves the OAuth gate is actively + intercepting browser traffic. We deliberately probe a gated route + here rather than ``/api/status``: status sits in the shared + ``PUBLIC_API_PATHS`` allowlist (portal liveness probe target) and + responds 200 without a cookie under both gates, so it cannot + distinguish "gate on" from "gate off". """ subprocess.run( ["docker", "run", "-d", "--name", container_name, @@ -351,14 +355,32 @@ def test_dashboard_oauth_gate_engages_on_non_loopback_bind( f"HERMES_DASHBOARD_OAUTH_CLIENT_ID is set. Got: {payload!r}" ) - # (2) /api/status is gated by the OAuth middleware → unauthenticated - # callers get 401, not the legacy public 200 JSON. - status_code, body = _http_probe(container_name, "/api/status") + # (2) A gated route (``/api/sessions``) returns 401 to an + # unauthenticated caller — the OAuth gate is intercepting. + status_code, body = _http_probe(container_name, "/api/sessions") assert status_code == 401, ( - "OAuth gate must intercept /api/status on 0.0.0.0 bind when a " - "provider is registered and HERMES_DASHBOARD_INSECURE is unset. " + "OAuth gate must intercept gated /api/* routes on 0.0.0.0 bind " + "when a provider is registered and HERMES_DASHBOARD_INSECURE " + f"is unset. Got: status={status_code} body={body!r}" + ) + + # (3) ``/api/status`` remains 200 under the gate — it's in the shared + # ``PUBLIC_API_PATHS`` allowlist so NAS's wildcard-subdomain + # liveness probe (``fly-provider.ts`` ``getInstanceRuntimeStatus``) + # can reach it without a cookie. Regression guard: this allowlist + # drifted once already and surfaced every healthy agent as + # STARTING/down in the portal UI. + status_code, body = _http_probe(container_name, "/api/status") + assert status_code == 200, ( + "/api/status must remain publicly reachable under the OAuth gate " + "— the portal uses it as the wildcard-subdomain liveness probe. " f"Got: status={status_code} body={body!r}" ) + status = json.loads(body) + assert status.get("auth_required") is True, ( + "/api/status must report auth_required=True when the OAuth gate " + f"is engaged so the SPA/portal can distinguish modes. Got: {status!r}" + ) def test_dashboard_insecure_env_var_opts_out_of_gate( diff --git a/tests/hermes_cli/test_dashboard_auth_401_reauth.py b/tests/hermes_cli/test_dashboard_auth_401_reauth.py index c866fad8252..28d410dc3ac 100644 --- a/tests/hermes_cli/test_dashboard_auth_401_reauth.py +++ b/tests/hermes_cli/test_dashboard_auth_401_reauth.py @@ -131,8 +131,13 @@ class TestRefreshTokenCookieDeprecation: class TestApi401Envelope: + # NOTE: probe a gated route (``/api/sessions``) here rather than + # ``/api/status`` — status is in the shared ``PUBLIC_API_PATHS`` + # allowlist (portal liveness probe) so it would 200 even without a + # cookie and never exercise the 401-envelope code path. + def test_no_cookie_returns_unauthenticated_envelope(self, gated_app): - r = gated_app.get("/api/status") + r = gated_app.get("/api/sessions") assert r.status_code == 401 body = r.json() assert body["error"] == "unauthenticated" @@ -141,7 +146,7 @@ class TestApi401Envelope: def test_invalid_cookie_returns_session_expired_envelope(self, gated_app): gated_app.cookies.set(SESSION_AT_COOKIE, "garbage") - r = gated_app.get("/api/status") + r = gated_app.get("/api/sessions") assert r.status_code == 401 body = r.json() assert body["error"] == "session_expired" @@ -151,7 +156,7 @@ class TestApi401Envelope: """Dead-cookie cleanup — Phase 6 requirement so the browser doesn't keep replaying the stale token on every request.""" gated_app.cookies.set(SESSION_AT_COOKIE, "garbage") - r = gated_app.get("/api/status") + r = gated_app.get("/api/sessions") set_cookies = r.headers.get_list("set-cookie") assert any( c.startswith(f"{SESSION_AT_COOKIE}=") and "Max-Age=0" in c diff --git a/tests/hermes_cli/test_dashboard_auth_middleware.py b/tests/hermes_cli/test_dashboard_auth_middleware.py index 011767604f4..cbbcc6d287f 100644 --- a/tests/hermes_cli/test_dashboard_auth_middleware.py +++ b/tests/hermes_cli/test_dashboard_auth_middleware.py @@ -56,10 +56,61 @@ def gated_app(): # --------------------------------------------------------------------------- -def test_gated_status_now_requires_auth(gated_app): - """When gate is on, /api/status is NOT public — login bootstrap uses /api/auth/providers.""" +def test_gated_status_is_public(gated_app): + """``/api/status`` MUST be public under the OAuth gate. + + Regression guard for the wildcard-subdomain rollout: NAS + (``fly-provider.ts`` ``getInstanceRuntimeStatus``) hits + ``/api/status`` without a cookie as its sole liveness probe. A 401 + here surfaces every healthy agent as STARTING/down in the portal + UI. The endpoint returns only version + gateway/auth-gate metadata + (no user data, no session content), so it stays in the shared + ``PUBLIC_API_PATHS`` allowlist under both the legacy ``_SESSION_TOKEN`` + gate and the OAuth gate. + + The body also reports the gate's shape (``auth_required``, + ``auth_providers``) so the SPA's StatusPage and external monitors + can distinguish loopback / gated / no-providers without a separate + round trip. + """ r = gated_app.get("/api/status") - assert r.status_code == 401 + assert r.status_code == 200, ( + f"Expected 200, got {r.status_code}: {r.text}" + ) + body = r.json() + assert body["auth_required"] is True + assert "version" in body + assert "gateway_state" in body + + +@pytest.mark.parametrize("path", [ + "/api/config/defaults", + "/api/config/schema", + "/api/model/info", + "/api/dashboard/themes", + "/api/dashboard/plugins", +]) +def test_other_public_api_paths_are_public_under_gate(gated_app, path): + """The remaining ``PUBLIC_API_PATHS`` entries must also bypass the + gate. They're documented as non-sensitive read-only endpoints that + the SPA pre-loads before login (themes, config schema, model + metadata). A 401 / 302-to-login here would block the dashboard + shell from rendering pre-auth. + + Accept any non-auth-failure status: 200 when the route succeeds, + or any route-specific error (e.g. 400 / 404 / 500 from a missing + dependency) — but NEVER 401, and NEVER a 302 to ``/login``. + """ + r = gated_app.get(path, follow_redirects=False) + assert r.status_code != 401, ( + f"{path} returned 401 under the OAuth gate — should be public" + ) + if r.status_code == 302: + location = r.headers.get("location", "") + assert "/login" not in location, ( + f"{path} redirected to {location} — should be public, " + "not bounced to /login" + ) def test_gated_html_redirects_to_login(gated_app): @@ -98,7 +149,7 @@ def test_gated_static_asset_path_is_public(gated_app): # --------------------------------------------------------------------------- -def test_full_login_round_trip_unlocks_api_status(gated_app): +def test_full_login_round_trip_unlocks_gated_api(gated_app): # 1) Click "Sign in with Stub IdP" — /auth/login redirects to the stub # with a PKCE cookie on the response. r1 = gated_app.get("/auth/login?provider=stub", follow_redirects=False) @@ -128,11 +179,16 @@ def test_full_login_round_trip_unlocks_api_status(gated_app): assert any("hermes_session_at" in c for c in set_cookies) assert any("hermes_session_rt" in c for c in set_cookies) - # 3) /api/status now succeeds because we're authenticated. - r3 = gated_app.get("/api/status") - assert r3.status_code == 200 - body = r3.json() - assert "version" in body + # 3) A gated API route (``/api/sessions``) now succeeds because we + # have a valid session cookie. (We deliberately don't probe + # ``/api/status`` here — it's in the shared PUBLIC_API_PATHS + # allowlist and would 200 even without a login, so it can't + # distinguish "logged in" from "gate accidentally disabled".) + r3 = gated_app.get("/api/sessions") + assert r3.status_code == 200, ( + f"Expected 200 for /api/sessions post-login, got {r3.status_code}: " + f"{r3.text}" + ) def test_login_unknown_provider_returns_404(gated_app): diff --git a/tests/hermes_cli/test_dashboard_auth_status_endpoint.py b/tests/hermes_cli/test_dashboard_auth_status_endpoint.py index 3b10917a1d4..9e1de3e76eb 100644 --- a/tests/hermes_cli/test_dashboard_auth_status_endpoint.py +++ b/tests/hermes_cli/test_dashboard_auth_status_endpoint.py @@ -59,19 +59,11 @@ def loopback_client(): web_server.app.state.auth_required = prev_required -def _login(client: TestClient) -> None: - """Drive the stub OAuth round trip so the gated client is authed.""" - r1 = client.get("/auth/login?provider=stub", follow_redirects=False) - assert r1.status_code == 302 - state = r1.headers["location"].split("state=")[1] - r2 = client.get( - f"/auth/callback?code=stub_code&state={state}", follow_redirects=False - ) - assert r2.status_code == 302 - - def test_status_reports_auth_required_in_gated_mode(gated_client): - _login(gated_client) + # No ``_login()`` call — ``/api/status`` is in the shared + # ``PUBLIC_API_PATHS`` allowlist precisely so external probes (and + # the SPA's pre-login bootstrap) can read the gate's shape without + # a cookie. Hit it cold. r = gated_client.get("/api/status") assert r.status_code == 200 body = r.json()