feat(dashboard): auto-initiate portal SSO redirect on unauthenticated load

When the dashboard gateway has no local session cookie, it rendered a
click-through /login interstitial — even though the Nous portal's
/oauth/authorize auto-approves any current member of the dashboard's org
and is a silent 302 when the user already holds a portal session. For the
common case (clicking a hosted-agent dashboard link while signed in to the
portal) that interstitial click is pure friction.

This makes the gate auto-initiate the OAuth redirect on an unauthenticated
HTML document load instead of rendering the interstitial, when exactly one
interactive provider is registered. A one-shot loop-guard cookie
(hermes_sso_attempt, 60s TTL) ensures that a genuinely absent portal
session (the portal bounces back still-unauthenticated) falls back to the
/login page after exactly one bounce rather than ping-ponging forever. The
marker is cleared on a successful callback and whenever the gate falls back
to /login.

Security: this removes a human CLICK, not a security check. The redirect
lands on the existing /auth/login route and runs the unchanged PKCE
auth-code flow; token verification, audience checks, redirect-URI match,
and org-membership checks are all untouched. /api/* fetches still get the
401 JSON envelope (never a 302 a fetch() would follow opaquely), and with
two or more providers the /login chooser still renders.

Phase 1 of the cloud-auto-discovery work.
This commit is contained in:
Ben 2026-06-29 17:33:16 +10:00 committed by Teknium
parent dc5ef20d89
commit f5ecbe1ec6
6 changed files with 276 additions and 14 deletions

View file

@ -67,6 +67,15 @@ from fastapi.responses import Response
SESSION_AT_COOKIE = "hermes_session_at"
SESSION_RT_COOKIE = "hermes_session_rt"
PKCE_COOKIE = "hermes_session_pkce"
# One-shot loop-guard marker for the auto-SSO redirect (Phase 1,
# cloud-auto-discovery). Set when the gate auto-initiates the portal OAuth
# redirect on an unauthenticated document load; its mere PRESENCE on the next
# unauthenticated load tells the gate "we already bounced once" so a genuinely
# absent portal session degrades to the /login page instead of ping-ponging.
# Carries no secret — it's a boolean breadcrumb — but is set HttpOnly/Lax/Secure
# like the others for consistency. Short TTL so a user who returns later gets a
# fresh silent attempt rather than a permanently-disabled one.
SSO_ATTEMPT_COOKIE = "hermes_sso_attempt"
# Possible name variants we may have to read back. Sorted so most-strict
# wins on iteration when both happen to be present (shouldn't happen in
@ -82,6 +91,13 @@ _NAME_VARIANTS = ("__Host-", "__Secure-", "")
# stale-cookie refresh churn ever matters.)
_RT_MAX_AGE = 30 * 24 * 60 * 60
_PKCE_MAX_AGE = 10 * 60
# Auto-SSO loop-guard marker TTL. Just long enough to cover one redirect
# round trip to the portal and back (a few seconds in practice); kept at 60s
# so a slow portal hop or a manual back-button still trips the guard, while a
# user returning minutes later gets a fresh silent attempt rather than being
# stuck on /login forever. The marker is also cleared explicitly on a
# successful callback and whenever the gate falls back to /login.
_SSO_ATTEMPT_MAX_AGE = 60
def _resolved_name(bare: str, *, use_https: bool, prefix: str) -> str:
@ -236,6 +252,43 @@ def read_pkce_cookie(request: Request) -> Optional[str]:
return _read_with_fallback(request, PKCE_COOKIE)
def set_sso_attempt_cookie(
response: Response, *, use_https: bool, prefix: str = "",
) -> None:
"""Set the one-shot auto-SSO loop-guard marker (Phase 1).
Written by the gate the moment it auto-initiates the portal OAuth
redirect on an unauthenticated document load. The value is a constant
(``"1"``) only its presence matters. Short Max-Age so a stale marker
can't permanently suppress a future silent attempt.
"""
response.set_cookie(
_resolved_name(SSO_ATTEMPT_COOKIE, use_https=use_https, prefix=prefix),
"1",
max_age=_SSO_ATTEMPT_MAX_AGE,
**_common_attrs(use_https=use_https, prefix=prefix),
)
def read_sso_attempt_cookie(request: Request) -> Optional[str]:
"""Return the auto-SSO marker value if present (any variant), else None."""
return _read_with_fallback(request, SSO_ATTEMPT_COOKIE)
def clear_sso_attempt_cookie(response: Response, *, prefix: str = "") -> None:
"""Emit Max-Age=0 deletions for the auto-SSO marker, every name variant.
Called on a successful callback and whenever the gate falls back to
/login, so the marker never lingers to suppress a later silent attempt.
"""
path = _cookie_path(prefix)
for variant in _NAME_VARIANTS:
response.set_cookie(
f"{variant}{SSO_ATTEMPT_COOKIE}", "", max_age=0,
path=path, httponly=True, samesite="lax",
)
def detect_https(request: Request) -> bool:
"""Decide whether to set the ``Secure`` cookie flag.

View file

@ -25,7 +25,12 @@ from fastapi.responses import JSONResponse, RedirectResponse, Response
from hermes_cli.dashboard_auth import list_session_providers
from hermes_cli.dashboard_auth.audit import AuditEvent, audit_log
from hermes_cli.dashboard_auth.base import ProviderError, RefreshExpiredError
from hermes_cli.dashboard_auth.cookies import read_session_cookies
from hermes_cli.dashboard_auth.cookies import (
clear_sso_attempt_cookie,
read_session_cookies,
read_sso_attempt_cookie,
set_sso_attempt_cookie,
)
from hermes_cli.dashboard_auth.public_paths import PUBLIC_API_PATHS
_log = logging.getLogger(__name__)
@ -132,6 +137,89 @@ def _unauth_response(request: Request, *, reason: str) -> Response:
return RedirectResponse(url=login_url, status_code=302)
def _interactive_providers():
"""Registered providers that can drive an interactive browser login.
A token-only credential (e.g. a drain/service provider) advertises
``supports_session = False`` and is not an auto-SSO candidate.
"""
return [
p for p in list_session_providers()
if getattr(p, "supports_session", True)
]
def _auto_sso_response(request: Request) -> Response | None:
"""Maybe auto-initiate the portal OAuth redirect on an unauth HTML load.
Returns a 302 ``/auth/login`` (the existing OAuth-initiation route)
when ALL of the following hold, else ``None`` (caller falls back to the
ordinary ``/login`` interstitial):
* the request is an HTML document navigation, not an ``/api/*`` fetch
(a fetch() would follow the 302 into the cross-origin OAuth dance
opaquely same reason ``_unauth_response`` never redirects APIs);
* exactly ONE interactive provider is registered with two or more we
can't pick for the user, so the ``/login`` chooser must render; with
zero there's nothing to redirect to;
* the one-shot loop-guard marker is ABSENT. Its presence means we
already bounced to the portal once and came back still
unauthenticated (no portal session) auto-redirecting again would
ping-pong, so we fall through to ``/login`` and clear the marker.
The portal ``/oauth/authorize`` auto-approves any current member of the
dashboard's org and is a silent 302 when the user already holds a portal
session, so for the common case (clicked a dashboard link while signed
in to the portal) this removes the interstitial CLICK entirely. It
removes a click, not a security check: the redirect lands on
``/auth/login`` which runs the unchanged PKCE auth-code flow.
"""
path = request.url.path
# APIs never auto-redirect (see _unauth_response). Only document loads.
if path.startswith("/api/"):
return None
# Already bounced once and still no session → portal has no session for
# this user. Stop here, clear the marker, let /login render.
if read_sso_attempt_cookie(request):
from hermes_cli.dashboard_auth.prefix import prefix_from_request
resp = _unauth_response(request, reason="no_cookie")
clear_sso_attempt_cookie(resp, prefix=prefix_from_request(request))
return resp
providers = _interactive_providers()
if len(providers) != 1:
# Zero → nothing to redirect to. Two+ → user must choose at /login.
return None
from hermes_cli.dashboard_auth.prefix import prefix_from_request
provider = providers[0]
prefix = prefix_from_request(request)
next_param = _safe_next_target(request)
from urllib.parse import quote
auth_login = f"{prefix}/auth/login?provider={quote(provider.name, safe='')}"
if next_param:
auth_login = f"{auth_login}&next={next_param}"
resp = RedirectResponse(url=auth_login, status_code=302)
# Drop the one-shot marker so a return trip that's STILL unauthenticated
# (portal had no session) trips the guard above next time instead of
# looping. Detect HTTPS for the Secure flag the same way the auth routes
# do; bind Path via the active prefix.
from hermes_cli.dashboard_auth.cookies import detect_https
set_sso_attempt_cookie(
resp, use_https=detect_https(request), prefix=prefix,
)
audit_log(
AuditEvent.LOGIN_START,
provider=provider.name,
reason="auto_sso",
ip=_client_ip(request),
)
return resp
def _safe_next_target(request: Request) -> str:
"""Build the URL-encoded ``next`` query value, or empty string.
@ -195,7 +283,15 @@ async def gated_auth_middleware(
at, _rt = read_session_cookies(request)
if not at and not _rt:
# Neither token present — no session at all. Nothing to verify or
# refresh; force login.
# refresh. Before falling back to the /login interstitial, try to
# silently bounce the user through the portal OAuth flow: the portal
# auto-approves org members and 302s straight back when they already
# hold a portal session, so the interstitial click is pure friction
# for the common case. The one-shot loop-guard inside _auto_sso_response
# prevents a ping-pong when the portal genuinely has no session.
auto = _auto_sso_response(request)
if auto is not None:
return auto
return _unauth_response(request, reason="no_cookie")
# Try every registered provider's verify_session in turn. Providers

View file

@ -39,6 +39,7 @@ from hermes_cli.dashboard_auth.base import (
from hermes_cli.dashboard_auth.cookies import (
clear_pkce_cookie,
clear_session_cookies,
clear_sso_attempt_cookie,
detect_https,
read_pkce_cookie,
read_session_cookies,
@ -358,6 +359,9 @@ async def auth_callback(
prefix=_prefix(request),
)
clear_pkce_cookie(resp, prefix=_prefix(request))
# Clear the one-shot auto-SSO loop-guard marker now that login succeeded,
# so it never lingers to suppress a future silent attempt after logout.
clear_sso_attempt_cookie(resp, prefix=_prefix(request))
return resp

View file

@ -281,14 +281,24 @@ class TestTransparentRefreshOnAccessTokenEviction:
class TestHtmlRedirectNext:
def test_deep_html_path_redirects_with_next(self, gated_app):
def test_deep_html_path_auto_sso_with_next(self, gated_app):
# Single interactive provider registered (the stub) → an unauth HTML
# load auto-initiates the OAuth redirect (Phase 1 cloud-auto-discovery)
# rather than rendering the /login interstitial. The original path is
# preserved as next= so the post-login landing returns there.
r = gated_app.get("/sessions", follow_redirects=False)
assert r.status_code == 302
assert r.headers["location"] == "/login?next=%2Fsessions"
assert r.headers["location"] == (
"/auth/login?provider=stub&next=%2Fsessions"
)
def test_root_path_redirects_with_next(self, gated_app):
def test_root_path_auto_sso(self, gated_app):
r = gated_app.get("/", follow_redirects=False)
assert r.headers["location"] in ("/login", "/login?next=%2F")
# Root has no useful next= (login lands at "/" anyway).
assert r.headers["location"] in (
"/auth/login?provider=stub",
"/auth/login?provider=stub&next=%2F",
)
def test_login_loop_avoided(self, gated_app):
"""A request to /login itself must not produce ``?next=/login``
@ -309,6 +319,94 @@ class TestHtmlRedirectNext:
assert "next=" not in body["login_url"]
# ---------------------------------------------------------------------------
# Gate middleware: auto-SSO redirect + one-shot loop guard (Phase 1)
# ---------------------------------------------------------------------------
class TestAutoSsoRedirect:
"""The dashboard auto-initiates the portal OAuth redirect on an
unauthenticated HTML document load (single interactive provider), and a
one-shot cookie guard prevents an infinite redirect loop when the portal
has no session for the user.
"""
from hermes_cli.dashboard_auth.cookies import SSO_ATTEMPT_COOKIE
def test_unauth_html_load_auto_redirects_to_oauth(self, gated_app):
"""Common case: clicked a dashboard link, no local session cookie.
We bounce straight to /auth/login (the OAuth-initiation route) rather
than the /login interstitial, and arm the one-shot guard cookie."""
r = gated_app.get("/sessions", follow_redirects=False)
assert r.status_code == 302
assert r.headers["location"].startswith("/auth/login?provider=stub")
# The one-shot loop-guard marker is set on the redirect.
set_cookie = r.headers.get_list("set-cookie")
assert any(self.SSO_ATTEMPT_COOKIE in c for c in set_cookie)
def test_second_unauth_load_with_guard_falls_back_to_login(self, gated_app):
"""Loop-guard: the user came back from the portal STILL
unauthenticated (no portal session). The guard cookie is now present,
so instead of auto-redirecting again (which would ping-pong forever)
we fall back to the /login interstitial and clear the marker."""
# Simulate the return trip: guard cookie present, still no session.
gated_app.cookies.set(self.SSO_ATTEMPT_COOKIE, "1")
r = gated_app.get("/sessions", follow_redirects=False)
assert r.status_code == 302
# Falls back to the interstitial, NOT another /auth/login bounce.
assert r.headers["location"].startswith("/login")
assert "/auth/login" not in r.headers["location"]
# And the one-shot marker is cleared so a later visit gets a fresh
# silent attempt rather than being stuck on /login forever.
set_cookie = r.headers.get_list("set-cookie")
assert any(
self.SSO_ATTEMPT_COOKIE in c and "Max-Age=0" in c
for c in set_cookie
)
def test_no_infinite_loop_following_redirects(self, gated_app):
"""End-to-end loop safety: following redirects from an unauth load,
with the stub IdP unable to mint a session (it bounces back to the
callback but we never land a cookie in this no-portal-session
simulation), must terminate not loop forever. We assert the guard
makes the SECOND unauth gate decision fall back to /login.
Concretely: first load arms the guard + 302s to /auth/login; a
subsequent unauth load (guard present) lands on /login. Two distinct
outcomes, no third bounce."""
first = gated_app.get("/dashboard", follow_redirects=False)
assert first.headers["location"].startswith("/auth/login?provider=stub")
# Carry the guard cookie the first response set into the next request
# (TestClient persists set-cookie automatically). A second unauth load:
second = gated_app.get("/dashboard", follow_redirects=False)
assert second.headers["location"].startswith("/login")
assert "/auth/login" not in second.headers["location"]
def test_api_path_never_auto_redirects(self, gated_app):
"""Auto-SSO is for HTML document loads only. An /api/* fetch with no
cookie still gets the 401 JSON envelope (a fetch() would otherwise
follow the 302 into the cross-origin OAuth dance opaquely)."""
r = gated_app.get("/api/sessions", follow_redirects=False)
assert r.status_code == 401
assert r.json()["error"] == "unauthenticated"
def test_multiple_providers_render_chooser_not_auto_sso(self, gated_app):
"""With two interactive providers we can't pick for the user, so the
/login chooser must render rather than auto-redirecting to one."""
from tests.hermes_cli.conftest_dashboard_auth import StubAuthProvider
from hermes_cli.dashboard_auth import register_provider
class _SecondStub(StubAuthProvider):
name = "stub2"
display_name = "Second Stub IdP"
register_provider(_SecondStub())
r = gated_app.get("/sessions", follow_redirects=False)
assert r.status_code == 302
assert r.headers["location"].startswith("/login")
assert "/auth/login" not in r.headers["location"]
# ---------------------------------------------------------------------------
# Gate middleware: same-origin next= validation
# ---------------------------------------------------------------------------

View file

@ -110,8 +110,11 @@ def test_other_public_api_paths_are_public_under_gate(gated_app, path):
def test_gated_html_redirects_to_login(gated_app):
r = gated_app.get("/", follow_redirects=False)
assert r.status_code == 302
# Phase 6: gate carries a ``next=`` so post-login bounces back to /.
assert r.headers["location"] in ("/login", "/login?next=%2F")
# Phase 1 (cloud-auto-discovery): with a single interactive provider, an
# unauthenticated HTML load auto-initiates the OAuth redirect to
# /auth/login rather than rendering the /login interstitial. The /login
# page remains the fallback (multiple/zero providers, or loop-guard trip).
assert r.headers["location"].startswith("/auth/login?provider=stub")
def test_gated_auth_providers_is_public(gated_app):

View file

@ -105,10 +105,13 @@ class TestGateRedirectsCarryPrefix:
follow_redirects=False,
)
assert r.status_code == 302
# /login redirect must include the prefix or the browser will
# follow it to mission-control.tilos.com/login (which the proxy
# doesn't route to the dashboard).
assert r.headers["location"].startswith("/hermes/login"), (
# Phase 1 (cloud-auto-discovery): a single-provider unauth HTML load
# auto-initiates the OAuth redirect to /auth/login. That redirect must
# ALSO carry the prefix, or the browser follows it to
# mission-control.tilos.com/auth/login (which the proxy doesn't route
# to the dashboard). The prefix-carrying invariant is what's under
# test; only the target path moved from /login to /auth/login.
assert r.headers["location"].startswith("/hermes/auth/login"), (
f"Location header lost prefix: {r.headers['location']!r}"
)
@ -132,7 +135,11 @@ class TestGateRedirectsCarryPrefix:
proxy at all."""
r = gated_app_direct.get("/sessions", follow_redirects=False)
assert r.status_code == 302
assert r.headers["location"] == "/login?next=%2Fsessions"
# Phase 1: single-provider unauth HTML load auto-initiates OAuth to
# /auth/login (no phantom prefix), carrying the original path as next=.
assert r.headers["location"] == (
"/auth/login?provider=stub&next=%2Fsessions"
)
def test_malformed_prefix_header_is_ignored(self, gated_app_proxied):
"""A hostile proxy injects ``X-Forwarded-Prefix: <script>``;
@ -145,7 +152,8 @@ class TestGateRedirectsCarryPrefix:
)
assert r.status_code == 302
assert "<script>" not in r.headers["location"]
assert r.headers["location"].startswith("/login")
# Phase 1: malformed prefix dropped → unprefixed auto-SSO redirect.
assert r.headers["location"].startswith("/auth/login")
# ---------------------------------------------------------------------------