feat(dashboard-auth): _ws_auth_ok helper + ticket auth on all 4 WS endpoints

Phase 5 task 5.2. Four WebSocket endpoints — /api/pty, /api/ws, /api/pub,
/api/events — previously authed with the same constant-time check against
`_SESSION_TOKEN`. Replaced with a single helper that branches on
`app.state.auth_required`:

  Loopback / --insecure: legacy ?token=<_SESSION_TOKEN> path (unchanged).
  Gated:                  ?ticket=<single-use> consumed against the
                          dashboard-auth ticket store.

Critical security property: gated mode UNCONDITIONALLY rejects the
?token= path. A leaked _SESSION_TOKEN value from a log line is not
replayable for WS access in gated deployments.

`_build_sidecar_url` now branches too: loopback uses the legacy token;
gated mode mints a server-internal ticket via mint_ticket() with
pseudo-user 'pty-sidecar' / provider 'server-internal' so audit logs can
distinguish PTY-internal sidecar tickets from browser tickets. PTY
children open /api/pub exactly once at startup so single-use suffices.

Ticket rejections audit-log as WS_TICKET_REJECTED with truncated reason
+ client IP + WS path. Operators debugging 'WS keeps closing' issues see
which endpoint and why.

17 new tests:
- POST /api/auth/ws-ticket: 200 with cookie, 401/302 without, distinct
  per call, GET-not-allowed.
- _ws_auth_ok loopback: token accept/reject, missing-token reject,
  ticket-param-ignored.
- _ws_auth_ok gated: ticket accept, single-use rejection, unknown reject,
  legacy-token-rejected-in-gated assertion, audit-log emission.
- _build_sidecar_url: loopback uses token=, gated uses ticket=, no-bound
  returns None.
This commit is contained in:
Ben 2026-05-21 16:02:29 +10:00
parent a3a1284110
commit bb72e4614a
2 changed files with 329 additions and 11 deletions

View file

@ -3347,6 +3347,50 @@ def _ws_client_is_allowed(ws: "WebSocket") -> bool:
return True
return client_host in _LOOPBACK_HOSTS
def _ws_auth_ok(ws: "WebSocket") -> bool:
"""Validate WS-upgrade auth in either loopback or gated mode.
Loopback / ``--insecure``: legacy ``?token=<_SESSION_TOKEN>`` query
parameter, constant-time compared.
Gated (public bind, no ``--insecure``): ``?ticket=<single-use>`` query
parameter consumed against the dashboard-auth ticket store. The legacy
token path is unconditionally rejected in this mode (the SPA bundle
isn't carrying the token any longer).
Returns True if the WS should be accepted; callers close with the
appropriate WS code (4401) on False. Audit-logs the rejection so
operators can debug "WS keeps closing" issues from the log.
"""
auth_required = bool(getattr(app.state, "auth_required", False))
if auth_required:
ticket = ws.query_params.get("ticket", "")
if not ticket:
return False
# Lazy import — keeps this function importable in test harnesses
# that don't bring in the dashboard_auth layer.
from hermes_cli.dashboard_auth.audit import AuditEvent, audit_log
from hermes_cli.dashboard_auth.ws_tickets import (
TicketInvalid,
consume_ticket,
)
try:
consume_ticket(ticket)
return True
except TicketInvalid as exc:
audit_log(
AuditEvent.WS_TICKET_REJECTED,
reason=str(exc),
ip=(ws.client.host if ws.client else ""),
path=ws.url.path,
)
return False
token = ws.query_params.get("token", "")
return hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode())
# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
# and /api/events (dashboard → browser sidebar). Keyed by an opaque channel id
# the chat tab generates on mount; entries auto-evict when the last subscriber
@ -3401,7 +3445,21 @@ def _resolve_chat_argv(
def _build_sidecar_url(channel: str) -> Optional[str]:
"""ws:// URL the PTY child should publish events to, or None when unbound."""
"""ws:// URL the PTY child should publish events to, or None when unbound.
Loopback / ``--insecure``: uses ``?token=<_SESSION_TOKEN>``.
Gated mode: mints a single-use ticket via the dashboard-auth ticket
store (server-side mint, no HTTP round trip the PTY child is a
server-spawned process and we trust it). The ticket binds to the
pseudo-user ``"pty-sidecar"`` so audit logs can distinguish these from
browser-initiated tickets.
The single-use lifetime means the PTY child cannot reconnect without a
new sidecar URL. PTY children open ``/api/pub`` once at startup; if
reconnect semantics ever become important, this should be upgraded to
a long-lived process-scoped token.
"""
host = getattr(app.state, "bound_host", None)
port = getattr(app.state, "bound_port", None)
@ -3409,7 +3467,15 @@ def _build_sidecar_url(channel: str) -> Optional[str]:
return None
netloc = f"[{host}]:{port}" if ":" in host and not host.startswith("[") else f"{host}:{port}"
qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel})
if getattr(app.state, "auth_required", False):
# Gated mode — mint a ticket so the WS upgrade survives _ws_auth_ok.
from hermes_cli.dashboard_auth.ws_tickets import mint_ticket
ticket = mint_ticket(user_id="pty-sidecar", provider="server-internal")
qs = urllib.parse.urlencode({"ticket": ticket, "channel": channel})
else:
qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel})
return f"ws://{netloc}/api/pub?{qs}"
@ -3442,9 +3508,7 @@ async def pty_ws(ws: WebSocket) -> None:
return
# --- auth + loopback check (before accept so we can close cleanly) ---
token = ws.query_params.get("token", "")
expected = _SESSION_TOKEN
if not hmac.compare_digest(token.encode(), expected.encode()):
if not _ws_auth_ok(ws):
await ws.close(code=4401)
return
@ -3562,8 +3626,7 @@ async def gateway_ws(ws: WebSocket) -> None:
await ws.close(code=4403)
return
token = ws.query_params.get("token", "")
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
if not _ws_auth_ok(ws):
await ws.close(code=4401)
return
@ -3594,8 +3657,7 @@ async def pub_ws(ws: WebSocket) -> None:
await ws.close(code=4403)
return
token = ws.query_params.get("token", "")
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
if not _ws_auth_ok(ws):
await ws.close(code=4401)
return
@ -3623,8 +3685,7 @@ async def events_ws(ws: WebSocket) -> None:
await ws.close(code=4403)
return
token = ws.query_params.get("token", "")
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
if not _ws_auth_ok(ws):
await ws.close(code=4401)
return