fix(auth) fix a few cases where refresh tokens were not rotated.

This commit is contained in:
Robin Fernandes 2026-05-17 22:29:40 +10:00 committed by Teknium
parent 20bffa5b37
commit 569bc94b59
6 changed files with 166 additions and 109 deletions

View file

@ -41,7 +41,7 @@ from dataclasses import dataclass, field
from datetime import datetime, timezone
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import parse_qs, urlencode, urlparse
import httpx
@ -89,11 +89,6 @@ NOUS_INFERENCE_AUTH_MODES = frozenset({
NOUS_AUTH_PATH_INVOKE_JWT = "invoke_jwt"
NOUS_AUTH_PATH_LEGACY_SESSION_KEY_CACHE = "legacy_session_key_cache"
NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT = "legacy_session_key_mint"
NOUS_AUTH_PATHS = frozenset({
NOUS_AUTH_PATH_INVOKE_JWT,
NOUS_AUTH_PATH_LEGACY_SESSION_KEY_CACHE,
NOUS_AUTH_PATH_LEGACY_SESSION_KEY_MINT,
})
DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 # 30 minutes
ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # refresh 2 min before expiry
NOUS_INVOKE_JWT_MIN_TTL_SECONDS = ACCESS_TOKEN_REFRESH_SKEW_SECONDS
@ -3991,7 +3986,7 @@ def _is_terminal_nous_refresh_error(exc: Exception) -> bool:
return (
isinstance(exc, AuthError)
and exc.provider == "nous"
and exc.code in {"invalid_grant", "invalid_token"}
and exc.code in {"invalid_grant", "invalid_token", "refresh_token_reused"}
and bool(exc.relogin_required)
)
@ -4103,12 +4098,16 @@ def _try_import_shared_nous_state(
"tls": {"insecure": False, "ca_bundle": None},
}
def _persist_shared_refresh(updated_state: Dict[str, Any], _reason: str) -> None:
_write_shared_nous_state(updated_state)
refreshed = refresh_nous_oauth_from_state(
state,
min_key_ttl_seconds=min_key_ttl_seconds,
timeout_seconds=timeout_seconds,
force_refresh=True,
inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_FRESH,
on_state_update=_persist_shared_refresh,
)
_write_shared_nous_state(refreshed)
except AuthError as exc:
@ -4163,7 +4162,7 @@ def _refresh_access_token(
code = str(error_payload.get("error", "invalid_grant"))
description = str(error_payload.get("error_description") or "Refresh token exchange failed")
relogin = code in {"invalid_grant", "invalid_token"}
relogin = code in {"invalid_grant", "invalid_token", "refresh_token_reused"}
# Detect the OAuth 2.1 "refresh token reuse" signal from the Nous portal
# server and surface an actionable message. This fires when an external
@ -4173,7 +4172,7 @@ def _refresh_access_token(
# retires the original RT, Hermes's next refresh uses it, and the whole
# session chain gets revoked as a token-theft signal (#15099).
lowered = description.lower()
if "reuse" in lowered or "reuse detected" in lowered:
if code == "refresh_token_reused" or "reuse" in lowered or "reuse detected" in lowered:
description = (
"Nous Portal detected refresh-token reuse and revoked this session.\n"
"This usually means an external process (monitoring script, "
@ -4185,6 +4184,7 @@ def _refresh_access_token(
"instead.\n"
"Re-authenticate with: hermes auth add nous"
)
relogin = True
raise AuthError(description, provider="nous", code=code, relogin_required=relogin)
@ -4418,8 +4418,14 @@ def refresh_nous_oauth_pure(
ca_bundle: Optional[str] = None,
force_refresh: bool = False,
inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
on_state_update: Optional[Callable[[Dict[str, Any], str], None]] = None,
) -> Dict[str, Any]:
"""Refresh Nous OAuth state without mutating auth.json."""
"""Refresh Nous OAuth state without mutating auth.json directly.
``on_state_update`` is called after a successful access-token refresh and
before any subsequent agent-key mint. Callers that own persistent state can
use it to save the newly rotated refresh token before later work can fail.
"""
inference_auth_mode = _normalize_nous_inference_auth_mode(inference_auth_mode)
state: Dict[str, Any] = {
"access_token": access_token,
@ -4479,6 +4485,8 @@ def refresh_nous_oauth_pure(
state["expires_at"] = datetime.fromtimestamp(
now.timestamp() + access_ttl, tz=timezone.utc
).isoformat()
if on_state_update is not None:
on_state_update(dict(state), "post_refresh_access_token")
selected_auth_path, fallback_reason = _choose_nous_inference_auth_path(
state,
@ -4519,6 +4527,7 @@ def refresh_nous_oauth_from_state(
timeout_seconds: float = 15.0,
force_refresh: bool = False,
inference_auth_mode: str = NOUS_INFERENCE_AUTH_MODE_AUTO,
on_state_update: Optional[Callable[[Dict[str, Any], str], None]] = None,
) -> Dict[str, Any]:
"""Refresh Nous OAuth from a state dict. Thin wrapper around refresh_nous_oauth_pure."""
tls = state.get("tls") or {}
@ -4540,6 +4549,7 @@ def refresh_nous_oauth_from_state(
ca_bundle=tls.get("ca_bundle"),
force_refresh=force_refresh,
inference_auth_mode=inference_auth_mode,
on_state_update=on_state_update,
)
@ -4603,6 +4613,7 @@ def persist_nous_credentials(
def _sync_nous_pool_from_auth_store() -> None:
"""Best-effort pool reseed after providers.nous changes; never fail login."""
try:
from agent.credential_pool import load_pool

View file

@ -1,13 +1,13 @@
"""Nous Portal upstream adapter.
Reads the user's Nous OAuth state from ``~/.hermes/auth.json``, refreshes
the access token and resolves the ``agent_key`` compatibility credential
when needed, then exposes the upstream base URL plus bearer for the proxy
server to forward to.
Reads the user's Nous OAuth state from ``~/.hermes/auth.json`` through the
shared runtime resolver, refreshes the access token and resolves the
``agent_key`` compatibility credential when needed, then exposes the upstream
base URL plus bearer for the proxy server to forward to.
The ``agent_key`` field may hold either a NAS invoke JWT or the legacy
opaque session key. The refresh helper handles both see
:func:`hermes_cli.auth.refresh_nous_oauth_from_state`.
:func:`hermes_cli.auth.resolve_nous_runtime_credentials`.
"""
from __future__ import annotations
@ -22,12 +22,13 @@ from hermes_cli.auth import (
NOUS_INFERENCE_AUTH_MODE_AUTO,
NOUS_INFERENCE_AUTH_MODE_LEGACY,
_load_auth_store,
_auth_store_lock,
_is_terminal_nous_refresh_error,
_quarantine_nous_oauth_state,
_quarantine_nous_pool_entries,
_save_auth_store,
_write_shared_nous_state,
refresh_nous_oauth_from_state,
resolve_nous_runtime_credentials,
)
from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
@ -50,9 +51,8 @@ class NousPortalAdapter(UpstreamAdapter):
"""Proxy upstream for the Nous Portal inference API."""
def __init__(self) -> None:
# Lock guards _load → refresh → _save against parallel proxy requests
# racing to refresh expired tokens. Refresh itself is HTTP, so we
# hold the lock across the network call (brief; OAuth refresh is fast).
# Serialize proxy requests in this process; cross-process token refresh
# and persistence are handled by resolve_nous_runtime_credentials().
self._lock = threading.Lock()
@property
@ -107,8 +107,7 @@ class NousPortalAdapter(UpstreamAdapter):
)
try:
refreshed = refresh_nous_oauth_from_state(
state,
refreshed = resolve_nous_runtime_credentials(
inference_auth_mode=inference_auth_mode,
)
except AuthError as exc:
@ -131,22 +130,20 @@ class NousPortalAdapter(UpstreamAdapter):
f"Failed to refresh Nous Portal credentials: {exc}"
) from exc
self._save_state(refreshed)
agent_key = refreshed.get("agent_key")
agent_key = refreshed.get("api_key")
if not agent_key:
raise RuntimeError(
"Nous Portal refresh did not return a usable agent_key. "
"Try `hermes login nous` to re-authenticate."
)
base_url = refreshed.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL
base_url = refreshed.get("base_url") or DEFAULT_NOUS_INFERENCE_URL
base_url = base_url.rstrip("/")
return UpstreamCredential(
bearer=agent_key,
base_url=base_url,
expires_at=refreshed.get("agent_key_expires_at"),
expires_at=refreshed.get("expires_at"),
)
# ------------------------------------------------------------------
@ -156,7 +153,8 @@ class NousPortalAdapter(UpstreamAdapter):
def _read_state(self) -> Optional[Dict[str, Any]]:
try:
store = _load_auth_store()
with _auth_store_lock():
store = _load_auth_store()
except Exception as exc:
logger.warning("proxy: failed to load auth store: %s", exc)
return None
@ -174,21 +172,20 @@ class NousPortalAdapter(UpstreamAdapter):
quarantine_reason: Optional[str] = None,
) -> None:
try:
store = _load_auth_store()
if quarantine_error is not None and quarantine_reason:
_quarantine_nous_pool_entries(
store,
quarantine_error,
reason=quarantine_reason,
)
providers = store.setdefault("providers", {})
providers["nous"] = state
_save_auth_store(store)
with _auth_store_lock():
store = _load_auth_store()
if quarantine_error is not None and quarantine_reason:
_quarantine_nous_pool_entries(
store,
quarantine_error,
reason=quarantine_reason,
)
providers = store.setdefault("providers", {})
providers["nous"] = state
_save_auth_store(store)
_write_shared_nous_state(state)
except Exception as exc:
# Best effort — we still return the fresh credential. The next
# request just won't see cached state, which means another refresh.
logger.warning("proxy: failed to persist refreshed Nous state: %s", exc)
logger.warning("proxy: failed to persist Nous quarantine state: %s", exc)
__all__ = ["NousPortalAdapter"]