refactor(auth): collapse Nous inference fallback controls

This commit is contained in:
Robin Fernandes 2026-05-17 20:34:39 +10:00 committed by Teknium
parent 89a3d038cf
commit 0bac7dd05b
13 changed files with 1071 additions and 240 deletions

View file

@ -231,6 +231,83 @@ def test_resolve_nous_runtime_credentials_prefers_invoke_jwt_and_mirrors(
assert pool_entries[0]["source"] == auth_mod.NOUS_DEVICE_CODE_SOURCE
def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
tmp_path,
monkeypatch,
):
import hermes_cli.auth as auth_mod
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
exp = int(time.time() + 3600)
expires_at = datetime.fromtimestamp(exp, tz=timezone.utc).isoformat()
token = _jwt_with_claims({
"sub": "test-user",
"scope": auth_mod.DEFAULT_NOUS_SCOPE,
"exp": exp,
})
original_obtained_at = "2026-04-17T22:00:10+00:00"
auth_store = {
"version": 1,
"active_provider": "nous",
"providers": {
"nous": {
"portal_base_url": "https://portal.example.com",
"inference_base_url": "https://inference.example.com/v1",
"client_id": "hermes-cli",
"token_type": "Bearer",
"scope": auth_mod.DEFAULT_NOUS_SCOPE,
"access_token": token,
"refresh_token": "refresh-token",
"obtained_at": "2026-02-01T00:00:00+00:00",
"expires_in": 123,
"expires_at": expires_at,
"agent_key": token,
"agent_key_id": None,
"agent_key_expires_at": expires_at,
"agent_key_expires_in": 123,
"agent_key_reused": False,
"agent_key_obtained_at": original_obtained_at,
"tls": {"insecure": False, "ca_bundle": None},
},
},
}
auth_path = hermes_home / "auth.json"
auth_path.write_text(json.dumps(auth_store, indent=2))
before_content = auth_path.read_text()
before_mtime = auth_path.stat().st_mtime_ns
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
def _unexpected_mint(*args, **kwargs):
raise AssertionError("stable invoke JWT should not mint a legacy key")
def _unexpected_shared_write(*args, **kwargs):
raise AssertionError("unchanged invoke JWT resolution should not sync shared store")
sync_calls = []
monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
monkeypatch.setattr(auth_mod, "_write_shared_nous_state", _unexpected_shared_write)
monkeypatch.setattr(
auth_mod,
"_sync_nous_pool_from_auth_store",
lambda: sync_calls.append(True),
)
creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
assert creds["api_key"] == token
assert creds["source"] == "invoke_jwt"
assert auth_path.read_text() == before_content
assert auth_path.stat().st_mtime_ns == before_mtime
assert sync_calls == []
payload = json.loads(auth_path.read_text())
assert (
payload["providers"]["nous"]["agent_key_obtained_at"]
== original_obtained_at
)
def test_resolve_nous_runtime_credentials_trusts_invoke_jwt_exp_over_stale_metadata(
tmp_path,
monkeypatch,
@ -301,6 +378,41 @@ def test_resolve_nous_runtime_credentials_does_not_apply_legacy_ttl_to_invoke_jw
assert payload["credential_pool"]["nous"][0]["agent_key"] == token
def test_legacy_auth_mode_bypasses_usable_invoke_jwt(tmp_path, monkeypatch):
import hermes_cli.auth as auth_mod
hermes_home = tmp_path / "hermes"
token = _invoke_jwt(seconds=3600)
_setup_nous_auth(
hermes_home,
access_token=token,
scope=auth_mod.DEFAULT_NOUS_SCOPE,
expires_at=_future_iso(3600),
expires_in=3600,
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
mint_calls = []
def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
del client, portal_base_url, min_ttl_seconds
mint_calls.append(access_token)
return _mint_payload(api_key="legacy-after-jwt-401")
monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
creds = auth_mod.resolve_nous_runtime_credentials(
min_key_ttl_seconds=300,
auth_mode=auth_mod.NOUS_INFERENCE_AUTH_LEGACY,
)
assert mint_calls == [token]
assert creds["api_key"] == "legacy-after-jwt-401"
assert creds["auth_path"] == "legacy_session_key_mint"
payload = json.loads((hermes_home / "auth.json").read_text())
assert payload["providers"]["nous"]["agent_key"] == "legacy-after-jwt-401"
def test_resolve_nous_runtime_credentials_falls_back_when_invoke_scope_missing(
tmp_path,
monkeypatch,
@ -735,6 +847,9 @@ def test_terminal_refresh_failure_quarantines_tokens(
hermes_home = tmp_path / "hermes"
_setup_nous_auth(hermes_home, refresh_token="refresh-old")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
from agent.credential_pool import load_pool
assert load_pool("nous").select() is not None
shared_state = _full_state_fixture()
shared_state["access_token"] = "access-old"
@ -765,6 +880,8 @@ def test_terminal_refresh_failure_quarantines_tokens(
assert not state_after_failure.get("agent_key")
assert state_after_failure["last_auth_error"]["code"] == "invalid_grant"
assert auth_mod._read_shared_nous_state() is None
payload = json.loads((hermes_home / "auth.json").read_text())
assert payload.get("credential_pool", {}).get("nous") == []
with pytest.raises(AuthError, match="No access token found"):
auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
@ -780,6 +897,9 @@ def test_managed_access_token_refresh_failure_quarantines_tokens(
hermes_home = tmp_path / "hermes"
_setup_nous_auth(hermes_home, refresh_token="refresh-old")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
from agent.credential_pool import load_pool
assert load_pool("nous").select() is not None
refresh_calls: list[str] = []
@ -802,6 +922,8 @@ def test_managed_access_token_refresh_failure_quarantines_tokens(
assert not state_after_failure.get("refresh_token")
assert not state_after_failure.get("access_token")
assert state_after_failure["last_auth_error"]["message"] == "Invalid refresh token"
payload = json.loads((hermes_home / "auth.json").read_text())
assert payload.get("credential_pool", {}).get("nous") == []
with pytest.raises(AuthError, match="No access token found"):
auth_mod.resolve_nous_access_token()
@ -1076,7 +1198,11 @@ def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch
calls after a Nous 401 before the fix it would raise AuthError because
providers.nous was empty.
"""
from hermes_cli.auth import persist_nous_credentials, resolve_nous_runtime_credentials
from hermes_cli.auth import (
NOUS_INFERENCE_AUTH_FRESH,
persist_nous_credentials,
resolve_nous_runtime_credentials,
)
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
@ -1104,7 +1230,10 @@ def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch
monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300, force_mint=True)
creds = resolve_nous_runtime_credentials(
min_key_ttl_seconds=300,
auth_mode=NOUS_INFERENCE_AUTH_FRESH,
)
assert creds["api_key"] == "new-agent-key"
@ -1569,7 +1698,7 @@ def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
def _fake_refresh(state, **kwargs):
# Simulate portal returning fresh tokens + a new agent_key
assert kwargs.get("force_refresh") is True
assert kwargs.get("force_mint") is True
assert kwargs.get("auth_mode") == auth_mod.NOUS_INFERENCE_AUTH_FRESH
return {
**state,
"access_token": "fresh-access-tok",
@ -1697,7 +1826,7 @@ def test_runtime_refresh_uses_newer_shared_token_before_local_stale_token(
creds = auth_mod.resolve_nous_runtime_credentials(
min_key_ttl_seconds=300,
force_mint=True,
auth_mode=auth_mod.NOUS_INFERENCE_AUTH_FRESH,
)
assert creds["api_key"] == "agent-key-from-shared-token"

View file

@ -141,6 +141,45 @@ def test_nous_adapter_get_credential_refreshes_and_persists(tmp_path, monkeypatc
assert stored["providers"]["nous"]["agent_key"] == "minted-bearer"
def test_nous_adapter_retry_credential_forces_legacy_mint(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_auth_store(tmp_path, {
"access_token": "jwt-access",
"refresh_token": "refresh-tok",
"client_id": "hermes-cli",
"portal_base_url": "https://portal.nousresearch.com",
"inference_base_url": "https://inference-api.nousresearch.com/v1",
"agent_key": "jwt-access",
})
refreshed_state = {
"access_token": "jwt-access",
"refresh_token": "refresh-tok",
"client_id": "hermes-cli",
"portal_base_url": "https://portal.nousresearch.com",
"inference_base_url": "https://inference-api.nousresearch.com/v1",
"agent_key": "legacy-bearer",
"agent_key_expires_at": "2099-01-01T00:00:00Z",
}
with patch(
"hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
return_value=refreshed_state,
) as mock_refresh:
adapter = NousPortalAdapter()
cred = adapter.get_retry_credential(
failed_credential=UpstreamCredential(
bearer="jwt-access",
base_url="https://inference-api.nousresearch.com/v1",
),
status_code=401,
)
assert cred is not None
assert cred.bearer == "legacy-bearer"
assert mock_refresh.call_args.kwargs["auth_mode"] == "legacy"
def test_nous_adapter_get_credential_raises_when_not_logged_in(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
adapter = NousPortalAdapter()
@ -166,6 +205,7 @@ def test_nous_adapter_get_credential_raises_on_refresh_failure(tmp_path, monkeyp
def test_nous_adapter_quarantines_terminal_refresh_failure(tmp_path, monkeypatch):
from hermes_cli.auth import AuthError
from agent.credential_pool import load_pool
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_write_auth_store(tmp_path, {
@ -173,6 +213,7 @@ def test_nous_adapter_quarantines_terminal_refresh_failure(tmp_path, monkeypatch
"refresh_token": "refresh-tok",
"agent_key": "stale-agent-key",
})
assert load_pool("nous").select() is not None
with patch(
"hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
@ -193,6 +234,7 @@ def test_nous_adapter_quarantines_terminal_refresh_failure(tmp_path, monkeypatch
assert not nous_state.get("access_token")
assert not nous_state.get("agent_key")
assert nous_state["last_auth_error"]["code"] == "invalid_grant"
assert stored.get("credential_pool", {}).get("nous") == []
def test_nous_adapter_get_credential_raises_when_no_agent_key_returned(tmp_path, monkeypatch):
@ -291,12 +333,15 @@ class FakeAdapter(UpstreamAdapter):
"""A test adapter that returns a fixed credential without touching disk."""
def __init__(self, base_url: str, bearer: str = "test-bearer",
allowed=None, raise_on_credential=False):
allowed=None, raise_on_credential=False,
retry_bearer: str | None = None):
self._base_url = base_url
self._bearer = bearer
self._allowed = frozenset(allowed or ["/chat/completions"])
self._raise = raise_on_credential
self._retry_bearer = retry_bearer
self.calls = 0
self.retry_calls = 0
@property
def name(self): return "fake"
@ -318,6 +363,17 @@ class FakeAdapter(UpstreamAdapter):
expires_at="2099-01-01T00:00:00Z",
)
def get_retry_credential(self, *, failed_credential, status_code):
del failed_credential
self.retry_calls += 1
if status_code != 401 or not self._retry_bearer:
return None
return UpstreamCredential(
bearer=self._retry_bearer,
base_url=self._base_url,
expires_at="2099-01-01T00:00:00Z",
)
async def _start_runner(app: "web.Application"):
"""Spin up an aiohttp app on an ephemeral localhost port. Returns (runner, base_url)."""
@ -358,6 +414,25 @@ def _build_fake_upstream(captured: Dict[str, Any]) -> "web.Application":
return app
def _build_retrying_fake_upstream(captured: Dict[str, Any]) -> "web.Application":
async def maybe_unauthorized(request):
body = await request.read()
auth = request.headers.get("Authorization")
captured["requests"].append({
"method": request.method,
"path": request.path,
"auth": auth,
"body": body.decode("utf-8") if body else "",
})
if auth == "Bearer jwt-bearer":
return web.json_response({"error": "bad token"}, status=401)
return web.json_response({"ok": True})
app = web.Application()
app.router.add_route("*", "/v1/chat/completions", maybe_unauthorized)
return app
def test_server_forwards_chat_completions():
async def run():
captured: Dict[str, Any] = {"requests": []}
@ -388,6 +463,41 @@ def test_server_forwards_chat_completions():
asyncio.run(run())
def test_server_retries_once_with_adapter_retry_credential_on_401():
async def run():
captured: Dict[str, Any] = {"requests": []}
upstream_runner, upstream_base = await _start_runner(
_build_retrying_fake_upstream(captured)
)
adapter = FakeAdapter(
f"{upstream_base}/v1",
bearer="jwt-bearer",
retry_bearer="legacy-bearer",
)
proxy_runner, proxy_base = await _start_runner(create_app(adapter))
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{proxy_base}/v1/chat/completions",
json={"model": "Hermes-4-70B"},
) as resp:
assert resp.status == 200
data = await resp.json()
assert data["ok"] is True
assert adapter.retry_calls == 1
assert [req["auth"] for req in captured["requests"]] == [
"Bearer jwt-bearer",
"Bearer legacy-bearer",
]
finally:
await proxy_runner.cleanup()
await upstream_runner.cleanup()
asyncio.run(run())
def test_server_rejects_disallowed_path():
async def run():
adapter = FakeAdapter("http://unused.example/v1", allowed=["/chat/completions"])

View file

@ -19,11 +19,12 @@ The fix:
These tests pin the corrected behavior.
"""
import asyncio
import time
from datetime import datetime, timezone
from unittest.mock import patch
import pytest
import httpx
from fastapi.testclient import TestClient
from hermes_cli.web_server import _SESSION_TOKEN, app
@ -32,6 +33,32 @@ client = TestClient(app)
HEADERS = {"X-Hermes-Session-Token": _SESSION_TOKEN}
def _fake_nous_device_data():
return {
"device_code": "device-code",
"user_code": "NOUS-1234",
"verification_uri": "https://portal.nousresearch.com/device",
"verification_uri_complete": (
"https://portal.nousresearch.com/device?user_code=NOUS-1234"
),
"expires_in": 600,
"interval": 5,
}
def _invoke_scope_refusal():
request = httpx.Request("POST", "https://portal.nousresearch.com/oauth/device/code")
response = httpx.Response(
400,
json={
"error": "invalid_scope",
"error_description": "unsupported scope inference:invoke",
},
request=request,
)
return httpx.HTTPStatusError("invalid scope", request=request, response=response)
def test_minimax_login_does_not_launch_anthropic_flow():
"""Click 'Login' on MiniMax → MUST NOT return claude.ai auth_url."""
fake_user_code_resp = {
@ -48,6 +75,9 @@ def test_minimax_login_does_not_launch_anthropic_flow():
), patch(
"hermes_cli.auth._minimax_pkce_pair",
return_value=("verifier-stub", "challenge-stub", "stub-state"),
), patch(
"hermes_cli.web_server._minimax_poller",
return_value=None,
):
resp = client.post(
"/api/providers/oauth/minimax-oauth/start",
@ -69,6 +99,113 @@ def test_minimax_login_does_not_launch_anthropic_flow():
assert body["expires_in"] == 600
def test_nous_dashboard_device_flow_honors_legacy_scope_override(monkeypatch):
from hermes_cli import auth as auth_mod
from hermes_cli import web_server as ws
requested_scopes = []
def fake_request_device_code(**kwargs):
requested_scopes.append(kwargs["scope"])
return _fake_nous_device_data()
monkeypatch.setenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, "true")
monkeypatch.setattr(auth_mod, "_request_device_code", fake_request_device_code)
monkeypatch.setattr(ws, "_nous_poller", lambda sid: None)
result = asyncio.run(ws._start_device_code_flow("nous"))
try:
assert requested_scopes == [auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE]
assert result["flow"] == "device_code"
assert result["user_code"] == "NOUS-1234"
assert (
ws._oauth_sessions[result["session_id"]]["scope"]
== auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
)
finally:
ws._oauth_sessions.pop(result["session_id"], None)
def test_nous_dashboard_device_flow_retries_legacy_scope_on_invoke_refusal(monkeypatch):
from hermes_cli import auth as auth_mod
from hermes_cli import web_server as ws
requested_scopes = []
def fake_request_device_code(**kwargs):
requested_scopes.append(kwargs["scope"])
if len(requested_scopes) == 1:
raise _invoke_scope_refusal()
return _fake_nous_device_data()
monkeypatch.delenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, raising=False)
monkeypatch.setattr(auth_mod, "_request_device_code", fake_request_device_code)
monkeypatch.setattr(ws, "_nous_poller", lambda sid: None)
result = asyncio.run(ws._start_device_code_flow("nous"))
try:
assert requested_scopes == [
auth_mod.DEFAULT_NOUS_SCOPE,
auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
]
assert (
ws._oauth_sessions[result["session_id"]]["scope"]
== auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
)
finally:
ws._oauth_sessions.pop(result["session_id"], None)
def test_nous_dashboard_poller_preserves_effective_scope_when_token_omits_scope(monkeypatch):
from hermes_cli import auth as auth_mod
from hermes_cli import web_server as ws
session_id = "nous-effective-scope-test"
ws._oauth_sessions[session_id] = {
"session_id": session_id,
"provider": "nous",
"flow": "device_code",
"created_at": time.time(),
"status": "pending",
"error_message": None,
"portal_base_url": "https://portal.nousresearch.com",
"client_id": "hermes-cli",
"device_code": "device-code",
"interval": 5,
"expires_at": time.time() + 600,
"scope": auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
}
captured_state = {}
def fake_refresh_nous_oauth_from_state(state, **kwargs):
captured_state.update(state)
return {**state, "agent_key": "legacy-agent-key"}
monkeypatch.setattr(
auth_mod,
"_poll_for_token",
lambda **kwargs: {
"access_token": "access-token",
"refresh_token": "refresh-token",
"expires_in": 3600,
"token_type": "Bearer",
},
)
monkeypatch.setattr(
auth_mod,
"refresh_nous_oauth_from_state",
fake_refresh_nous_oauth_from_state,
)
monkeypatch.setattr(auth_mod, "persist_nous_credentials", lambda state: None)
try:
ws._nous_poller(session_id)
assert captured_state["scope"] == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
assert ws._oauth_sessions[session_id]["status"] == "approved"
finally:
ws._oauth_sessions.pop(session_id, None)
def test_minimax_dashboard_poller_accepts_absolute_ms_expired_in():
"""Dashboard MiniMax completion must accept unix-ms token expiry values."""
from hermes_cli import web_server as ws