refactor(auth): collapse Nous inference fallback controls

2026-06-08 08:11:38 +00:00 · 2026-05-17 20:34:39 +10:00 · 2026-05-17 20:34:39 +10:00 · 0bac7dd05b
commit 0bac7dd05b
parent 89a3d038cf
13 changed files with 1071 additions and 240 deletions
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@ -231,6 +231,83 @@ def test_resolve_nous_runtime_credentials_prefers_invoke_jwt_and_mirrors(
    assert pool_entries[0]["source"] == auth_mod.NOUS_DEVICE_CODE_SOURCE


+def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
+    tmp_path,
+    monkeypatch,
+):
+    import hermes_cli.auth as auth_mod
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    exp = int(time.time() + 3600)
+    expires_at = datetime.fromtimestamp(exp, tz=timezone.utc).isoformat()
+    token = _jwt_with_claims({
+        "sub": "test-user",
+        "scope": auth_mod.DEFAULT_NOUS_SCOPE,
+        "exp": exp,
+    })
+    original_obtained_at = "2026-04-17T22:00:10+00:00"
+    auth_store = {
+        "version": 1,
+        "active_provider": "nous",
+        "providers": {
+            "nous": {
+                "portal_base_url": "https://portal.example.com",
+                "inference_base_url": "https://inference.example.com/v1",
+                "client_id": "hermes-cli",
+                "token_type": "Bearer",
+                "scope": auth_mod.DEFAULT_NOUS_SCOPE,
+                "access_token": token,
+                "refresh_token": "refresh-token",
+                "obtained_at": "2026-02-01T00:00:00+00:00",
+                "expires_in": 123,
+                "expires_at": expires_at,
+                "agent_key": token,
+                "agent_key_id": None,
+                "agent_key_expires_at": expires_at,
+                "agent_key_expires_in": 123,
+                "agent_key_reused": False,
+                "agent_key_obtained_at": original_obtained_at,
+                "tls": {"insecure": False, "ca_bundle": None},
+            },
+        },
+    }
+    auth_path = hermes_home / "auth.json"
+    auth_path.write_text(json.dumps(auth_store, indent=2))
+    before_content = auth_path.read_text()
+    before_mtime = auth_path.stat().st_mtime_ns
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    def _unexpected_mint(*args, **kwargs):
+        raise AssertionError("stable invoke JWT should not mint a legacy key")
+
+    def _unexpected_shared_write(*args, **kwargs):
+        raise AssertionError("unchanged invoke JWT resolution should not sync shared store")
+
+    sync_calls = []
+
+    monkeypatch.setattr(auth_mod, "_mint_agent_key", _unexpected_mint)
+    monkeypatch.setattr(auth_mod, "_write_shared_nous_state", _unexpected_shared_write)
+    monkeypatch.setattr(
+        auth_mod,
+        "_sync_nous_pool_from_auth_store",
+        lambda: sync_calls.append(True),
+    )
+
+    creds = auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
+
+    assert creds["api_key"] == token
+    assert creds["source"] == "invoke_jwt"
+    assert auth_path.read_text() == before_content
+    assert auth_path.stat().st_mtime_ns == before_mtime
+    assert sync_calls == []
+    payload = json.loads(auth_path.read_text())
+    assert (
+        payload["providers"]["nous"]["agent_key_obtained_at"]
+        == original_obtained_at
+    )
+
+
 def test_resolve_nous_runtime_credentials_trusts_invoke_jwt_exp_over_stale_metadata(
    tmp_path,
    monkeypatch,
@ -301,6 +378,41 @@ def test_resolve_nous_runtime_credentials_does_not_apply_legacy_ttl_to_invoke_jw
    assert payload["credential_pool"]["nous"][0]["agent_key"] == token


+def test_legacy_auth_mode_bypasses_usable_invoke_jwt(tmp_path, monkeypatch):
+    import hermes_cli.auth as auth_mod
+
+    hermes_home = tmp_path / "hermes"
+    token = _invoke_jwt(seconds=3600)
+    _setup_nous_auth(
+        hermes_home,
+        access_token=token,
+        scope=auth_mod.DEFAULT_NOUS_SCOPE,
+        expires_at=_future_iso(3600),
+        expires_in=3600,
+    )
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    mint_calls = []
+
+    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
+        del client, portal_base_url, min_ttl_seconds
+        mint_calls.append(access_token)
+        return _mint_payload(api_key="legacy-after-jwt-401")
+
+    monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
+
+    creds = auth_mod.resolve_nous_runtime_credentials(
+        min_key_ttl_seconds=300,
+        auth_mode=auth_mod.NOUS_INFERENCE_AUTH_LEGACY,
+    )
+
+    assert mint_calls == [token]
+    assert creds["api_key"] == "legacy-after-jwt-401"
+    assert creds["auth_path"] == "legacy_session_key_mint"
+    payload = json.loads((hermes_home / "auth.json").read_text())
+    assert payload["providers"]["nous"]["agent_key"] == "legacy-after-jwt-401"
+
+
 def test_resolve_nous_runtime_credentials_falls_back_when_invoke_scope_missing(
    tmp_path,
    monkeypatch,
@ -735,6 +847,9 @@ def test_terminal_refresh_failure_quarantines_tokens(
    hermes_home = tmp_path / "hermes"
    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    from agent.credential_pool import load_pool
+
+    assert load_pool("nous").select() is not None

    shared_state = _full_state_fixture()
    shared_state["access_token"] = "access-old"
@ -765,6 +880,8 @@ def test_terminal_refresh_failure_quarantines_tokens(
    assert not state_after_failure.get("agent_key")
    assert state_after_failure["last_auth_error"]["code"] == "invalid_grant"
    assert auth_mod._read_shared_nous_state() is None
+    payload = json.loads((hermes_home / "auth.json").read_text())
+    assert payload.get("credential_pool", {}).get("nous") == []

    with pytest.raises(AuthError, match="No access token found"):
        auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
@ -780,6 +897,9 @@ def test_managed_access_token_refresh_failure_quarantines_tokens(
    hermes_home = tmp_path / "hermes"
    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    from agent.credential_pool import load_pool
+
+    assert load_pool("nous").select() is not None

    refresh_calls: list[str] = []

@ -802,6 +922,8 @@ def test_managed_access_token_refresh_failure_quarantines_tokens(
    assert not state_after_failure.get("refresh_token")
    assert not state_after_failure.get("access_token")
    assert state_after_failure["last_auth_error"]["message"] == "Invalid refresh token"
+    payload = json.loads((hermes_home / "auth.json").read_text())
+    assert payload.get("credential_pool", {}).get("nous") == []

    with pytest.raises(AuthError, match="No access token found"):
        auth_mod.resolve_nous_access_token()
@ -1076,7 +1198,11 @@ def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch
    calls after a Nous 401 — before the fix it would raise AuthError because
    providers.nous was empty.
    """
-    from hermes_cli.auth import persist_nous_credentials, resolve_nous_runtime_credentials
+    from hermes_cli.auth import (
+        NOUS_INFERENCE_AUTH_FRESH,
+        persist_nous_credentials,
+        resolve_nous_runtime_credentials,
+    )

    hermes_home = tmp_path / "hermes"
    hermes_home.mkdir(parents=True, exist_ok=True)
@ -1104,7 +1230,10 @@ def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch
    monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)

-    creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300, force_mint=True)
+    creds = resolve_nous_runtime_credentials(
+        min_key_ttl_seconds=300,
+        auth_mode=NOUS_INFERENCE_AUTH_FRESH,
+    )
    assert creds["api_key"] == "new-agent-key"


@ -1569,7 +1698,7 @@ def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
    def _fake_refresh(state, **kwargs):
        # Simulate portal returning fresh tokens + a new agent_key
        assert kwargs.get("force_refresh") is True
-        assert kwargs.get("force_mint") is True
+        assert kwargs.get("auth_mode") == auth_mod.NOUS_INFERENCE_AUTH_FRESH
        return {
            **state,
            "access_token": "fresh-access-tok",
@ -1697,7 +1826,7 @@ def test_runtime_refresh_uses_newer_shared_token_before_local_stale_token(

    creds = auth_mod.resolve_nous_runtime_credentials(
        min_key_ttl_seconds=300,
-        force_mint=True,
+        auth_mode=auth_mod.NOUS_INFERENCE_AUTH_FRESH,
    )

    assert creds["api_key"] == "agent-key-from-shared-token"
--- a/tests/hermes_cli/test_proxy.py
+++ b/tests/hermes_cli/test_proxy.py
@ -141,6 +141,45 @@ def test_nous_adapter_get_credential_refreshes_and_persists(tmp_path, monkeypatc
    assert stored["providers"]["nous"]["agent_key"] == "minted-bearer"


+def test_nous_adapter_retry_credential_forces_legacy_mint(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _write_auth_store(tmp_path, {
+        "access_token": "jwt-access",
+        "refresh_token": "refresh-tok",
+        "client_id": "hermes-cli",
+        "portal_base_url": "https://portal.nousresearch.com",
+        "inference_base_url": "https://inference-api.nousresearch.com/v1",
+        "agent_key": "jwt-access",
+    })
+
+    refreshed_state = {
+        "access_token": "jwt-access",
+        "refresh_token": "refresh-tok",
+        "client_id": "hermes-cli",
+        "portal_base_url": "https://portal.nousresearch.com",
+        "inference_base_url": "https://inference-api.nousresearch.com/v1",
+        "agent_key": "legacy-bearer",
+        "agent_key_expires_at": "2099-01-01T00:00:00Z",
+    }
+
+    with patch(
+        "hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
+        return_value=refreshed_state,
+    ) as mock_refresh:
+        adapter = NousPortalAdapter()
+        cred = adapter.get_retry_credential(
+            failed_credential=UpstreamCredential(
+                bearer="jwt-access",
+                base_url="https://inference-api.nousresearch.com/v1",
+            ),
+            status_code=401,
+        )
+
+    assert cred is not None
+    assert cred.bearer == "legacy-bearer"
+    assert mock_refresh.call_args.kwargs["auth_mode"] == "legacy"
+
+
 def test_nous_adapter_get_credential_raises_when_not_logged_in(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    adapter = NousPortalAdapter()
@ -166,6 +205,7 @@ def test_nous_adapter_get_credential_raises_on_refresh_failure(tmp_path, monkeyp

 def test_nous_adapter_quarantines_terminal_refresh_failure(tmp_path, monkeypatch):
    from hermes_cli.auth import AuthError
+    from agent.credential_pool import load_pool

    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _write_auth_store(tmp_path, {
@ -173,6 +213,7 @@ def test_nous_adapter_quarantines_terminal_refresh_failure(tmp_path, monkeypatch
        "refresh_token": "refresh-tok",
        "agent_key": "stale-agent-key",
    })
+    assert load_pool("nous").select() is not None

    with patch(
        "hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
@ -193,6 +234,7 @@ def test_nous_adapter_quarantines_terminal_refresh_failure(tmp_path, monkeypatch
    assert not nous_state.get("access_token")
    assert not nous_state.get("agent_key")
    assert nous_state["last_auth_error"]["code"] == "invalid_grant"
+    assert stored.get("credential_pool", {}).get("nous") == []


 def test_nous_adapter_get_credential_raises_when_no_agent_key_returned(tmp_path, monkeypatch):
@ -291,12 +333,15 @@ class FakeAdapter(UpstreamAdapter):
    """A test adapter that returns a fixed credential without touching disk."""

    def __init__(self, base_url: str, bearer: str = "test-bearer",
-                 allowed=None, raise_on_credential=False):
+                 allowed=None, raise_on_credential=False,
+                 retry_bearer: str | None = None):
        self._base_url = base_url
        self._bearer = bearer
        self._allowed = frozenset(allowed or ["/chat/completions"])
        self._raise = raise_on_credential
+        self._retry_bearer = retry_bearer
        self.calls = 0
+        self.retry_calls = 0

    @property
    def name(self): return "fake"
@ -318,6 +363,17 @@ class FakeAdapter(UpstreamAdapter):
            expires_at="2099-01-01T00:00:00Z",
        )

+    def get_retry_credential(self, *, failed_credential, status_code):
+        del failed_credential
+        self.retry_calls += 1
+        if status_code != 401 or not self._retry_bearer:
+            return None
+        return UpstreamCredential(
+            bearer=self._retry_bearer,
+            base_url=self._base_url,
+            expires_at="2099-01-01T00:00:00Z",
+        )
+

 async def _start_runner(app: "web.Application"):
    """Spin up an aiohttp app on an ephemeral localhost port. Returns (runner, base_url)."""
@ -358,6 +414,25 @@ def _build_fake_upstream(captured: Dict[str, Any]) -> "web.Application":
    return app


+def _build_retrying_fake_upstream(captured: Dict[str, Any]) -> "web.Application":
+    async def maybe_unauthorized(request):
+        body = await request.read()
+        auth = request.headers.get("Authorization")
+        captured["requests"].append({
+            "method": request.method,
+            "path": request.path,
+            "auth": auth,
+            "body": body.decode("utf-8") if body else "",
+        })
+        if auth == "Bearer jwt-bearer":
+            return web.json_response({"error": "bad token"}, status=401)
+        return web.json_response({"ok": True})
+
+    app = web.Application()
+    app.router.add_route("*", "/v1/chat/completions", maybe_unauthorized)
+    return app
+
+
 def test_server_forwards_chat_completions():
    async def run():
        captured: Dict[str, Any] = {"requests": []}
@ -388,6 +463,41 @@ def test_server_forwards_chat_completions():
    asyncio.run(run())


+def test_server_retries_once_with_adapter_retry_credential_on_401():
+    async def run():
+        captured: Dict[str, Any] = {"requests": []}
+        upstream_runner, upstream_base = await _start_runner(
+            _build_retrying_fake_upstream(captured)
+        )
+        adapter = FakeAdapter(
+            f"{upstream_base}/v1",
+            bearer="jwt-bearer",
+            retry_bearer="legacy-bearer",
+        )
+        proxy_runner, proxy_base = await _start_runner(create_app(adapter))
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    f"{proxy_base}/v1/chat/completions",
+                    json={"model": "Hermes-4-70B"},
+                ) as resp:
+                    assert resp.status == 200
+                    data = await resp.json()
+                    assert data["ok"] is True
+
+            assert adapter.retry_calls == 1
+            assert [req["auth"] for req in captured["requests"]] == [
+                "Bearer jwt-bearer",
+                "Bearer legacy-bearer",
+            ]
+        finally:
+            await proxy_runner.cleanup()
+            await upstream_runner.cleanup()
+
+    asyncio.run(run())
+
+
 def test_server_rejects_disallowed_path():
    async def run():
        adapter = FakeAdapter("http://unused.example/v1", allowed=["/chat/completions"])
--- a/tests/hermes_cli/test_web_oauth_dispatch.py
+++ b/tests/hermes_cli/test_web_oauth_dispatch.py
@ -19,11 +19,12 @@ The fix:

 These tests pin the corrected behavior.
 """
+import asyncio
 import time
 from datetime import datetime, timezone
 from unittest.mock import patch

-import pytest
+import httpx
 from fastapi.testclient import TestClient

 from hermes_cli.web_server import _SESSION_TOKEN, app
@ -32,6 +33,32 @@ client = TestClient(app)
 HEADERS = {"X-Hermes-Session-Token": _SESSION_TOKEN}


+def _fake_nous_device_data():
+    return {
+        "device_code": "device-code",
+        "user_code": "NOUS-1234",
+        "verification_uri": "https://portal.nousresearch.com/device",
+        "verification_uri_complete": (
+            "https://portal.nousresearch.com/device?user_code=NOUS-1234"
+        ),
+        "expires_in": 600,
+        "interval": 5,
+    }
+
+
+def _invoke_scope_refusal():
+    request = httpx.Request("POST", "https://portal.nousresearch.com/oauth/device/code")
+    response = httpx.Response(
+        400,
+        json={
+            "error": "invalid_scope",
+            "error_description": "unsupported scope inference:invoke",
+        },
+        request=request,
+    )
+    return httpx.HTTPStatusError("invalid scope", request=request, response=response)
+
+
 def test_minimax_login_does_not_launch_anthropic_flow():
    """Click 'Login' on MiniMax → MUST NOT return claude.ai auth_url."""
    fake_user_code_resp = {
@ -48,6 +75,9 @@ def test_minimax_login_does_not_launch_anthropic_flow():
    ), patch(
        "hermes_cli.auth._minimax_pkce_pair",
        return_value=("verifier-stub", "challenge-stub", "stub-state"),
+    ), patch(
+        "hermes_cli.web_server._minimax_poller",
+        return_value=None,
    ):
        resp = client.post(
            "/api/providers/oauth/minimax-oauth/start",
@ -69,6 +99,113 @@ def test_minimax_login_does_not_launch_anthropic_flow():
    assert body["expires_in"] == 600


+def test_nous_dashboard_device_flow_honors_legacy_scope_override(monkeypatch):
+    from hermes_cli import auth as auth_mod
+    from hermes_cli import web_server as ws
+
+    requested_scopes = []
+
+    def fake_request_device_code(**kwargs):
+        requested_scopes.append(kwargs["scope"])
+        return _fake_nous_device_data()
+
+    monkeypatch.setenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, "true")
+    monkeypatch.setattr(auth_mod, "_request_device_code", fake_request_device_code)
+    monkeypatch.setattr(ws, "_nous_poller", lambda sid: None)
+
+    result = asyncio.run(ws._start_device_code_flow("nous"))
+    try:
+        assert requested_scopes == [auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE]
+        assert result["flow"] == "device_code"
+        assert result["user_code"] == "NOUS-1234"
+        assert (
+            ws._oauth_sessions[result["session_id"]]["scope"]
+            == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
+        )
+    finally:
+        ws._oauth_sessions.pop(result["session_id"], None)
+
+
+def test_nous_dashboard_device_flow_retries_legacy_scope_on_invoke_refusal(monkeypatch):
+    from hermes_cli import auth as auth_mod
+    from hermes_cli import web_server as ws
+
+    requested_scopes = []
+
+    def fake_request_device_code(**kwargs):
+        requested_scopes.append(kwargs["scope"])
+        if len(requested_scopes) == 1:
+            raise _invoke_scope_refusal()
+        return _fake_nous_device_data()
+
+    monkeypatch.delenv(auth_mod.NOUS_LEGACY_SESSION_KEYS_ENV, raising=False)
+    monkeypatch.setattr(auth_mod, "_request_device_code", fake_request_device_code)
+    monkeypatch.setattr(ws, "_nous_poller", lambda sid: None)
+
+    result = asyncio.run(ws._start_device_code_flow("nous"))
+    try:
+        assert requested_scopes == [
+            auth_mod.DEFAULT_NOUS_SCOPE,
+            auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+        ]
+        assert (
+            ws._oauth_sessions[result["session_id"]]["scope"]
+            == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
+        )
+    finally:
+        ws._oauth_sessions.pop(result["session_id"], None)
+
+
+def test_nous_dashboard_poller_preserves_effective_scope_when_token_omits_scope(monkeypatch):
+    from hermes_cli import auth as auth_mod
+    from hermes_cli import web_server as ws
+
+    session_id = "nous-effective-scope-test"
+    ws._oauth_sessions[session_id] = {
+        "session_id": session_id,
+        "provider": "nous",
+        "flow": "device_code",
+        "created_at": time.time(),
+        "status": "pending",
+        "error_message": None,
+        "portal_base_url": "https://portal.nousresearch.com",
+        "client_id": "hermes-cli",
+        "device_code": "device-code",
+        "interval": 5,
+        "expires_at": time.time() + 600,
+        "scope": auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE,
+    }
+    captured_state = {}
+
+    def fake_refresh_nous_oauth_from_state(state, **kwargs):
+        captured_state.update(state)
+        return {**state, "agent_key": "legacy-agent-key"}
+
+    monkeypatch.setattr(
+        auth_mod,
+        "_poll_for_token",
+        lambda **kwargs: {
+            "access_token": "access-token",
+            "refresh_token": "refresh-token",
+            "expires_in": 3600,
+            "token_type": "Bearer",
+        },
+    )
+    monkeypatch.setattr(
+        auth_mod,
+        "refresh_nous_oauth_from_state",
+        fake_refresh_nous_oauth_from_state,
+    )
+    monkeypatch.setattr(auth_mod, "persist_nous_credentials", lambda state: None)
+
+    try:
+        ws._nous_poller(session_id)
+        assert captured_state["scope"] == auth_mod.NOUS_LEGACY_AGENT_KEY_SCOPE
+        assert ws._oauth_sessions[session_id]["status"] == "approved"
+    finally:
+        ws._oauth_sessions.pop(session_id, None)
+
+
 def test_minimax_dashboard_poller_accepts_absolute_ms_expired_in():
    """Dashboard MiniMax completion must accept unix-ms token expiry values."""
    from hermes_cli import web_server as ws