diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 7f27873a7fb..93e3d609ee8 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -929,6 +929,47 @@ class CredentialPool: self._persist() self._sync_device_code_entry_to_auth_store(updated) return updated + if auth_mod._is_terminal_nous_refresh_error(exc): + logger.debug("Nous refresh token is terminally invalid; clearing local token state") + try: + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "nous") or { + "client_id": entry.client_id, + "portal_base_url": entry.portal_base_url, + "inference_base_url": entry.inference_base_url, + "token_type": entry.token_type, + "scope": entry.scope, + "tls": entry.tls, + } + store_refresh = str(state.get("refresh_token") or "").strip() + entry_refresh = str(entry.refresh_token or "").strip() + if not store_refresh or store_refresh == entry_refresh: + auth_mod._quarantine_nous_oauth_state( + state, + exc, + reason="credential_pool_refresh_failure", + ) + _save_provider_state(auth_store, "nous", state) + _save_auth_store(auth_store) + except Exception as clear_exc: + logger.debug("Failed to clear terminal Nous OAuth state: %s", clear_exc) + + cleared = replace( + entry, + access_token=None, + refresh_token=None, + agent_key=None, + agent_key_expires_at=None, + ) + self._replace_entry(entry, cleared) + self._persist() + self._mark_exhausted( + cleared, + 401, + {"reason": getattr(exc, "code", None), "message": str(exc)}, + ) + return None self._mark_exhausted(entry, None) return None diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 8b154db7468..50f105de10a 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -3616,6 +3616,63 @@ def _read_shared_nous_state() -> Optional[Dict[str, Any]]: return payload +def _clear_shared_nous_state(reason: str) -> None: + """Remove the shared Nous OAuth store after a terminal token failure.""" + try: + with _nous_shared_store_lock(): + path = _nous_shared_store_path() + try: + path.unlink() + except FileNotFoundError: + pass + _oauth_trace("nous_shared_store_cleared", reason=reason) + except Exception as exc: + logger.debug("Failed to clear shared Nous auth store: %s", exc) + + +def _is_terminal_nous_refresh_error(exc: Exception) -> bool: + """True when retrying the same Nous refresh token cannot succeed.""" + return ( + isinstance(exc, AuthError) + and exc.provider == "nous" + and exc.code in {"invalid_grant", "invalid_token"} + and bool(exc.relogin_required) + ) + + +def _quarantine_nous_oauth_state( + state: Dict[str, Any], + error: AuthError, + *, + reason: str, +) -> None: + """Keep routing metadata but remove dead OAuth material so it is not replayed.""" + for key in ( + "access_token", + "refresh_token", + "expires_at", + "expires_in", + "obtained_at", + "agent_key", + "agent_key_id", + "agent_key_expires_at", + "agent_key_expires_in", + "agent_key_reused", + "agent_key_obtained_at", + ): + state.pop(key, None) + state["last_auth_error"] = { + "provider": "nous", + "code": error.code, + "message": str(error), + "reason": reason, + "relogin_required": True, + "at": datetime.now(timezone.utc).isoformat(), + } + _clear_shared_nous_state(reason) + invalidate_nous_auth_status_cache() + + def _try_import_shared_nous_state( *, timeout_seconds: float = 15.0, @@ -3671,6 +3728,8 @@ def _try_import_shared_nous_state( error_type=type(exc).__name__, error_code=getattr(exc, "code", None), ) + if _is_terminal_nous_refresh_error(exc): + _clear_shared_nous_state("shared_import_terminal_refresh_failure") logger.debug("Shared Nous import failed: %s", exc) return None except Exception as exc: @@ -3896,12 +3955,23 @@ def resolve_nous_access_token( headers={"Accept": "application/json"}, verify=verify, ) as client: - refreshed = _refresh_access_token( - client=client, - portal_base_url=portal_base_url, - client_id=client_id, - refresh_token=refresh_token, - ) + try: + refreshed = _refresh_access_token( + client=client, + portal_base_url=portal_base_url, + client_id=client_id, + refresh_token=refresh_token, + ) + except AuthError as exc: + if _is_terminal_nous_refresh_error(exc): + _quarantine_nous_oauth_state( + state, + exc, + reason="managed_access_token_refresh_failure", + ) + _save_provider_state(auth_store, "nous", state) + _save_auth_store(auth_store) + raise now = datetime.now(timezone.utc) access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in")) @@ -4209,10 +4279,20 @@ def resolve_nous_runtime_credentials( reason="access_expiring", refresh_token_fp=_token_fingerprint(refresh_token), ) - refreshed = _refresh_access_token( - client=client, portal_base_url=portal_base_url, - client_id=client_id, refresh_token=refresh_token, - ) + try: + refreshed = _refresh_access_token( + client=client, portal_base_url=portal_base_url, + client_id=client_id, refresh_token=refresh_token, + ) + except AuthError as exc: + if _is_terminal_nous_refresh_error(exc): + _quarantine_nous_oauth_state( + state, + exc, + reason="runtime_access_refresh_failure", + ) + _persist_state("terminal_runtime_access_refresh_failure") + raise now = datetime.now(timezone.utc) access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in")) previous_refresh_token = refresh_token @@ -4283,10 +4363,20 @@ def resolve_nous_runtime_credentials( reason="mint_retry_after_invalid_token", refresh_token_fp=_token_fingerprint(latest_refresh_token), ) - refreshed = _refresh_access_token( - client=client, portal_base_url=portal_base_url, - client_id=client_id, refresh_token=latest_refresh_token, - ) + try: + refreshed = _refresh_access_token( + client=client, portal_base_url=portal_base_url, + client_id=client_id, refresh_token=latest_refresh_token, + ) + except AuthError as exc: + if _is_terminal_nous_refresh_error(exc): + _quarantine_nous_oauth_state( + state, + exc, + reason="runtime_mint_retry_refresh_failure", + ) + _persist_state("terminal_runtime_mint_retry_refresh_failure") + raise now = datetime.now(timezone.utc) access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in")) state["access_token"] = refreshed["access_token"] diff --git a/hermes_cli/proxy/adapters/nous_portal.py b/hermes_cli/proxy/adapters/nous_portal.py index b72cbd305b3..842489659a4 100644 --- a/hermes_cli/proxy/adapters/nous_portal.py +++ b/hermes_cli/proxy/adapters/nous_portal.py @@ -16,8 +16,11 @@ import threading from typing import Any, Dict, FrozenSet, Optional from hermes_cli.auth import ( + AuthError, DEFAULT_NOUS_INFERENCE_URL, _load_auth_store, + _is_terminal_nous_refresh_error, + _quarantine_nous_oauth_state, _save_auth_store, _write_shared_nous_state, refresh_nous_oauth_from_state, @@ -81,6 +84,17 @@ class NousPortalAdapter(UpstreamAdapter): try: refreshed = refresh_nous_oauth_from_state(state) + except AuthError as exc: + if _is_terminal_nous_refresh_error(exc): + _quarantine_nous_oauth_state( + state, + exc, + reason="proxy_refresh_failure", + ) + self._save_state(state) + raise RuntimeError( + f"Failed to refresh Nous Portal credentials: {exc}" + ) from exc except Exception as exc: raise RuntimeError( f"Failed to refresh Nous Portal credentials: {exc}" diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py index 299567a9a6f..e2d2726f21b 100644 --- a/tests/agent/test_credential_pool.py +++ b/tests/agent/test_credential_pool.py @@ -510,6 +510,70 @@ def test_load_pool_migrates_nous_provider_state(tmp_path, monkeypatch): assert entry.agent_key == "agent-key" +def test_nous_pool_terminal_refresh_clears_tokens(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(tmp_path / "shared")) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-token", + "refresh_token": "refresh-token", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + } + }, + }, + ) + + from agent.credential_pool import load_pool + from hermes_cli import auth as auth_mod + from hermes_cli.auth import AuthError + + refresh_calls = {"count": 0} + + def _terminal_refresh_failure(*_args, **_kwargs): + refresh_calls["count"] += 1 + raise AuthError( + "Refresh session has been revoked", + provider="nous", + code="invalid_grant", + relogin_required=True, + ) + + monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _terminal_refresh_failure) + + pool = load_pool("nous") + assert pool.select() is not None + assert pool.try_refresh_current() is None + + entry = pool.entries()[0] + assert entry.last_status == "exhausted" + assert entry.last_error_code == 401 + assert entry.refresh_token is None + assert entry.access_token is None + assert entry.agent_key is None + + auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) + nous_state = auth_payload["providers"]["nous"] + assert not nous_state.get("refresh_token") + assert not nous_state.get("access_token") + assert not nous_state.get("agent_key") + assert nous_state["last_auth_error"]["code"] == "invalid_grant" + + assert pool.try_refresh_current() is None + assert refresh_calls["count"] == 1 + + def test_load_pool_removes_stale_file_backed_singleton_entry(tmp_path, monkeypatch): monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py index 5cd546462dd..37662c77ece 100644 --- a/tests/hermes_cli/test_auth_nous_provider.py +++ b/tests/hermes_cli/test_auth_nous_provider.py @@ -373,6 +373,89 @@ def test_refresh_token_persisted_when_mint_times_out(tmp_path, monkeypatch): assert state_after_failure["access_token"] == "access-1" +def test_terminal_refresh_failure_quarantines_tokens( + tmp_path, monkeypatch, shared_store_env, +): + """A revoked/invalid Nous refresh token must not be replayed forever.""" + from hermes_cli import auth as auth_mod + + hermes_home = tmp_path / "hermes" + _setup_nous_auth(hermes_home, refresh_token="refresh-old") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + shared_state = _full_state_fixture() + shared_state["access_token"] = "access-old" + shared_state["refresh_token"] = "refresh-old" + shared_state["expires_at"] = "2026-02-01T00:00:00+00:00" + auth_mod._write_shared_nous_state(shared_state) + + refresh_calls: list[str] = [] + + def _terminal_refresh_failure(*, client, portal_base_url, client_id, refresh_token): + refresh_calls.append(refresh_token) + raise AuthError( + "Refresh session has been revoked", + provider="nous", + code="invalid_grant", + relogin_required=True, + ) + + monkeypatch.setattr(auth_mod, "_refresh_access_token", _terminal_refresh_failure) + + with pytest.raises(AuthError, match="Refresh session has been revoked"): + auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300) + + state_after_failure = auth_mod.get_provider_auth_state("nous") + assert state_after_failure is not None + assert not state_after_failure.get("refresh_token") + assert not state_after_failure.get("access_token") + assert not state_after_failure.get("agent_key") + assert state_after_failure["last_auth_error"]["code"] == "invalid_grant" + assert auth_mod._read_shared_nous_state() is None + + with pytest.raises(AuthError, match="No access token found"): + auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300) + + assert refresh_calls == ["refresh-old"] + + +def test_managed_access_token_refresh_failure_quarantines_tokens( + tmp_path, monkeypatch, shared_store_env, +): + from hermes_cli import auth as auth_mod + + hermes_home = tmp_path / "hermes" + _setup_nous_auth(hermes_home, refresh_token="refresh-old") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + refresh_calls: list[str] = [] + + def _terminal_refresh_failure(*, client, portal_base_url, client_id, refresh_token): + refresh_calls.append(refresh_token) + raise AuthError( + "Invalid refresh token", + provider="nous", + code="invalid_grant", + relogin_required=True, + ) + + monkeypatch.setattr(auth_mod, "_refresh_access_token", _terminal_refresh_failure) + + with pytest.raises(AuthError, match="Invalid refresh token"): + auth_mod.resolve_nous_access_token() + + state_after_failure = auth_mod.get_provider_auth_state("nous") + assert state_after_failure is not None + assert not state_after_failure.get("refresh_token") + assert not state_after_failure.get("access_token") + assert state_after_failure["last_auth_error"]["message"] == "Invalid refresh token" + + with pytest.raises(AuthError, match="No access token found"): + auth_mod.resolve_nous_access_token() + + assert refresh_calls == ["refresh-old"] + + def test_mint_retry_uses_latest_rotated_refresh_token(tmp_path, monkeypatch): hermes_home = tmp_path / "hermes" _setup_nous_auth(hermes_home, refresh_token="refresh-old") @@ -1118,6 +1201,7 @@ def test_try_import_shared_returns_none_on_refresh_failure( monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _boom) assert auth_mod._try_import_shared_nous_state() is None + assert auth_mod._read_shared_nous_state() is None def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch): diff --git a/tests/hermes_cli/test_proxy.py b/tests/hermes_cli/test_proxy.py index 0c874facac7..3ab06eeb92f 100644 --- a/tests/hermes_cli/test_proxy.py +++ b/tests/hermes_cli/test_proxy.py @@ -164,6 +164,37 @@ def test_nous_adapter_get_credential_raises_on_refresh_failure(tmp_path, monkeyp adapter.get_credential() +def test_nous_adapter_quarantines_terminal_refresh_failure(tmp_path, monkeypatch): + from hermes_cli.auth import AuthError + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _write_auth_store(tmp_path, { + "access_token": "access-tok", + "refresh_token": "refresh-tok", + "agent_key": "stale-agent-key", + }) + + with patch( + "hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state", + side_effect=AuthError( + "Refresh session has been revoked", + provider="nous", + code="invalid_grant", + relogin_required=True, + ), + ): + adapter = NousPortalAdapter() + with pytest.raises(RuntimeError, match="Refresh session has been revoked"): + adapter.get_credential() + + stored = json.loads((tmp_path / "auth.json").read_text()) + nous_state = stored["providers"]["nous"] + assert not nous_state.get("refresh_token") + assert not nous_state.get("access_token") + assert not nous_state.get("agent_key") + assert nous_state["last_auth_error"]["code"] == "invalid_grant" + + def test_nous_adapter_get_credential_raises_when_no_agent_key_returned(tmp_path, monkeypatch): """If the refresh helper succeeds but produces no agent_key, we surface a clear error.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path))