mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(auth): stop replaying invalid Nous refresh tokens
Quarantine Nous OAuth state when refresh fails with terminal invalid_grant/invalid_token errors. Clear local and shared refresh material across runtime, managed access-token, proxy, and credential-pool paths so Hermes stops retrying revoked refresh sessions.
This commit is contained in:
parent
4c46c35ed0
commit
c905562623
6 changed files with 338 additions and 14 deletions
|
|
@ -929,6 +929,47 @@ class CredentialPool:
|
|||
self._persist()
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
if auth_mod._is_terminal_nous_refresh_error(exc):
|
||||
logger.debug("Nous refresh token is terminally invalid; clearing local token state")
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "nous") or {
|
||||
"client_id": entry.client_id,
|
||||
"portal_base_url": entry.portal_base_url,
|
||||
"inference_base_url": entry.inference_base_url,
|
||||
"token_type": entry.token_type,
|
||||
"scope": entry.scope,
|
||||
"tls": entry.tls,
|
||||
}
|
||||
store_refresh = str(state.get("refresh_token") or "").strip()
|
||||
entry_refresh = str(entry.refresh_token or "").strip()
|
||||
if not store_refresh or store_refresh == entry_refresh:
|
||||
auth_mod._quarantine_nous_oauth_state(
|
||||
state,
|
||||
exc,
|
||||
reason="credential_pool_refresh_failure",
|
||||
)
|
||||
_save_provider_state(auth_store, "nous", state)
|
||||
_save_auth_store(auth_store)
|
||||
except Exception as clear_exc:
|
||||
logger.debug("Failed to clear terminal Nous OAuth state: %s", clear_exc)
|
||||
|
||||
cleared = replace(
|
||||
entry,
|
||||
access_token=None,
|
||||
refresh_token=None,
|
||||
agent_key=None,
|
||||
agent_key_expires_at=None,
|
||||
)
|
||||
self._replace_entry(entry, cleared)
|
||||
self._persist()
|
||||
self._mark_exhausted(
|
||||
cleared,
|
||||
401,
|
||||
{"reason": getattr(exc, "code", None), "message": str(exc)},
|
||||
)
|
||||
return None
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
|
|
|
|||
|
|
@ -3616,6 +3616,63 @@ def _read_shared_nous_state() -> Optional[Dict[str, Any]]:
|
|||
return payload
|
||||
|
||||
|
||||
def _clear_shared_nous_state(reason: str) -> None:
|
||||
"""Remove the shared Nous OAuth store after a terminal token failure."""
|
||||
try:
|
||||
with _nous_shared_store_lock():
|
||||
path = _nous_shared_store_path()
|
||||
try:
|
||||
path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
_oauth_trace("nous_shared_store_cleared", reason=reason)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to clear shared Nous auth store: %s", exc)
|
||||
|
||||
|
||||
def _is_terminal_nous_refresh_error(exc: Exception) -> bool:
|
||||
"""True when retrying the same Nous refresh token cannot succeed."""
|
||||
return (
|
||||
isinstance(exc, AuthError)
|
||||
and exc.provider == "nous"
|
||||
and exc.code in {"invalid_grant", "invalid_token"}
|
||||
and bool(exc.relogin_required)
|
||||
)
|
||||
|
||||
|
||||
def _quarantine_nous_oauth_state(
|
||||
state: Dict[str, Any],
|
||||
error: AuthError,
|
||||
*,
|
||||
reason: str,
|
||||
) -> None:
|
||||
"""Keep routing metadata but remove dead OAuth material so it is not replayed."""
|
||||
for key in (
|
||||
"access_token",
|
||||
"refresh_token",
|
||||
"expires_at",
|
||||
"expires_in",
|
||||
"obtained_at",
|
||||
"agent_key",
|
||||
"agent_key_id",
|
||||
"agent_key_expires_at",
|
||||
"agent_key_expires_in",
|
||||
"agent_key_reused",
|
||||
"agent_key_obtained_at",
|
||||
):
|
||||
state.pop(key, None)
|
||||
state["last_auth_error"] = {
|
||||
"provider": "nous",
|
||||
"code": error.code,
|
||||
"message": str(error),
|
||||
"reason": reason,
|
||||
"relogin_required": True,
|
||||
"at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
_clear_shared_nous_state(reason)
|
||||
invalidate_nous_auth_status_cache()
|
||||
|
||||
|
||||
def _try_import_shared_nous_state(
|
||||
*,
|
||||
timeout_seconds: float = 15.0,
|
||||
|
|
@ -3671,6 +3728,8 @@ def _try_import_shared_nous_state(
|
|||
error_type=type(exc).__name__,
|
||||
error_code=getattr(exc, "code", None),
|
||||
)
|
||||
if _is_terminal_nous_refresh_error(exc):
|
||||
_clear_shared_nous_state("shared_import_terminal_refresh_failure")
|
||||
logger.debug("Shared Nous import failed: %s", exc)
|
||||
return None
|
||||
except Exception as exc:
|
||||
|
|
@ -3896,12 +3955,23 @@ def resolve_nous_access_token(
|
|||
headers={"Accept": "application/json"},
|
||||
verify=verify,
|
||||
) as client:
|
||||
refreshed = _refresh_access_token(
|
||||
client=client,
|
||||
portal_base_url=portal_base_url,
|
||||
client_id=client_id,
|
||||
refresh_token=refresh_token,
|
||||
)
|
||||
try:
|
||||
refreshed = _refresh_access_token(
|
||||
client=client,
|
||||
portal_base_url=portal_base_url,
|
||||
client_id=client_id,
|
||||
refresh_token=refresh_token,
|
||||
)
|
||||
except AuthError as exc:
|
||||
if _is_terminal_nous_refresh_error(exc):
|
||||
_quarantine_nous_oauth_state(
|
||||
state,
|
||||
exc,
|
||||
reason="managed_access_token_refresh_failure",
|
||||
)
|
||||
_save_provider_state(auth_store, "nous", state)
|
||||
_save_auth_store(auth_store)
|
||||
raise
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
|
||||
|
|
@ -4209,10 +4279,20 @@ def resolve_nous_runtime_credentials(
|
|||
reason="access_expiring",
|
||||
refresh_token_fp=_token_fingerprint(refresh_token),
|
||||
)
|
||||
refreshed = _refresh_access_token(
|
||||
client=client, portal_base_url=portal_base_url,
|
||||
client_id=client_id, refresh_token=refresh_token,
|
||||
)
|
||||
try:
|
||||
refreshed = _refresh_access_token(
|
||||
client=client, portal_base_url=portal_base_url,
|
||||
client_id=client_id, refresh_token=refresh_token,
|
||||
)
|
||||
except AuthError as exc:
|
||||
if _is_terminal_nous_refresh_error(exc):
|
||||
_quarantine_nous_oauth_state(
|
||||
state,
|
||||
exc,
|
||||
reason="runtime_access_refresh_failure",
|
||||
)
|
||||
_persist_state("terminal_runtime_access_refresh_failure")
|
||||
raise
|
||||
now = datetime.now(timezone.utc)
|
||||
access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
|
||||
previous_refresh_token = refresh_token
|
||||
|
|
@ -4283,10 +4363,20 @@ def resolve_nous_runtime_credentials(
|
|||
reason="mint_retry_after_invalid_token",
|
||||
refresh_token_fp=_token_fingerprint(latest_refresh_token),
|
||||
)
|
||||
refreshed = _refresh_access_token(
|
||||
client=client, portal_base_url=portal_base_url,
|
||||
client_id=client_id, refresh_token=latest_refresh_token,
|
||||
)
|
||||
try:
|
||||
refreshed = _refresh_access_token(
|
||||
client=client, portal_base_url=portal_base_url,
|
||||
client_id=client_id, refresh_token=latest_refresh_token,
|
||||
)
|
||||
except AuthError as exc:
|
||||
if _is_terminal_nous_refresh_error(exc):
|
||||
_quarantine_nous_oauth_state(
|
||||
state,
|
||||
exc,
|
||||
reason="runtime_mint_retry_refresh_failure",
|
||||
)
|
||||
_persist_state("terminal_runtime_mint_retry_refresh_failure")
|
||||
raise
|
||||
now = datetime.now(timezone.utc)
|
||||
access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
|
||||
state["access_token"] = refreshed["access_token"]
|
||||
|
|
|
|||
|
|
@ -16,8 +16,11 @@ import threading
|
|||
from typing import Any, Dict, FrozenSet, Optional
|
||||
|
||||
from hermes_cli.auth import (
|
||||
AuthError,
|
||||
DEFAULT_NOUS_INFERENCE_URL,
|
||||
_load_auth_store,
|
||||
_is_terminal_nous_refresh_error,
|
||||
_quarantine_nous_oauth_state,
|
||||
_save_auth_store,
|
||||
_write_shared_nous_state,
|
||||
refresh_nous_oauth_from_state,
|
||||
|
|
@ -81,6 +84,17 @@ class NousPortalAdapter(UpstreamAdapter):
|
|||
|
||||
try:
|
||||
refreshed = refresh_nous_oauth_from_state(state)
|
||||
except AuthError as exc:
|
||||
if _is_terminal_nous_refresh_error(exc):
|
||||
_quarantine_nous_oauth_state(
|
||||
state,
|
||||
exc,
|
||||
reason="proxy_refresh_failure",
|
||||
)
|
||||
self._save_state(state)
|
||||
raise RuntimeError(
|
||||
f"Failed to refresh Nous Portal credentials: {exc}"
|
||||
) from exc
|
||||
except Exception as exc:
|
||||
raise RuntimeError(
|
||||
f"Failed to refresh Nous Portal credentials: {exc}"
|
||||
|
|
|
|||
|
|
@ -510,6 +510,70 @@ def test_load_pool_migrates_nous_provider_state(tmp_path, monkeypatch):
|
|||
assert entry.agent_key == "agent-key"
|
||||
|
||||
|
||||
def test_nous_pool_terminal_refresh_clears_tokens(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(tmp_path / "shared"))
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"active_provider": "nous",
|
||||
"providers": {
|
||||
"nous": {
|
||||
"portal_base_url": "https://portal.example.com",
|
||||
"inference_base_url": "https://inference.example.com/v1",
|
||||
"client_id": "hermes-cli",
|
||||
"token_type": "Bearer",
|
||||
"scope": "inference:mint_agent_key",
|
||||
"access_token": "access-token",
|
||||
"refresh_token": "refresh-token",
|
||||
"expires_at": "2026-03-24T12:00:00+00:00",
|
||||
"agent_key": "agent-key",
|
||||
"agent_key_expires_at": "2026-03-24T13:30:00+00:00",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
from hermes_cli import auth as auth_mod
|
||||
from hermes_cli.auth import AuthError
|
||||
|
||||
refresh_calls = {"count": 0}
|
||||
|
||||
def _terminal_refresh_failure(*_args, **_kwargs):
|
||||
refresh_calls["count"] += 1
|
||||
raise AuthError(
|
||||
"Refresh session has been revoked",
|
||||
provider="nous",
|
||||
code="invalid_grant",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _terminal_refresh_failure)
|
||||
|
||||
pool = load_pool("nous")
|
||||
assert pool.select() is not None
|
||||
assert pool.try_refresh_current() is None
|
||||
|
||||
entry = pool.entries()[0]
|
||||
assert entry.last_status == "exhausted"
|
||||
assert entry.last_error_code == 401
|
||||
assert entry.refresh_token is None
|
||||
assert entry.access_token is None
|
||||
assert entry.agent_key is None
|
||||
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
nous_state = auth_payload["providers"]["nous"]
|
||||
assert not nous_state.get("refresh_token")
|
||||
assert not nous_state.get("access_token")
|
||||
assert not nous_state.get("agent_key")
|
||||
assert nous_state["last_auth_error"]["code"] == "invalid_grant"
|
||||
|
||||
assert pool.try_refresh_current() is None
|
||||
assert refresh_calls["count"] == 1
|
||||
|
||||
|
||||
def test_load_pool_removes_stale_file_backed_singleton_entry(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
|
|
|
|||
|
|
@ -373,6 +373,89 @@ def test_refresh_token_persisted_when_mint_times_out(tmp_path, monkeypatch):
|
|||
assert state_after_failure["access_token"] == "access-1"
|
||||
|
||||
|
||||
def test_terminal_refresh_failure_quarantines_tokens(
|
||||
tmp_path, monkeypatch, shared_store_env,
|
||||
):
|
||||
"""A revoked/invalid Nous refresh token must not be replayed forever."""
|
||||
from hermes_cli import auth as auth_mod
|
||||
|
||||
hermes_home = tmp_path / "hermes"
|
||||
_setup_nous_auth(hermes_home, refresh_token="refresh-old")
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
shared_state = _full_state_fixture()
|
||||
shared_state["access_token"] = "access-old"
|
||||
shared_state["refresh_token"] = "refresh-old"
|
||||
shared_state["expires_at"] = "2026-02-01T00:00:00+00:00"
|
||||
auth_mod._write_shared_nous_state(shared_state)
|
||||
|
||||
refresh_calls: list[str] = []
|
||||
|
||||
def _terminal_refresh_failure(*, client, portal_base_url, client_id, refresh_token):
|
||||
refresh_calls.append(refresh_token)
|
||||
raise AuthError(
|
||||
"Refresh session has been revoked",
|
||||
provider="nous",
|
||||
code="invalid_grant",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(auth_mod, "_refresh_access_token", _terminal_refresh_failure)
|
||||
|
||||
with pytest.raises(AuthError, match="Refresh session has been revoked"):
|
||||
auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
|
||||
|
||||
state_after_failure = auth_mod.get_provider_auth_state("nous")
|
||||
assert state_after_failure is not None
|
||||
assert not state_after_failure.get("refresh_token")
|
||||
assert not state_after_failure.get("access_token")
|
||||
assert not state_after_failure.get("agent_key")
|
||||
assert state_after_failure["last_auth_error"]["code"] == "invalid_grant"
|
||||
assert auth_mod._read_shared_nous_state() is None
|
||||
|
||||
with pytest.raises(AuthError, match="No access token found"):
|
||||
auth_mod.resolve_nous_runtime_credentials(min_key_ttl_seconds=300)
|
||||
|
||||
assert refresh_calls == ["refresh-old"]
|
||||
|
||||
|
||||
def test_managed_access_token_refresh_failure_quarantines_tokens(
|
||||
tmp_path, monkeypatch, shared_store_env,
|
||||
):
|
||||
from hermes_cli import auth as auth_mod
|
||||
|
||||
hermes_home = tmp_path / "hermes"
|
||||
_setup_nous_auth(hermes_home, refresh_token="refresh-old")
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
refresh_calls: list[str] = []
|
||||
|
||||
def _terminal_refresh_failure(*, client, portal_base_url, client_id, refresh_token):
|
||||
refresh_calls.append(refresh_token)
|
||||
raise AuthError(
|
||||
"Invalid refresh token",
|
||||
provider="nous",
|
||||
code="invalid_grant",
|
||||
relogin_required=True,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(auth_mod, "_refresh_access_token", _terminal_refresh_failure)
|
||||
|
||||
with pytest.raises(AuthError, match="Invalid refresh token"):
|
||||
auth_mod.resolve_nous_access_token()
|
||||
|
||||
state_after_failure = auth_mod.get_provider_auth_state("nous")
|
||||
assert state_after_failure is not None
|
||||
assert not state_after_failure.get("refresh_token")
|
||||
assert not state_after_failure.get("access_token")
|
||||
assert state_after_failure["last_auth_error"]["message"] == "Invalid refresh token"
|
||||
|
||||
with pytest.raises(AuthError, match="No access token found"):
|
||||
auth_mod.resolve_nous_access_token()
|
||||
|
||||
assert refresh_calls == ["refresh-old"]
|
||||
|
||||
|
||||
def test_mint_retry_uses_latest_rotated_refresh_token(tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
_setup_nous_auth(hermes_home, refresh_token="refresh-old")
|
||||
|
|
@ -1118,6 +1201,7 @@ def test_try_import_shared_returns_none_on_refresh_failure(
|
|||
monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _boom)
|
||||
|
||||
assert auth_mod._try_import_shared_nous_state() is None
|
||||
assert auth_mod._read_shared_nous_state() is None
|
||||
|
||||
|
||||
def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
|
||||
|
|
|
|||
|
|
@ -164,6 +164,37 @@ def test_nous_adapter_get_credential_raises_on_refresh_failure(tmp_path, monkeyp
|
|||
adapter.get_credential()
|
||||
|
||||
|
||||
def test_nous_adapter_quarantines_terminal_refresh_failure(tmp_path, monkeypatch):
|
||||
from hermes_cli.auth import AuthError
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
_write_auth_store(tmp_path, {
|
||||
"access_token": "access-tok",
|
||||
"refresh_token": "refresh-tok",
|
||||
"agent_key": "stale-agent-key",
|
||||
})
|
||||
|
||||
with patch(
|
||||
"hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
|
||||
side_effect=AuthError(
|
||||
"Refresh session has been revoked",
|
||||
provider="nous",
|
||||
code="invalid_grant",
|
||||
relogin_required=True,
|
||||
),
|
||||
):
|
||||
adapter = NousPortalAdapter()
|
||||
with pytest.raises(RuntimeError, match="Refresh session has been revoked"):
|
||||
adapter.get_credential()
|
||||
|
||||
stored = json.loads((tmp_path / "auth.json").read_text())
|
||||
nous_state = stored["providers"]["nous"]
|
||||
assert not nous_state.get("refresh_token")
|
||||
assert not nous_state.get("access_token")
|
||||
assert not nous_state.get("agent_key")
|
||||
assert nous_state["last_auth_error"]["code"] == "invalid_grant"
|
||||
|
||||
|
||||
def test_nous_adapter_get_credential_raises_when_no_agent_key_returned(tmp_path, monkeypatch):
|
||||
"""If the refresh helper succeeds but produces no agent_key, we surface a clear error."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue