From 78450c4bd60043384a5b1ce182db7adc8acec02c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 24 Apr 2026 05:08:46 -0700 Subject: [PATCH] fix(nous-oauth): preserve obtained_at in pool + actionable message on RT reuse (#15111) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two narrow fixes motivated by #15099. 1. _seed_from_singletons() was dropping obtained_at, agent_key_obtained_at, expires_in, and friends when seeding device_code pool entries from the providers.nous singleton. Fresh credentials showed up with obtained_at=None, which broke downstream freshness-sensitive consumers (self-heal hooks, pool pruning by age) — they treated just-minted credentials as older than they actually were and evicted them. 2. When the Nous Portal OAuth 2.1 server returns invalid_grant with 'Refresh token reuse detected' in the error_description, rewrite the message to explain the likely cause (an external process consumed the rotated RT without persisting it back) and the mitigation. The generic reuse message led users to report this as a Hermes persistence bug when the actual trigger was typically a third-party monitoring script calling /api/oauth/token directly. Non-reuse errors keep their original server description untouched. Closes #15099. Regression tests: - tests/agent/test_credential_pool.py::test_nous_seed_from_singletons_preserves_obtained_at_timestamps - tests/hermes_cli/test_auth_nous_provider.py::test_refresh_token_reuse_detection_surfaces_actionable_message - tests/hermes_cli/test_auth_nous_provider.py::test_refresh_non_reuse_error_keeps_original_description --- agent/credential_pool.py | 12 ++++ hermes_cli/auth.py | 22 ++++++ tests/agent/test_credential_pool.py | 69 ++++++++++++++++++ tests/hermes_cli/test_auth_nous_provider.py | 80 +++++++++++++++++++++ 4 files changed, 183 insertions(+) diff --git a/agent/credential_pool.py b/agent/credential_pool.py index de8d03185..2d5accd41 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -1056,6 +1056,18 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup "inference_base_url": state.get("inference_base_url"), "agent_key": state.get("agent_key"), "agent_key_expires_at": state.get("agent_key_expires_at"), + # Carry the mint/refresh timestamps into the pool so + # freshness-sensitive consumers (self-heal hooks, pool + # pruning by age) can distinguish just-minted credentials + # from stale ones. Without these, fresh device_code + # entries get obtained_at=None and look older than they + # are (#15099). + "obtained_at": state.get("obtained_at"), + "expires_in": state.get("expires_in"), + "agent_key_id": state.get("agent_key_id"), + "agent_key_expires_in": state.get("agent_key_expires_in"), + "agent_key_reused": state.get("agent_key_reused"), + "agent_key_obtained_at": state.get("agent_key_obtained_at"), "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None, "label": seeded_label, }, diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index dbce736cc..ff0028875 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -1889,6 +1889,28 @@ def _refresh_access_token( code = str(error_payload.get("error", "invalid_grant")) description = str(error_payload.get("error_description") or "Refresh token exchange failed") relogin = code in {"invalid_grant", "invalid_token"} + + # Detect the OAuth 2.1 "refresh token reuse" signal from the Nous portal + # server and surface an actionable message. This fires when an external + # process (health-check script, monitoring tool, custom self-heal hook) + # called POST /api/oauth/token with Hermes's refresh_token without + # persisting the rotated token back to auth.json — the server then + # retires the original RT, Hermes's next refresh uses it, and the whole + # session chain gets revoked as a token-theft signal (#15099). + lowered = description.lower() + if "reuse" in lowered or "reuse detected" in lowered: + description = ( + "Nous Portal detected refresh-token reuse and revoked this session.\n" + "This usually means an external process (monitoring script, " + "custom self-heal hook, or another Hermes install sharing " + "~/.hermes/auth.json) called POST /api/oauth/token with Hermes's " + "refresh token without persisting the rotated token back.\n" + "Nous refresh tokens are single-use — only Hermes may call the " + "refresh endpoint. For health checks, use `hermes auth status` " + "instead.\n" + "Re-authenticate with: hermes auth add nous" + ) + raise AuthError(description, provider="nous", code=code, relogin_required=relogin) diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py index 76e1412bf..ce7b1a22d 100644 --- a/tests/agent/test_credential_pool.py +++ b/tests/agent/test_credential_pool.py @@ -1102,3 +1102,72 @@ def test_load_pool_does_not_seed_qwen_oauth_when_no_token(tmp_path, monkeypatch) assert not pool.has_credentials() assert pool.entries() == [] + + +def test_nous_seed_from_singletons_preserves_obtained_at_timestamps(tmp_path, monkeypatch): + """Regression test for #15099 secondary issue. + + When ``_seed_from_singletons`` materialises a device_code pool entry from + the ``providers.nous`` singleton, it must carry the mint/refresh + timestamps (``obtained_at``, ``agent_key_obtained_at``, ``expires_in``, + etc.) into the pool entry. Without them, freshness-sensitive consumers + (self-heal hooks, pool pruning by age) treat just-minted credentials as + older than they actually are and evict them. + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "providers": { + "nous": { + "access_token": "at_XXXXXXXX", + "refresh_token": "rt_YYYYYYYY", + "client_id": "hermes-cli", + "portal_base_url": "https://portal.nousresearch.com", + "inference_base_url": "https://inference.nousresearch.com/v1", + "token_type": "Bearer", + "scope": "openid profile", + "obtained_at": "2026-04-24T10:00:00+00:00", + "expires_at": "2026-04-24T11:00:00+00:00", + "expires_in": 3600, + "agent_key": "sk-nous-AAAA", + "agent_key_id": "ak_123", + "agent_key_expires_at": "2026-04-25T10:00:00+00:00", + "agent_key_expires_in": 86400, + "agent_key_reused": False, + "agent_key_obtained_at": "2026-04-24T10:00:05+00:00", + "tls": {"insecure": False, "ca_bundle": None}, + }, + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("nous") + entries = pool.entries() + + device_entries = [e for e in entries if e.source == "device_code"] + assert len(device_entries) == 1, f"expected single device_code entry; got {len(device_entries)}" + e = device_entries[0] + + # Direct dataclass fields — must survive the singleton → pool copy. + assert e.access_token == "at_XXXXXXXX" + assert e.refresh_token == "rt_YYYYYYYY" + assert e.expires_at == "2026-04-24T11:00:00+00:00" + assert e.agent_key == "sk-nous-AAAA" + assert e.agent_key_expires_at == "2026-04-25T10:00:00+00:00" + + # Extra fields — this is what regressed. These must be carried through + # via ``extra`` dict or __getattr__, NOT silently dropped. + assert e.obtained_at == "2026-04-24T10:00:00+00:00", ( + f"obtained_at was dropped during seed; got {e.obtained_at!r}. This breaks " + f"downstream pool-freshness consumers (#15099)." + ) + assert e.agent_key_obtained_at == "2026-04-24T10:00:05+00:00" + assert e.expires_in == 3600 + assert e.agent_key_id == "ak_123" + assert e.agent_key_expires_in == 86400 + assert e.agent_key_reused is False + diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py index b94c3c50c..cf74e97f5 100644 --- a/tests/hermes_cli/test_auth_nous_provider.py +++ b/tests/hermes_cli/test_auth_nous_provider.py @@ -732,3 +732,83 @@ def test_persist_nous_credentials_no_label_uses_auto_derived(tmp_path, monkeypat # No "label" key embedded in providers.nous when the caller didn't supply one. payload = json.loads((hermes_home / "auth.json").read_text()) assert "label" not in payload["providers"]["nous"] + + +def test_refresh_token_reuse_detection_surfaces_actionable_message(): + """Regression for #15099. + + When the Nous Portal server returns ``invalid_grant`` with + ``error_description`` containing "reuse detected", Hermes must surface an + actionable message explaining that an external process consumed the + refresh token. The default opaque "Refresh token reuse detected; please + re-authenticate" string led users to report this as a Hermes persistence + bug when the true cause is external RT consumption (monitoring scripts, + custom self-heal hooks). + """ + from hermes_cli.auth import _refresh_access_token + + class _FakeResponse: + status_code = 400 + + def json(self): + return { + "error": "invalid_grant", + "error_description": "Refresh token reuse detected; please re-authenticate", + } + + class _FakeClient: + def post(self, *args, **kwargs): + return _FakeResponse() + + with pytest.raises(AuthError) as exc_info: + _refresh_access_token( + client=_FakeClient(), + portal_base_url="https://portal.nousresearch.com", + client_id="hermes-cli", + refresh_token="rt_consumed_elsewhere", + ) + + message = str(exc_info.value) + assert "refresh-token reuse" in message.lower() or "refresh token reuse" in message.lower() + # The message must mention the external-process cause and give next steps. + assert "external process" in message.lower() or "monitoring script" in message.lower() + assert "hermes auth add nous" in message.lower() + # Must still be classified as invalid_grant + relogin_required. + assert exc_info.value.code == "invalid_grant" + assert exc_info.value.relogin_required is True + + +def test_refresh_non_reuse_error_keeps_original_description(): + """Non-reuse invalid_grant errors must keep their original description untouched. + + Only the "reuse detected" signature should trigger the actionable message; + generic ``invalid_grant: Refresh session has been revoked`` (the + downstream consequence) keeps its original text so we don't overwrite + useful server context for unrelated failure modes. + """ + from hermes_cli.auth import _refresh_access_token + + class _FakeResponse: + status_code = 400 + + def json(self): + return { + "error": "invalid_grant", + "error_description": "Refresh session has been revoked", + } + + class _FakeClient: + def post(self, *args, **kwargs): + return _FakeResponse() + + with pytest.raises(AuthError) as exc_info: + _refresh_access_token( + client=_FakeClient(), + portal_base_url="https://portal.nousresearch.com", + client_id="hermes-cli", + refresh_token="rt_anything", + ) + + assert "Refresh session has been revoked" in str(exc_info.value) + # Must not have been rewritten with the reuse message. + assert "external process" not in str(exc_info.value).lower()