fix(nous-oauth): preserve obtained_at in pool + actionable message on RT reuse (#15111)

Two narrow fixes motivated by #15099.

1. _seed_from_singletons() was dropping obtained_at, agent_key_obtained_at,
   expires_in, and friends when seeding device_code pool entries from the
   providers.nous singleton. Fresh credentials showed up with
   obtained_at=None, which broke downstream freshness-sensitive consumers
   (self-heal hooks, pool pruning by age) — they treated just-minted
   credentials as older than they actually were and evicted them.

2. When the Nous Portal OAuth 2.1 server returns invalid_grant with
   'Refresh token reuse detected' in the error_description, rewrite the
   message to explain the likely cause (an external process consumed the
   rotated RT without persisting it back) and the mitigation. The generic
   reuse message led users to report this as a Hermes persistence bug when
   the actual trigger was typically a third-party monitoring script calling
   /api/oauth/token directly. Non-reuse errors keep their original server
   description untouched.

Closes #15099.

Regression tests:
- tests/agent/test_credential_pool.py::test_nous_seed_from_singletons_preserves_obtained_at_timestamps
- tests/hermes_cli/test_auth_nous_provider.py::test_refresh_token_reuse_detection_surfaces_actionable_message
- tests/hermes_cli/test_auth_nous_provider.py::test_refresh_non_reuse_error_keeps_original_description
This commit is contained in:
Teknium 2026-04-24 05:08:46 -07:00 committed by GitHub
parent 852c7f3be3
commit 78450c4bd6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 183 additions and 0 deletions

View file

@ -1102,3 +1102,72 @@ def test_load_pool_does_not_seed_qwen_oauth_when_no_token(tmp_path, monkeypatch)
assert not pool.has_credentials()
assert pool.entries() == []
def test_nous_seed_from_singletons_preserves_obtained_at_timestamps(tmp_path, monkeypatch):
"""Regression test for #15099 secondary issue.
When ``_seed_from_singletons`` materialises a device_code pool entry from
the ``providers.nous`` singleton, it must carry the mint/refresh
timestamps (``obtained_at``, ``agent_key_obtained_at``, ``expires_in``,
etc.) into the pool entry. Without them, freshness-sensitive consumers
(self-heal hooks, pool pruning by age) treat just-minted credentials as
older than they actually are and evict them.
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
_write_auth_store(
tmp_path,
{
"version": 1,
"providers": {
"nous": {
"access_token": "at_XXXXXXXX",
"refresh_token": "rt_YYYYYYYY",
"client_id": "hermes-cli",
"portal_base_url": "https://portal.nousresearch.com",
"inference_base_url": "https://inference.nousresearch.com/v1",
"token_type": "Bearer",
"scope": "openid profile",
"obtained_at": "2026-04-24T10:00:00+00:00",
"expires_at": "2026-04-24T11:00:00+00:00",
"expires_in": 3600,
"agent_key": "sk-nous-AAAA",
"agent_key_id": "ak_123",
"agent_key_expires_at": "2026-04-25T10:00:00+00:00",
"agent_key_expires_in": 86400,
"agent_key_reused": False,
"agent_key_obtained_at": "2026-04-24T10:00:05+00:00",
"tls": {"insecure": False, "ca_bundle": None},
},
},
},
)
from agent.credential_pool import load_pool
pool = load_pool("nous")
entries = pool.entries()
device_entries = [e for e in entries if e.source == "device_code"]
assert len(device_entries) == 1, f"expected single device_code entry; got {len(device_entries)}"
e = device_entries[0]
# Direct dataclass fields — must survive the singleton → pool copy.
assert e.access_token == "at_XXXXXXXX"
assert e.refresh_token == "rt_YYYYYYYY"
assert e.expires_at == "2026-04-24T11:00:00+00:00"
assert e.agent_key == "sk-nous-AAAA"
assert e.agent_key_expires_at == "2026-04-25T10:00:00+00:00"
# Extra fields — this is what regressed. These must be carried through
# via ``extra`` dict or __getattr__, NOT silently dropped.
assert e.obtained_at == "2026-04-24T10:00:00+00:00", (
f"obtained_at was dropped during seed; got {e.obtained_at!r}. This breaks "
f"downstream pool-freshness consumers (#15099)."
)
assert e.agent_key_obtained_at == "2026-04-24T10:00:05+00:00"
assert e.expires_in == 3600
assert e.agent_key_id == "ak_123"
assert e.agent_key_expires_in == 86400
assert e.agent_key_reused is False

View file

@ -732,3 +732,83 @@ def test_persist_nous_credentials_no_label_uses_auto_derived(tmp_path, monkeypat
# No "label" key embedded in providers.nous when the caller didn't supply one.
payload = json.loads((hermes_home / "auth.json").read_text())
assert "label" not in payload["providers"]["nous"]
def test_refresh_token_reuse_detection_surfaces_actionable_message():
"""Regression for #15099.
When the Nous Portal server returns ``invalid_grant`` with
``error_description`` containing "reuse detected", Hermes must surface an
actionable message explaining that an external process consumed the
refresh token. The default opaque "Refresh token reuse detected; please
re-authenticate" string led users to report this as a Hermes persistence
bug when the true cause is external RT consumption (monitoring scripts,
custom self-heal hooks).
"""
from hermes_cli.auth import _refresh_access_token
class _FakeResponse:
status_code = 400
def json(self):
return {
"error": "invalid_grant",
"error_description": "Refresh token reuse detected; please re-authenticate",
}
class _FakeClient:
def post(self, *args, **kwargs):
return _FakeResponse()
with pytest.raises(AuthError) as exc_info:
_refresh_access_token(
client=_FakeClient(),
portal_base_url="https://portal.nousresearch.com",
client_id="hermes-cli",
refresh_token="rt_consumed_elsewhere",
)
message = str(exc_info.value)
assert "refresh-token reuse" in message.lower() or "refresh token reuse" in message.lower()
# The message must mention the external-process cause and give next steps.
assert "external process" in message.lower() or "monitoring script" in message.lower()
assert "hermes auth add nous" in message.lower()
# Must still be classified as invalid_grant + relogin_required.
assert exc_info.value.code == "invalid_grant"
assert exc_info.value.relogin_required is True
def test_refresh_non_reuse_error_keeps_original_description():
"""Non-reuse invalid_grant errors must keep their original description untouched.
Only the "reuse detected" signature should trigger the actionable message;
generic ``invalid_grant: Refresh session has been revoked`` (the
downstream consequence) keeps its original text so we don't overwrite
useful server context for unrelated failure modes.
"""
from hermes_cli.auth import _refresh_access_token
class _FakeResponse:
status_code = 400
def json(self):
return {
"error": "invalid_grant",
"error_description": "Refresh session has been revoked",
}
class _FakeClient:
def post(self, *args, **kwargs):
return _FakeResponse()
with pytest.raises(AuthError) as exc_info:
_refresh_access_token(
client=_FakeClient(),
portal_base_url="https://portal.nousresearch.com",
client_id="hermes-cli",
refresh_token="rt_anything",
)
assert "Refresh session has been revoked" in str(exc_info.value)
# Must not have been rewritten with the reuse message.
assert "external process" not in str(exc_info.value).lower()