From 785d168d50e1b2e4496ff000c67494774883db85 Mon Sep 17 00:00:00 2001 From: konsisumer Date: Fri, 24 Apr 2026 05:18:53 -0700 Subject: [PATCH] fix(credential_pool): add Nous OAuth cross-process auth-store sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Concurrent Hermes processes (e.g. cron jobs) refreshing a Nous OAuth token via resolve_nous_runtime_credentials() write the rotated tokens to auth.json. The calling process's pool entry becomes stale, and the next refresh against the already-rotated token triggers a 'refresh token reuse' revocation on the Nous Portal. _sync_nous_entry_from_auth_store() reads auth.json under the same lock used by resolve_nous_runtime_credentials, and adopts the newer token pair before refreshing the pool entry. This complements #15111 (which preserved the obtained_at timestamps through seeding). Partial salvage of #10160 by @konsisumer — only the agent/credential_pool.py changes + the 3 Nous-specific regression tests. The PR also touched 10 unrelated files (Dockerfile, tips.py, various tool tests) which were dropped as scope creep. Regression tests: - test_sync_nous_entry_from_auth_store_adopts_newer_tokens - test_sync_nous_entry_noop_when_tokens_match - test_nous_exhausted_entry_recovers_via_auth_store_sync --- agent/credential_pool.py | 89 +++++++++++++++ tests/agent/test_credential_pool.py | 169 ++++++++++++++++++++++++++++ 2 files changed, 258 insertions(+) diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 98064a85f..f6cb24dd6 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -455,6 +455,61 @@ class CredentialPool: logger.debug("Failed to sync from credentials file: %s", exc) return entry + def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential: + """Sync a Nous pool entry from auth.json if tokens differ. + + Nous OAuth refresh tokens are single-use. When another process + (e.g. a concurrent cron) refreshes the token via + ``resolve_nous_runtime_credentials``, it writes fresh tokens to + auth.json under ``_auth_store_lock``. The pool entry's tokens + become stale. This method detects that and adopts the newer pair, + avoiding a "refresh token reuse" revocation on the Nous Portal. + """ + if self.provider != "nous" or entry.source != "device_code": + return entry + try: + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "nous") + if not state: + return entry + store_refresh = state.get("refresh_token", "") + store_access = state.get("access_token", "") + if store_refresh and store_refresh != entry.refresh_token: + logger.debug( + "Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)", + entry.id, + ) + field_updates: Dict[str, Any] = { + "access_token": store_access, + "refresh_token": store_refresh, + "last_status": None, + "last_status_at": None, + "last_error_code": None, + } + if state.get("expires_at"): + field_updates["expires_at"] = state["expires_at"] + if state.get("agent_key"): + field_updates["agent_key"] = state["agent_key"] + if state.get("agent_key_expires_at"): + field_updates["agent_key_expires_at"] = state["agent_key_expires_at"] + if state.get("inference_base_url"): + field_updates["inference_base_url"] = state["inference_base_url"] + extra_updates = dict(entry.extra) + for extra_key in ("obtained_at", "expires_in", "agent_key_id", + "agent_key_expires_in", "agent_key_reused", + "agent_key_obtained_at"): + val = state.get(extra_key) + if val is not None: + extra_updates[extra_key] = val + updated = replace(entry, extra=extra_updates, **field_updates) + self._replace_entry(entry, updated) + self._persist() + return updated + except Exception as exc: + logger.debug("Failed to sync Nous entry from auth.json: %s", exc) + return entry + def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None: """Write refreshed pool entry tokens back to auth.json providers. @@ -561,6 +616,9 @@ class CredentialPool: last_refresh=refreshed.get("last_refresh"), ) elif self.provider == "nous": + synced = self._sync_nous_entry_from_auth_store(entry) + if synced is not entry: + entry = synced nous_state = { "access_token": entry.access_token, "refresh_token": entry.refresh_token, @@ -635,6 +693,26 @@ class CredentialPool: # Credentials file had a valid (non-expired) token — use it directly logger.debug("Credentials file has valid token, using without refresh") return synced + # For nous: another process may have consumed the refresh token + # between our proactive sync and the HTTP call. Re-sync from + # auth.json and adopt the fresh tokens if available. + if self.provider == "nous": + synced = self._sync_nous_entry_from_auth_store(entry) + if synced.refresh_token != entry.refresh_token: + logger.debug("Nous refresh failed but auth.json has newer tokens — adopting") + updated = replace( + synced, + last_status=STATUS_OK, + last_status_at=None, + last_error_code=None, + last_error_reason=None, + last_error_message=None, + last_error_reset_at=None, + ) + self._replace_entry(synced, updated) + self._persist() + self._sync_device_code_entry_to_auth_store(updated) + return updated self._mark_exhausted(entry, None) return None @@ -698,6 +776,17 @@ class CredentialPool: if synced is not entry: entry = synced cleared_any = True + # For nous entries, sync from auth.json before status checks. + # Another process may have successfully refreshed via + # resolve_nous_runtime_credentials(), making this entry's + # exhausted status stale. + if (self.provider == "nous" + and entry.source == "device_code" + and entry.last_status == STATUS_EXHAUSTED): + synced = self._sync_nous_entry_from_auth_store(entry) + if synced is not entry: + entry = synced + cleared_any = True if entry.last_status == STATUS_EXHAUSTED: exhausted_until = _exhausted_until(entry) if exhausted_until is not None and now < exhausted_until: diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py index 6baee3d04..7f3a835f1 100644 --- a/tests/agent/test_credential_pool.py +++ b/tests/agent/test_credential_pool.py @@ -1201,3 +1201,172 @@ class TestLeastUsedStrategy: assert e2.id != e1.id or e2.request_count == 2, ( "least_used should alternate or increment" ) + + +# ── PR #10160 salvage: Nous OAuth cross-process sync tests ───────────────── + +def test_sync_nous_entry_from_auth_store_adopts_newer_tokens(tmp_path, monkeypatch): + """When auth.json has a newer refresh token, the pool entry should adopt it.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-OLD", + "refresh_token": "refresh-OLD", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key-OLD", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + } + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("nous") + entry = pool.select() + assert entry is not None + assert entry.refresh_token == "refresh-OLD" + + # Simulate another process refreshing the token in auth.json + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-NEW", + "refresh_token": "refresh-NEW", + "expires_at": "2026-03-24T12:30:00+00:00", + "agent_key": "agent-key-NEW", + "agent_key_expires_at": "2026-03-24T14:00:00+00:00", + } + }, + }, + ) + + synced = pool._sync_nous_entry_from_auth_store(entry) + assert synced is not entry + assert synced.access_token == "access-NEW" + assert synced.refresh_token == "refresh-NEW" + assert synced.agent_key == "agent-key-NEW" + assert synced.agent_key_expires_at == "2026-03-24T14:00:00+00:00" + +def test_sync_nous_entry_noop_when_tokens_match(tmp_path, monkeypatch): + """When auth.json has the same refresh token, sync should be a no-op.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-token", + "refresh_token": "refresh-token", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + } + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("nous") + entry = pool.select() + assert entry is not None + + synced = pool._sync_nous_entry_from_auth_store(entry) + assert synced is entry + +def test_nous_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch): + """An exhausted Nous entry should recover when auth.json has newer tokens.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + from agent.credential_pool import load_pool, STATUS_EXHAUSTED + from dataclasses import replace as dc_replace + + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-OLD", + "refresh_token": "refresh-OLD", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + } + }, + }, + ) + + pool = load_pool("nous") + entry = pool.select() + assert entry is not None + + # Mark entry as exhausted (simulating a failed refresh) + exhausted = dc_replace( + entry, + last_status=STATUS_EXHAUSTED, + last_status_at=time.time(), + last_error_code=401, + ) + pool._replace_entry(entry, exhausted) + pool._persist() + + # Simulate another process having successfully refreshed + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-FRESH", + "refresh_token": "refresh-FRESH", + "expires_at": "2026-03-24T12:30:00+00:00", + "agent_key": "agent-key-FRESH", + "agent_key_expires_at": "2026-03-24T14:00:00+00:00", + } + }, + }, + ) + + available = pool._available_entries(clear_expired=True) + assert len(available) == 1 + assert available[0].refresh_token == "refresh-FRESH" + assert available[0].last_status is None