From a70ee1b898fb66fbe75757ba28d8070b28c65f8a Mon Sep 17 00:00:00 2001 From: Stefan Vandermeulen Date: Fri, 3 Apr 2026 12:30:28 +0200 Subject: [PATCH] fix: sync OAuth tokens between credential pool and credentials file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OAuth refresh tokens are single-use. When multiple consumers share the same Anthropic OAuth session (credential pool entries, Claude Code CLI, multiple Hermes profiles), whichever refreshes first invalidates the refresh token for all others. This causes a cascade: 1. Pool entry tries to refresh with a consumed refresh token → 400 2. Pool marks the credential as "exhausted" with a 24-hour cooldown 3. All subsequent heartbeats skip the credential entirely 4. The fallback to resolve_anthropic_token() only works while the access token in ~/.claude/.credentials.json hasn't expired 5. Once it expires, nothing can auto-recover without manual re-login Fix: - Add _sync_anthropic_entry_from_credentials_file() to detect when ~/.claude/.credentials.json has a newer refresh token and sync it into the pool entry, clearing exhaustion status - After a successful pool refresh, write the new tokens back to ~/.claude/.credentials.json so other consumers stay in sync - On refresh failure, check if the credentials file has a different (newer) refresh token and retry once before marking exhausted - In _available_entries(), sync exhausted claude_code entries from the credentials file before applying the 24-hour cooldown, so a manual re-login or external refresh immediately unblocks agents Co-Authored-By: Claude Opus 4.6 (1M context) --- agent/credential_pool.py | 99 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 003a5a8e763..204d154116d 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -303,6 +303,43 @@ class CredentialPool: self._persist() return updated + def _sync_anthropic_entry_from_credentials_file(self, entry: PooledCredential) -> PooledCredential: + """Sync a claude_code pool entry from ~/.claude/.credentials.json if tokens differ. + + OAuth refresh tokens are single-use. When something external (e.g. + Claude Code CLI, or another profile's pool) refreshes the token, it + writes the new pair to ~/.claude/.credentials.json. The pool entry's + refresh token becomes stale. This method detects that and syncs. + """ + if self.provider != "anthropic" or entry.source != "claude_code": + return entry + try: + from agent.anthropic_adapter import read_claude_code_credentials + creds = read_claude_code_credentials() + if not creds: + return entry + file_refresh = creds.get("refreshToken", "") + file_access = creds.get("accessToken", "") + file_expires = creds.get("expiresAt", 0) + # If the credentials file has a different token pair, sync it + if file_refresh and file_refresh != entry.refresh_token: + logger.debug("Pool entry %s: syncing tokens from credentials file (refresh token changed)", entry.id) + updated = replace( + entry, + access_token=file_access, + refresh_token=file_refresh, + expires_at_ms=file_expires, + last_status=None, + last_status_at=None, + last_error_code=None, + ) + self._replace_entry(entry, updated) + self._persist() + return updated + except Exception as exc: + logger.debug("Failed to sync from credentials file: %s", exc) + return entry + def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]: if entry.auth_type != AUTH_TYPE_OAUTH or not entry.refresh_token: if force: @@ -323,6 +360,19 @@ class CredentialPool: refresh_token=refreshed["refresh_token"], expires_at_ms=refreshed["expires_at_ms"], ) + # Keep ~/.claude/.credentials.json in sync so that the + # fallback path (resolve_anthropic_token) and other profiles + # see the latest tokens. + if entry.source == "claude_code": + try: + from agent.anthropic_adapter import _write_claude_code_credentials + _write_claude_code_credentials( + refreshed["access_token"], + refreshed["refresh_token"], + refreshed["expires_at_ms"], + ) + except Exception as wexc: + logger.debug("Failed to write refreshed token to credentials file: %s", wexc) elif self.provider == "openai-codex": refreshed = auth_mod.refresh_codex_oauth_pure( entry.access_token, @@ -369,6 +419,46 @@ class CredentialPool: return entry except Exception as exc: logger.debug("Credential refresh failed for %s/%s: %s", self.provider, entry.id, exc) + # For anthropic claude_code entries: the refresh token may have been + # consumed by another process. Check if ~/.claude/.credentials.json + # has a newer token pair and retry once. + if self.provider == "anthropic" and entry.source == "claude_code": + synced = self._sync_anthropic_entry_from_credentials_file(entry) + if synced.refresh_token != entry.refresh_token: + logger.debug("Retrying refresh with synced token from credentials file") + try: + from agent.anthropic_adapter import refresh_anthropic_oauth_pure + refreshed = refresh_anthropic_oauth_pure( + synced.refresh_token, + use_json=synced.source.endswith("hermes_pkce"), + ) + updated = replace( + synced, + access_token=refreshed["access_token"], + refresh_token=refreshed["refresh_token"], + expires_at_ms=refreshed["expires_at_ms"], + last_status=STATUS_OK, + last_status_at=None, + last_error_code=None, + ) + self._replace_entry(synced, updated) + self._persist() + try: + from agent.anthropic_adapter import _write_claude_code_credentials + _write_claude_code_credentials( + refreshed["access_token"], + refreshed["refresh_token"], + refreshed["expires_at_ms"], + ) + except Exception: + pass + return updated + except Exception as retry_exc: + logger.debug("Retry refresh also failed: %s", retry_exc) + elif not self._entry_needs_refresh(synced): + # Credentials file had a valid (non-expired) token — use it directly + logger.debug("Credentials file has valid token, using without refresh") + return synced self._mark_exhausted(entry, None) return None @@ -422,6 +512,15 @@ class CredentialPool: cleared_any = False available: List[PooledCredential] = [] for entry in self._entries: + # For anthropic claude_code entries, sync from the credentials file + # before any status/refresh checks. This picks up tokens refreshed + # by other processes (Claude Code CLI, other Hermes profiles). + if (self.provider == "anthropic" and entry.source == "claude_code" + and entry.last_status == STATUS_EXHAUSTED): + synced = self._sync_anthropic_entry_from_credentials_file(entry) + if synced is not entry: + entry = synced + cleared_any = True if entry.last_status == STATUS_EXHAUSTED: ttl = _exhausted_ttl(entry.last_error_code) if entry.last_status_at and now - entry.last_status_at < ttl: