From a4091e49f10ddceaac1a902848aabfb1b9aae210 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 25 Jun 2026 19:14:06 -0700 Subject: [PATCH] fix(auth): write rotated Codex/xAI pool grant through to global root (#48415) (#52760) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CredentialPool._sync_device_code_entry_to_auth_store rotated single-use OAuth refresh tokens but wrote the new chain only into the active profile store. When a profile resolves a grant from the global-root fallback (read_credential_pool, #18594) and the pool then refreshes it, root was left holding a now-revoked refresh token — every other profile reading the stale root grant subsequently died with refresh_token_reused / invalid_grant once its access token expired. This is the credential-pool analog of #43589 (which fixed the non-pool xAI refresh path in _save_xai_oauth_tokens). Detect the read-from-root case (profile lacks its own providers. block) BEFORE the profile save and, after it, write the rotated chain back to the global root via a best-effort, seat-belted write-through. A profile that genuinely shadows root (owns the block) is untouched; classic mode (profile == root) is a no-op; a failed root write never breaks the profile's own save. Covers openai-codex (reported), xai-oauth, and nous through the shared sync path. --- agent/credential_pool.py | 84 ++++++++ ...test_credential_pool_oauth_writethrough.py | 190 ++++++++++++++++++ 2 files changed, 274 insertions(+) create mode 100644 tests/agent/test_credential_pool_oauth_writethrough.py diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 4e883cffaa0..e4aa575ab04 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -11,6 +11,7 @@ import uuid import re from dataclasses import dataclass, fields, replace from datetime import datetime, timezone +from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple from hermes_constants import OPENROUTER_BASE_URL @@ -447,6 +448,63 @@ def get_pool_strategy(provider: str) -> str: DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1 +def _write_through_provider_state_to_global_root( + provider_id: str, state: Dict[str, Any] +) -> None: + """Persist a rotated OAuth ``state`` into the global-root auth.json. + + Best-effort write-through for the multi-profile rotation hazard + (#48415 / #43589): nous, openai-codex, and xai-oauth rotate the + refresh_token on refresh, so when a profile pool refresh rotates a grant + it resolved from the root fallback, the rotated chain must land back in + root. Otherwise root keeps a now-revoked refresh token and every other + profile reading the stale root grant dies with ``refresh_token_reused`` / + ``invalid_grant`` once its access token expires. + + Only updates ``providers.`` in the root store; never touches + the profile store (the caller already saved that). Swallows all errors — a + failed write-through degrades to the pre-existing behavior (root stale), it + must never break the profile's own successful save. Mirrors + ``hermes_cli.auth._write_through_xai_oauth_to_global_root`` (which covers + the non-pool xAI refresh path) for the credential-pool refresh path. + """ + try: + global_path = auth_mod._global_auth_file_path() + except Exception: + return + if global_path is None: + # Classic mode (profile == root); the profile save already hit root. + return + # Seat belt: under pytest, refuse to write the real user's + # ~/.hermes/auth.json even when HERMES_HOME points at a profile path + # (mirrors the read-side guard in _load_global_auth_store). Uses the + # unmodified HOME env, not Path.home() which fixtures may monkeypatch. + if os.environ.get("PYTEST_CURRENT_TEST"): + real_home_env = os.environ.get("HOME", "") + if real_home_env: + real_root = Path(real_home_env) / ".hermes" / "auth.json" + try: + if global_path.resolve(strict=False) == real_root.resolve(strict=False): + return + except Exception: + return + try: + if global_path.exists(): + global_store = _load_auth_store(global_path) + else: + global_store = {} + if not isinstance(global_store, dict): + return + _store_provider_state(global_store, provider_id, dict(state), set_active=False) + auth_mod._save_auth_store(global_store, global_path) + except Exception as exc: # pragma: no cover - best effort + logger.debug( + "%s pool refresh: write-through to global root failed: %s", + provider_id, + exc, + ) + + class CredentialPool: def __init__(self, provider: str, entries: List[PooledCredential]): self.provider = provider @@ -800,6 +858,28 @@ class CredentialPool: try: with _auth_store_lock(): auth_store = _load_auth_store() + # Decide BEFORE writing whether this profile is reading the + # grant from the global root (no own providers. block) vs. + # genuinely shadowing it. A pool refresh rotates single-use + # OAuth refresh tokens, so a profile that resolved the grant + # from root MUST write the rotated chain back to root too — + # otherwise root keeps a revoked refresh token and every other + # profile reading the stale root grant dies with + # refresh_token_reused / invalid_grant once its access token + # expires. This mirrors the xAI write-through in + # hermes_cli.auth._save_xai_oauth_tokens (#43589); the pool + # refresh path is the Codex/xAI analog reported in #48415. + _wt_provider_id = { + "nous": "nous", + "openai-codex": "openai-codex", + "xai-oauth": "xai-oauth", + }.get(self.provider) + write_through_to_root = bool(_wt_provider_id) and not ( + isinstance(auth_store.get("providers"), dict) + and isinstance( + auth_store["providers"].get(_wt_provider_id), dict + ) + ) if self.provider == "nous": state = _load_provider_state(auth_store, "nous") if state is None: @@ -855,6 +935,10 @@ class CredentialPool: return _save_auth_store(auth_store) + if write_through_to_root and _wt_provider_id: + _write_through_provider_state_to_global_root( + _wt_provider_id, state + ) except Exception as exc: logger.debug("Failed to sync %s pool entry back to auth store: %s", self.provider, exc) diff --git a/tests/agent/test_credential_pool_oauth_writethrough.py b/tests/agent/test_credential_pool_oauth_writethrough.py new file mode 100644 index 00000000000..819e304f469 --- /dev/null +++ b/tests/agent/test_credential_pool_oauth_writethrough.py @@ -0,0 +1,190 @@ +"""Regression tests for credential-pool OAuth refresh write-through to root. + +Companion to ``tests/hermes_cli/test_xai_oauth_writethrough.py``. That file +covers the *non-pool* xAI refresh path (``_save_xai_oauth_tokens``). These +cover the **credential-pool** refresh path +(``CredentialPool._sync_device_code_entry_to_auth_store``): when a profile +that has no own ``providers.`` block refreshes — via the pool — a rotating +OAuth grant it resolved from the global-root fallback, the rotated chain must +be written back to the global root too. Otherwise root keeps a revoked refresh +token and every other profile reading root's stale grant dies with +``refresh_token_reused`` / ``invalid_grant`` once its access token expires +(issue #48415, the Codex/xAI analog of #43589). + +The tests drive the real ``_sync_device_code_entry_to_auth_store`` against +real on-disk auth stores (profile + root under ``tmp_path``) rather than +mocking the save boundary, so they exercise the actual atomic write path. +""" + +import json + +import pytest + +from agent import credential_pool as CP +from agent.credential_pool import ( + AUTH_TYPE_OAUTH, + CredentialPool, + PooledCredential, +) +from hermes_cli import auth as A + + +def _write_store(path, store): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(store), encoding="utf-8") + + +def _read_store(path): + return json.loads(path.read_text(encoding="utf-8")) + + +def _entry(provider: str, *, id: str, access_token: str, refresh_token: str): + return PooledCredential( + provider=provider, + id=id, + label="cred", + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source="device_code", + access_token=access_token, + refresh_token=refresh_token, + ) + + +@pytest.fixture +def profile_and_root(tmp_path, monkeypatch): + """Wire a profile auth store + a distinct global-root auth store on disk. + + The pytest seat belt in ``_write_through_provider_state_to_global_root`` + only refuses the *real* user's ``$HOME/.hermes/auth.json``; a tmp_path + root is allowed, so point HOME away from the tmp root to keep the guard + from tripping on these fixtures. + """ + profile_path = tmp_path / "profiles" / "work" / "auth.json" + root_path = tmp_path / "root" / "auth.json" + + monkeypatch.setattr(A, "_auth_file_path", lambda: profile_path) + monkeypatch.setattr(A, "_global_auth_file_path", lambda: root_path) + monkeypatch.setenv("HOME", str(tmp_path / "not-the-root")) + return profile_path, root_path + + +@pytest.mark.parametrize( + "provider", + ["openai-codex", "xai-oauth"], +) +def test_pool_refresh_writes_through_to_root_when_profile_reads_root( + profile_and_root, provider +): + """A profile reading root's grant must push rotated tokens back to root.""" + profile_path, root_path = profile_and_root + # Profile has NO own provider block (reads root via fallback). + _write_store(profile_path, {"version": 1, "providers": {}}) + _write_store( + root_path, + { + "version": 1, + "providers": { + provider: { + "tokens": { + "access_token": "old-access", + "refresh_token": "old-refresh", + } + } + }, + }, + ) + + pool = CredentialPool(provider, []) + pool._sync_device_code_entry_to_auth_store( + _entry(provider, id="e1", access_token="new-access", refresh_token="new-refresh") + ) + + # Profile got the rotated chain (existing behavior). + profile = _read_store(profile_path) + assert ( + profile["providers"][provider]["tokens"]["refresh_token"] == "new-refresh" + ) + + # AND the global root no longer holds the revoked refresh token (#48415). + root = _read_store(root_path) + assert root["providers"][provider]["tokens"]["access_token"] == "new-access" + assert root["providers"][provider]["tokens"]["refresh_token"] == "new-refresh" + + +@pytest.mark.parametrize( + "provider", + ["openai-codex", "xai-oauth"], +) +def test_pool_refresh_does_not_touch_root_when_profile_shadows( + profile_and_root, provider +): + """A profile that genuinely shadows root must NOT clobber the root grant.""" + profile_path, root_path = profile_and_root + # Profile has its OWN provider block: it shadows root legitimately. + _write_store( + profile_path, + { + "version": 1, + "providers": { + provider: { + "tokens": { + "access_token": "profile-old", + "refresh_token": "profile-old-refresh", + } + } + }, + }, + ) + _write_store( + root_path, + { + "version": 1, + "providers": { + provider: { + "tokens": { + "access_token": "root-untouched", + "refresh_token": "root-untouched-refresh", + } + } + }, + }, + ) + + pool = CredentialPool(provider, []) + pool._sync_device_code_entry_to_auth_store( + _entry( + provider, + id="e2", + access_token="profile-new", + refresh_token="profile-new-refresh", + ) + ) + + profile = _read_store(profile_path) + assert ( + profile["providers"][provider]["tokens"]["refresh_token"] + == "profile-new-refresh" + ) + + # Root keeps its own grant — write-through must not run when the profile + # owns the block. + root = _read_store(root_path) + assert ( + root["providers"][provider]["tokens"]["refresh_token"] + == "root-untouched-refresh" + ) + + +def test_write_through_helper_is_noop_in_classic_mode(monkeypatch, tmp_path): + """When profile == root (classic mode), the helper must be a no-op. + + ``_global_auth_file_path`` returns None in classic mode; the profile save + already wrote to root, so a second write would be redundant (and the + helper has nothing to target). + """ + monkeypatch.setattr(A, "_global_auth_file_path", lambda: None) + # Must not raise and must not attempt any write. + CP._write_through_provider_state_to_global_root( + "openai-codex", {"tokens": {"access_token": "a", "refresh_token": "r"}} + )