diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index c037dab7f11..87069b3de8d 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -3516,8 +3516,36 @@ def resolve_codex_runtime_credentials( refresh_if_expiring: bool = True, refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, ) -> Dict[str, Any]: - """Resolve runtime credentials from Hermes's own Codex token store.""" - data = _read_codex_tokens() + """Resolve runtime credentials from Hermes's own Codex token store. + + Falls back to the credential pool when the singleton (``providers.openai-codex.tokens``) + has no usable access_token but the pool (``credential_pool.openai-codex``) does. This + closes the divergence between the chat path (singleton-only via this function) and + the auxiliary path (pool-first via ``_read_codex_access_token``). Without this + fallback, a user whose tokens live only in the pool — for example after a manual + pool seed, a partial re-auth, or pool-only restoration from a backup — gets a bare + HTTP 401 ``Missing Authentication header`` from the wire instead of a usable + credential. See issue #32992. + """ + try: + data = _read_codex_tokens() + except AuthError: + pool_token = _pool_codex_access_token() + if pool_token: + base_url = ( + os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/") + or DEFAULT_CODEX_BASE_URL + ) + return { + "provider": "openai-codex", + "base_url": base_url, + "api_key": pool_token, + "source": "credential_pool", + "last_refresh": None, + "auth_mode": "chatgpt", + } + raise + tokens = dict(data["tokens"]) access_token = str(tokens.get("access_token", "") or "").strip() refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20")) @@ -3555,6 +3583,46 @@ def resolve_codex_runtime_credentials( } +def _pool_codex_access_token() -> str: + """Return the most-recent usable access_token from the openai-codex pool. + + Used as a fallback by ``resolve_codex_runtime_credentials`` when the + singleton has no creds. Reads ``credential_pool.openai-codex`` entries + directly from auth.json and picks the first non-empty access_token, + preferring entries that are not currently in an exhaustion cooldown. + Returns ``""`` when no usable entry is found (caller handles by raising + the original AuthError). + """ + try: + with _auth_store_lock(): + auth_store = _load_auth_store() + pool = auth_store.get("credential_pool") + if not isinstance(pool, dict): + return "" + entries = pool.get("openai-codex") + if not isinstance(entries, list): + return "" + + def _entry_usable(entry: Dict[str, Any]) -> bool: + if not isinstance(entry, dict): + return False + token = entry.get("access_token") + if not isinstance(token, str) or not token.strip(): + return False + # Skip entries currently in an exhaustion cooldown window. + reset_at = entry.get("last_error_reset_at") + if isinstance(reset_at, (int, float)) and reset_at > time.time(): + return False + return True + + for entry in entries: + if _entry_usable(entry): + return str(entry.get("access_token", "")).strip() + except Exception: + logger.debug("Codex pool fallback lookup failed", exc_info=True) + return "" + + # ============================================================================= # xAI Grok OAuth — tokens stored in ~/.hermes/auth.json # ============================================================================= diff --git a/tests/hermes_cli/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py index 1fc0bc8e02d..7b1bec33929 100644 --- a/tests/hermes_cli/test_auth_codex_provider.py +++ b/tests/hermes_cli/test_auth_codex_provider.py @@ -125,6 +125,98 @@ def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch): assert resolved["api_key"] == "access-forced" +def test_resolve_codex_runtime_credentials_falls_back_to_pool_when_singleton_empty(tmp_path, monkeypatch): + """Regression for #32992 — chat path returns 401 when singleton is empty but pool has creds. + + The chat path historically went through ``resolve_codex_runtime_credentials`` which + only consulted ``providers.openai-codex.tokens`` and raised ``AuthError`` when that + was empty. The auxiliary path went through ``_read_codex_access_token`` which + checks the pool first. Users with creds only in the pool (manual seed, partial + re-auth, restore from backup) hit a bare HTTP 401 on chat but worked fine on + auxiliary calls. The fallback closes that divergence. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + # Singleton: empty tokens (would normally raise AuthError). + # Pool: valid access_token. + auth_store = { + "version": 1, + "providers": {}, # no openai-codex singleton at all + "credential_pool": { + "openai-codex": [ + { + "source": "device_code", + "access_token": "pool-fallback-token", + "refresh_token": "pool-refresh", + "last_status": "ok", + "auth_type": "oauth", + }, + ], + }, + } + (hermes_home / "auth.json").write_text(json.dumps(auth_store)) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + resolved = resolve_codex_runtime_credentials() + assert resolved["api_key"] == "pool-fallback-token" + assert resolved["source"] == "credential_pool" + assert resolved["base_url"] # default codex backend URL + + +def test_resolve_codex_runtime_credentials_pool_fallback_skips_exhausted(tmp_path, monkeypatch): + """The pool fallback skips entries currently in an exhaustion cooldown window.""" + import time as _time + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + future_reset = _time.time() + 3600 # 1h cooldown remaining + auth_store = { + "version": 1, + "providers": {}, + "credential_pool": { + "openai-codex": [ + { + "source": "device_code", + "access_token": "wedged-token", + "last_error_reset_at": future_reset, # in cooldown + }, + { + "source": "device_code", + "access_token": "usable-token", + "last_status": "ok", + }, + ], + }, + } + (hermes_home / "auth.json").write_text(json.dumps(auth_store)) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + resolved = resolve_codex_runtime_credentials() + assert resolved["api_key"] == "usable-token" + assert resolved["source"] == "credential_pool" + + +def test_resolve_codex_runtime_credentials_pool_fallback_no_usable_entry(tmp_path, monkeypatch): + """When both singleton and pool are empty/unusable, the original AuthError propagates.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + auth_store = { + "version": 1, + "providers": {}, + "credential_pool": { + "openai-codex": [ + {"source": "device_code", "access_token": ""}, # empty + ], + }, + } + (hermes_home / "auth.json").write_text(json.dumps(auth_store)) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + with pytest.raises(AuthError) as exc: + resolve_codex_runtime_credentials() + assert exc.value.code == "codex_auth_missing" + + def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch): monkeypatch.delenv("OPENAI_API_KEY", raising=False) monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)