fix(auth): codex chat path falls back to credential_pool when singleton is empty

Closes #32992.

The chat path resolves Codex credentials via `resolve_codex_runtime_credentials`
which only reads `providers.openai-codex.tokens` (the singleton). The auxiliary
path uses `_read_codex_access_token` which checks the credential_pool first.
For users whose tokens live only in the pool — manual seed, partial re-auth,
restore from backup, or any state where the singleton is empty but the pool
is healthy — the chat path raised AuthError or (worse, since OpenAI(api_key='')
silently attaches no header) the wire saw HTTP 401 "Missing Authentication header"
while the auxiliary path worked fine.

This adds a pool fallback to `resolve_codex_runtime_credentials`: when the
singleton has no usable access_token, scan `credential_pool.openai-codex` for
the first entry that has a non-empty access_token and isn't in an exhaustion
cooldown window (`last_error_reset_at` in the future). If found, return that
token with `source="credential_pool"`. If no usable entry exists, the original
AuthError propagates as before.

Regression tests cover:
- Empty singleton + healthy pool entry → pool token returned
- Pool fallback skips entries currently in cooldown
- Empty singleton + empty/wedged pool → AuthError propagates (existing contract preserved)
This commit is contained in:
Teknium 2026-05-27 03:33:52 -07:00
parent 3e33e14335
commit 69dfcdcc15
2 changed files with 162 additions and 2 deletions

View file

@ -125,6 +125,98 @@ def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch):
assert resolved["api_key"] == "access-forced"
def test_resolve_codex_runtime_credentials_falls_back_to_pool_when_singleton_empty(tmp_path, monkeypatch):
"""Regression for #32992 — chat path returns 401 when singleton is empty but pool has creds.
The chat path historically went through ``resolve_codex_runtime_credentials`` which
only consulted ``providers.openai-codex.tokens`` and raised ``AuthError`` when that
was empty. The auxiliary path went through ``_read_codex_access_token`` which
checks the pool first. Users with creds only in the pool (manual seed, partial
re-auth, restore from backup) hit a bare HTTP 401 on chat but worked fine on
auxiliary calls. The fallback closes that divergence.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
# Singleton: empty tokens (would normally raise AuthError).
# Pool: valid access_token.
auth_store = {
"version": 1,
"providers": {}, # no openai-codex singleton at all
"credential_pool": {
"openai-codex": [
{
"source": "device_code",
"access_token": "pool-fallback-token",
"refresh_token": "pool-refresh",
"last_status": "ok",
"auth_type": "oauth",
},
],
},
}
(hermes_home / "auth.json").write_text(json.dumps(auth_store))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
resolved = resolve_codex_runtime_credentials()
assert resolved["api_key"] == "pool-fallback-token"
assert resolved["source"] == "credential_pool"
assert resolved["base_url"] # default codex backend URL
def test_resolve_codex_runtime_credentials_pool_fallback_skips_exhausted(tmp_path, monkeypatch):
"""The pool fallback skips entries currently in an exhaustion cooldown window."""
import time as _time
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
future_reset = _time.time() + 3600 # 1h cooldown remaining
auth_store = {
"version": 1,
"providers": {},
"credential_pool": {
"openai-codex": [
{
"source": "device_code",
"access_token": "wedged-token",
"last_error_reset_at": future_reset, # in cooldown
},
{
"source": "device_code",
"access_token": "usable-token",
"last_status": "ok",
},
],
},
}
(hermes_home / "auth.json").write_text(json.dumps(auth_store))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
resolved = resolve_codex_runtime_credentials()
assert resolved["api_key"] == "usable-token"
assert resolved["source"] == "credential_pool"
def test_resolve_codex_runtime_credentials_pool_fallback_no_usable_entry(tmp_path, monkeypatch):
"""When both singleton and pool are empty/unusable, the original AuthError propagates."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
auth_store = {
"version": 1,
"providers": {},
"credential_pool": {
"openai-codex": [
{"source": "device_code", "access_token": ""}, # empty
],
},
}
(hermes_home / "auth.json").write_text(json.dumps(auth_store))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
with pytest.raises(AuthError) as exc:
resolve_codex_runtime_credentials()
assert exc.value.code == "codex_auth_missing"
def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)