diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 34c8f6db77..0043c70ca2 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -68,8 +68,10 @@ SUPPORTED_POOL_STRATEGIES = { } # Cooldown before retrying an exhausted credential. -# 429 (rate-limited) and 402 (billing/quota) both cool down after 1 hour. +# Transient 401 auth failures cool down briefly so single-key setups can recover. +# 429 (rate-limited), 402 (billing/quota), and other failures cool down after 1 hour. # Provider-supplied reset_at timestamps override these defaults. +EXHAUSTED_TTL_401_SECONDS = 5 * 60 # 5 minutes EXHAUSTED_TTL_429_SECONDS = 60 * 60 # 1 hour EXHAUSTED_TTL_DEFAULT_SECONDS = 60 * 60 # 1 hour @@ -190,6 +192,8 @@ def _is_manual_source(source: str) -> bool: def _exhausted_ttl(error_code: Optional[int]) -> int: """Return cooldown seconds based on the HTTP status that caused exhaustion.""" + if error_code == 401: + return EXHAUSTED_TTL_401_SECONDS if error_code == 429: return EXHAUSTED_TTL_429_SECONDS return EXHAUSTED_TTL_DEFAULT_SECONDS diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py index e656a3e0b3..299567a9a6 100644 --- a/tests/agent/test_credential_pool.py +++ b/tests/agent/test_credential_pool.py @@ -250,6 +250,42 @@ def test_exhausted_402_entry_resets_after_one_hour(tmp_path, monkeypatch): assert entry.last_status == "ok" +def test_exhausted_401_entry_resets_after_five_minutes(tmp_path, monkeypatch): + """Transient auth failures should not strand single-key setups for an hour.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "openrouter": [ + { + "id": "cred-1", + "label": "primary", + "auth_type": "api_key", + "priority": 0, + "source": "manual", + "access_token": "***", + "base_url": "https://openrouter.ai/api/v1", + "last_status": "exhausted", + "last_status_at": time.time() - 310, + "last_error_code": 401, + } + ] + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("openrouter") + entry = pool.select() + + assert entry is not None + assert entry.id == "cred-1" + assert entry.last_status == "ok" + + def test_explicit_reset_timestamp_overrides_default_429_ttl(tmp_path, monkeypatch): monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) # Prevent auto-seeding from Codex CLI tokens on the host