fix(agent): fall back on rate limit when pool has no rotation room

Extracts pool-rotation-room logic into `_pool_may_recover_from_rate_limit`
so single-credential pools no longer block the eager-fallback path on 429.

The existing check `pool is not None and pool.has_available()` lets
fallback fire only after the pool marks every entry as exhausted.  With
exactly one credential in the pool (the common shape for Gemini OAuth,
Vertex service accounts, and any personal-key setup), `has_available()`
flips back to True as soon as the cooldown expires — Hermes retries
against the same entry, hits the same daily-quota 429, and burns the
retry budget in a tight loop before ever reaching the configured
`fallback_model`.  Observed in the wild as 4+ hours of 429 noise on a
single Gemini key instead of falling through to Vertex as configured.

Rotation is only meaningful with more than one credential — gate on
`len(pool.entries()) > 1`.  Multi-credential pools keep the current
wait-for-rotation behaviour unchanged.

Fixes #11314.  Related to #8947, #10210, #7230.  Narrower scope than
open PRs #8023 (classifier change) and #11492 (503/529 credential-pool
bypass) — this addresses the single-credential 429 case specifically
and does not conflict with either.

Tests: 6 new unit tests in tests/run_agent/test_provider_fallback.py
covering (a) None pool, (b) single-cred available, (c) single-cred in
cooldown, (d) 2-cred available rotates, (e) multi-cred all cooling-down
falls back, (f) many-cred available rotates.  All 18 tests in the file
pass.
This commit is contained in:
Prasad Subrahmanya 2026-04-19 14:25:59 +02:00 committed by Teknium
parent 1af44a13c0
commit 1fc77f995b
2 changed files with 70 additions and 6 deletions

View file

@ -7,7 +7,7 @@ advancement through multiple providers.
from unittest.mock import MagicMock, patch
from run_agent import AIAgent
from run_agent import AIAgent, _pool_may_recover_from_rate_limit
def _make_agent(fallback_model=None):
@ -181,3 +181,42 @@ class TestFallbackChainAdvancement:
):
assert agent._try_activate_fallback() is True
assert mock_rpc.call_args.kwargs["explicit_api_key"] == "env-secret"
# ── Pool-rotation vs fallback gating (#11314) ────────────────────────────
def _pool(n_entries: int, has_available: bool = True):
"""Make a minimal credential-pool stand-in for rotation-room checks."""
pool = MagicMock()
pool.entries.return_value = [MagicMock() for _ in range(n_entries)]
pool.has_available.return_value = has_available
return pool
class TestPoolRotationRoom:
def test_none_pool_returns_false(self):
assert _pool_may_recover_from_rate_limit(None) is False
def test_single_credential_returns_false(self):
"""With one credential that just 429'd, rotation has nowhere to go.
The pool may still report has_available() True once cooldown expires,
but retrying against the same entry will hit the same daily-quota
429 and burn the retry budget. Must fall back.
"""
assert _pool_may_recover_from_rate_limit(_pool(1)) is False
def test_single_credential_in_cooldown_returns_false(self):
assert _pool_may_recover_from_rate_limit(_pool(1, has_available=False)) is False
def test_two_credentials_available_returns_true(self):
"""With >1 credentials and at least one available, rotate instead of fallback."""
assert _pool_may_recover_from_rate_limit(_pool(2)) is True
def test_multiple_credentials_all_in_cooldown_returns_false(self):
"""All credentials cooling down — fall back rather than wait."""
assert _pool_may_recover_from_rate_limit(_pool(3, has_available=False)) is False
def test_many_credentials_available_returns_true(self):
assert _pool_may_recover_from_rate_limit(_pool(10)) is True