diff --git a/run_agent.py b/run_agent.py index fe4dbc68c6..b605585f21 100644 --- a/run_agent.py +++ b/run_agent.py @@ -693,6 +693,31 @@ def _routermint_headers() -> dict: } +def _pool_may_recover_from_rate_limit(pool) -> bool: + """Decide whether to wait for credential-pool rotation instead of falling back. + + The existing pool-rotation path requires the pool to (1) exist and (2) have + at least one entry not currently in exhaustion cooldown. But rotation is + only meaningful when the pool has more than one entry. + + With a single-credential pool (common for Gemini OAuth, Vertex service + accounts, and any "one personal key" configuration), the primary entry + just 429'd and there is nothing to rotate to. Waiting for the pool + cooldown to expire means retrying against the same exhausted quota — the + daily-quota 429 will recur immediately, and the retry budget is burned. + + In that case we must fall back to the configured ``fallback_model`` + instead. Returns True only when rotation has somewhere to go. + + See issue #11314. + """ + if pool is None: + return False + if not pool.has_available(): + return False + return len(pool.entries()) > 1 + + def _qwen_portal_headers() -> dict: """Return default HTTP headers required by Qwen Portal API.""" import platform as _plat @@ -10582,11 +10607,11 @@ class AIAgent: ) if is_rate_limited and self._fallback_index < len(self._fallback_chain): # Don't eagerly fallback if credential pool rotation may - # still recover. The pool's retry-then-rotate cycle needs - # at least one more attempt to fire — jumping to a fallback - # provider here short-circuits it. - pool = self._credential_pool - pool_may_recover = pool is not None and pool.has_available() + # still recover. See _pool_may_recover_from_rate_limit + # for the single-credential-pool exception. Fixes #11314. + pool_may_recover = _pool_may_recover_from_rate_limit( + self._credential_pool + ) if not pool_may_recover: self._emit_status("⚠️ Rate limited — switching to fallback provider...") if self._try_activate_fallback(): diff --git a/tests/run_agent/test_provider_fallback.py b/tests/run_agent/test_provider_fallback.py index 88982437e3..44de0846f4 100644 --- a/tests/run_agent/test_provider_fallback.py +++ b/tests/run_agent/test_provider_fallback.py @@ -7,7 +7,7 @@ advancement through multiple providers. from unittest.mock import MagicMock, patch -from run_agent import AIAgent +from run_agent import AIAgent, _pool_may_recover_from_rate_limit def _make_agent(fallback_model=None): @@ -181,3 +181,42 @@ class TestFallbackChainAdvancement: ): assert agent._try_activate_fallback() is True assert mock_rpc.call_args.kwargs["explicit_api_key"] == "env-secret" + + +# ── Pool-rotation vs fallback gating (#11314) ──────────────────────────── + + +def _pool(n_entries: int, has_available: bool = True): + """Make a minimal credential-pool stand-in for rotation-room checks.""" + pool = MagicMock() + pool.entries.return_value = [MagicMock() for _ in range(n_entries)] + pool.has_available.return_value = has_available + return pool + + +class TestPoolRotationRoom: + def test_none_pool_returns_false(self): + assert _pool_may_recover_from_rate_limit(None) is False + + def test_single_credential_returns_false(self): + """With one credential that just 429'd, rotation has nowhere to go. + + The pool may still report has_available() True once cooldown expires, + but retrying against the same entry will hit the same daily-quota + 429 and burn the retry budget. Must fall back. + """ + assert _pool_may_recover_from_rate_limit(_pool(1)) is False + + def test_single_credential_in_cooldown_returns_false(self): + assert _pool_may_recover_from_rate_limit(_pool(1, has_available=False)) is False + + def test_two_credentials_available_returns_true(self): + """With >1 credentials and at least one available, rotate instead of fallback.""" + assert _pool_may_recover_from_rate_limit(_pool(2)) is True + + def test_multiple_credentials_all_in_cooldown_returns_false(self): + """All credentials cooling down — fall back rather than wait.""" + assert _pool_may_recover_from_rate_limit(_pool(3, has_available=False)) is False + + def test_many_credentials_available_returns_true(self): + assert _pool_may_recover_from_rate_limit(_pool(10)) is True