From 1fc77f995ba892b675f7120cf29fcd2d90add31c Mon Sep 17 00:00:00 2001 From: Prasad Subrahmanya Date: Sun, 19 Apr 2026 14:25:59 +0200 Subject: [PATCH] fix(agent): fall back on rate limit when pool has no rotation room MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracts pool-rotation-room logic into `_pool_may_recover_from_rate_limit` so single-credential pools no longer block the eager-fallback path on 429. The existing check `pool is not None and pool.has_available()` lets fallback fire only after the pool marks every entry as exhausted. With exactly one credential in the pool (the common shape for Gemini OAuth, Vertex service accounts, and any personal-key setup), `has_available()` flips back to True as soon as the cooldown expires — Hermes retries against the same entry, hits the same daily-quota 429, and burns the retry budget in a tight loop before ever reaching the configured `fallback_model`. Observed in the wild as 4+ hours of 429 noise on a single Gemini key instead of falling through to Vertex as configured. Rotation is only meaningful with more than one credential — gate on `len(pool.entries()) > 1`. Multi-credential pools keep the current wait-for-rotation behaviour unchanged. Fixes #11314. Related to #8947, #10210, #7230. Narrower scope than open PRs #8023 (classifier change) and #11492 (503/529 credential-pool bypass) — this addresses the single-credential 429 case specifically and does not conflict with either. Tests: 6 new unit tests in tests/run_agent/test_provider_fallback.py covering (a) None pool, (b) single-cred available, (c) single-cred in cooldown, (d) 2-cred available rotates, (e) multi-cred all cooling-down falls back, (f) many-cred available rotates. All 18 tests in the file pass. --- run_agent.py | 35 ++++++++++++++++--- tests/run_agent/test_provider_fallback.py | 41 ++++++++++++++++++++++- 2 files changed, 70 insertions(+), 6 deletions(-) diff --git a/run_agent.py b/run_agent.py index fe4dbc68c..b605585f2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -693,6 +693,31 @@ def _routermint_headers() -> dict: } +def _pool_may_recover_from_rate_limit(pool) -> bool: + """Decide whether to wait for credential-pool rotation instead of falling back. + + The existing pool-rotation path requires the pool to (1) exist and (2) have + at least one entry not currently in exhaustion cooldown. But rotation is + only meaningful when the pool has more than one entry. + + With a single-credential pool (common for Gemini OAuth, Vertex service + accounts, and any "one personal key" configuration), the primary entry + just 429'd and there is nothing to rotate to. Waiting for the pool + cooldown to expire means retrying against the same exhausted quota — the + daily-quota 429 will recur immediately, and the retry budget is burned. + + In that case we must fall back to the configured ``fallback_model`` + instead. Returns True only when rotation has somewhere to go. + + See issue #11314. + """ + if pool is None: + return False + if not pool.has_available(): + return False + return len(pool.entries()) > 1 + + def _qwen_portal_headers() -> dict: """Return default HTTP headers required by Qwen Portal API.""" import platform as _plat @@ -10582,11 +10607,11 @@ class AIAgent: ) if is_rate_limited and self._fallback_index < len(self._fallback_chain): # Don't eagerly fallback if credential pool rotation may - # still recover. The pool's retry-then-rotate cycle needs - # at least one more attempt to fire — jumping to a fallback - # provider here short-circuits it. - pool = self._credential_pool - pool_may_recover = pool is not None and pool.has_available() + # still recover. See _pool_may_recover_from_rate_limit + # for the single-credential-pool exception. Fixes #11314. + pool_may_recover = _pool_may_recover_from_rate_limit( + self._credential_pool + ) if not pool_may_recover: self._emit_status("⚠️ Rate limited — switching to fallback provider...") if self._try_activate_fallback(): diff --git a/tests/run_agent/test_provider_fallback.py b/tests/run_agent/test_provider_fallback.py index 88982437e..44de0846f 100644 --- a/tests/run_agent/test_provider_fallback.py +++ b/tests/run_agent/test_provider_fallback.py @@ -7,7 +7,7 @@ advancement through multiple providers. from unittest.mock import MagicMock, patch -from run_agent import AIAgent +from run_agent import AIAgent, _pool_may_recover_from_rate_limit def _make_agent(fallback_model=None): @@ -181,3 +181,42 @@ class TestFallbackChainAdvancement: ): assert agent._try_activate_fallback() is True assert mock_rpc.call_args.kwargs["explicit_api_key"] == "env-secret" + + +# ── Pool-rotation vs fallback gating (#11314) ──────────────────────────── + + +def _pool(n_entries: int, has_available: bool = True): + """Make a minimal credential-pool stand-in for rotation-room checks.""" + pool = MagicMock() + pool.entries.return_value = [MagicMock() for _ in range(n_entries)] + pool.has_available.return_value = has_available + return pool + + +class TestPoolRotationRoom: + def test_none_pool_returns_false(self): + assert _pool_may_recover_from_rate_limit(None) is False + + def test_single_credential_returns_false(self): + """With one credential that just 429'd, rotation has nowhere to go. + + The pool may still report has_available() True once cooldown expires, + but retrying against the same entry will hit the same daily-quota + 429 and burn the retry budget. Must fall back. + """ + assert _pool_may_recover_from_rate_limit(_pool(1)) is False + + def test_single_credential_in_cooldown_returns_false(self): + assert _pool_may_recover_from_rate_limit(_pool(1, has_available=False)) is False + + def test_two_credentials_available_returns_true(self): + """With >1 credentials and at least one available, rotate instead of fallback.""" + assert _pool_may_recover_from_rate_limit(_pool(2)) is True + + def test_multiple_credentials_all_in_cooldown_returns_false(self): + """All credentials cooling down — fall back rather than wait.""" + assert _pool_may_recover_from_rate_limit(_pool(3, has_available=False)) is False + + def test_many_credentials_available_returns_true(self): + assert _pool_may_recover_from_rate_limit(_pool(10)) is True