From 526742199bef2ab184c92b87d4f78edb75aeeaa6 Mon Sep 17 00:00:00 2001 From: Justin Kausel Date: Tue, 21 Apr 2026 14:23:36 -0400 Subject: [PATCH] Prefer fallback for Gemini CloudCode rate limits --- run_agent.py | 38 +++++++++++++-- tests/agent/test_gemini_fast_fallback.py | 62 ++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 5 deletions(-) create mode 100644 tests/agent/test_gemini_fast_fallback.py diff --git a/run_agent.py b/run_agent.py index bf1e2258bb..546cc0ef65 100644 --- a/run_agent.py +++ b/run_agent.py @@ -834,7 +834,9 @@ def _routermint_headers() -> dict: } -def _pool_may_recover_from_rate_limit(pool) -> bool: +def _pool_may_recover_from_rate_limit( + pool, *, provider: str | None = None, base_url: str | None = None +) -> bool: """Decide whether to wait for credential-pool rotation instead of falling back. The existing pool-rotation path requires the pool to (1) exist and (2) have @@ -847,15 +849,23 @@ def _pool_may_recover_from_rate_limit(pool) -> bool: cooldown to expire means retrying against the same exhausted quota — the daily-quota 429 will recur immediately, and the retry budget is burned. - In that case we must fall back to the configured ``fallback_model`` + Additionally, Google CloudCode / Gemini CLI rate limits are ACCOUNT-level + throttles — even a multi-entry pool shares the same quota window, so + rotation won't recover. Skip straight to the fallback for those (#13636). + + In those cases we must fall back to the configured ``fallback_model`` instead. Returns True only when rotation has somewhere to go. - See issue #11314. + See issues #11314 and #13636. """ if pool is None: return False if not pool.has_available(): return False + # CloudCode / Gemini CLI quotas are account-wide — all pool entries share + # the same throttle window, so rotation can't recover. Prefer fallback. + if provider == "google-gemini-cli" or str(base_url or "").startswith("cloudcode-pa://"): + return False return len(pool.entries()) > 1 @@ -6367,6 +6377,21 @@ class AIAgent: return False, has_retried_429 + def _credential_pool_may_recover_rate_limit(self) -> bool: + """Whether a rate-limit retry should wait for same-provider credentials.""" + pool = self._credential_pool + if pool is None: + return False + if ( + self.provider == "google-gemini-cli" + or str(getattr(self, "base_url", "")).startswith("cloudcode-pa://") + ): + # CloudCode/Gemini quota windows are usually account-level throttles. + # Prefer the configured fallback immediately instead of waiting out + # Retry-After while a pooled OAuth credential may still appear usable. + return False + return pool.has_available() + def _anthropic_messages_create(self, api_kwargs: dict): if self.api_mode == "anthropic_messages": self._try_refresh_anthropic_client_credentials() @@ -12447,9 +12472,12 @@ class AIAgent: if is_rate_limited and self._fallback_index < len(self._fallback_chain): # Don't eagerly fallback if credential pool rotation may # still recover. See _pool_may_recover_from_rate_limit - # for the single-credential-pool exception. Fixes #11314. + # for the single-credential-pool and CloudCode-quota + # exceptions. Fixes #11314 and #13636. pool_may_recover = _pool_may_recover_from_rate_limit( - self._credential_pool + self._credential_pool, + provider=self.provider, + base_url=getattr(self, "base_url", None), ) if not pool_may_recover: self._emit_status("⚠️ Rate limited — switching to fallback provider...") diff --git a/tests/agent/test_gemini_fast_fallback.py b/tests/agent/test_gemini_fast_fallback.py new file mode 100644 index 0000000000..3a842e57ae --- /dev/null +++ b/tests/agent/test_gemini_fast_fallback.py @@ -0,0 +1,62 @@ +"""Regression tests for #13636 — CloudCode / Gemini CLI rate-limit fallback. + +_pool_may_recover_from_rate_limit() is the hinge between credential-pool +rotation and fallback-provider activation. For CloudCode (Gemini CLI / +Gemini OAuth) the 429 is an account-wide throttle, so waiting for pool +rotation is pointless — prefer fallback immediately. +""" +from unittest.mock import MagicMock + +from run_agent import _pool_may_recover_from_rate_limit + + +def _pool(entries: int = 2): + p = MagicMock() + p.has_available.return_value = True + p.entries.return_value = list(range(entries)) + return p + + +def test_cloudcode_provider_skips_pool_rotation(): + assert _pool_may_recover_from_rate_limit( + _pool(entries=3), + provider="google-gemini-cli", + base_url="cloudcode-pa://google", + ) is False + + +def test_cloudcode_base_url_skips_pool_rotation_even_on_alias_provider(): + # Even if the provider label is something else, a cloudcode-pa:// URL + # signals the account-wide quota regime. + assert _pool_may_recover_from_rate_limit( + _pool(entries=3), + provider="custom-provider", + base_url="cloudcode-pa://google", + ) is False + + +def test_non_cloudcode_multi_entry_pool_still_recovers(): + assert _pool_may_recover_from_rate_limit( + _pool(entries=3), + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + ) is True + + +def test_single_entry_pool_skips_rotation_regardless_of_provider(): + # Pre-existing single-entry-pool exception (#11314) still holds. + assert _pool_may_recover_from_rate_limit( + _pool(entries=1), + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + ) is False + + +def test_exhausted_pool_skips_rotation(): + p = MagicMock() + p.has_available.return_value = False + assert _pool_may_recover_from_rate_limit(p) is False + + +def test_no_pool_skips_rotation(): + assert _pool_may_recover_from_rate_limit(None) is False