Prefer fallback for Gemini CloudCode rate limits

2026-05-08 03:01:47 +00:00 · 2026-04-21 14:23:36 -04:00 · 2026-04-21 14:23:36 -04:00 · 526742199b
commit 526742199b
parent 12135b4c8a
2 changed files with 95 additions and 5 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -834,7 +834,9 @@ def _routermint_headers() -> dict:
    }


-def _pool_may_recover_from_rate_limit(pool) -> bool:
+def _pool_may_recover_from_rate_limit(
+    pool, *, provider: str | None = None, base_url: str | None = None
+) -> bool:
    """Decide whether to wait for credential-pool rotation instead of falling back.

    The existing pool-rotation path requires the pool to (1) exist and (2) have
@ -847,15 +849,23 @@ def _pool_may_recover_from_rate_limit(pool) -> bool:
    cooldown to expire means retrying against the same exhausted quota — the
    daily-quota 429 will recur immediately, and the retry budget is burned.

-    In that case we must fall back to the configured ``fallback_model``
+    Additionally, Google CloudCode / Gemini CLI rate limits are ACCOUNT-level
+    throttles — even a multi-entry pool shares the same quota window, so
+    rotation won't recover.  Skip straight to the fallback for those (#13636).
+
+    In those cases we must fall back to the configured ``fallback_model``
    instead.  Returns True only when rotation has somewhere to go.

-    See issue #11314.
+    See issues #11314 and #13636.
    """
    if pool is None:
        return False
    if not pool.has_available():
        return False
+    # CloudCode / Gemini CLI quotas are account-wide — all pool entries share
+    # the same throttle window, so rotation can't recover.  Prefer fallback.
+    if provider == "google-gemini-cli" or str(base_url or "").startswith("cloudcode-pa://"):
+        return False
    return len(pool.entries()) > 1


@ -6367,6 +6377,21 @@ class AIAgent:

        return False, has_retried_429

+    def _credential_pool_may_recover_rate_limit(self) -> bool:
+        """Whether a rate-limit retry should wait for same-provider credentials."""
+        pool = self._credential_pool
+        if pool is None:
+            return False
+        if (
+            self.provider == "google-gemini-cli"
+            or str(getattr(self, "base_url", "")).startswith("cloudcode-pa://")
+        ):
+            # CloudCode/Gemini quota windows are usually account-level throttles.
+            # Prefer the configured fallback immediately instead of waiting out
+            # Retry-After while a pooled OAuth credential may still appear usable.
+            return False
+        return pool.has_available()
+
    def _anthropic_messages_create(self, api_kwargs: dict):
        if self.api_mode == "anthropic_messages":
            self._try_refresh_anthropic_client_credentials()
@ -12447,9 +12472,12 @@ class AIAgent:
                    if is_rate_limited and self._fallback_index < len(self._fallback_chain):
                        # Don't eagerly fallback if credential pool rotation may
                        # still recover.  See _pool_may_recover_from_rate_limit
-                        # for the single-credential-pool exception.  Fixes #11314.
+                        # for the single-credential-pool and CloudCode-quota
+                        # exceptions.  Fixes #11314 and #13636.
                        pool_may_recover = _pool_may_recover_from_rate_limit(
-                            self._credential_pool
+                            self._credential_pool,
+                            provider=self.provider,
+                            base_url=getattr(self, "base_url", None),
                        )
                        if not pool_may_recover:
                            self._emit_status("⚠️ Rate limited — switching to fallback provider...")
--- a/tests/agent/test_gemini_fast_fallback.py
+++ b/tests/agent/test_gemini_fast_fallback.py
@ -0,0 +1,62 @@
+"""Regression tests for #13636 — CloudCode / Gemini CLI rate-limit fallback.
+
+_pool_may_recover_from_rate_limit() is the hinge between credential-pool
+rotation and fallback-provider activation.  For CloudCode (Gemini CLI /
+Gemini OAuth) the 429 is an account-wide throttle, so waiting for pool
+rotation is pointless — prefer fallback immediately.
+"""
+from unittest.mock import MagicMock
+
+from run_agent import _pool_may_recover_from_rate_limit
+
+
+def _pool(entries: int = 2):
+    p = MagicMock()
+    p.has_available.return_value = True
+    p.entries.return_value = list(range(entries))
+    return p
+
+
+def test_cloudcode_provider_skips_pool_rotation():
+    assert _pool_may_recover_from_rate_limit(
+        _pool(entries=3),
+        provider="google-gemini-cli",
+        base_url="cloudcode-pa://google",
+    ) is False
+
+
+def test_cloudcode_base_url_skips_pool_rotation_even_on_alias_provider():
+    # Even if the provider label is something else, a cloudcode-pa:// URL
+    # signals the account-wide quota regime.
+    assert _pool_may_recover_from_rate_limit(
+        _pool(entries=3),
+        provider="custom-provider",
+        base_url="cloudcode-pa://google",
+    ) is False
+
+
+def test_non_cloudcode_multi_entry_pool_still_recovers():
+    assert _pool_may_recover_from_rate_limit(
+        _pool(entries=3),
+        provider="openrouter",
+        base_url="https://openrouter.ai/api/v1",
+    ) is True
+
+
+def test_single_entry_pool_skips_rotation_regardless_of_provider():
+    # Pre-existing single-entry-pool exception (#11314) still holds.
+    assert _pool_may_recover_from_rate_limit(
+        _pool(entries=1),
+        provider="openrouter",
+        base_url="https://openrouter.ai/api/v1",
+    ) is False
+
+
+def test_exhausted_pool_skips_rotation():
+    p = MagicMock()
+    p.has_available.return_value = False
+    assert _pool_may_recover_from_rate_limit(p) is False
+
+
+def test_no_pool_skips_rotation():
+    assert _pool_may_recover_from_rate_limit(None) is False