Prefer fallback for Gemini CloudCode rate limits

This commit is contained in:
Justin Kausel 2026-04-21 14:23:36 -04:00 committed by Teknium
parent 12135b4c8a
commit 526742199b
2 changed files with 95 additions and 5 deletions

View file

@ -834,7 +834,9 @@ def _routermint_headers() -> dict:
}
def _pool_may_recover_from_rate_limit(pool) -> bool:
def _pool_may_recover_from_rate_limit(
pool, *, provider: str | None = None, base_url: str | None = None
) -> bool:
"""Decide whether to wait for credential-pool rotation instead of falling back.
The existing pool-rotation path requires the pool to (1) exist and (2) have
@ -847,15 +849,23 @@ def _pool_may_recover_from_rate_limit(pool) -> bool:
cooldown to expire means retrying against the same exhausted quota the
daily-quota 429 will recur immediately, and the retry budget is burned.
In that case we must fall back to the configured ``fallback_model``
Additionally, Google CloudCode / Gemini CLI rate limits are ACCOUNT-level
throttles even a multi-entry pool shares the same quota window, so
rotation won't recover. Skip straight to the fallback for those (#13636).
In those cases we must fall back to the configured ``fallback_model``
instead. Returns True only when rotation has somewhere to go.
See issue #11314.
See issues #11314 and #13636.
"""
if pool is None:
return False
if not pool.has_available():
return False
# CloudCode / Gemini CLI quotas are account-wide — all pool entries share
# the same throttle window, so rotation can't recover. Prefer fallback.
if provider == "google-gemini-cli" or str(base_url or "").startswith("cloudcode-pa://"):
return False
return len(pool.entries()) > 1
@ -6367,6 +6377,21 @@ class AIAgent:
return False, has_retried_429
def _credential_pool_may_recover_rate_limit(self) -> bool:
"""Whether a rate-limit retry should wait for same-provider credentials."""
pool = self._credential_pool
if pool is None:
return False
if (
self.provider == "google-gemini-cli"
or str(getattr(self, "base_url", "")).startswith("cloudcode-pa://")
):
# CloudCode/Gemini quota windows are usually account-level throttles.
# Prefer the configured fallback immediately instead of waiting out
# Retry-After while a pooled OAuth credential may still appear usable.
return False
return pool.has_available()
def _anthropic_messages_create(self, api_kwargs: dict):
if self.api_mode == "anthropic_messages":
self._try_refresh_anthropic_client_credentials()
@ -12447,9 +12472,12 @@ class AIAgent:
if is_rate_limited and self._fallback_index < len(self._fallback_chain):
# Don't eagerly fallback if credential pool rotation may
# still recover. See _pool_may_recover_from_rate_limit
# for the single-credential-pool exception. Fixes #11314.
# for the single-credential-pool and CloudCode-quota
# exceptions. Fixes #11314 and #13636.
pool_may_recover = _pool_may_recover_from_rate_limit(
self._credential_pool
self._credential_pool,
provider=self.provider,
base_url=getattr(self, "base_url", None),
)
if not pool_may_recover:
self._emit_status("⚠️ Rate limited — switching to fallback provider...")

View file

@ -0,0 +1,62 @@
"""Regression tests for #13636 — CloudCode / Gemini CLI rate-limit fallback.
_pool_may_recover_from_rate_limit() is the hinge between credential-pool
rotation and fallback-provider activation. For CloudCode (Gemini CLI /
Gemini OAuth) the 429 is an account-wide throttle, so waiting for pool
rotation is pointless prefer fallback immediately.
"""
from unittest.mock import MagicMock
from run_agent import _pool_may_recover_from_rate_limit
def _pool(entries: int = 2):
p = MagicMock()
p.has_available.return_value = True
p.entries.return_value = list(range(entries))
return p
def test_cloudcode_provider_skips_pool_rotation():
assert _pool_may_recover_from_rate_limit(
_pool(entries=3),
provider="google-gemini-cli",
base_url="cloudcode-pa://google",
) is False
def test_cloudcode_base_url_skips_pool_rotation_even_on_alias_provider():
# Even if the provider label is something else, a cloudcode-pa:// URL
# signals the account-wide quota regime.
assert _pool_may_recover_from_rate_limit(
_pool(entries=3),
provider="custom-provider",
base_url="cloudcode-pa://google",
) is False
def test_non_cloudcode_multi_entry_pool_still_recovers():
assert _pool_may_recover_from_rate_limit(
_pool(entries=3),
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
) is True
def test_single_entry_pool_skips_rotation_regardless_of_provider():
# Pre-existing single-entry-pool exception (#11314) still holds.
assert _pool_may_recover_from_rate_limit(
_pool(entries=1),
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
) is False
def test_exhausted_pool_skips_rotation():
p = MagicMock()
p.has_available.return_value = False
assert _pool_may_recover_from_rate_limit(p) is False
def test_no_pool_skips_rotation():
assert _pool_may_recover_from_rate_limit(None) is False