mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
Prefer fallback for Gemini CloudCode rate limits
This commit is contained in:
parent
12135b4c8a
commit
526742199b
2 changed files with 95 additions and 5 deletions
38
run_agent.py
38
run_agent.py
|
|
@ -834,7 +834,9 @@ def _routermint_headers() -> dict:
|
|||
}
|
||||
|
||||
|
||||
def _pool_may_recover_from_rate_limit(pool) -> bool:
|
||||
def _pool_may_recover_from_rate_limit(
|
||||
pool, *, provider: str | None = None, base_url: str | None = None
|
||||
) -> bool:
|
||||
"""Decide whether to wait for credential-pool rotation instead of falling back.
|
||||
|
||||
The existing pool-rotation path requires the pool to (1) exist and (2) have
|
||||
|
|
@ -847,15 +849,23 @@ def _pool_may_recover_from_rate_limit(pool) -> bool:
|
|||
cooldown to expire means retrying against the same exhausted quota — the
|
||||
daily-quota 429 will recur immediately, and the retry budget is burned.
|
||||
|
||||
In that case we must fall back to the configured ``fallback_model``
|
||||
Additionally, Google CloudCode / Gemini CLI rate limits are ACCOUNT-level
|
||||
throttles — even a multi-entry pool shares the same quota window, so
|
||||
rotation won't recover. Skip straight to the fallback for those (#13636).
|
||||
|
||||
In those cases we must fall back to the configured ``fallback_model``
|
||||
instead. Returns True only when rotation has somewhere to go.
|
||||
|
||||
See issue #11314.
|
||||
See issues #11314 and #13636.
|
||||
"""
|
||||
if pool is None:
|
||||
return False
|
||||
if not pool.has_available():
|
||||
return False
|
||||
# CloudCode / Gemini CLI quotas are account-wide — all pool entries share
|
||||
# the same throttle window, so rotation can't recover. Prefer fallback.
|
||||
if provider == "google-gemini-cli" or str(base_url or "").startswith("cloudcode-pa://"):
|
||||
return False
|
||||
return len(pool.entries()) > 1
|
||||
|
||||
|
||||
|
|
@ -6367,6 +6377,21 @@ class AIAgent:
|
|||
|
||||
return False, has_retried_429
|
||||
|
||||
def _credential_pool_may_recover_rate_limit(self) -> bool:
|
||||
"""Whether a rate-limit retry should wait for same-provider credentials."""
|
||||
pool = self._credential_pool
|
||||
if pool is None:
|
||||
return False
|
||||
if (
|
||||
self.provider == "google-gemini-cli"
|
||||
or str(getattr(self, "base_url", "")).startswith("cloudcode-pa://")
|
||||
):
|
||||
# CloudCode/Gemini quota windows are usually account-level throttles.
|
||||
# Prefer the configured fallback immediately instead of waiting out
|
||||
# Retry-After while a pooled OAuth credential may still appear usable.
|
||||
return False
|
||||
return pool.has_available()
|
||||
|
||||
def _anthropic_messages_create(self, api_kwargs: dict):
|
||||
if self.api_mode == "anthropic_messages":
|
||||
self._try_refresh_anthropic_client_credentials()
|
||||
|
|
@ -12447,9 +12472,12 @@ class AIAgent:
|
|||
if is_rate_limited and self._fallback_index < len(self._fallback_chain):
|
||||
# Don't eagerly fallback if credential pool rotation may
|
||||
# still recover. See _pool_may_recover_from_rate_limit
|
||||
# for the single-credential-pool exception. Fixes #11314.
|
||||
# for the single-credential-pool and CloudCode-quota
|
||||
# exceptions. Fixes #11314 and #13636.
|
||||
pool_may_recover = _pool_may_recover_from_rate_limit(
|
||||
self._credential_pool
|
||||
self._credential_pool,
|
||||
provider=self.provider,
|
||||
base_url=getattr(self, "base_url", None),
|
||||
)
|
||||
if not pool_may_recover:
|
||||
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
|
|
|
|||
62
tests/agent/test_gemini_fast_fallback.py
Normal file
62
tests/agent/test_gemini_fast_fallback.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
"""Regression tests for #13636 — CloudCode / Gemini CLI rate-limit fallback.
|
||||
|
||||
_pool_may_recover_from_rate_limit() is the hinge between credential-pool
|
||||
rotation and fallback-provider activation. For CloudCode (Gemini CLI /
|
||||
Gemini OAuth) the 429 is an account-wide throttle, so waiting for pool
|
||||
rotation is pointless — prefer fallback immediately.
|
||||
"""
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from run_agent import _pool_may_recover_from_rate_limit
|
||||
|
||||
|
||||
def _pool(entries: int = 2):
|
||||
p = MagicMock()
|
||||
p.has_available.return_value = True
|
||||
p.entries.return_value = list(range(entries))
|
||||
return p
|
||||
|
||||
|
||||
def test_cloudcode_provider_skips_pool_rotation():
|
||||
assert _pool_may_recover_from_rate_limit(
|
||||
_pool(entries=3),
|
||||
provider="google-gemini-cli",
|
||||
base_url="cloudcode-pa://google",
|
||||
) is False
|
||||
|
||||
|
||||
def test_cloudcode_base_url_skips_pool_rotation_even_on_alias_provider():
|
||||
# Even if the provider label is something else, a cloudcode-pa:// URL
|
||||
# signals the account-wide quota regime.
|
||||
assert _pool_may_recover_from_rate_limit(
|
||||
_pool(entries=3),
|
||||
provider="custom-provider",
|
||||
base_url="cloudcode-pa://google",
|
||||
) is False
|
||||
|
||||
|
||||
def test_non_cloudcode_multi_entry_pool_still_recovers():
|
||||
assert _pool_may_recover_from_rate_limit(
|
||||
_pool(entries=3),
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
) is True
|
||||
|
||||
|
||||
def test_single_entry_pool_skips_rotation_regardless_of_provider():
|
||||
# Pre-existing single-entry-pool exception (#11314) still holds.
|
||||
assert _pool_may_recover_from_rate_limit(
|
||||
_pool(entries=1),
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
) is False
|
||||
|
||||
|
||||
def test_exhausted_pool_skips_rotation():
|
||||
p = MagicMock()
|
||||
p.has_available.return_value = False
|
||||
assert _pool_may_recover_from_rate_limit(p) is False
|
||||
|
||||
|
||||
def test_no_pool_skips_rotation():
|
||||
assert _pool_may_recover_from_rate_limit(None) is False
|
||||
Loading…
Add table
Add a link
Reference in a new issue