mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-14 04:02:26 +00:00
Prefer fallback for Gemini CloudCode rate limits
This commit is contained in:
parent
12135b4c8a
commit
526742199b
2 changed files with 95 additions and 5 deletions
38
run_agent.py
38
run_agent.py
|
|
@ -834,7 +834,9 @@ def _routermint_headers() -> dict:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _pool_may_recover_from_rate_limit(pool) -> bool:
|
def _pool_may_recover_from_rate_limit(
|
||||||
|
pool, *, provider: str | None = None, base_url: str | None = None
|
||||||
|
) -> bool:
|
||||||
"""Decide whether to wait for credential-pool rotation instead of falling back.
|
"""Decide whether to wait for credential-pool rotation instead of falling back.
|
||||||
|
|
||||||
The existing pool-rotation path requires the pool to (1) exist and (2) have
|
The existing pool-rotation path requires the pool to (1) exist and (2) have
|
||||||
|
|
@ -847,15 +849,23 @@ def _pool_may_recover_from_rate_limit(pool) -> bool:
|
||||||
cooldown to expire means retrying against the same exhausted quota — the
|
cooldown to expire means retrying against the same exhausted quota — the
|
||||||
daily-quota 429 will recur immediately, and the retry budget is burned.
|
daily-quota 429 will recur immediately, and the retry budget is burned.
|
||||||
|
|
||||||
In that case we must fall back to the configured ``fallback_model``
|
Additionally, Google CloudCode / Gemini CLI rate limits are ACCOUNT-level
|
||||||
|
throttles — even a multi-entry pool shares the same quota window, so
|
||||||
|
rotation won't recover. Skip straight to the fallback for those (#13636).
|
||||||
|
|
||||||
|
In those cases we must fall back to the configured ``fallback_model``
|
||||||
instead. Returns True only when rotation has somewhere to go.
|
instead. Returns True only when rotation has somewhere to go.
|
||||||
|
|
||||||
See issue #11314.
|
See issues #11314 and #13636.
|
||||||
"""
|
"""
|
||||||
if pool is None:
|
if pool is None:
|
||||||
return False
|
return False
|
||||||
if not pool.has_available():
|
if not pool.has_available():
|
||||||
return False
|
return False
|
||||||
|
# CloudCode / Gemini CLI quotas are account-wide — all pool entries share
|
||||||
|
# the same throttle window, so rotation can't recover. Prefer fallback.
|
||||||
|
if provider == "google-gemini-cli" or str(base_url or "").startswith("cloudcode-pa://"):
|
||||||
|
return False
|
||||||
return len(pool.entries()) > 1
|
return len(pool.entries()) > 1
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -6367,6 +6377,21 @@ class AIAgent:
|
||||||
|
|
||||||
return False, has_retried_429
|
return False, has_retried_429
|
||||||
|
|
||||||
|
def _credential_pool_may_recover_rate_limit(self) -> bool:
|
||||||
|
"""Whether a rate-limit retry should wait for same-provider credentials."""
|
||||||
|
pool = self._credential_pool
|
||||||
|
if pool is None:
|
||||||
|
return False
|
||||||
|
if (
|
||||||
|
self.provider == "google-gemini-cli"
|
||||||
|
or str(getattr(self, "base_url", "")).startswith("cloudcode-pa://")
|
||||||
|
):
|
||||||
|
# CloudCode/Gemini quota windows are usually account-level throttles.
|
||||||
|
# Prefer the configured fallback immediately instead of waiting out
|
||||||
|
# Retry-After while a pooled OAuth credential may still appear usable.
|
||||||
|
return False
|
||||||
|
return pool.has_available()
|
||||||
|
|
||||||
def _anthropic_messages_create(self, api_kwargs: dict):
|
def _anthropic_messages_create(self, api_kwargs: dict):
|
||||||
if self.api_mode == "anthropic_messages":
|
if self.api_mode == "anthropic_messages":
|
||||||
self._try_refresh_anthropic_client_credentials()
|
self._try_refresh_anthropic_client_credentials()
|
||||||
|
|
@ -12447,9 +12472,12 @@ class AIAgent:
|
||||||
if is_rate_limited and self._fallback_index < len(self._fallback_chain):
|
if is_rate_limited and self._fallback_index < len(self._fallback_chain):
|
||||||
# Don't eagerly fallback if credential pool rotation may
|
# Don't eagerly fallback if credential pool rotation may
|
||||||
# still recover. See _pool_may_recover_from_rate_limit
|
# still recover. See _pool_may_recover_from_rate_limit
|
||||||
# for the single-credential-pool exception. Fixes #11314.
|
# for the single-credential-pool and CloudCode-quota
|
||||||
|
# exceptions. Fixes #11314 and #13636.
|
||||||
pool_may_recover = _pool_may_recover_from_rate_limit(
|
pool_may_recover = _pool_may_recover_from_rate_limit(
|
||||||
self._credential_pool
|
self._credential_pool,
|
||||||
|
provider=self.provider,
|
||||||
|
base_url=getattr(self, "base_url", None),
|
||||||
)
|
)
|
||||||
if not pool_may_recover:
|
if not pool_may_recover:
|
||||||
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||||
|
|
|
||||||
62
tests/agent/test_gemini_fast_fallback.py
Normal file
62
tests/agent/test_gemini_fast_fallback.py
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
"""Regression tests for #13636 — CloudCode / Gemini CLI rate-limit fallback.
|
||||||
|
|
||||||
|
_pool_may_recover_from_rate_limit() is the hinge between credential-pool
|
||||||
|
rotation and fallback-provider activation. For CloudCode (Gemini CLI /
|
||||||
|
Gemini OAuth) the 429 is an account-wide throttle, so waiting for pool
|
||||||
|
rotation is pointless — prefer fallback immediately.
|
||||||
|
"""
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
from run_agent import _pool_may_recover_from_rate_limit
|
||||||
|
|
||||||
|
|
||||||
|
def _pool(entries: int = 2):
|
||||||
|
p = MagicMock()
|
||||||
|
p.has_available.return_value = True
|
||||||
|
p.entries.return_value = list(range(entries))
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
def test_cloudcode_provider_skips_pool_rotation():
|
||||||
|
assert _pool_may_recover_from_rate_limit(
|
||||||
|
_pool(entries=3),
|
||||||
|
provider="google-gemini-cli",
|
||||||
|
base_url="cloudcode-pa://google",
|
||||||
|
) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_cloudcode_base_url_skips_pool_rotation_even_on_alias_provider():
|
||||||
|
# Even if the provider label is something else, a cloudcode-pa:// URL
|
||||||
|
# signals the account-wide quota regime.
|
||||||
|
assert _pool_may_recover_from_rate_limit(
|
||||||
|
_pool(entries=3),
|
||||||
|
provider="custom-provider",
|
||||||
|
base_url="cloudcode-pa://google",
|
||||||
|
) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_cloudcode_multi_entry_pool_still_recovers():
|
||||||
|
assert _pool_may_recover_from_rate_limit(
|
||||||
|
_pool(entries=3),
|
||||||
|
provider="openrouter",
|
||||||
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_entry_pool_skips_rotation_regardless_of_provider():
|
||||||
|
# Pre-existing single-entry-pool exception (#11314) still holds.
|
||||||
|
assert _pool_may_recover_from_rate_limit(
|
||||||
|
_pool(entries=1),
|
||||||
|
provider="openrouter",
|
||||||
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_exhausted_pool_skips_rotation():
|
||||||
|
p = MagicMock()
|
||||||
|
p.has_available.return_value = False
|
||||||
|
assert _pool_may_recover_from_rate_limit(p) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_pool_skips_rotation():
|
||||||
|
assert _pool_may_recover_from_rate_limit(None) is False
|
||||||
Loading…
Add table
Add a link
Reference in a new issue