mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(agent): fall back on rate limit when pool has no rotation room
Extracts pool-rotation-room logic into `_pool_may_recover_from_rate_limit` so single-credential pools no longer block the eager-fallback path on 429. The existing check `pool is not None and pool.has_available()` lets fallback fire only after the pool marks every entry as exhausted. With exactly one credential in the pool (the common shape for Gemini OAuth, Vertex service accounts, and any personal-key setup), `has_available()` flips back to True as soon as the cooldown expires — Hermes retries against the same entry, hits the same daily-quota 429, and burns the retry budget in a tight loop before ever reaching the configured `fallback_model`. Observed in the wild as 4+ hours of 429 noise on a single Gemini key instead of falling through to Vertex as configured. Rotation is only meaningful with more than one credential — gate on `len(pool.entries()) > 1`. Multi-credential pools keep the current wait-for-rotation behaviour unchanged. Fixes #11314. Related to #8947, #10210, #7230. Narrower scope than open PRs #8023 (classifier change) and #11492 (503/529 credential-pool bypass) — this addresses the single-credential 429 case specifically and does not conflict with either. Tests: 6 new unit tests in tests/run_agent/test_provider_fallback.py covering (a) None pool, (b) single-cred available, (c) single-cred in cooldown, (d) 2-cred available rotates, (e) multi-cred all cooling-down falls back, (f) many-cred available rotates. All 18 tests in the file pass.
This commit is contained in:
parent
1af44a13c0
commit
1fc77f995b
2 changed files with 70 additions and 6 deletions
35
run_agent.py
35
run_agent.py
|
|
@ -693,6 +693,31 @@ def _routermint_headers() -> dict:
|
|||
}
|
||||
|
||||
|
||||
def _pool_may_recover_from_rate_limit(pool) -> bool:
|
||||
"""Decide whether to wait for credential-pool rotation instead of falling back.
|
||||
|
||||
The existing pool-rotation path requires the pool to (1) exist and (2) have
|
||||
at least one entry not currently in exhaustion cooldown. But rotation is
|
||||
only meaningful when the pool has more than one entry.
|
||||
|
||||
With a single-credential pool (common for Gemini OAuth, Vertex service
|
||||
accounts, and any "one personal key" configuration), the primary entry
|
||||
just 429'd and there is nothing to rotate to. Waiting for the pool
|
||||
cooldown to expire means retrying against the same exhausted quota — the
|
||||
daily-quota 429 will recur immediately, and the retry budget is burned.
|
||||
|
||||
In that case we must fall back to the configured ``fallback_model``
|
||||
instead. Returns True only when rotation has somewhere to go.
|
||||
|
||||
See issue #11314.
|
||||
"""
|
||||
if pool is None:
|
||||
return False
|
||||
if not pool.has_available():
|
||||
return False
|
||||
return len(pool.entries()) > 1
|
||||
|
||||
|
||||
def _qwen_portal_headers() -> dict:
|
||||
"""Return default HTTP headers required by Qwen Portal API."""
|
||||
import platform as _plat
|
||||
|
|
@ -10582,11 +10607,11 @@ class AIAgent:
|
|||
)
|
||||
if is_rate_limited and self._fallback_index < len(self._fallback_chain):
|
||||
# Don't eagerly fallback if credential pool rotation may
|
||||
# still recover. The pool's retry-then-rotate cycle needs
|
||||
# at least one more attempt to fire — jumping to a fallback
|
||||
# provider here short-circuits it.
|
||||
pool = self._credential_pool
|
||||
pool_may_recover = pool is not None and pool.has_available()
|
||||
# still recover. See _pool_may_recover_from_rate_limit
|
||||
# for the single-credential-pool exception. Fixes #11314.
|
||||
pool_may_recover = _pool_may_recover_from_rate_limit(
|
||||
self._credential_pool
|
||||
)
|
||||
if not pool_may_recover:
|
||||
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
|
||||
if self._try_activate_fallback():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue