mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(agent): fall back on rate limit when pool has no rotation room
Extracts pool-rotation-room logic into `_pool_may_recover_from_rate_limit` so single-credential pools no longer block the eager-fallback path on 429. The existing check `pool is not None and pool.has_available()` lets fallback fire only after the pool marks every entry as exhausted. With exactly one credential in the pool (the common shape for Gemini OAuth, Vertex service accounts, and any personal-key setup), `has_available()` flips back to True as soon as the cooldown expires — Hermes retries against the same entry, hits the same daily-quota 429, and burns the retry budget in a tight loop before ever reaching the configured `fallback_model`. Observed in the wild as 4+ hours of 429 noise on a single Gemini key instead of falling through to Vertex as configured. Rotation is only meaningful with more than one credential — gate on `len(pool.entries()) > 1`. Multi-credential pools keep the current wait-for-rotation behaviour unchanged. Fixes #11314. Related to #8947, #10210, #7230. Narrower scope than open PRs #8023 (classifier change) and #11492 (503/529 credential-pool bypass) — this addresses the single-credential 429 case specifically and does not conflict with either. Tests: 6 new unit tests in tests/run_agent/test_provider_fallback.py covering (a) None pool, (b) single-cred available, (c) single-cred in cooldown, (d) 2-cred available rotates, (e) multi-cred all cooling-down falls back, (f) many-cred available rotates. All 18 tests in the file pass.
This commit is contained in:
parent
1af44a13c0
commit
1fc77f995b
2 changed files with 70 additions and 6 deletions
|
|
@ -7,7 +7,7 @@ advancement through multiple providers.
|
|||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from run_agent import AIAgent
|
||||
from run_agent import AIAgent, _pool_may_recover_from_rate_limit
|
||||
|
||||
|
||||
def _make_agent(fallback_model=None):
|
||||
|
|
@ -181,3 +181,42 @@ class TestFallbackChainAdvancement:
|
|||
):
|
||||
assert agent._try_activate_fallback() is True
|
||||
assert mock_rpc.call_args.kwargs["explicit_api_key"] == "env-secret"
|
||||
|
||||
|
||||
# ── Pool-rotation vs fallback gating (#11314) ────────────────────────────
|
||||
|
||||
|
||||
def _pool(n_entries: int, has_available: bool = True):
|
||||
"""Make a minimal credential-pool stand-in for rotation-room checks."""
|
||||
pool = MagicMock()
|
||||
pool.entries.return_value = [MagicMock() for _ in range(n_entries)]
|
||||
pool.has_available.return_value = has_available
|
||||
return pool
|
||||
|
||||
|
||||
class TestPoolRotationRoom:
|
||||
def test_none_pool_returns_false(self):
|
||||
assert _pool_may_recover_from_rate_limit(None) is False
|
||||
|
||||
def test_single_credential_returns_false(self):
|
||||
"""With one credential that just 429'd, rotation has nowhere to go.
|
||||
|
||||
The pool may still report has_available() True once cooldown expires,
|
||||
but retrying against the same entry will hit the same daily-quota
|
||||
429 and burn the retry budget. Must fall back.
|
||||
"""
|
||||
assert _pool_may_recover_from_rate_limit(_pool(1)) is False
|
||||
|
||||
def test_single_credential_in_cooldown_returns_false(self):
|
||||
assert _pool_may_recover_from_rate_limit(_pool(1, has_available=False)) is False
|
||||
|
||||
def test_two_credentials_available_returns_true(self):
|
||||
"""With >1 credentials and at least one available, rotate instead of fallback."""
|
||||
assert _pool_may_recover_from_rate_limit(_pool(2)) is True
|
||||
|
||||
def test_multiple_credentials_all_in_cooldown_returns_false(self):
|
||||
"""All credentials cooling down — fall back rather than wait."""
|
||||
assert _pool_may_recover_from_rate_limit(_pool(3, has_available=False)) is False
|
||||
|
||||
def test_many_credentials_available_returns_true(self):
|
||||
assert _pool_may_recover_from_rate_limit(_pool(10)) is True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue