mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
fix: include rate-limit in auxiliary capacity-error fallback gate
Rate-limit (429) errors on explicit-provider auxiliary tasks were silently failing instead of triggering the fallback chain. The is_capacity_error gate only checked payment and connection errors, excluding rate limits — so when a configured provider like openai-codex hit its rate limit, auxiliary tasks (kanban_decomposer, vision, web_extract, approval, etc.) had zero resilience. Add _is_rate_limit_error() to is_capacity_error at both call sites (sync and async paths) so rate limits trigger fallback regardless of whether the provider was auto-detected or explicitly configured. Fixes #52228
This commit is contained in:
parent
ff8920299c
commit
751adfa6b9
2 changed files with 46 additions and 4 deletions
|
|
@ -5669,7 +5669,9 @@ def call_llm(
|
|||
is_auto = resolved_provider in {"auto", "", None}
|
||||
# Capacity errors bypass the explicit-provider gate: the provider
|
||||
# literally cannot serve this request regardless of user intent.
|
||||
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
# Rate limits are included: after retries are exhausted, a 429 means
|
||||
# the provider cannot serve this request — fall back. See #52228.
|
||||
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err) or _is_rate_limit_error(first_err)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
|
|
@ -6112,11 +6114,13 @@ async def async_call_llm(
|
|||
or _is_connection_error(first_err)
|
||||
or _is_rate_limit_error(first_err)
|
||||
)
|
||||
# Capacity errors (payment/quota/connection) bypass the explicit-provider
|
||||
# gate — the provider cannot serve the request regardless of user intent.
|
||||
# Capacity errors (payment/quota/connection/rate-limit) bypass the
|
||||
# explicit-provider gate — the provider cannot serve the request
|
||||
# regardless of user intent. Rate limits are included: after retries
|
||||
# are exhausted, a 429 means the provider is at capacity. See #52228.
|
||||
# See #26803: daily token quota must fall back like a 402 credit error.
|
||||
is_auto = resolved_provider in {"auto", "", None}
|
||||
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err) or _is_rate_limit_error(first_err)
|
||||
if should_fallback and (is_auto or is_capacity_error):
|
||||
if _is_payment_error(first_err):
|
||||
reason = "payment error"
|
||||
|
|
|
|||
|
|
@ -1885,6 +1885,44 @@ class TestAuxiliaryFallbackLayering:
|
|||
|
||||
assert main_client.chat.completions.create.called
|
||||
|
||||
def test_explicit_provider_rate_limit_triggers_fallback(self, monkeypatch):
|
||||
"""429 rate-limit on an explicit provider must trigger fallback (not be ignored).
|
||||
|
||||
Regression test for #52228: rate limits were excluded from
|
||||
``is_capacity_error``, so explicit-provider auxiliary calls never
|
||||
fell back on 429 — only auto-provider calls did.
|
||||
"""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
||||
primary_client = MagicMock()
|
||||
rate_err = Exception("Rate limit exceeded, try again in 60 seconds")
|
||||
rate_err.status_code = 429
|
||||
primary_client.chat.completions.create.side_effect = rate_err
|
||||
|
||||
fallback_client = MagicMock()
|
||||
fallback_client.chat.completions.create.return_value = MagicMock(choices=[
|
||||
MagicMock(message=MagicMock(content="from fallback chain"))
|
||||
])
|
||||
|
||||
with patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(primary_client, "gpt-5.5")), \
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)), \
|
||||
patch("agent.auxiliary_client._try_configured_fallback_chain",
|
||||
return_value=(fallback_client, "deepseek-v4-pro", "fallback_chain[0](opencode-go)")) as mock_chain, \
|
||||
patch("agent.auxiliary_client._try_main_agent_model_fallback") as mock_main:
|
||||
result = call_llm(
|
||||
task="kanban_decomposer",
|
||||
messages=[{"role": "user", "content": "decompose this"}],
|
||||
)
|
||||
|
||||
# Fallback chain MUST be tried for rate-limit on explicit provider
|
||||
mock_chain.assert_called()
|
||||
assert fallback_client.chat.completions.create.called
|
||||
# Main agent fallback should NOT be needed when chain succeeds
|
||||
mock_main.assert_not_called()
|
||||
|
||||
|
||||
def test_warning_emitted_when_all_fallbacks_exhausted(self, monkeypatch, caplog):
|
||||
"""When chain AND main model both fail, a user-visible warning fires before re-raise."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue