From 751adfa6b94147ed7e2981f5db35f0686f8f7394 Mon Sep 17 00:00:00 2001 From: pyxl-dev Date: Thu, 25 Jun 2026 01:17:40 +0000 Subject: [PATCH] fix: include rate-limit in auxiliary capacity-error fallback gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rate-limit (429) errors on explicit-provider auxiliary tasks were silently failing instead of triggering the fallback chain. The is_capacity_error gate only checked payment and connection errors, excluding rate limits — so when a configured provider like openai-codex hit its rate limit, auxiliary tasks (kanban_decomposer, vision, web_extract, approval, etc.) had zero resilience. Add _is_rate_limit_error() to is_capacity_error at both call sites (sync and async paths) so rate limits trigger fallback regardless of whether the provider was auto-detected or explicitly configured. Fixes #52228 --- agent/auxiliary_client.py | 12 ++++++--- tests/agent/test_auxiliary_client.py | 38 ++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 18a6f9bfa73..d20f1f948d5 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -5669,7 +5669,9 @@ def call_llm( is_auto = resolved_provider in {"auto", "", None} # Capacity errors bypass the explicit-provider gate: the provider # literally cannot serve this request regardless of user intent. - is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err) + # Rate limits are included: after retries are exhausted, a 429 means + # the provider cannot serve this request — fall back. See #52228. + is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err) or _is_rate_limit_error(first_err) if should_fallback and (is_auto or is_capacity_error): if _is_payment_error(first_err): reason = "payment error" @@ -6112,11 +6114,13 @@ async def async_call_llm( or _is_connection_error(first_err) or _is_rate_limit_error(first_err) ) - # Capacity errors (payment/quota/connection) bypass the explicit-provider - # gate — the provider cannot serve the request regardless of user intent. + # Capacity errors (payment/quota/connection/rate-limit) bypass the + # explicit-provider gate — the provider cannot serve the request + # regardless of user intent. Rate limits are included: after retries + # are exhausted, a 429 means the provider is at capacity. See #52228. # See #26803: daily token quota must fall back like a 402 credit error. is_auto = resolved_provider in {"auto", "", None} - is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err) + is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err) or _is_rate_limit_error(first_err) if should_fallback and (is_auto or is_capacity_error): if _is_payment_error(first_err): reason = "payment error" diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 060a817998e..aa75b993495 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1885,6 +1885,44 @@ class TestAuxiliaryFallbackLayering: assert main_client.chat.completions.create.called + def test_explicit_provider_rate_limit_triggers_fallback(self, monkeypatch): + """429 rate-limit on an explicit provider must trigger fallback (not be ignored). + + Regression test for #52228: rate limits were excluded from + ``is_capacity_error``, so explicit-provider auxiliary calls never + fell back on 429 — only auto-provider calls did. + """ + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + + primary_client = MagicMock() + rate_err = Exception("Rate limit exceeded, try again in 60 seconds") + rate_err.status_code = 429 + primary_client.chat.completions.create.side_effect = rate_err + + fallback_client = MagicMock() + fallback_client.chat.completions.create.return_value = MagicMock(choices=[ + MagicMock(message=MagicMock(content="from fallback chain")) + ]) + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(primary_client, "gpt-5.5")), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("openai-codex", "gpt-5.5", None, None, None)), \ + patch("agent.auxiliary_client._try_configured_fallback_chain", + return_value=(fallback_client, "deepseek-v4-pro", "fallback_chain[0](opencode-go)")) as mock_chain, \ + patch("agent.auxiliary_client._try_main_agent_model_fallback") as mock_main: + result = call_llm( + task="kanban_decomposer", + messages=[{"role": "user", "content": "decompose this"}], + ) + + # Fallback chain MUST be tried for rate-limit on explicit provider + mock_chain.assert_called() + assert fallback_client.chat.completions.create.called + # Main agent fallback should NOT be needed when chain succeeds + mock_main.assert_not_called() + + def test_warning_emitted_when_all_fallbacks_exhausted(self, monkeypatch, caplog): """When chain AND main model both fail, a user-visible warning fires before re-raise.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")