From 751adfa6b94147ed7e2981f5db35f0686f8f7394 Mon Sep 17 00:00:00 2001
From: pyxl-dev <pyxl-dev@users.noreply.github.com>
Date: Thu, 25 Jun 2026 01:17:40 +0000
Subject: [PATCH] fix: include rate-limit in auxiliary capacity-error fallback
 gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rate-limit (429) errors on explicit-provider auxiliary tasks were
silently failing instead of triggering the fallback chain. The
is_capacity_error gate only checked payment and connection errors,
excluding rate limits — so when a configured provider like
openai-codex hit its rate limit, auxiliary tasks (kanban_decomposer,
vision, web_extract, approval, etc.) had zero resilience.

Add _is_rate_limit_error() to is_capacity_error at both call sites
(sync and async paths) so rate limits trigger fallback regardless
of whether the provider was auto-detected or explicitly configured.

Fixes #52228
---
 agent/auxiliary_client.py            | 12 ++++++---
 tests/agent/test_auxiliary_client.py | 38 ++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 18a6f9bfa73..d20f1f948d5 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -5669,7 +5669,9 @@ def call_llm(
         is_auto = resolved_provider in {"auto", "", None}
         # Capacity errors bypass the explicit-provider gate: the provider
         # literally cannot serve this request regardless of user intent.
-        is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
+        # Rate limits are included: after retries are exhausted, a 429 means
+        # the provider cannot serve this request — fall back. See #52228.
+        is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err) or _is_rate_limit_error(first_err)
         if should_fallback and (is_auto or is_capacity_error):
             if _is_payment_error(first_err):
                 reason = "payment error"
@@ -6112,11 +6114,13 @@ async def async_call_llm(
             or _is_connection_error(first_err)
             or _is_rate_limit_error(first_err)
         )
-        # Capacity errors (payment/quota/connection) bypass the explicit-provider
-        # gate — the provider cannot serve the request regardless of user intent.
+        # Capacity errors (payment/quota/connection/rate-limit) bypass the
+        # explicit-provider gate — the provider cannot serve the request
+        # regardless of user intent. Rate limits are included: after retries
+        # are exhausted, a 429 means the provider is at capacity. See #52228.
         # See #26803: daily token quota must fall back like a 402 credit error.
         is_auto = resolved_provider in {"auto", "", None}
-        is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err)
+        is_capacity_error = _is_payment_error(first_err) or _is_connection_error(first_err) or _is_rate_limit_error(first_err)
         if should_fallback and (is_auto or is_capacity_error):
             if _is_payment_error(first_err):
                 reason = "payment error"
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 060a817998e..aa75b993495 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -1885,6 +1885,44 @@ class TestAuxiliaryFallbackLayering:
 
         assert main_client.chat.completions.create.called
 
+    def test_explicit_provider_rate_limit_triggers_fallback(self, monkeypatch):
+        """429 rate-limit on an explicit provider must trigger fallback (not be ignored).
+
+        Regression test for #52228: rate limits were excluded from
+        ``is_capacity_error``, so explicit-provider auxiliary calls never
+        fell back on 429 — only auto-provider calls did.
+        """
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+
+        primary_client = MagicMock()
+        rate_err = Exception("Rate limit exceeded, try again in 60 seconds")
+        rate_err.status_code = 429
+        primary_client.chat.completions.create.side_effect = rate_err
+
+        fallback_client = MagicMock()
+        fallback_client.chat.completions.create.return_value = MagicMock(choices=[
+            MagicMock(message=MagicMock(content="from fallback chain"))
+        ])
+
+        with patch("agent.auxiliary_client._get_cached_client",
+                   return_value=(primary_client, "gpt-5.5")), \
+             patch("agent.auxiliary_client._resolve_task_provider_model",
+                   return_value=("openai-codex", "gpt-5.5", None, None, None)), \
+             patch("agent.auxiliary_client._try_configured_fallback_chain",
+                   return_value=(fallback_client, "deepseek-v4-pro", "fallback_chain[0](opencode-go)")) as mock_chain, \
+             patch("agent.auxiliary_client._try_main_agent_model_fallback") as mock_main:
+            result = call_llm(
+                task="kanban_decomposer",
+                messages=[{"role": "user", "content": "decompose this"}],
+            )
+
+        # Fallback chain MUST be tried for rate-limit on explicit provider
+        mock_chain.assert_called()
+        assert fallback_client.chat.completions.create.called
+        # Main agent fallback should NOT be needed when chain succeeds
+        mock_main.assert_not_called()
+
+
     def test_warning_emitted_when_all_fallbacks_exhausted(self, monkeypatch, caplog):
         """When chain AND main model both fail, a user-visible warning fires before re-raise."""
         monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")