fix(aux): trigger fallback on 429 rate-limit errors in auxiliary client

When a provider returns a 429 rate-limit error (not billing-related),
the auxiliary client's call_llm/async_call_llm previously did NOT trigger
the fallback chain. This caused auxiliary tasks like session_search to
exhaust all 3 retries against the same rate-limited endpoint, losing
session metadata that depended on the summarization completing.

Root cause: `_is_payment_error()` only matched 429s containing billing
keywords ("credits", "insufficient funds", etc.). Provider-specific
rate-limit messages like Nous's "Hold up for a bit, you've exceeded the
rate limit on your API key" didn't match, so `_is_payment_error` returned
False, `_is_connection_error` returned False, and `should_fallback` was
False — all retries hit the same rate-limited provider.

Fix:
- New `_is_rate_limit_error()` function that detects 429 + rate-limit
  keywords, generic 429 without billing keywords, and OpenAI SDK
  `RateLimitError` class instances (which may omit .status_code).
- Updated `should_fallback` in both `call_llm` and `async_call_llm` to
  include `_is_rate_limit_error`.
- Updated the max_tokens retry path to also check for rate-limit errors.
- Updated the reason string to include "rate limit".

This complements the Nous rate guard (PR #10568) which prevents new calls
to Nous when already rate-limited — this fix handles the case where a
request is already in flight when the 429 arrives.

Related: #8023, #12554, #11034
Co-authored-by: Zeejay <zjtan1@gmail.com>
This commit is contained in:
Zeejay 2026-04-22 01:14:32 +10:00 committed by Teknium
parent 8c0f254c06
commit f8ba265340
2 changed files with 155 additions and 8 deletions

View file

@ -20,6 +20,7 @@ from agent.auxiliary_client import (
_read_codex_access_token,
_get_provider_chain,
_is_payment_error,
_is_rate_limit_error,
_normalize_aux_provider,
_try_payment_fallback,
_resolve_auto,
@ -789,6 +790,65 @@ class TestIsPaymentError:
assert _is_payment_error(exc) is False
class TestIsRateLimitError:
"""_is_rate_limit_error detects 429 rate-limit errors warranting fallback."""
def test_429_with_rate_limit_message(self):
exc = Exception("Rate limit exceeded, try again in 2 seconds")
exc.status_code = 429
assert _is_rate_limit_error(exc) is True
def test_429_with_resets_in_message(self):
"""Nous-style 429: 'resets in 3508s'."""
exc = Exception("Hold up for a bit, you've exceeded the rate limit on your API key")
exc.status_code = 429
assert _is_rate_limit_error(exc) is True
def test_429_with_too_many_requests(self):
exc = Exception("Too many requests")
exc.status_code = 429
assert _is_rate_limit_error(exc) is True
def test_429_without_billing_keywords_is_rate_limit(self):
"""Generic 429 without billing keywords = likely a rate limit."""
exc = Exception("Something went wrong")
exc.status_code = 429
assert _is_rate_limit_error(exc) is True
def test_429_with_credits_message_is_not_rate_limit(self):
"""Billing-related 429 should NOT be classified as rate limit."""
exc = Exception("insufficient credits remaining")
exc.status_code = 429
assert _is_rate_limit_error(exc) is False
def test_429_with_billing_message_is_not_rate_limit(self):
exc = Exception("you can only afford 1000 tokens")
exc.status_code = 429
assert _is_rate_limit_error(exc) is False
def test_402_is_not_rate_limit(self):
exc = Exception("Payment Required")
exc.status_code = 402
assert _is_rate_limit_error(exc) is False
def test_500_is_not_rate_limit(self):
exc = Exception("Internal Server Error")
exc.status_code = 500
assert _is_rate_limit_error(exc) is False
def test_openai_ratelimiterror_classname(self):
"""OpenAI SDK RateLimitError may omit .status_code — detect by class name."""
class RateLimitError(Exception):
pass
exc = RateLimitError("rate limit exceeded")
# No status_code set, but class name matches
assert _is_rate_limit_error(exc) is True
def test_no_status_code_no_keywords_is_not_rate_limit(self):
exc = Exception("connection reset")
assert _is_rate_limit_error(exc) is False
class TestGetProviderChain:
"""_get_provider_chain() resolves functions at call time (testable)."""
@ -860,13 +920,18 @@ class TestTryPaymentFallback:
class TestCallLlmPaymentFallback:
"""call_llm() retries with a different provider on 402 / payment errors."""
"""call_llm() retries with a different provider on 402 / payment / rate-limit errors."""
def _make_402_error(self, msg="Payment Required: insufficient credits"):
exc = Exception(msg)
exc.status_code = 402
return exc
def _make_429_rate_limit_error(self, msg="Rate limit exceeded, try again in 60 seconds"):
exc = Exception(msg)
exc.status_code = 429
return exc
def test_non_payment_error_not_caught(self, monkeypatch):
"""Non-payment/non-connection errors (500) should NOT trigger fallback."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
@ -886,6 +951,32 @@ class TestCallLlmPaymentFallback:
messages=[{"role": "user", "content": "hello"}],
)
def test_429_rate_limit_triggers_fallback(self, monkeypatch):
"""429 rate-limit errors should trigger fallback to next provider."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
primary_client = MagicMock()
rate_err = self._make_429_rate_limit_error()
primary_client.chat.completions.create.side_effect = rate_err
fallback_client = MagicMock()
fallback_client.chat.completions.create.return_value = MagicMock(choices=[
MagicMock(message=MagicMock(content="fallback response"))
])
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "xiaomi/mimo-v2-pro")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", "xiaomi/mimo-v2-pro", None, None, None)), \
patch("agent.auxiliary_client._try_payment_fallback",
return_value=(fallback_client, "fallback-model", "openrouter")):
result = call_llm(
task="session_search",
messages=[{"role": "user", "content": "hello"}],
)
# Fallback client should have been used
assert fallback_client.chat.completions.create.called
# ---------------------------------------------------------------------------
# Gate: _resolve_api_key_provider must skip anthropic when not configured
# ---------------------------------------------------------------------------