fix(agent): only set rate-limit cooldown when leaving primary; add tests

This commit is contained in:
vlwkaos 2026-04-12 14:14:26 +09:00 committed by Teknium
parent a9fd8d7c88
commit f7f7588893
4 changed files with 135 additions and 2 deletions

View file

@ -343,6 +343,11 @@ def classify_api_error(
"""
status_code = _extract_status_code(error)
error_type = type(error).__name__
# Copilot/GitHub Models RateLimitError may not set .status_code; force 429
# so downstream rate-limit handling (classifier reason, pool rotation,
# fallback gating) fires correctly instead of misclassifying as generic.
if status_code is None and error_type == "RateLimitError":
status_code = 429
body = _extract_error_body(error)
error_code = _extract_error_code(body)

View file

@ -6494,7 +6494,7 @@ class AIAgent:
# ── Provider fallback ──────────────────────────────────────────────────
def _try_activate_fallback(self) -> bool:
def _try_activate_fallback(self, reason: "FailoverReason | None" = None) -> bool:
"""Switch to the next fallback model/provider in the chain.
Called when the current model is failing after retries. Swaps the
@ -6506,6 +6506,15 @@ class AIAgent:
auth resolution and client construction no duplicated providerkey
mappings.
"""
if reason in (FailoverReason.rate_limit, FailoverReason.billing):
# Only start cooldown when leaving the primary provider. If we're
# already on a fallback and chain-switching, the primary wasn't the
# source of the 429 so the cooldown should not be reset/extended.
fallback_already_active = bool(getattr(self, "_fallback_activated", False))
current_provider = (getattr(self, "provider", "") or "").strip().lower()
primary_provider = ((self._primary_runtime or {}).get("provider") or "").strip().lower()
if (not fallback_already_active) or (primary_provider and current_provider == primary_provider):
self._rate_limited_until = time.monotonic() + 60
if self._fallback_index >= len(self._fallback_chain):
return False
@ -6689,6 +6698,9 @@ class AIAgent:
if not self._fallback_activated:
return False
if getattr(self, "_rate_limited_until", 0) > time.monotonic():
return False # primary still in rate-limit cooldown, stay on fallback
rt = self._primary_runtime
try:
# ── Core runtime state ──
@ -10665,7 +10677,7 @@ class AIAgent:
)
if not pool_may_recover:
self._emit_status("⚠️ Rate limited — switching to fallback provider...")
if self._try_activate_fallback():
if self._try_activate_fallback(reason=classified.reason):
retry_count = 0
compression_attempts = 0
primary_recovery_attempted = False

View file

@ -1094,3 +1094,37 @@ class TestSSLTransientPatterns:
result = classify_api_error(e)
assert result.reason == FailoverReason.timeout
assert result.retryable is True
# ── Test: RateLimitError without status_code (Copilot/GitHub Models) ──────────
class TestRateLimitErrorWithoutStatusCode:
"""Regression tests for the Copilot/GitHub Models edge case where the
OpenAI SDK raises RateLimitError but does not populate .status_code."""
def _make_rate_limit_error(self, status_code=None):
"""Create an exception whose class name is 'RateLimitError' with
an optionally missing status_code, mirroring the OpenAI SDK shape."""
cls = type("RateLimitError", (Exception,), {})
e = cls("You have exceeded your rate limit.")
e.status_code = status_code # None simulates the Copilot case
return e
def test_rate_limit_error_without_status_code_classified_as_rate_limit(self):
"""RateLimitError with status_code=None must classify as rate_limit."""
e = self._make_rate_limit_error(status_code=None)
result = classify_api_error(e, provider="copilot", model="gpt-4o")
assert result.reason == FailoverReason.rate_limit
def test_rate_limit_error_with_status_code_429_classified_as_rate_limit(self):
"""RateLimitError that does set status_code=429 still classifies correctly."""
e = self._make_rate_limit_error(status_code=429)
result = classify_api_error(e, provider="copilot", model="gpt-4o")
assert result.reason == FailoverReason.rate_limit
def test_other_error_without_status_code_not_forced_to_rate_limit(self):
"""A non-RateLimitError with missing status_code must NOT be forced to 429."""
cls = type("APIError", (Exception,), {})
e = cls("something went wrong")
e.status_code = None
result = classify_api_error(e, provider="copilot", model="gpt-4o")
assert result.reason != FailoverReason.rate_limit

View file

@ -446,3 +446,85 @@ class TestRestoreInRunConversation:
assert agent._fallback_index == 0
assert agent.provider == "custom"
assert agent.base_url == "https://my-llm.example.com/v1"
# =============================================================================
# Rate-limit cooldown gate
# =============================================================================
class TestRateLimitCooldown:
"""Verify _restore_primary_runtime() respects the 60s rate-limit cooldown."""
def test_restore_blocked_during_cooldown(self):
"""While _rate_limited_until is in the future, restore returns False."""
agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
)
mock_client = _mock_resolve()
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
agent._try_activate_fallback()
assert agent._fallback_activated is True
# Manually set cooldown well into the future
agent._rate_limited_until = time.monotonic() + 60
result = agent._restore_primary_runtime()
assert result is False
assert agent._fallback_activated is True # still on fallback
def test_restore_allowed_after_cooldown_expires(self):
"""Once the cooldown window passes, restore proceeds normally."""
agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
)
mock_client = _mock_resolve()
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
agent._try_activate_fallback()
assert agent._fallback_activated is True
# Cooldown already expired
agent._rate_limited_until = time.monotonic() - 1
with patch("run_agent.OpenAI", return_value=MagicMock()):
result = agent._restore_primary_runtime()
assert result is True
assert agent._fallback_activated is False
def test_cooldown_set_on_rate_limit_reason(self):
"""_try_activate_fallback with rate_limit reason sets _rate_limited_until."""
from run_agent import FailoverReason
agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
)
before = time.monotonic()
mock_client = _mock_resolve()
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
agent._try_activate_fallback(reason=FailoverReason.rate_limit)
assert hasattr(agent, "_rate_limited_until")
assert agent._rate_limited_until > before + 50 # ~60s from now
def test_cooldown_not_set_when_already_on_fallback(self):
"""Chain-switching while already on fallback must not reset cooldown."""
from run_agent import FailoverReason
agent = _make_agent(
fallback_model=[
{"provider": "openrouter", "model": "model-a"},
{"provider": "anthropic", "model": "model-b"},
],
)
mock_client = _mock_resolve()
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
# First call: leaving primary → cooldown should be set
agent._try_activate_fallback(reason=FailoverReason.rate_limit)
first_cooldown = getattr(agent, "_rate_limited_until", 0)
# Second call: already on fallback (provider != primary) → cooldown must not advance
agent._try_activate_fallback(reason=FailoverReason.rate_limit)
second_cooldown = getattr(agent, "_rate_limited_until", 0)
# second call should not have extended the cooldown
assert second_cooldown == first_cooldown