From f7f7588893ef2f1bae7531c37394f1dbbe38ef1f Mon Sep 17 00:00:00 2001 From: vlwkaos Date: Sun, 12 Apr 2026 14:14:26 +0900 Subject: [PATCH] fix(agent): only set rate-limit cooldown when leaving primary; add tests --- agent/error_classifier.py | 5 ++ run_agent.py | 16 +++- tests/agent/test_error_classifier.py | 34 ++++++++ .../run_agent/test_primary_runtime_restore.py | 82 +++++++++++++++++++ 4 files changed, 135 insertions(+), 2 deletions(-) diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 570000525..87324d676 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -343,6 +343,11 @@ def classify_api_error( """ status_code = _extract_status_code(error) error_type = type(error).__name__ + # Copilot/GitHub Models RateLimitError may not set .status_code; force 429 + # so downstream rate-limit handling (classifier reason, pool rotation, + # fallback gating) fires correctly instead of misclassifying as generic. + if status_code is None and error_type == "RateLimitError": + status_code = 429 body = _extract_error_body(error) error_code = _extract_error_code(body) diff --git a/run_agent.py b/run_agent.py index bf88c3719..7af6d3ab0 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6494,7 +6494,7 @@ class AIAgent: # ── Provider fallback ────────────────────────────────────────────────── - def _try_activate_fallback(self) -> bool: + def _try_activate_fallback(self, reason: "FailoverReason | None" = None) -> bool: """Switch to the next fallback model/provider in the chain. Called when the current model is failing after retries. Swaps the @@ -6506,6 +6506,15 @@ class AIAgent: auth resolution and client construction — no duplicated provider→key mappings. """ + if reason in (FailoverReason.rate_limit, FailoverReason.billing): + # Only start cooldown when leaving the primary provider. If we're + # already on a fallback and chain-switching, the primary wasn't the + # source of the 429 so the cooldown should not be reset/extended. + fallback_already_active = bool(getattr(self, "_fallback_activated", False)) + current_provider = (getattr(self, "provider", "") or "").strip().lower() + primary_provider = ((self._primary_runtime or {}).get("provider") or "").strip().lower() + if (not fallback_already_active) or (primary_provider and current_provider == primary_provider): + self._rate_limited_until = time.monotonic() + 60 if self._fallback_index >= len(self._fallback_chain): return False @@ -6689,6 +6698,9 @@ class AIAgent: if not self._fallback_activated: return False + if getattr(self, "_rate_limited_until", 0) > time.monotonic(): + return False # primary still in rate-limit cooldown, stay on fallback + rt = self._primary_runtime try: # ── Core runtime state ── @@ -10665,7 +10677,7 @@ class AIAgent: ) if not pool_may_recover: self._emit_status("⚠️ Rate limited — switching to fallback provider...") - if self._try_activate_fallback(): + if self._try_activate_fallback(reason=classified.reason): retry_count = 0 compression_attempts = 0 primary_recovery_attempted = False diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index 9bab14548..e8a92774b 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -1094,3 +1094,37 @@ class TestSSLTransientPatterns: result = classify_api_error(e) assert result.reason == FailoverReason.timeout assert result.retryable is True + +# ── Test: RateLimitError without status_code (Copilot/GitHub Models) ────────── + +class TestRateLimitErrorWithoutStatusCode: + """Regression tests for the Copilot/GitHub Models edge case where the + OpenAI SDK raises RateLimitError but does not populate .status_code.""" + + def _make_rate_limit_error(self, status_code=None): + """Create an exception whose class name is 'RateLimitError' with + an optionally missing status_code, mirroring the OpenAI SDK shape.""" + cls = type("RateLimitError", (Exception,), {}) + e = cls("You have exceeded your rate limit.") + e.status_code = status_code # None simulates the Copilot case + return e + + def test_rate_limit_error_without_status_code_classified_as_rate_limit(self): + """RateLimitError with status_code=None must classify as rate_limit.""" + e = self._make_rate_limit_error(status_code=None) + result = classify_api_error(e, provider="copilot", model="gpt-4o") + assert result.reason == FailoverReason.rate_limit + + def test_rate_limit_error_with_status_code_429_classified_as_rate_limit(self): + """RateLimitError that does set status_code=429 still classifies correctly.""" + e = self._make_rate_limit_error(status_code=429) + result = classify_api_error(e, provider="copilot", model="gpt-4o") + assert result.reason == FailoverReason.rate_limit + + def test_other_error_without_status_code_not_forced_to_rate_limit(self): + """A non-RateLimitError with missing status_code must NOT be forced to 429.""" + cls = type("APIError", (Exception,), {}) + e = cls("something went wrong") + e.status_code = None + result = classify_api_error(e, provider="copilot", model="gpt-4o") + assert result.reason != FailoverReason.rate_limit diff --git a/tests/run_agent/test_primary_runtime_restore.py b/tests/run_agent/test_primary_runtime_restore.py index 74119c30e..d082f047f 100644 --- a/tests/run_agent/test_primary_runtime_restore.py +++ b/tests/run_agent/test_primary_runtime_restore.py @@ -446,3 +446,85 @@ class TestRestoreInRunConversation: assert agent._fallback_index == 0 assert agent.provider == "custom" assert agent.base_url == "https://my-llm.example.com/v1" + + +# ============================================================================= +# Rate-limit cooldown gate +# ============================================================================= + +class TestRateLimitCooldown: + """Verify _restore_primary_runtime() respects the 60s rate-limit cooldown.""" + + def test_restore_blocked_during_cooldown(self): + """While _rate_limited_until is in the future, restore returns False.""" + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + mock_client = _mock_resolve() + with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): + agent._try_activate_fallback() + + assert agent._fallback_activated is True + + # Manually set cooldown well into the future + agent._rate_limited_until = time.monotonic() + 60 + + result = agent._restore_primary_runtime() + assert result is False + assert agent._fallback_activated is True # still on fallback + + def test_restore_allowed_after_cooldown_expires(self): + """Once the cooldown window passes, restore proceeds normally.""" + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + mock_client = _mock_resolve() + with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): + agent._try_activate_fallback() + + assert agent._fallback_activated is True + + # Cooldown already expired + agent._rate_limited_until = time.monotonic() - 1 + + with patch("run_agent.OpenAI", return_value=MagicMock()): + result = agent._restore_primary_runtime() + + assert result is True + assert agent._fallback_activated is False + + def test_cooldown_set_on_rate_limit_reason(self): + """_try_activate_fallback with rate_limit reason sets _rate_limited_until.""" + from run_agent import FailoverReason + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + before = time.monotonic() + mock_client = _mock_resolve() + with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): + agent._try_activate_fallback(reason=FailoverReason.rate_limit) + + assert hasattr(agent, "_rate_limited_until") + assert agent._rate_limited_until > before + 50 # ~60s from now + + def test_cooldown_not_set_when_already_on_fallback(self): + """Chain-switching while already on fallback must not reset cooldown.""" + from run_agent import FailoverReason + agent = _make_agent( + fallback_model=[ + {"provider": "openrouter", "model": "model-a"}, + {"provider": "anthropic", "model": "model-b"}, + ], + ) + mock_client = _mock_resolve() + with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): + # First call: leaving primary → cooldown should be set + agent._try_activate_fallback(reason=FailoverReason.rate_limit) + first_cooldown = getattr(agent, "_rate_limited_until", 0) + + # Second call: already on fallback (provider != primary) → cooldown must not advance + agent._try_activate_fallback(reason=FailoverReason.rate_limit) + second_cooldown = getattr(agent, "_rate_limited_until", 0) + + # second call should not have extended the cooldown + assert second_cooldown == first_cooldown