fix(agent): only set rate-limit cooldown when leaving primary; add tests

2026-04-25 00:51:20 +00:00 · 2026-04-12 14:14:26 +09:00 · 2026-04-12 14:14:26 +09:00 · f7f7588893
commit f7f7588893
parent a9fd8d7c88
4 changed files with 135 additions and 2 deletions
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@ -343,6 +343,11 @@ def classify_api_error(
    """
    status_code = _extract_status_code(error)
    error_type = type(error).__name__
+    # Copilot/GitHub Models RateLimitError may not set .status_code; force 429
+    # so downstream rate-limit handling (classifier reason, pool rotation,
+    # fallback gating) fires correctly instead of misclassifying as generic.
+    if status_code is None and error_type == "RateLimitError":
+        status_code = 429
    body = _extract_error_body(error)
    error_code = _extract_error_code(body)

--- a/run_agent.py
+++ b/run_agent.py
@ -6494,7 +6494,7 @@ class AIAgent:

    # ── Provider fallback ──────────────────────────────────────────────────

-    def _try_activate_fallback(self) -> bool:
+    def _try_activate_fallback(self, reason: "FailoverReason | None" = None) -> bool:
        """Switch to the next fallback model/provider in the chain.

        Called when the current model is failing after retries.  Swaps the
@ -6506,6 +6506,15 @@ class AIAgent:
        auth resolution and client construction — no duplicated provider→key
        mappings.
        """
+        if reason in (FailoverReason.rate_limit, FailoverReason.billing):
+            # Only start cooldown when leaving the primary provider.  If we're
+            # already on a fallback and chain-switching, the primary wasn't the
+            # source of the 429 so the cooldown should not be reset/extended.
+            fallback_already_active = bool(getattr(self, "_fallback_activated", False))
+            current_provider = (getattr(self, "provider", "") or "").strip().lower()
+            primary_provider = ((self._primary_runtime or {}).get("provider") or "").strip().lower()
+            if (not fallback_already_active) or (primary_provider and current_provider == primary_provider):
+                self._rate_limited_until = time.monotonic() + 60
        if self._fallback_index >= len(self._fallback_chain):
            return False

@ -6689,6 +6698,9 @@ class AIAgent:
        if not self._fallback_activated:
            return False

+        if getattr(self, "_rate_limited_until", 0) > time.monotonic():
+            return False  # primary still in rate-limit cooldown, stay on fallback
+
        rt = self._primary_runtime
        try:
            # ── Core runtime state ──
@ -10665,7 +10677,7 @@ class AIAgent:
                        )
                        if not pool_may_recover:
                            self._emit_status("⚠️ Rate limited — switching to fallback provider...")
-                            if self._try_activate_fallback():
+                            if self._try_activate_fallback(reason=classified.reason):
                                retry_count = 0
                                compression_attempts = 0
                                primary_recovery_attempted = False
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@ -1094,3 +1094,37 @@ class TestSSLTransientPatterns:
        result = classify_api_error(e)
        assert result.reason == FailoverReason.timeout
        assert result.retryable is True
+
+# ── Test: RateLimitError without status_code (Copilot/GitHub Models) ──────────
+
+class TestRateLimitErrorWithoutStatusCode:
+    """Regression tests for the Copilot/GitHub Models edge case where the
+    OpenAI SDK raises RateLimitError but does not populate .status_code."""
+
+    def _make_rate_limit_error(self, status_code=None):
+        """Create an exception whose class name is 'RateLimitError' with
+        an optionally missing status_code, mirroring the OpenAI SDK shape."""
+        cls = type("RateLimitError", (Exception,), {})
+        e = cls("You have exceeded your rate limit.")
+        e.status_code = status_code  # None simulates the Copilot case
+        return e
+
+    def test_rate_limit_error_without_status_code_classified_as_rate_limit(self):
+        """RateLimitError with status_code=None must classify as rate_limit."""
+        e = self._make_rate_limit_error(status_code=None)
+        result = classify_api_error(e, provider="copilot", model="gpt-4o")
+        assert result.reason == FailoverReason.rate_limit
+
+    def test_rate_limit_error_with_status_code_429_classified_as_rate_limit(self):
+        """RateLimitError that does set status_code=429 still classifies correctly."""
+        e = self._make_rate_limit_error(status_code=429)
+        result = classify_api_error(e, provider="copilot", model="gpt-4o")
+        assert result.reason == FailoverReason.rate_limit
+
+    def test_other_error_without_status_code_not_forced_to_rate_limit(self):
+        """A non-RateLimitError with missing status_code must NOT be forced to 429."""
+        cls = type("APIError", (Exception,), {})
+        e = cls("something went wrong")
+        e.status_code = None
+        result = classify_api_error(e, provider="copilot", model="gpt-4o")
+        assert result.reason != FailoverReason.rate_limit
--- a/tests/run_agent/test_primary_runtime_restore.py
+++ b/tests/run_agent/test_primary_runtime_restore.py
@ -446,3 +446,85 @@ class TestRestoreInRunConversation:
        assert agent._fallback_index == 0
        assert agent.provider == "custom"
        assert agent.base_url == "https://my-llm.example.com/v1"
+
+
+# =============================================================================
+# Rate-limit cooldown gate
+# =============================================================================
+
+class TestRateLimitCooldown:
+    """Verify _restore_primary_runtime() respects the 60s rate-limit cooldown."""
+
+    def test_restore_blocked_during_cooldown(self):
+        """While _rate_limited_until is in the future, restore returns False."""
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        mock_client = _mock_resolve()
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
+            agent._try_activate_fallback()
+
+        assert agent._fallback_activated is True
+
+        # Manually set cooldown well into the future
+        agent._rate_limited_until = time.monotonic() + 60
+
+        result = agent._restore_primary_runtime()
+        assert result is False
+        assert agent._fallback_activated is True  # still on fallback
+
+    def test_restore_allowed_after_cooldown_expires(self):
+        """Once the cooldown window passes, restore proceeds normally."""
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        mock_client = _mock_resolve()
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
+            agent._try_activate_fallback()
+
+        assert agent._fallback_activated is True
+
+        # Cooldown already expired
+        agent._rate_limited_until = time.monotonic() - 1
+
+        with patch("run_agent.OpenAI", return_value=MagicMock()):
+            result = agent._restore_primary_runtime()
+
+        assert result is True
+        assert agent._fallback_activated is False
+
+    def test_cooldown_set_on_rate_limit_reason(self):
+        """_try_activate_fallback with rate_limit reason sets _rate_limited_until."""
+        from run_agent import FailoverReason
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        before = time.monotonic()
+        mock_client = _mock_resolve()
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
+            agent._try_activate_fallback(reason=FailoverReason.rate_limit)
+
+        assert hasattr(agent, "_rate_limited_until")
+        assert agent._rate_limited_until > before + 50  # ~60s from now
+
+    def test_cooldown_not_set_when_already_on_fallback(self):
+        """Chain-switching while already on fallback must not reset cooldown."""
+        from run_agent import FailoverReason
+        agent = _make_agent(
+            fallback_model=[
+                {"provider": "openrouter", "model": "model-a"},
+                {"provider": "anthropic", "model": "model-b"},
+            ],
+        )
+        mock_client = _mock_resolve()
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
+            # First call: leaving primary → cooldown should be set
+            agent._try_activate_fallback(reason=FailoverReason.rate_limit)
+            first_cooldown = getattr(agent, "_rate_limited_until", 0)
+
+            # Second call: already on fallback (provider != primary) → cooldown must not advance
+            agent._try_activate_fallback(reason=FailoverReason.rate_limit)
+            second_cooldown = getattr(agent, "_rate_limited_until", 0)
+
+        # second call should not have extended the cooldown
+        assert second_cooldown == first_cooldown