From f7f7588893ef2f1bae7531c37394f1dbbe38ef1f Mon Sep 17 00:00:00 2001
From: vlwkaos <mgparkprint@gmail.com>
Date: Sun, 12 Apr 2026 14:14:26 +0900
Subject: [PATCH] fix(agent): only set rate-limit cooldown when leaving
 primary; add tests

---
 agent/error_classifier.py                     |  5 ++
 run_agent.py                                  | 16 +++-
 tests/agent/test_error_classifier.py          | 34 ++++++++
 .../run_agent/test_primary_runtime_restore.py | 82 +++++++++++++++++++
 4 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/agent/error_classifier.py b/agent/error_classifier.py
index 570000525..87324d676 100644
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -343,6 +343,11 @@ def classify_api_error(
     """
     status_code = _extract_status_code(error)
     error_type = type(error).__name__
+    # Copilot/GitHub Models RateLimitError may not set .status_code; force 429
+    # so downstream rate-limit handling (classifier reason, pool rotation,
+    # fallback gating) fires correctly instead of misclassifying as generic.
+    if status_code is None and error_type == "RateLimitError":
+        status_code = 429
     body = _extract_error_body(error)
     error_code = _extract_error_code(body)
 
diff --git a/run_agent.py b/run_agent.py
index bf88c3719..7af6d3ab0 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -6494,7 +6494,7 @@ class AIAgent:
 
     # ── Provider fallback ──────────────────────────────────────────────────
 
-    def _try_activate_fallback(self) -> bool:
+    def _try_activate_fallback(self, reason: "FailoverReason | None" = None) -> bool:
         """Switch to the next fallback model/provider in the chain.
 
         Called when the current model is failing after retries.  Swaps the
@@ -6506,6 +6506,15 @@ class AIAgent:
         auth resolution and client construction — no duplicated provider→key
         mappings.
         """
+        if reason in (FailoverReason.rate_limit, FailoverReason.billing):
+            # Only start cooldown when leaving the primary provider.  If we're
+            # already on a fallback and chain-switching, the primary wasn't the
+            # source of the 429 so the cooldown should not be reset/extended.
+            fallback_already_active = bool(getattr(self, "_fallback_activated", False))
+            current_provider = (getattr(self, "provider", "") or "").strip().lower()
+            primary_provider = ((self._primary_runtime or {}).get("provider") or "").strip().lower()
+            if (not fallback_already_active) or (primary_provider and current_provider == primary_provider):
+                self._rate_limited_until = time.monotonic() + 60
         if self._fallback_index >= len(self._fallback_chain):
             return False
 
@@ -6689,6 +6698,9 @@ class AIAgent:
         if not self._fallback_activated:
             return False
 
+        if getattr(self, "_rate_limited_until", 0) > time.monotonic():
+            return False  # primary still in rate-limit cooldown, stay on fallback
+
         rt = self._primary_runtime
         try:
             # ── Core runtime state ──
@@ -10665,7 +10677,7 @@ class AIAgent:
                         )
                         if not pool_may_recover:
                             self._emit_status("⚠️ Rate limited — switching to fallback provider...")
-                            if self._try_activate_fallback():
+                            if self._try_activate_fallback(reason=classified.reason):
                                 retry_count = 0
                                 compression_attempts = 0
                                 primary_recovery_attempted = False
diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py
index 9bab14548..e8a92774b 100644
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@@ -1094,3 +1094,37 @@ class TestSSLTransientPatterns:
         result = classify_api_error(e)
         assert result.reason == FailoverReason.timeout
         assert result.retryable is True
+
+# ── Test: RateLimitError without status_code (Copilot/GitHub Models) ──────────
+
+class TestRateLimitErrorWithoutStatusCode:
+    """Regression tests for the Copilot/GitHub Models edge case where the
+    OpenAI SDK raises RateLimitError but does not populate .status_code."""
+
+    def _make_rate_limit_error(self, status_code=None):
+        """Create an exception whose class name is 'RateLimitError' with
+        an optionally missing status_code, mirroring the OpenAI SDK shape."""
+        cls = type("RateLimitError", (Exception,), {})
+        e = cls("You have exceeded your rate limit.")
+        e.status_code = status_code  # None simulates the Copilot case
+        return e
+
+    def test_rate_limit_error_without_status_code_classified_as_rate_limit(self):
+        """RateLimitError with status_code=None must classify as rate_limit."""
+        e = self._make_rate_limit_error(status_code=None)
+        result = classify_api_error(e, provider="copilot", model="gpt-4o")
+        assert result.reason == FailoverReason.rate_limit
+
+    def test_rate_limit_error_with_status_code_429_classified_as_rate_limit(self):
+        """RateLimitError that does set status_code=429 still classifies correctly."""
+        e = self._make_rate_limit_error(status_code=429)
+        result = classify_api_error(e, provider="copilot", model="gpt-4o")
+        assert result.reason == FailoverReason.rate_limit
+
+    def test_other_error_without_status_code_not_forced_to_rate_limit(self):
+        """A non-RateLimitError with missing status_code must NOT be forced to 429."""
+        cls = type("APIError", (Exception,), {})
+        e = cls("something went wrong")
+        e.status_code = None
+        result = classify_api_error(e, provider="copilot", model="gpt-4o")
+        assert result.reason != FailoverReason.rate_limit
diff --git a/tests/run_agent/test_primary_runtime_restore.py b/tests/run_agent/test_primary_runtime_restore.py
index 74119c30e..d082f047f 100644
--- a/tests/run_agent/test_primary_runtime_restore.py
+++ b/tests/run_agent/test_primary_runtime_restore.py
@@ -446,3 +446,85 @@ class TestRestoreInRunConversation:
         assert agent._fallback_index == 0
         assert agent.provider == "custom"
         assert agent.base_url == "https://my-llm.example.com/v1"
+
+
+# =============================================================================
+# Rate-limit cooldown gate
+# =============================================================================
+
+class TestRateLimitCooldown:
+    """Verify _restore_primary_runtime() respects the 60s rate-limit cooldown."""
+
+    def test_restore_blocked_during_cooldown(self):
+        """While _rate_limited_until is in the future, restore returns False."""
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        mock_client = _mock_resolve()
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
+            agent._try_activate_fallback()
+
+        assert agent._fallback_activated is True
+
+        # Manually set cooldown well into the future
+        agent._rate_limited_until = time.monotonic() + 60
+
+        result = agent._restore_primary_runtime()
+        assert result is False
+        assert agent._fallback_activated is True  # still on fallback
+
+    def test_restore_allowed_after_cooldown_expires(self):
+        """Once the cooldown window passes, restore proceeds normally."""
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        mock_client = _mock_resolve()
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
+            agent._try_activate_fallback()
+
+        assert agent._fallback_activated is True
+
+        # Cooldown already expired
+        agent._rate_limited_until = time.monotonic() - 1
+
+        with patch("run_agent.OpenAI", return_value=MagicMock()):
+            result = agent._restore_primary_runtime()
+
+        assert result is True
+        assert agent._fallback_activated is False
+
+    def test_cooldown_set_on_rate_limit_reason(self):
+        """_try_activate_fallback with rate_limit reason sets _rate_limited_until."""
+        from run_agent import FailoverReason
+        agent = _make_agent(
+            fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
+        )
+        before = time.monotonic()
+        mock_client = _mock_resolve()
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
+            agent._try_activate_fallback(reason=FailoverReason.rate_limit)
+
+        assert hasattr(agent, "_rate_limited_until")
+        assert agent._rate_limited_until > before + 50  # ~60s from now
+
+    def test_cooldown_not_set_when_already_on_fallback(self):
+        """Chain-switching while already on fallback must not reset cooldown."""
+        from run_agent import FailoverReason
+        agent = _make_agent(
+            fallback_model=[
+                {"provider": "openrouter", "model": "model-a"},
+                {"provider": "anthropic", "model": "model-b"},
+            ],
+        )
+        mock_client = _mock_resolve()
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)):
+            # First call: leaving primary → cooldown should be set
+            agent._try_activate_fallback(reason=FailoverReason.rate_limit)
+            first_cooldown = getattr(agent, "_rate_limited_until", 0)
+
+            # Second call: already on fallback (provider != primary) → cooldown must not advance
+            agent._try_activate_fallback(reason=FailoverReason.rate_limit)
+            second_cooldown = getattr(agent, "_rate_limited_until", 0)
+
+        # second call should not have extended the cooldown
+        assert second_cooldown == first_cooldown