From 38e7bd8a08a9df450b7d8661778ecc47f25323c2 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sat, 27 Jun 2026 03:55:17 -0700
Subject: [PATCH] fix(agent): classify 429 'overloaded' bodies as overloaded,
 not rate_limit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Z.AI / Zhipu reuse HTTP 429 for server-wide overload. The 429 status
path classified these unconditionally as rate_limit with
should_rotate_credential=True, so an overloaded provider exhausted the
credential pool after two errors — fatal for a single-key user, who has
nothing to rotate to.

The credential is valid; the server is just busy. Disambiguate the 429
body against a shared _OVERLOADED_PATTERNS list and route overload
language to FailoverReason.overloaded (retryable, no rotation), matching
the existing 503/529 path and the message-only path (#52890). Genuine
rate limits (no overload language) still rotate.

Extracted the inline overloaded tuple #52890 added into the shared
_OVERLOADED_PATTERNS constant so the status-code and message paths use
one list.

Closes #14038.
---
 agent/error_classifier.py            | 44 ++++++++++++++++++++++++----
 tests/agent/test_error_classifier.py | 24 +++++++++++++++
 2 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/agent/error_classifier.py b/agent/error_classifier.py
index a54738befb7..2d43806bc07 100644
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -133,6 +133,31 @@ _RATE_LIMIT_PATTERNS = [
     "servicequotaexceededexception",
 ]
 
+# Patterns that indicate provider-side overload, NOT a per-credential rate
+# limit or billing problem.  The credential is valid — the server is just
+# busy — so the correct recovery is "back off and retry the same key", never
+# "rotate the credential" (rotating exhausts the pool while the endpoint is
+# still busy; a single-key user has nothing to rotate to).  Some providers
+# (notably Z.AI / Zhipu) reuse HTTP 429 for server-wide overload, so the 429
+# status path matches the body against this list before falling through to
+# the rate_limit default.  Phrases are kept narrow and overload-flavoured so a
+# normal rate-limit message ("you have been rate-limited") doesn't hit this
+# bucket. (#14038, #15297)
+_OVERLOADED_PATTERNS = [
+    "overloaded",
+    "temporarily overloaded",
+    "service is temporarily overloaded",
+    "service may be temporarily overloaded",
+    "server is overloaded",
+    "server overloaded",
+    "service overloaded",
+    "service is overloaded",
+    "upstream overloaded",
+    "currently overloaded",
+    "at capacity",
+    "over capacity",
+]
+
 # Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
 _USAGE_LIMIT_PATTERNS = [
     "usage limit",
@@ -863,7 +888,19 @@ def _classify_by_status(
         )
 
     if status_code == 429:
-        # Already checked long_context_tier above; this is a normal rate limit
+        # Already checked long_context_tier above. Some providers (notably
+        # Z.AI / Zhipu) reuse HTTP 429 for server-wide overload — same status
+        # code as a true per-credential rate limit, but the credential is
+        # valid and the correct recovery is "back off and retry the same key",
+        # NOT "rotate the credential" (which exhausts the pool while the
+        # endpoint is still busy, and does nothing for a single-key user).
+        # Disambiguate on the error body so an overload 429 takes the
+        # transient-overload path instead of burning the pool. (#14038)
+        if any(p in error_msg for p in _OVERLOADED_PATTERNS):
+            return result_fn(
+                FailoverReason.overloaded,
+                retryable=True,
+            )
         return result_fn(
             FailoverReason.rate_limit,
             retryable=True,
@@ -1219,10 +1256,7 @@ def _classify_by_message(
     # e.g. some Anthropic-compatible proxies) classifies as a transient
     # overload (backoff + retry) instead of falling through to `unknown` or
     # incorrectly triggering credential rotation.
-    if any(p in error_msg for p in (
-        "overloaded", "temporarily overloaded",
-        "service is temporarily overloaded",
-    )):
+    if any(p in error_msg for p in _OVERLOADED_PATTERNS):
         return result_fn(
             FailoverReason.overloaded,
             retryable=True,
diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py
index 5d72bc2f142..270c0d8b8d1 100644
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@@ -386,6 +386,30 @@ class TestClassifyApiError:
         assert result.retryable is True
         assert result.should_rotate_credential is False
 
+    def test_429_with_overloaded_body_is_overloaded_not_rate_limit(self):
+        """Z.AI / Zhipu reuse HTTP 429 for server-wide overload. The credential
+        is valid — the server is just busy — so it must classify as overloaded
+        (back off + retry the same key), NOT rate_limit (which would rotate and
+        exhaust the pool, doing nothing for a single-key user). (#14038)"""
+        e = MockAPIError(
+            "The service may be temporarily overloaded, please try again later",
+            status_code=429,
+        )
+        result = classify_api_error(e, provider="zai")
+        assert result.reason == FailoverReason.overloaded
+        assert result.retryable is True
+        assert result.should_rotate_credential is False
+
+    def test_429_normal_rate_limit_still_rotates(self):
+        """Guard: a genuine 429 rate limit (no overload language) must still
+        classify as rate_limit and rotate the credential. (#14038)"""
+        e = MockAPIError(
+            "Rate limit exceeded: too many requests", status_code=429
+        )
+        result = classify_api_error(e, provider="zai")
+        assert result.reason == FailoverReason.rate_limit
+        assert result.should_rotate_credential is True
+
     # ── 5xx that are actually request-validation errors ──
     # Some OpenAI-compatible gateways (e.g. codex.nekos.me) return
     # request-validation failures with a 5xx status. These are