From 05f53f4e6a6bb5aa99ec089f998dc995f054e55a Mon Sep 17 00:00:00 2001 From: pander <> Date: Thu, 23 Apr 2026 00:06:02 +0800 Subject: [PATCH] fix(error_classifier): classify 'overloaded' as FailoverReason.overloaded before rate_limit When a provider (e.g. Z.AI) returns 'The service may be temporarily overloaded, please try again later' as HTTP 200 or HTTP 400, the error was matched against _RATE_LIMIT_PATTERNS (which includes 'servicequotaexceededexception') and classified as rate_limit with should_rotate_credential=True. After 2 failures the single API key was marked exhausted and all further retries failed. The fix adds an 'overloaded' / 'temporarily overloaded' pattern check BEFORE the rate_limit check in both _classify_400 and _classify_by_message. Overloaded errors now get FailoverReason.overloaded (retryable, should_fallback) instead of rate_limit, preventing unnecessary credential rotation. Closes #14038 --- agent/error_classifier.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 14a2609d8..85764b0a6 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -590,6 +590,16 @@ def _classify_400( # Some providers return rate limit / billing errors as 400 instead of 429/402. # Check these patterns before falling through to format_error. + + # Overloaded patterns — server-side overload, NOT a credential/billing issue. + # Must come before rate_limit check to avoid rotating credentials unnecessarily. + if "overloaded" in error_msg or "temporarily overloaded" in error_msg: + return result_fn( + FailoverReason.overloaded, + retryable=True, + should_fallback=True, + ) + if any(p in error_msg for p in _RATE_LIMIT_PATTERNS): return result_fn( FailoverReason.rate_limit, @@ -723,7 +733,14 @@ def _classify_by_message( should_fallback=True, ) - # Rate limit patterns + # Rate limit patterns — but overloaded must come first to avoid credential rotation. + if "overloaded" in error_msg or "temporarily overloaded" in error_msg: + return result_fn( + FailoverReason.overloaded, + retryable=True, + should_fallback=True, + ) + if any(p in error_msg for p in _RATE_LIMIT_PATTERNS): return result_fn( FailoverReason.rate_limit,