diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 0f1450113..1f6b48a09 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -677,6 +677,27 @@ def _classify_by_message( should_compress=True, ) + # Usage-limit patterns need the same disambiguation as 402: some providers + # surface "usage limit" errors without an HTTP status code. A transient + # signal ("try again", "resets at", …) means it's a periodic quota, not + # billing exhaustion. + has_usage_limit = any(p in error_msg for p in _USAGE_LIMIT_PATTERNS) + if has_usage_limit: + has_transient_signal = any(p in error_msg for p in _USAGE_LIMIT_TRANSIENT_SIGNALS) + if has_transient_signal: + return result_fn( + FailoverReason.rate_limit, + retryable=True, + should_rotate_credential=True, + should_fallback=True, + ) + return result_fn( + FailoverReason.billing, + retryable=False, + should_rotate_credential=True, + should_fallback=True, + ) + # Billing patterns if any(p in error_msg for p in _BILLING_PATTERNS): return result_fn( diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index c59735589..44e891f0c 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -480,6 +480,39 @@ class TestClassifyApiError: result = classify_api_error(e) assert result.reason == FailoverReason.context_overflow + # ── Message-only usage limit disambiguation (no status code) ── + + def test_message_usage_limit_transient_is_rate_limit(self): + """'usage limit' + 'try again' with no status code → rate_limit, not billing.""" + e = Exception("usage limit exceeded, try again in 5 minutes") + result = classify_api_error(e) + assert result.reason == FailoverReason.rate_limit + assert result.retryable is True + assert result.should_rotate_credential is True + assert result.should_fallback is True + + def test_message_usage_limit_no_retry_signal_is_billing(self): + """'usage limit' with no transient signal and no status code → billing.""" + e = Exception("usage limit reached") + result = classify_api_error(e) + assert result.reason == FailoverReason.billing + assert result.retryable is False + assert result.should_rotate_credential is True + + def test_message_quota_with_reset_window_is_rate_limit(self): + """'quota' + 'resets at' with no status code → rate_limit.""" + e = Exception("quota exceeded, resets at midnight UTC") + result = classify_api_error(e) + assert result.reason == FailoverReason.rate_limit + assert result.retryable is True + + def test_message_limit_exceeded_with_wait_is_rate_limit(self): + """'limit exceeded' + 'wait' with no status code → rate_limit.""" + e = Exception("key limit exceeded, please wait before retrying") + result = classify_api_error(e) + assert result.reason == FailoverReason.rate_limit + assert result.retryable is True + # ── Unknown / fallback ── def test_generic_exception_is_unknown(self):