mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(error_classifier): disambiguate usage-limit patterns in _classify_by_message
_classify_by_message had no handling for _USAGE_LIMIT_PATTERNS, so messages like 'usage limit exceeded, try again in 5 minutes' arriving without an HTTP status code fell through to FailoverReason.unknown instead of rate_limit. Apply the same billing/rate-limit disambiguation that _classify_402 already uses: USAGE_LIMIT_PATTERNS + transient signal → rate_limit, USAGE_LIMIT_PATTERNS alone → billing. Add 4 tests covering the no-status-code usage-limit path.
This commit is contained in:
parent
1789c2699a
commit
e053433c84
2 changed files with 54 additions and 0 deletions
|
|
@ -677,6 +677,27 @@ def _classify_by_message(
|
|||
should_compress=True,
|
||||
)
|
||||
|
||||
# Usage-limit patterns need the same disambiguation as 402: some providers
|
||||
# surface "usage limit" errors without an HTTP status code. A transient
|
||||
# signal ("try again", "resets at", …) means it's a periodic quota, not
|
||||
# billing exhaustion.
|
||||
has_usage_limit = any(p in error_msg for p in _USAGE_LIMIT_PATTERNS)
|
||||
if has_usage_limit:
|
||||
has_transient_signal = any(p in error_msg for p in _USAGE_LIMIT_TRANSIENT_SIGNALS)
|
||||
if has_transient_signal:
|
||||
return result_fn(
|
||||
FailoverReason.rate_limit,
|
||||
retryable=True,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
return result_fn(
|
||||
FailoverReason.billing,
|
||||
retryable=False,
|
||||
should_rotate_credential=True,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Billing patterns
|
||||
if any(p in error_msg for p in _BILLING_PATTERNS):
|
||||
return result_fn(
|
||||
|
|
|
|||
|
|
@ -480,6 +480,39 @@ class TestClassifyApiError:
|
|||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.context_overflow
|
||||
|
||||
# ── Message-only usage limit disambiguation (no status code) ──
|
||||
|
||||
def test_message_usage_limit_transient_is_rate_limit(self):
|
||||
"""'usage limit' + 'try again' with no status code → rate_limit, not billing."""
|
||||
e = Exception("usage limit exceeded, try again in 5 minutes")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
assert result.retryable is True
|
||||
assert result.should_rotate_credential is True
|
||||
assert result.should_fallback is True
|
||||
|
||||
def test_message_usage_limit_no_retry_signal_is_billing(self):
|
||||
"""'usage limit' with no transient signal and no status code → billing."""
|
||||
e = Exception("usage limit reached")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.billing
|
||||
assert result.retryable is False
|
||||
assert result.should_rotate_credential is True
|
||||
|
||||
def test_message_quota_with_reset_window_is_rate_limit(self):
|
||||
"""'quota' + 'resets at' with no status code → rate_limit."""
|
||||
e = Exception("quota exceeded, resets at midnight UTC")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
assert result.retryable is True
|
||||
|
||||
def test_message_limit_exceeded_with_wait_is_rate_limit(self):
|
||||
"""'limit exceeded' + 'wait' with no status code → rate_limit."""
|
||||
e = Exception("key limit exceeded, please wait before retrying")
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.rate_limit
|
||||
assert result.retryable is True
|
||||
|
||||
# ── Unknown / fallback ──
|
||||
|
||||
def test_generic_exception_is_unknown(self):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue