mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: handle Anthropic Sonnet long-context tier 429 by reducing to 200k (#4747)
Anthropic returns HTTP 429 'Extra usage is required for long context requests' when a Claude Max subscription doesn't include the 1M context tier. This is NOT a transient rate limit — retrying won't help. Only applies to Sonnet models (Opus 1M is general access). Detects this specific error before the generic rate-limit handler and: 1. Reduces context_length from 1M to 200k (the standard tier) 2. Triggers context compression to fit 3. Retries with the reduced context The reduction is session-scoped (not persisted) so it auto-recovers if the user later enables extra usage on their subscription. Fixes: Sonnet 4.6 instant rate limits on Claude Max without extra usage
This commit is contained in:
parent
26d6083624
commit
8fd9fafc84
2 changed files with 57 additions and 9 deletions
|
|
@ -7380,17 +7380,19 @@ class AIAgent:
|
|||
# compress history and retry, not abort immediately.
|
||||
status_code = getattr(api_error, "status_code", None)
|
||||
|
||||
# ── Anthropic long-context tier gate ──────────────────
|
||||
# ── Anthropic Sonnet long-context tier gate ───────────
|
||||
# Anthropic returns HTTP 429 "Extra usage is required for
|
||||
# long context requests" when a Claude Max (or similar)
|
||||
# subscription doesn't include the 1M-context tier. This
|
||||
# is NOT a transient rate limit — retrying or switching
|
||||
# credentials won't help. Reduce context to 200k (the
|
||||
# standard tier) and compress.
|
||||
# Only applies to Sonnet — Opus 1M is general access.
|
||||
_is_long_context_tier_error = (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
and "sonnet" in self.model.lower()
|
||||
)
|
||||
if _is_long_context_tier_error:
|
||||
_reduced_ctx = 200000
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
"""Tests for Anthropic long-context tier 429 handling.
|
||||
"""Tests for Anthropic Sonnet long-context tier 429 handling.
|
||||
|
||||
When Claude Max users without "extra usage" hit the 1M context tier,
|
||||
Anthropic returns HTTP 429 "Extra usage is required for long context
|
||||
requests." This is NOT a transient rate limit — the agent should
|
||||
When Claude Max users without "extra usage" hit the 1M context tier
|
||||
on Sonnet, Anthropic returns HTTP 429 "Extra usage is required for long
|
||||
context requests." This is NOT a transient rate limit — the agent should
|
||||
reduce context_length to 200k and compress instead of retrying.
|
||||
|
||||
Only Sonnet is affected — Opus 1M is general access.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
|
@ -20,12 +22,13 @@ class TestLongContextTierDetection:
|
|||
"""Verify the detection heuristic matches the Anthropic error."""
|
||||
|
||||
@staticmethod
|
||||
def _is_long_context_tier_error(status_code, error_msg):
|
||||
def _is_long_context_tier_error(status_code, error_msg, model="claude-sonnet-4.6"):
|
||||
error_msg = error_msg.lower()
|
||||
return (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
and "sonnet" in model.lower()
|
||||
)
|
||||
|
||||
def test_matches_anthropic_error(self):
|
||||
|
|
@ -40,6 +43,35 @@ class TestLongContextTierDetection:
|
|||
"extra usage is required for long context requests.",
|
||||
)
|
||||
|
||||
def test_matches_openrouter_model_id(self):
|
||||
assert self._is_long_context_tier_error(
|
||||
429,
|
||||
"Extra usage is required for long context requests.",
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
)
|
||||
|
||||
def test_matches_nous_model_id(self):
|
||||
assert self._is_long_context_tier_error(
|
||||
429,
|
||||
"Extra usage is required for long context requests.",
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
|
||||
def test_rejects_opus(self):
|
||||
"""Opus 1M is general access — should NOT trigger reduction."""
|
||||
assert not self._is_long_context_tier_error(
|
||||
429,
|
||||
"Extra usage is required for long context requests.",
|
||||
model="claude-opus-4.6",
|
||||
)
|
||||
|
||||
def test_rejects_opus_openrouter(self):
|
||||
assert not self._is_long_context_tier_error(
|
||||
429,
|
||||
"Extra usage is required for long context requests.",
|
||||
model="anthropic/claude-opus-4.6",
|
||||
)
|
||||
|
||||
def test_rejects_normal_429(self):
|
||||
assert not self._is_long_context_tier_error(
|
||||
429,
|
||||
|
|
@ -132,27 +164,41 @@ class TestAgentErrorPath:
|
|||
is_rate_limited check fires a fallback switch."""
|
||||
error_msg = "extra usage is required for long context requests."
|
||||
status_code = 429
|
||||
model = "claude-sonnet-4.6"
|
||||
|
||||
# The long-context check fires first
|
||||
_is_long_context_tier_error = (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
and "sonnet" in model.lower()
|
||||
)
|
||||
assert _is_long_context_tier_error
|
||||
|
||||
# So we never reach the generic rate-limit path
|
||||
# (in the real code, `break` exits the retry loop)
|
||||
def test_opus_429_falls_through_to_rate_limit(self):
|
||||
"""Opus should NOT match — falls through to generic rate-limit."""
|
||||
error_msg = "extra usage is required for long context requests."
|
||||
status_code = 429
|
||||
model = "claude-opus-4.6"
|
||||
|
||||
_is_long_context_tier_error = (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
and "sonnet" in model.lower()
|
||||
)
|
||||
assert not _is_long_context_tier_error
|
||||
|
||||
def test_normal_429_still_treated_as_rate_limit(self):
|
||||
"""A normal 429 should NOT match the long-context check."""
|
||||
error_msg = "rate limit exceeded"
|
||||
status_code = 429
|
||||
model = "claude-sonnet-4.6"
|
||||
|
||||
_is_long_context_tier_error = (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
and "sonnet" in model.lower()
|
||||
)
|
||||
assert not _is_long_context_tier_error
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue