fix: handle Anthropic Sonnet long-context tier 429 by reducing to 200k (#4747)

Anthropic returns HTTP 429 'Extra usage is required for long context
requests' when a Claude Max subscription doesn't include the 1M context
tier. This is NOT a transient rate limit — retrying won't help.

Only applies to Sonnet models (Opus 1M is general access). Detects
this specific error before the generic rate-limit handler and:
1. Reduces context_length from 1M to 200k (the standard tier)
2. Triggers context compression to fit
3. Retries with the reduced context

The reduction is session-scoped (not persisted) so it auto-recovers
if the user later enables extra usage on their subscription.

Fixes: Sonnet 4.6 instant rate limits on Claude Max without extra usage
This commit is contained in:
Teknium 2026-04-03 02:05:02 -07:00 committed by GitHub
parent 26d6083624
commit 8fd9fafc84
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 57 additions and 9 deletions

View file

@ -7380,17 +7380,19 @@ class AIAgent:
# compress history and retry, not abort immediately.
status_code = getattr(api_error, "status_code", None)
# ── Anthropic long-context tier gate ──────────────────
# ── Anthropic Sonnet long-context tier gate ───────────
# Anthropic returns HTTP 429 "Extra usage is required for
# long context requests" when a Claude Max (or similar)
# subscription doesn't include the 1M-context tier. This
# is NOT a transient rate limit — retrying or switching
# credentials won't help. Reduce context to 200k (the
# standard tier) and compress.
# Only applies to Sonnet — Opus 1M is general access.
_is_long_context_tier_error = (
status_code == 429
and "extra usage" in error_msg
and "long context" in error_msg
and "sonnet" in self.model.lower()
)
if _is_long_context_tier_error:
_reduced_ctx = 200000

View file

@ -1,9 +1,11 @@
"""Tests for Anthropic long-context tier 429 handling.
"""Tests for Anthropic Sonnet long-context tier 429 handling.
When Claude Max users without "extra usage" hit the 1M context tier,
Anthropic returns HTTP 429 "Extra usage is required for long context
requests." This is NOT a transient rate limit — the agent should
When Claude Max users without "extra usage" hit the 1M context tier
on Sonnet, Anthropic returns HTTP 429 "Extra usage is required for long
context requests." This is NOT a transient rate limit — the agent should
reduce context_length to 200k and compress instead of retrying.
Only Sonnet is affected Opus 1M is general access.
"""
import pytest
@ -20,12 +22,13 @@ class TestLongContextTierDetection:
"""Verify the detection heuristic matches the Anthropic error."""
@staticmethod
def _is_long_context_tier_error(status_code, error_msg):
def _is_long_context_tier_error(status_code, error_msg, model="claude-sonnet-4.6"):
error_msg = error_msg.lower()
return (
status_code == 429
and "extra usage" in error_msg
and "long context" in error_msg
and "sonnet" in model.lower()
)
def test_matches_anthropic_error(self):
@ -40,6 +43,35 @@ class TestLongContextTierDetection:
"extra usage is required for long context requests.",
)
def test_matches_openrouter_model_id(self):
assert self._is_long_context_tier_error(
429,
"Extra usage is required for long context requests.",
model="anthropic/claude-sonnet-4.6",
)
def test_matches_nous_model_id(self):
assert self._is_long_context_tier_error(
429,
"Extra usage is required for long context requests.",
model="claude-sonnet-4-6",
)
def test_rejects_opus(self):
"""Opus 1M is general access — should NOT trigger reduction."""
assert not self._is_long_context_tier_error(
429,
"Extra usage is required for long context requests.",
model="claude-opus-4.6",
)
def test_rejects_opus_openrouter(self):
assert not self._is_long_context_tier_error(
429,
"Extra usage is required for long context requests.",
model="anthropic/claude-opus-4.6",
)
def test_rejects_normal_429(self):
assert not self._is_long_context_tier_error(
429,
@ -132,27 +164,41 @@ class TestAgentErrorPath:
is_rate_limited check fires a fallback switch."""
error_msg = "extra usage is required for long context requests."
status_code = 429
model = "claude-sonnet-4.6"
# The long-context check fires first
_is_long_context_tier_error = (
status_code == 429
and "extra usage" in error_msg
and "long context" in error_msg
and "sonnet" in model.lower()
)
assert _is_long_context_tier_error
# So we never reach the generic rate-limit path
# (in the real code, `break` exits the retry loop)
def test_opus_429_falls_through_to_rate_limit(self):
"""Opus should NOT match — falls through to generic rate-limit."""
error_msg = "extra usage is required for long context requests."
status_code = 429
model = "claude-opus-4.6"
_is_long_context_tier_error = (
status_code == 429
and "extra usage" in error_msg
and "long context" in error_msg
and "sonnet" in model.lower()
)
assert not _is_long_context_tier_error
def test_normal_429_still_treated_as_rate_limit(self):
"""A normal 429 should NOT match the long-context check."""
error_msg = "rate limit exceeded"
status_code = 429
model = "claude-sonnet-4.6"
_is_long_context_tier_error = (
status_code == 429
and "extra usage" in error_msg
and "long context" in error_msg
and "sonnet" in model.lower()
)
assert not _is_long_context_tier_error