diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 04232f26c0..efee8f6bf1 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -461,7 +461,11 @@ def _requires_bearer_auth(base_url: str | None) -> bool: return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic")) -def _common_betas_for_base_url(base_url: str | None) -> list[str]: +def _common_betas_for_base_url( + base_url: str | None, + *, + drop_context_1m_beta: bool = False, +) -> list[str]: """Return the beta headers that are safe for the configured endpoint. MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests @@ -472,14 +476,30 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]: The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth endpoints — MiniMax hosts its own models, not Claude, so the header is irrelevant at best and risks request rejection at worst. + + ``drop_context_1m_beta=True`` additionally strips the 1M-context beta on + otherwise-unrelated endpoints. The OAuth retry path flips this flag after + a subscription rejects the beta with + "The long context beta is not yet available for this subscription" so + subsequent requests in the same session don't repeat the probe. See the + reactive recovery loop in ``run_agent.py`` and issue-comment history on + PR #17680 for the full rationale. """ if _requires_bearer_auth(base_url): _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA} return [b for b in _COMMON_BETAS if b not in _stripped] + if drop_context_1m_beta: + return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] return _COMMON_BETAS -def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None): +def build_anthropic_client( + api_key: str, + base_url: str = None, + timeout: float = None, + *, + drop_context_1m_beta: bool = False, +): """Create an Anthropic client, auto-detecting setup-tokens vs API keys. If *timeout* is provided it overrides the default 900s read timeout. The @@ -488,6 +508,12 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = Anthropic-compatible providers respect the same knob as OpenAI-wire providers. + ``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the + client-level ``anthropic-beta`` header. Used by the reactive OAuth retry + path in ``run_agent.py`` when a subscription rejects the beta; leave at + its default on fresh clients so 1M-capable subscriptions keep the + capability. + Returns an anthropic.Anthropic instance. """ _anthropic_sdk = _get_anthropic_sdk() @@ -517,7 +543,10 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = kwargs["default_query"] = {"api-version": "2025-04-15"} else: kwargs["base_url"] = normalized_base_url - common_betas = _common_betas_for_base_url(normalized_base_url) + common_betas = _common_betas_for_base_url( + normalized_base_url, + drop_context_1m_beta=drop_context_1m_beta, + ) if _is_kimi_coding_endpoint(base_url): # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0 @@ -1689,6 +1718,7 @@ def build_anthropic_kwargs( context_length: Optional[int] = None, base_url: str | None = None, fast_mode: bool = False, + drop_context_1m_beta: bool = False, ) -> Dict[str, Any]: """Build kwargs for anthropic.messages.create(). @@ -1877,7 +1907,10 @@ def build_anthropic_kwargs( kwargs.setdefault("extra_body", {})["speed"] = "fast" # Build extra_headers with ALL applicable betas (the per-request # extra_headers override the client-level anthropic-beta header). - betas = list(_common_betas_for_base_url(base_url)) + betas = list(_common_betas_for_base_url( + base_url, + drop_context_1m_beta=drop_context_1m_beta, + )) if is_oauth: betas.extend(_OAUTH_ONLY_BETAS) betas.append(_FAST_MODE_BETA) diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 511ab353c0..86e99ec1ac 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -54,6 +54,7 @@ class FailoverReason(enum.Enum): # Provider-specific thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate + oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden" # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry # Catch-all unknown = "unknown" # Unclassifiable — retry with backoff @@ -450,6 +451,25 @@ def classify_api_error( should_compress=True, ) + # Anthropic OAuth subscription rejects the 1M-context beta header. + # Observed error body: "The long context beta is not yet available for + # this subscription." Returned as HTTP 400 from native Anthropic when + # the subscription doesn't include 1M context, even though the request + # carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path + # in run_agent.py rebuilds the Anthropic client with the beta stripped + # and retries once. Pattern is narrow enough that it won't collide with + # the 429 tier-gate pattern above (different status, different phrase). + if ( + status_code == 400 + and "long context beta" in error_msg + and "not yet available" in error_msg + ): + return _result( + FailoverReason.oauth_long_context_beta_forbidden, + retryable=True, + should_compress=False, + ) + # ── 2. HTTP status code classification ────────────────────────── if status_code is not None: diff --git a/agent/transports/anthropic.py b/agent/transports/anthropic.py index 66c485b523..72024ac20f 100644 --- a/agent/transports/anthropic.py +++ b/agent/transports/anthropic.py @@ -58,6 +58,7 @@ class AnthropicTransport(ProviderTransport): context_length: int | None base_url: str | None fast_mode: bool + drop_context_1m_beta: bool """ from agent.anthropic_adapter import build_anthropic_kwargs @@ -73,6 +74,7 @@ class AnthropicTransport(ProviderTransport): context_length=params.get("context_length"), base_url=params.get("base_url"), fast_mode=params.get("fast_mode", False), + drop_context_1m_beta=params.get("drop_context_1m_beta", False), ) def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index e715298771..8166742f10 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -1023,10 +1023,16 @@ def run_doctor(args): print(" Checking Anthropic API...", end="", flush=True) try: import httpx - from agent.anthropic_adapter import _is_oauth_token, _COMMON_BETAS, _OAUTH_ONLY_BETAS + from agent.anthropic_adapter import ( + _is_oauth_token, + _COMMON_BETAS, + _OAUTH_ONLY_BETAS, + _CONTEXT_1M_BETA, + ) headers = {"anthropic-version": "2023-06-01"} - if _is_oauth_token(anthropic_key): + is_oauth = _is_oauth_token(anthropic_key) + if is_oauth: headers["Authorization"] = f"Bearer {anthropic_key}" headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS) else: @@ -1036,6 +1042,25 @@ def run_doctor(args): headers=headers, timeout=10 ) + # Reactive recovery: OAuth subscriptions that don't include 1M + # context reject the request with 400 "long context beta is not + # yet available for this subscription". Retry once with that + # beta stripped so the doctor check doesn't falsely report the + # Anthropic API as unreachable for those users. + if ( + is_oauth + and response.status_code == 400 + and "long context beta" in response.text.lower() + and "not yet available" in response.text.lower() + ): + headers["anthropic-beta"] = ",".join( + [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + list(_OAUTH_ONLY_BETAS) + ) + response = httpx.get( + "https://api.anthropic.com/v1/models", + headers=headers, + timeout=10, + ) if response.status_code == 200: print(f"\r {color('✓', Colors.GREEN)} Anthropic API ") elif response.status_code == 401: diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 74545254c9..f5ca1a3b22 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -2034,28 +2034,56 @@ def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]: return None headers: dict[str, str] = {"anthropic-version": "2023-06-01"} - if _is_oauth_token(token): + is_oauth = _is_oauth_token(token) + if is_oauth: headers["Authorization"] = f"Bearer {token}" - from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS + from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS, _CONTEXT_1M_BETA headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS) else: headers["x-api-key"] = token - req = urllib.request.Request( - "https://api.anthropic.com/v1/models", - headers=headers, - ) - try: + def _do_request(h: dict[str, str]): + req = urllib.request.Request( + "https://api.anthropic.com/v1/models", + headers=h, + ) with urllib.request.urlopen(req, timeout=timeout) as resp: - data = json.loads(resp.read().decode()) - models = [m["id"] for m in data.get("data", []) if m.get("id")] - # Sort: latest/largest first (opus > sonnet > haiku, higher version first) - return sorted(models, key=lambda m: ( - "opus" not in m, # opus first - "sonnet" not in m, # then sonnet - "haiku" not in m, # then haiku - m, # alphabetical within tier - )) + return json.loads(resp.read().decode()) + + try: + try: + data = _do_request(headers) + except urllib.error.HTTPError as http_err: + # Reactive recovery for OAuth subscriptions that reject the 1M + # context beta with 400 "long context beta is not yet available + # for this subscription". Retry once without the beta; re-raise + # anything else so the outer except logs it. + if ( + is_oauth + and http_err.code == 400 + ): + try: + body_text = http_err.read().decode(errors="ignore").lower() + except Exception: + body_text = "" + if "long context beta" in body_text and "not yet available" in body_text: + headers["anthropic-beta"] = ",".join( + [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + + list(_OAUTH_ONLY_BETAS) + ) + data = _do_request(headers) + else: + raise + else: + raise + models = [m["id"] for m in data.get("data", []) if m.get("id")] + # Sort: latest/largest first (opus > sonnet > haiku, higher version first) + return sorted(models, key=lambda m: ( + "opus" not in m, # opus first + "sonnet" not in m, # then sonnet + "haiku" not in m, # then haiku + m, # alphabetical within tier + )) except Exception as e: import logging logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e) diff --git a/run_agent.py b/run_agent.py index 5ce0f03d67..6a5f777959 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6210,7 +6210,12 @@ class AIAgent: correctly — rebuilding with the Bedrock SDK when provider is bedrock, rather than always falling back to build_anthropic_client() which requires a direct Anthropic API key. + + Honors ``self._oauth_1m_beta_disabled`` (set by the reactive recovery + path when an OAuth subscription rejects the 1M-context beta) so the + rebuilt client carries the reduced beta set. """ + _drop_1m = bool(getattr(self, "_oauth_1m_beta_disabled", False)) if getattr(self, "provider", None) == "bedrock": from agent.anthropic_adapter import build_anthropic_bedrock_client region = getattr(self, "_bedrock_region", "us-east-1") or "us-east-1" @@ -6221,6 +6226,7 @@ class AIAgent: self._anthropic_api_key, getattr(self, "_anthropic_base_url", None), timeout=get_provider_request_timeout(self.provider, self.model), + drop_context_1m_beta=_drop_1m, ) def _interruptible_api_call(self, api_kwargs: dict): @@ -8167,6 +8173,7 @@ class AIAgent: context_length=ctx_len, base_url=getattr(self, "_anthropic_base_url", None), fast_mode=(self.request_overrides or {}).get("speed") == "fast", + drop_context_1m_beta=bool(getattr(self, "_oauth_1m_beta_disabled", False)), ) # AWS Bedrock native Converse API — bypasses the OpenAI client entirely. @@ -10752,6 +10759,7 @@ class AIAgent: copilot_auth_retry_attempted=False thinking_sig_retry_attempted = False image_shrink_retry_attempted = False + oauth_1m_beta_retry_attempted = False has_retried_429 = False restart_with_compressed_messages = False restart_with_length_continuation = False @@ -11708,6 +11716,36 @@ class AIAgent: "or shrink didn't reduce size; surfacing original error." ) + # Anthropic OAuth subscription rejected the 1M-context beta + # header ("long context beta is not yet available for this + # subscription"). Disable the beta for the rest of this + # session, rebuild the client, and retry once. 1M-capable + # subscriptions never hit this branch — they accept the + # beta and keep full 1M context. See PR #17680 for the + # original report (we chose reactive recovery over the + # proposed unconditional omit so capable subscriptions + # don't silently lose the capability). + if ( + classified.reason == FailoverReason.oauth_long_context_beta_forbidden + and self.api_mode == "anthropic_messages" + and self._is_anthropic_oauth + and not oauth_1m_beta_retry_attempted + ): + oauth_1m_beta_retry_attempted = True + if not getattr(self, "_oauth_1m_beta_disabled", False): + self._oauth_1m_beta_disabled = True + try: + self._anthropic_client.close() + except Exception: + pass + self._rebuild_anthropic_client() + self._vprint( + f"{self.log_prefix}🔕 OAuth subscription doesn't support " + f"the 1M-context beta — disabled for this session and retrying...", + force=True, + ) + continue + if ( self.api_mode == "codex_responses" and self.provider == "openai-codex" diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py index 2089893ae2..8105363b2e 100644 --- a/tests/agent/test_anthropic_adapter.py +++ b/tests/agent/test_anthropic_adapter.py @@ -66,8 +66,30 @@ class TestBuildAnthropicClient: assert "claude-code-20250219" in betas assert "interleaved-thinking-2025-05-14" in betas assert "fine-grained-tool-streaming-2025-05-14" in betas + # Default: 1M-context beta stays IN for OAuth so 1M-capable + # subscriptions keep full context. The reactive recovery path + # in run_agent.py flips it off only after a subscription + # actually rejects the beta. + assert "context-1m-2025-08-07" in betas assert "api_key" not in kwargs + def test_oauth_drop_context_1m_beta_strips_only_1m(self): + """drop_context_1m_beta=True strips context-1m-2025-08-07 while + preserving every other OAuth-relevant beta.""" + with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk: + build_anthropic_client( + "sk-ant-oat01-" + "x" * 60, + drop_context_1m_beta=True, + ) + kwargs = mock_sdk.Anthropic.call_args[1] + betas = kwargs["default_headers"]["anthropic-beta"] + assert "context-1m-2025-08-07" not in betas + # Everything else must still be there. + assert "oauth-2025-04-20" in betas + assert "claude-code-20250219" in betas + assert "interleaved-thinking-2025-05-14" in betas + assert "fine-grained-tool-streaming-2025-05-14" in betas + def test_api_key_uses_api_key(self): with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk: build_anthropic_client("sk-ant-api03-something") @@ -77,6 +99,7 @@ class TestBuildAnthropicClient: # API key auth should still get common betas betas = kwargs["default_headers"]["anthropic-beta"] assert "interleaved-thinking-2025-05-14" in betas + assert "context-1m-2025-08-07" in betas assert "oauth-2025-04-20" not in betas # OAuth-only beta NOT present assert "claude-code-20250219" not in betas # OAuth-only beta NOT present @@ -963,6 +986,42 @@ class TestBuildAnthropicKwargs: ) assert kwargs["model"] == "claude-sonnet-4-20250514" + def test_fast_mode_oauth_default_keeps_context_1m_beta(self): + """Default OAuth fast-mode requests still carry context-1m-2025-08-07.""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=4096, + reasoning_config=None, + is_oauth=True, + fast_mode=True, + ) + betas = kwargs["extra_headers"]["anthropic-beta"] + assert "fast-mode-2026-02-01" in betas + assert "oauth-2025-04-20" in betas + assert "context-1m-2025-08-07" in betas + + def test_fast_mode_oauth_drop_context_1m_beta_strips_only_1m(self): + """drop_context_1m_beta=True strips context-1m from fast-mode + extra_headers while preserving every other OAuth + fast-mode beta.""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=4096, + reasoning_config=None, + is_oauth=True, + fast_mode=True, + drop_context_1m_beta=True, + ) + betas = kwargs["extra_headers"]["anthropic-beta"] + assert "context-1m-2025-08-07" not in betas + assert "fast-mode-2026-02-01" in betas + assert "oauth-2025-04-20" in betas + assert "claude-code-20250219" in betas + assert "interleaved-thinking-2025-05-14" in betas + def test_reasoning_config_maps_to_manual_thinking_for_pre_4_6_models(self): kwargs = build_anthropic_kwargs( model="claude-sonnet-4-20250514", diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index d6598b66a3..9d52c7bdf2 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -57,7 +57,9 @@ class TestFailoverReason: "context_overflow", "payload_too_large", "image_too_large", "model_not_found", "format_error", "provider_policy_blocked", - "thinking_signature", "long_context_tier", "unknown", + "thinking_signature", "long_context_tier", + "oauth_long_context_beta_forbidden", + "unknown", } actual = {r.value for r in FailoverReason} assert expected == actual @@ -458,6 +460,40 @@ class TestClassifyApiError: result = classify_api_error(e, provider="anthropic") assert result.reason == FailoverReason.rate_limit + # ── Provider-specific: Anthropic OAuth 1M-context beta forbidden ── + + def test_anthropic_oauth_1m_beta_forbidden(self): + """400 + 'long context beta is not yet available for this subscription' + → oauth_long_context_beta_forbidden (retryable, no compression).""" + e = MockAPIError( + "The long context beta is not yet available for this subscription.", + status_code=400, + ) + result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6") + assert result.reason == FailoverReason.oauth_long_context_beta_forbidden + assert result.retryable is True + assert result.should_compress is False + + def test_anthropic_oauth_1m_beta_forbidden_does_not_collide_with_tier_gate(self): + """The 429 'extra usage' + 'long context' tier gate keeps its own + classification even though its message mentions 'long context'.""" + e = MockAPIError( + "Extra usage is required for long context requests over 200k tokens", + status_code=429, + ) + result = classify_api_error(e, provider="anthropic", model="claude-sonnet-4.6") + assert result.reason == FailoverReason.long_context_tier + + def test_400_without_beta_phrase_is_not_1m_beta_forbidden(self): + """A generic 400 that happens to mention 'long context' but not the + exact beta-availability phrase should not be misclassified.""" + e = MockAPIError( + "long context window exceeded", + status_code=400, + ) + result = classify_api_error(e, provider="anthropic") + assert result.reason != FailoverReason.oauth_long_context_beta_forbidden + # ── Transport errors ── def test_read_timeout(self):