mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix: avoid unsupported anthropic context beta by default
This commit is contained in:
parent
b9f1ac8c10
commit
e9685a5cf7
2 changed files with 62 additions and 42 deletions
|
|
@ -231,33 +231,30 @@ def _supports_fast_mode(model: str) -> bool:
|
||||||
return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS)
|
return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS)
|
||||||
|
|
||||||
|
|
||||||
# Beta headers for enhanced features (sent with ALL auth types).
|
# Beta headers for enhanced features that are safe on ordinary/native Anthropic
|
||||||
# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
|
# requests. As of Opus 4.7 (2026-04-16), these are GA on Claude 4.6+ — the
|
||||||
# beta headers are still accepted (harmless no-op) but not required. Kept
|
# beta headers are still accepted (harmless no-op) but not required. Kept
|
||||||
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
|
# here so older Claude (4.5, 4.1) + compatible endpoints that still gate on
|
||||||
# that still gate on the headers continue to get the enhanced features.
|
# the headers continue to get the enhanced features.
|
||||||
#
|
#
|
||||||
# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
|
# Do NOT include ``context-1m-2025-08-07`` here. Anthropic returns HTTP 400
|
||||||
# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
|
# ("long context beta is not yet available for this subscription") for
|
||||||
# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
|
# accounts without the long-context beta, which breaks normal short auxiliary
|
||||||
# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
|
# calls like title generation/session summarization.
|
||||||
# at 200K even though model_metadata.py advertises 1M. The header is a harmless
|
|
||||||
# no-op on endpoints where 1M is GA.
|
|
||||||
#
|
#
|
||||||
# Migration guide: remove these if you no longer support ≤4.5 models or once
|
# ``context-1m-2025-08-07`` is still required to unlock the 1M context window
|
||||||
# Bedrock/Azure promote 1M to GA.
|
# on Claude Opus 4.6/4.7 and Sonnet 4.6 when served via AWS Bedrock or Azure
|
||||||
|
# AI Foundry. Add it only for those endpoint-specific paths below.
|
||||||
_COMMON_BETAS = [
|
_COMMON_BETAS = [
|
||||||
"interleaved-thinking-2025-05-14",
|
"interleaved-thinking-2025-05-14",
|
||||||
"fine-grained-tool-streaming-2025-05-14",
|
"fine-grained-tool-streaming-2025-05-14",
|
||||||
"context-1m-2025-08-07",
|
|
||||||
]
|
]
|
||||||
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
||||||
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
||||||
# fall back to the provider's default response path.
|
# fall back to the provider's default response path.
|
||||||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||||||
# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
|
# 1M context beta. Native Anthropic does not get this by default because some
|
||||||
# Bearer-auth (MiniMax) endpoints since they host their own models and
|
# subscriptions reject it, but Bedrock/Azure still need it for 1M context.
|
||||||
# unknown Anthropic beta headers risk request rejection.
|
|
||||||
_CONTEXT_1M_BETA = "context-1m-2025-08-07"
|
_CONTEXT_1M_BETA = "context-1m-2025-08-07"
|
||||||
|
|
||||||
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||||||
|
|
@ -476,6 +473,14 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||||||
|
|
||||||
|
|
||||||
|
def _base_url_needs_context_1m_beta(base_url: str | None) -> bool:
|
||||||
|
"""Return True for endpoints that still gate 1M context behind a beta."""
|
||||||
|
normalized = _normalize_base_url_text(base_url).lower()
|
||||||
|
if not normalized:
|
||||||
|
return False
|
||||||
|
return "azure.com" in normalized
|
||||||
|
|
||||||
|
|
||||||
def _common_betas_for_base_url(
|
def _common_betas_for_base_url(
|
||||||
base_url: str | None,
|
base_url: str | None,
|
||||||
*,
|
*,
|
||||||
|
|
@ -485,27 +490,25 @@ def _common_betas_for_base_url(
|
||||||
|
|
||||||
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
|
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
|
||||||
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
||||||
tool-use message triggers a connection error. Strip that beta for
|
tool-use message triggers a connection error.
|
||||||
Bearer-auth endpoints while keeping all other betas intact.
|
|
||||||
|
|
||||||
The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
|
The ``context-1m-2025-08-07`` beta is not sent to native Anthropic by
|
||||||
endpoints — MiniMax hosts its own models, not Claude, so the header is
|
default because some subscriptions reject it. Add it only for endpoint
|
||||||
irrelevant at best and risks request rejection at worst.
|
families that still require it for 1M context, currently Azure AI Foundry.
|
||||||
|
Bedrock uses its own client helper below and opts in explicitly.
|
||||||
|
|
||||||
``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
|
``drop_context_1m_beta=True`` strips the 1M-context beta from any path that
|
||||||
otherwise-unrelated endpoints. The OAuth retry path flips this flag after
|
would otherwise include it after a subscription/endpoint rejects the beta.
|
||||||
a subscription rejects the beta with
|
|
||||||
"The long context beta is not yet available for this subscription" so
|
|
||||||
subsequent requests in the same session don't repeat the probe. See the
|
|
||||||
reactive recovery loop in ``run_agent.py`` and issue-comment history on
|
|
||||||
PR #17680 for the full rationale.
|
|
||||||
"""
|
"""
|
||||||
|
betas = list(_COMMON_BETAS)
|
||||||
|
if _base_url_needs_context_1m_beta(base_url) and not drop_context_1m_beta:
|
||||||
|
betas.append(_CONTEXT_1M_BETA)
|
||||||
if _requires_bearer_auth(base_url):
|
if _requires_bearer_auth(base_url):
|
||||||
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
|
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
|
||||||
return [b for b in _COMMON_BETAS if b not in _stripped]
|
return [b for b in betas if b not in _stripped]
|
||||||
if drop_context_1m_beta:
|
if drop_context_1m_beta:
|
||||||
return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
|
return [b for b in betas if b != _CONTEXT_1M_BETA]
|
||||||
return _COMMON_BETAS
|
return betas
|
||||||
|
|
||||||
|
|
||||||
def build_anthropic_client(
|
def build_anthropic_client(
|
||||||
|
|
@ -642,7 +645,7 @@ def build_anthropic_bedrock_client(region: str):
|
||||||
return _anthropic_sdk.AnthropicBedrock(
|
return _anthropic_sdk.AnthropicBedrock(
|
||||||
aws_region=region,
|
aws_region=region,
|
||||||
timeout=Timeout(timeout=900.0, connect=10.0),
|
timeout=Timeout(timeout=900.0, connect=10.0),
|
||||||
default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
|
default_headers={"anthropic-beta": ",".join([*_COMMON_BETAS, _CONTEXT_1M_BETA])},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ from agent.anthropic_adapter import (
|
||||||
_to_plain_data,
|
_to_plain_data,
|
||||||
_write_claude_code_credentials,
|
_write_claude_code_credentials,
|
||||||
build_anthropic_client,
|
build_anthropic_client,
|
||||||
|
build_anthropic_bedrock_client,
|
||||||
build_anthropic_kwargs,
|
build_anthropic_kwargs,
|
||||||
convert_messages_to_anthropic,
|
convert_messages_to_anthropic,
|
||||||
convert_tools_to_anthropic,
|
convert_tools_to_anthropic,
|
||||||
|
|
@ -66,11 +67,9 @@ class TestBuildAnthropicClient:
|
||||||
assert "claude-code-20250219" in betas
|
assert "claude-code-20250219" in betas
|
||||||
assert "interleaved-thinking-2025-05-14" in betas
|
assert "interleaved-thinking-2025-05-14" in betas
|
||||||
assert "fine-grained-tool-streaming-2025-05-14" in betas
|
assert "fine-grained-tool-streaming-2025-05-14" in betas
|
||||||
# Default: 1M-context beta stays IN for OAuth so 1M-capable
|
# Native Anthropic does not get context-1m by default; accounts
|
||||||
# subscriptions keep full context. The reactive recovery path
|
# without that beta reject even short auxiliary requests.
|
||||||
# in run_agent.py flips it off only after a subscription
|
assert "context-1m-2025-08-07" not in betas
|
||||||
# actually rejects the beta.
|
|
||||||
assert "context-1m-2025-08-07" in betas
|
|
||||||
assert "api_key" not in kwargs
|
assert "api_key" not in kwargs
|
||||||
|
|
||||||
def test_oauth_drop_context_1m_beta_strips_only_1m(self):
|
def test_oauth_drop_context_1m_beta_strips_only_1m(self):
|
||||||
|
|
@ -99,7 +98,7 @@ class TestBuildAnthropicClient:
|
||||||
# API key auth should still get common betas
|
# API key auth should still get common betas
|
||||||
betas = kwargs["default_headers"]["anthropic-beta"]
|
betas = kwargs["default_headers"]["anthropic-beta"]
|
||||||
assert "interleaved-thinking-2025-05-14" in betas
|
assert "interleaved-thinking-2025-05-14" in betas
|
||||||
assert "context-1m-2025-08-07" in betas
|
assert "context-1m-2025-08-07" not in betas
|
||||||
assert "oauth-2025-04-20" not in betas # OAuth-only beta NOT present
|
assert "oauth-2025-04-20" not in betas # OAuth-only beta NOT present
|
||||||
assert "claude-code-20250219" not in betas # OAuth-only beta NOT present
|
assert "claude-code-20250219" not in betas # OAuth-only beta NOT present
|
||||||
|
|
||||||
|
|
@ -109,9 +108,27 @@ class TestBuildAnthropicClient:
|
||||||
kwargs = mock_sdk.Anthropic.call_args[1]
|
kwargs = mock_sdk.Anthropic.call_args[1]
|
||||||
assert kwargs["base_url"] == "https://custom.api.com"
|
assert kwargs["base_url"] == "https://custom.api.com"
|
||||||
assert kwargs["default_headers"] == {
|
assert kwargs["default_headers"] == {
|
||||||
"anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07"
|
"anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def test_azure_anthropic_endpoint_keeps_context_1m_beta(self):
|
||||||
|
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||||
|
build_anthropic_client(
|
||||||
|
"azure-key",
|
||||||
|
base_url="https://example.services.ai.azure.com/models/anthropic",
|
||||||
|
)
|
||||||
|
kwargs = mock_sdk.Anthropic.call_args[1]
|
||||||
|
betas = kwargs["default_headers"]["anthropic-beta"]
|
||||||
|
assert "context-1m-2025-08-07" in betas
|
||||||
|
|
||||||
|
def test_bedrock_client_keeps_context_1m_beta(self):
|
||||||
|
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||||
|
mock_sdk.AnthropicBedrock = MagicMock()
|
||||||
|
build_anthropic_bedrock_client("us-east-1")
|
||||||
|
kwargs = mock_sdk.AnthropicBedrock.call_args[1]
|
||||||
|
betas = kwargs["default_headers"]["anthropic-beta"]
|
||||||
|
assert "context-1m-2025-08-07" in betas
|
||||||
|
|
||||||
def test_minimax_anthropic_endpoint_uses_bearer_auth_for_regular_api_keys(self):
|
def test_minimax_anthropic_endpoint_uses_bearer_auth_for_regular_api_keys(self):
|
||||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||||
build_anthropic_client(
|
build_anthropic_client(
|
||||||
|
|
@ -986,8 +1003,8 @@ class TestBuildAnthropicKwargs:
|
||||||
)
|
)
|
||||||
assert kwargs["model"] == "claude-sonnet-4-20250514"
|
assert kwargs["model"] == "claude-sonnet-4-20250514"
|
||||||
|
|
||||||
def test_fast_mode_oauth_default_keeps_context_1m_beta(self):
|
def test_fast_mode_oauth_default_omits_context_1m_beta(self):
|
||||||
"""Default OAuth fast-mode requests still carry context-1m-2025-08-07."""
|
"""Default OAuth fast-mode avoids context-1m for subscriptions without it."""
|
||||||
kwargs = build_anthropic_kwargs(
|
kwargs = build_anthropic_kwargs(
|
||||||
model="claude-opus-4-6",
|
model="claude-opus-4-6",
|
||||||
messages=[{"role": "user", "content": "Hi"}],
|
messages=[{"role": "user", "content": "Hi"}],
|
||||||
|
|
@ -1000,7 +1017,7 @@ class TestBuildAnthropicKwargs:
|
||||||
betas = kwargs["extra_headers"]["anthropic-beta"]
|
betas = kwargs["extra_headers"]["anthropic-beta"]
|
||||||
assert "fast-mode-2026-02-01" in betas
|
assert "fast-mode-2026-02-01" in betas
|
||||||
assert "oauth-2025-04-20" in betas
|
assert "oauth-2025-04-20" in betas
|
||||||
assert "context-1m-2025-08-07" in betas
|
assert "context-1m-2025-08-07" not in betas
|
||||||
|
|
||||||
def test_fast_mode_oauth_drop_context_1m_beta_strips_only_1m(self):
|
def test_fast_mode_oauth_drop_context_1m_beta_strips_only_1m(self):
|
||||||
"""drop_context_1m_beta=True strips context-1m from fast-mode
|
"""drop_context_1m_beta=True strips context-1m from fast-mode
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue