hermes-agent/tests/agent/test_bedrock_1m_context.py
Teknium a7cdd4133c
fix(bedrock): send context-1m-2025-08-07 beta so Opus 4.6/4.7 get 1M context (#16793)
On AWS Bedrock (and Azure AI Foundry), Claude Opus 4.6/4.7 and Sonnet 4.6
are capped at 200K context unless the request carries the
`context-1m-2025-08-07` beta header. On native Anthropic (api.anthropic.com)
1M went GA so the header is a harmless no-op, but Bedrock/Azure still gate
it as beta as of 2026-04.

Hermes was advertising 1M in model_metadata.py (`claude-opus-4-7: 1000000`)
while silently sending a request without the beta — so Bedrock users saw
a 200K ceiling with no error message, and no config knob unblocked it.
Claude Code sends this header by default, which is why the same Bedrock
credentials worked there.

- Add `context-1m-2025-08-07` to `_COMMON_BETAS` (alongside interleaved
  thinking and fine-grained tool streaming).
- Strip it in `_common_betas_for_base_url` for MiniMax bearer-auth
  endpoints — they host their own models, not Claude, so Anthropic beta
  headers are irrelevant and could risk rejection.
- Attach `_COMMON_BETAS` as `default_headers` on the AnthropicBedrock
  client. Previously that constructor passed no betas at all, so native
  Anthropic had the 1M unlock via default_headers but Bedrock didn't.
- Fast-mode per-request `extra_headers` already rebuilds from
  `_common_betas_for_base_url`, so it picks up the 1M beta automatically.

Reported by user 'Rodmar' on Discord: Bedrock Opus 4.7 stuck at 200K while
same credentials worked in Claude Code.
2026-04-27 20:41:36 -07:00

105 lines
4.3 KiB
Python

"""Tests for the 1M-context beta header on AWS Bedrock Claude models.
Claude Opus 4.6/4.7 and Sonnet 4.6 support a 1M context window, but on AWS
Bedrock (and Azure AI Foundry) that window is still gated behind the
``context-1m-2025-08-07`` beta header as of 2026-04. Without it, Bedrock
caps these models at 200K even though ``model_metadata.py`` advertises 1M.
These tests guard the invariant that the header is always emitted on the
Bedrock client path, and that it survives the MiniMax bearer-auth strip.
"""
from unittest.mock import MagicMock, patch
class TestBedrockContext1MBeta:
"""``context-1m-2025-08-07`` must reach Bedrock Claude requests."""
def test_common_betas_includes_1m(self):
from agent.anthropic_adapter import _COMMON_BETAS, _CONTEXT_1M_BETA
assert _CONTEXT_1M_BETA == "context-1m-2025-08-07"
assert _CONTEXT_1M_BETA in _COMMON_BETAS
def test_common_betas_for_native_anthropic_includes_1m(self):
"""Native Anthropic endpoints (and Bedrock with empty base_url) get 1M."""
from agent.anthropic_adapter import (
_common_betas_for_base_url,
_CONTEXT_1M_BETA,
)
assert _CONTEXT_1M_BETA in _common_betas_for_base_url(None)
assert _CONTEXT_1M_BETA in _common_betas_for_base_url("")
assert _CONTEXT_1M_BETA in _common_betas_for_base_url(
"https://api.anthropic.com"
)
def test_common_betas_strips_1m_for_minimax(self):
"""MiniMax bearer-auth endpoints host their own models — strip 1M beta."""
from agent.anthropic_adapter import (
_common_betas_for_base_url,
_CONTEXT_1M_BETA,
)
for url in (
"https://api.minimax.io/anthropic",
"https://api.minimaxi.com/anthropic",
):
betas = _common_betas_for_base_url(url)
assert _CONTEXT_1M_BETA not in betas, (
f"1M beta must be stripped for MiniMax bearer endpoint {url}"
)
# Other betas still present
assert "interleaved-thinking-2025-05-14" in betas
def test_build_anthropic_bedrock_client_sends_1m_beta(self):
"""AnthropicBedrock client must carry the 1M beta in default_headers.
This is the load-bearing assertion for the reported bug:
without this header Bedrock serves Opus 4.6/4.7 with a 200K cap.
"""
import agent.anthropic_adapter as adapter
fake_sdk = MagicMock()
fake_sdk.AnthropicBedrock = MagicMock()
with patch.object(adapter, "_anthropic_sdk", fake_sdk):
adapter.build_anthropic_bedrock_client(region="us-west-2")
call_kwargs = fake_sdk.AnthropicBedrock.call_args.kwargs
assert call_kwargs["aws_region"] == "us-west-2"
default_headers = call_kwargs.get("default_headers") or {}
beta_header = default_headers.get("anthropic-beta", "")
assert "context-1m-2025-08-07" in beta_header, (
"Bedrock client must send context-1m-2025-08-07 or Opus 4.6/4.7 "
"silently caps at 200K context"
)
# Other common betas still present — no regression.
assert "interleaved-thinking-2025-05-14" in beta_header
assert "fine-grained-tool-streaming-2025-05-14" in beta_header
def test_build_anthropic_kwargs_includes_1m_for_bedrock_fastmode(self):
"""Fast-mode requests (per-request extra_headers) still include 1M beta.
Per-request extra_headers override client-level default_headers, so
the fast-mode path must re-include everything in _COMMON_BETAS.
"""
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="claude-opus-4-7",
messages=[{"role": "user", "content": "hi"}],
tools=None,
max_tokens=1024,
reasoning_config=None,
is_oauth=False,
# Empty base_url mirrors AnthropicBedrock (no HTTP base URL)
base_url=None,
fast_mode=True,
)
beta_header = kwargs.get("extra_headers", {}).get("anthropic-beta", "")
assert "context-1m-2025-08-07" in beta_header, (
"fast-mode extra_headers must carry the 1M beta or it overrides "
"client-level default_headers and Bedrock drops back to 200K"
)