mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-05 02:31:47 +00:00
fix(anthropic): restrict fast mode to Opus 4.6 (Anthropic API contract)
Per https://platform.claude.com/docs/en/build-with-claude/fast-mode: "Fast mode is currently supported on Opus 4.6 only. Sending speed: fast with an unsupported model returns an error." Pre-fix, _is_anthropic_fast_model() returned True for any claude-* model, so /fast on Opus 4.7 (or Sonnet/Haiku) would persist agent.service_tier=fast in config.yaml and the adapter would inject extra_body["speed"] = "fast" on every subsequent request. Opus 4.7 returns: HTTP 400: 'claude-opus-4-7' does not support the `speed` parameter. This wedged sessions across model upgrades (a user who ran /fast on Opus 4.6 and later switched the default model to 4.7 hit a hard 400 on every turn until they manually edited config.yaml). Changes: - _is_anthropic_fast_model: gate on "opus-4-6" / "opus-4.6" only - anthropic_adapter: add _supports_fast_mode predicate as defensive guard so stale request_overrides on an unsupported model are dropped silently instead of 400'ing - Tests: flip the assertions that mirrored the bug (Sonnet/Haiku/Opus 4.7 asserting fast-mode support) to match the documented API contract
This commit is contained in:
parent
a7417f8a4a
commit
d89e7a3cd4
4 changed files with 138 additions and 24 deletions
|
|
@ -76,6 +76,7 @@ _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
|
|||
# Models where temperature/top_p/top_k return 400 if set to non-default values.
|
||||
# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
|
||||
_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
|
||||
_FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
|
||||
|
||||
# ── Max output token limits per Anthropic model ───────────────────────
|
||||
# Source: Anthropic docs + Cline model catalog. Anthropic's API requires
|
||||
|
|
@ -219,6 +220,17 @@ def _forbids_sampling_params(model: str) -> bool:
|
|||
return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
|
||||
|
||||
|
||||
def _supports_fast_mode(model: str) -> bool:
|
||||
"""Return True for models that support Anthropic Fast Mode (speed=fast).
|
||||
|
||||
Per Anthropic docs, fast mode is currently supported on Opus 4.6 only.
|
||||
Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7)
|
||||
returns HTTP 400. This guard prevents silently 400'ing when stale config
|
||||
or older callers leave fast mode enabled across a model upgrade.
|
||||
"""
|
||||
return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS)
|
||||
|
||||
|
||||
# Beta headers for enhanced features (sent with ALL auth types).
|
||||
# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
|
||||
# beta headers are still accepted (harmless no-op) but not required. Kept
|
||||
|
|
@ -1932,9 +1944,15 @@ def build_anthropic_kwargs(
|
|||
|
||||
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
|
||||
# Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
|
||||
# output speed. Only for native Anthropic endpoints — third-party
|
||||
# providers would reject the unknown beta header and speed parameter.
|
||||
if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
|
||||
# output speed. Per Anthropic docs, fast mode is only supported on
|
||||
# Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter.
|
||||
# Only for native Anthropic endpoints — third-party providers would
|
||||
# reject the unknown beta header and speed parameter.
|
||||
if (
|
||||
fast_mode
|
||||
and not _is_third_party_anthropic_endpoint(base_url)
|
||||
and _supports_fast_mode(model)
|
||||
):
|
||||
kwargs.setdefault("extra_body", {})["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
|
|
|
|||
|
|
@ -1740,10 +1740,20 @@ def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
|||
|
||||
|
||||
def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
|
||||
"""Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
|
||||
"""Return True if the model is a Claude model eligible for Anthropic Fast Mode.
|
||||
|
||||
Fast mode is currently supported on Claude Opus 4.6 only. Per Anthropic's
|
||||
docs (https://platform.claude.com/docs/en/build-with-claude/fast-mode):
|
||||
"Fast mode is currently supported on Opus 4.6 only. Sending speed: fast
|
||||
with an unsupported model returns an error." Opus 4.7 explicitly rejects
|
||||
the ``speed`` parameter with HTTP 400.
|
||||
"""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
base = raw.split(":")[0]
|
||||
return base.startswith("claude-")
|
||||
if not base.startswith("claude-"):
|
||||
return False
|
||||
# Only Opus 4.6 supports fast mode at present.
|
||||
return "opus-4-6" in base or "opus-4.6" in base
|
||||
|
||||
|
||||
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
|
|
|
|||
|
|
@ -1113,6 +1113,45 @@ class TestBuildAnthropicKwargs:
|
|||
assert _forbids_sampling_params("claude-opus-4-6") is False
|
||||
assert _forbids_sampling_params("claude-sonnet-4-5") is False
|
||||
|
||||
def test_supports_fast_mode_predicate(self):
|
||||
"""Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded."""
|
||||
from agent.anthropic_adapter import _supports_fast_mode
|
||||
assert _supports_fast_mode("claude-opus-4-6") is True
|
||||
assert _supports_fast_mode("anthropic/claude-opus-4-6") is True
|
||||
assert _supports_fast_mode("claude-opus-4-7") is False
|
||||
assert _supports_fast_mode("claude-sonnet-4-6") is False
|
||||
assert _supports_fast_mode("claude-haiku-4-5") is False
|
||||
assert _supports_fast_mode("") is False
|
||||
|
||||
def test_fast_mode_omitted_for_unsupported_model(self):
|
||||
"""fast_mode=True on Opus 4.7 must NOT inject speed=fast (API 400s)."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-opus-4-7",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=None,
|
||||
max_tokens=1024,
|
||||
reasoning_config=None,
|
||||
fast_mode=True,
|
||||
)
|
||||
# extra_body either absent or doesn't carry "speed"
|
||||
assert "speed" not in kwargs.get("extra_body", {})
|
||||
# No fast-mode beta header should be added either
|
||||
beta_header = (kwargs.get("extra_headers") or {}).get("anthropic-beta", "")
|
||||
assert "fast-mode-2026-02-01" not in beta_header
|
||||
|
||||
def test_fast_mode_still_applied_on_opus_46(self):
|
||||
"""Regression guard — fast mode must still work on Opus 4.6."""
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-opus-4-6",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=None,
|
||||
max_tokens=1024,
|
||||
reasoning_config=None,
|
||||
fast_mode=True,
|
||||
)
|
||||
assert kwargs.get("extra_body", {}).get("speed") == "fast"
|
||||
assert "fast-mode-2026-02-01" in kwargs["extra_headers"]["anthropic-beta"]
|
||||
|
||||
def test_reasoning_disabled(self):
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-sonnet-4-20250514",
|
||||
|
|
|
|||
|
|
@ -128,17 +128,34 @@ class TestPriorityProcessingModels(unittest.TestCase):
|
|||
assert model_supports_fast_mode(model), f"{model} should support fast mode"
|
||||
|
||||
def test_all_anthropic_models_supported(self):
|
||||
"""Per Anthropic docs, fast mode is currently Opus 4.6 only.
|
||||
|
||||
Sending speed=fast to Opus 4.7, Sonnet, or Haiku returns HTTP 400.
|
||||
Pre-fix this test asserted all Claude variants supported fast mode,
|
||||
which mirrored the bug rather than the API contract.
|
||||
"""
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
# All Claude models support Anthropic Fast Mode — Opus, Sonnet, Haiku.
|
||||
# Supported: Opus 4.6 in any form
|
||||
supported = [
|
||||
"claude-opus-4-7", "claude-opus-4-6", "claude-opus-4.6",
|
||||
"claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4",
|
||||
"claude-haiku-4-5", "claude-3-5-haiku",
|
||||
"claude-opus-4-6", "claude-opus-4.6",
|
||||
"anthropic/claude-opus-4-6", "anthropic/claude-opus-4.6",
|
||||
]
|
||||
for model in supported:
|
||||
assert model_supports_fast_mode(model), f"{model} should support fast mode"
|
||||
|
||||
# Unsupported per Anthropic API: Opus 4.7, Sonnet, Haiku
|
||||
unsupported = [
|
||||
"claude-opus-4-7",
|
||||
"claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4",
|
||||
"claude-haiku-4-5", "claude-3-5-haiku",
|
||||
]
|
||||
for model in unsupported:
|
||||
assert not model_supports_fast_mode(model), (
|
||||
f"{model} should NOT support fast mode — Anthropic restricts "
|
||||
f"speed=fast to Opus 4.6"
|
||||
)
|
||||
|
||||
def test_codex_models_excluded(self):
|
||||
"""Codex models route through Responses API and don't accept service_tier."""
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
|
@ -257,18 +274,20 @@ class TestAnthropicFastMode(unittest.TestCase):
|
|||
assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True
|
||||
assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True
|
||||
|
||||
def test_anthropic_all_claude_models_supported(self):
|
||||
def test_anthropic_non_opus46_models_excluded(self):
|
||||
"""Anthropic restricts fast mode to Opus 4.6 — others must be excluded.
|
||||
|
||||
Per https://platform.claude.com/docs/en/build-with-claude/fast-mode,
|
||||
sending speed=fast to Opus 4.7, Sonnet, or Haiku returns HTTP 400.
|
||||
"""
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
# All Claude models support fast mode — Opus, Sonnet, Haiku.
|
||||
# The anthropic adapter gates speed=fast on native Anthropic
|
||||
# endpoints only, so third-party proxies that reject the beta
|
||||
# are protected downstream (see _is_third_party_anthropic_endpoint).
|
||||
assert model_supports_fast_mode("claude-sonnet-4-6") is True
|
||||
assert model_supports_fast_mode("claude-sonnet-4.6") is True
|
||||
assert model_supports_fast_mode("claude-haiku-4-5") is True
|
||||
assert model_supports_fast_mode("claude-opus-4-7") is True
|
||||
assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is True
|
||||
assert model_supports_fast_mode("claude-sonnet-4-6") is False
|
||||
assert model_supports_fast_mode("claude-sonnet-4.6") is False
|
||||
assert model_supports_fast_mode("claude-haiku-4-5") is False
|
||||
assert model_supports_fast_mode("claude-opus-4-7") is False
|
||||
assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False
|
||||
assert model_supports_fast_mode("anthropic/claude-opus-4-7") is False
|
||||
|
||||
def test_non_claude_models_not_anthropic_fast(self):
|
||||
"""Non-Claude models should not be treated as Anthropic fast-mode."""
|
||||
|
|
@ -294,6 +313,17 @@ class TestAnthropicFastMode(unittest.TestCase):
|
|||
result = resolve_fast_mode_overrides("anthropic/claude-opus-4.6")
|
||||
assert result == {"speed": "fast"}
|
||||
|
||||
def test_resolve_overrides_returns_none_for_unsupported_claude(self):
|
||||
"""Opus 4.7 and other Claude models don't support fast mode (API 400s).
|
||||
|
||||
Per Anthropic docs, fast mode is currently Opus 4.6 only.
|
||||
"""
|
||||
from hermes_cli.models import resolve_fast_mode_overrides
|
||||
|
||||
assert resolve_fast_mode_overrides("claude-opus-4-7") is None
|
||||
assert resolve_fast_mode_overrides("claude-sonnet-4-6") is None
|
||||
assert resolve_fast_mode_overrides("claude-haiku-4-5") is None
|
||||
|
||||
def test_resolve_overrides_returns_service_tier_for_openai(self):
|
||||
"""OpenAI models should still get service_tier, not speed."""
|
||||
from hermes_cli.models import resolve_fast_mode_overrides
|
||||
|
|
@ -302,13 +332,21 @@ class TestAnthropicFastMode(unittest.TestCase):
|
|||
assert result == {"service_tier": "priority"}
|
||||
|
||||
def test_is_anthropic_fast_model(self):
|
||||
"""Fast mode is currently Opus 4.6 only — other Claude variants must be excluded."""
|
||||
from hermes_cli.models import _is_anthropic_fast_model
|
||||
|
||||
# Supported: Opus 4.6 in any form
|
||||
assert _is_anthropic_fast_model("claude-opus-4-6") is True
|
||||
assert _is_anthropic_fast_model("claude-opus-4.6") is True
|
||||
assert _is_anthropic_fast_model("claude-sonnet-4-6") is True
|
||||
assert _is_anthropic_fast_model("claude-haiku-4-5") is True
|
||||
assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True
|
||||
assert _is_anthropic_fast_model("claude-opus-4.6:fast") is True
|
||||
|
||||
# Unsupported per Anthropic API contract — would 400 if we sent speed=fast
|
||||
assert _is_anthropic_fast_model("claude-opus-4-7") is False
|
||||
assert _is_anthropic_fast_model("claude-sonnet-4-6") is False
|
||||
assert _is_anthropic_fast_model("claude-haiku-4-5") is False
|
||||
|
||||
# Non-Claude
|
||||
assert _is_anthropic_fast_model("gpt-5.4") is False
|
||||
assert _is_anthropic_fast_model("") is False
|
||||
|
||||
|
|
@ -320,14 +358,23 @@ class TestAnthropicFastMode(unittest.TestCase):
|
|||
)
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||
|
||||
def test_fast_command_exposed_for_anthropic_sonnet(self):
|
||||
"""Sonnet now supports Anthropic Fast Mode — the adapter gates on base_url."""
|
||||
def test_fast_command_hidden_for_anthropic_sonnet(self):
|
||||
"""Sonnet doesn't support fast mode (Opus 4.6 only) — /fast must be hidden."""
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(
|
||||
provider="anthropic", requested_provider="anthropic",
|
||||
model="claude-sonnet-4-6", agent=None,
|
||||
)
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is False
|
||||
|
||||
def test_fast_command_hidden_for_anthropic_opus_47(self):
|
||||
"""Opus 4.7 doesn't support fast mode — /fast must be hidden."""
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(
|
||||
provider="anthropic", requested_provider="anthropic",
|
||||
model="claude-opus-4-7", agent=None,
|
||||
)
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is False
|
||||
|
||||
def test_fast_command_hidden_for_non_claude_non_openai(self):
|
||||
"""Non-Claude, non-OpenAI models should not expose /fast."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue