diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 8c468e8686..bb1b33fcc8 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -76,6 +76,7 @@ _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7") # Models where temperature/top_p/top_k return 400 if set to non-default values. # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it. _NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7") +_FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6") # ── Max output token limits per Anthropic model ─────────────────────── # Source: Anthropic docs + Cline model catalog. Anthropic's API requires @@ -219,6 +220,17 @@ def _forbids_sampling_params(model: str) -> bool: return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS) +def _supports_fast_mode(model: str) -> bool: + """Return True for models that support Anthropic Fast Mode (speed=fast). + + Per Anthropic docs, fast mode is currently supported on Opus 4.6 only. + Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7) + returns HTTP 400. This guard prevents silently 400'ing when stale config + or older callers leave fast mode enabled across a model upgrade. + """ + return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS) + + # Beta headers for enhanced features (sent with ALL auth types). # As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the # beta headers are still accepted (harmless no-op) but not required. Kept @@ -1932,9 +1944,15 @@ def build_anthropic_kwargs( # ── Fast mode (Opus 4.6 only) ──────────────────────────────────── # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x - # output speed. Only for native Anthropic endpoints — third-party - # providers would reject the unknown beta header and speed parameter. - if fast_mode and not _is_third_party_anthropic_endpoint(base_url): + # output speed. Per Anthropic docs, fast mode is only supported on + # Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter. + # Only for native Anthropic endpoints — third-party providers would + # reject the unknown beta header and speed parameter. + if ( + fast_mode + and not _is_third_party_anthropic_endpoint(base_url) + and _supports_fast_mode(model) + ): kwargs.setdefault("extra_body", {})["speed"] = "fast" # Build extra_headers with ALL applicable betas (the per-request # extra_headers override the client-level anthropic-beta header). diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 984685e6c3..b1630b3d83 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1740,10 +1740,20 @@ def model_supports_fast_mode(model_id: Optional[str]) -> bool: def _is_anthropic_fast_model(model_id: Optional[str]) -> bool: - """Return True if the model is a Claude model eligible for Anthropic Fast Mode.""" + """Return True if the model is a Claude model eligible for Anthropic Fast Mode. + + Fast mode is currently supported on Claude Opus 4.6 only. Per Anthropic's + docs (https://platform.claude.com/docs/en/build-with-claude/fast-mode): + "Fast mode is currently supported on Opus 4.6 only. Sending speed: fast + with an unsupported model returns an error." Opus 4.7 explicitly rejects + the ``speed`` parameter with HTTP 400. + """ raw = _strip_vendor_prefix(str(model_id or "")) base = raw.split(":")[0] - return base.startswith("claude-") + if not base.startswith("claude-"): + return False + # Only Opus 4.6 supports fast mode at present. + return "opus-4-6" in base or "opus-4.6" in base def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None: diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py index 2e676aef62..0bb607d741 100644 --- a/tests/agent/test_anthropic_adapter.py +++ b/tests/agent/test_anthropic_adapter.py @@ -1113,6 +1113,45 @@ class TestBuildAnthropicKwargs: assert _forbids_sampling_params("claude-opus-4-6") is False assert _forbids_sampling_params("claude-sonnet-4-5") is False + def test_supports_fast_mode_predicate(self): + """Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded.""" + from agent.anthropic_adapter import _supports_fast_mode + assert _supports_fast_mode("claude-opus-4-6") is True + assert _supports_fast_mode("anthropic/claude-opus-4-6") is True + assert _supports_fast_mode("claude-opus-4-7") is False + assert _supports_fast_mode("claude-sonnet-4-6") is False + assert _supports_fast_mode("claude-haiku-4-5") is False + assert _supports_fast_mode("") is False + + def test_fast_mode_omitted_for_unsupported_model(self): + """fast_mode=True on Opus 4.7 must NOT inject speed=fast (API 400s).""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-7", + messages=[{"role": "user", "content": "hi"}], + tools=None, + max_tokens=1024, + reasoning_config=None, + fast_mode=True, + ) + # extra_body either absent or doesn't carry "speed" + assert "speed" not in kwargs.get("extra_body", {}) + # No fast-mode beta header should be added either + beta_header = (kwargs.get("extra_headers") or {}).get("anthropic-beta", "") + assert "fast-mode-2026-02-01" not in beta_header + + def test_fast_mode_still_applied_on_opus_46(self): + """Regression guard — fast mode must still work on Opus 4.6.""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": "hi"}], + tools=None, + max_tokens=1024, + reasoning_config=None, + fast_mode=True, + ) + assert kwargs.get("extra_body", {}).get("speed") == "fast" + assert "fast-mode-2026-02-01" in kwargs["extra_headers"]["anthropic-beta"] + def test_reasoning_disabled(self): kwargs = build_anthropic_kwargs( model="claude-sonnet-4-20250514", diff --git a/tests/cli/test_fast_command.py b/tests/cli/test_fast_command.py index 343c05658c..a98ae75444 100644 --- a/tests/cli/test_fast_command.py +++ b/tests/cli/test_fast_command.py @@ -128,17 +128,34 @@ class TestPriorityProcessingModels(unittest.TestCase): assert model_supports_fast_mode(model), f"{model} should support fast mode" def test_all_anthropic_models_supported(self): + """Per Anthropic docs, fast mode is currently Opus 4.6 only. + + Sending speed=fast to Opus 4.7, Sonnet, or Haiku returns HTTP 400. + Pre-fix this test asserted all Claude variants supported fast mode, + which mirrored the bug rather than the API contract. + """ from hermes_cli.models import model_supports_fast_mode - # All Claude models support Anthropic Fast Mode — Opus, Sonnet, Haiku. + # Supported: Opus 4.6 in any form supported = [ - "claude-opus-4-7", "claude-opus-4-6", "claude-opus-4.6", - "claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4", - "claude-haiku-4-5", "claude-3-5-haiku", + "claude-opus-4-6", "claude-opus-4.6", + "anthropic/claude-opus-4-6", "anthropic/claude-opus-4.6", ] for model in supported: assert model_supports_fast_mode(model), f"{model} should support fast mode" + # Unsupported per Anthropic API: Opus 4.7, Sonnet, Haiku + unsupported = [ + "claude-opus-4-7", + "claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4", + "claude-haiku-4-5", "claude-3-5-haiku", + ] + for model in unsupported: + assert not model_supports_fast_mode(model), ( + f"{model} should NOT support fast mode — Anthropic restricts " + f"speed=fast to Opus 4.6" + ) + def test_codex_models_excluded(self): """Codex models route through Responses API and don't accept service_tier.""" from hermes_cli.models import model_supports_fast_mode @@ -257,18 +274,20 @@ class TestAnthropicFastMode(unittest.TestCase): assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True - def test_anthropic_all_claude_models_supported(self): + def test_anthropic_non_opus46_models_excluded(self): + """Anthropic restricts fast mode to Opus 4.6 — others must be excluded. + + Per https://platform.claude.com/docs/en/build-with-claude/fast-mode, + sending speed=fast to Opus 4.7, Sonnet, or Haiku returns HTTP 400. + """ from hermes_cli.models import model_supports_fast_mode - # All Claude models support fast mode — Opus, Sonnet, Haiku. - # The anthropic adapter gates speed=fast on native Anthropic - # endpoints only, so third-party proxies that reject the beta - # are protected downstream (see _is_third_party_anthropic_endpoint). - assert model_supports_fast_mode("claude-sonnet-4-6") is True - assert model_supports_fast_mode("claude-sonnet-4.6") is True - assert model_supports_fast_mode("claude-haiku-4-5") is True - assert model_supports_fast_mode("claude-opus-4-7") is True - assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is True + assert model_supports_fast_mode("claude-sonnet-4-6") is False + assert model_supports_fast_mode("claude-sonnet-4.6") is False + assert model_supports_fast_mode("claude-haiku-4-5") is False + assert model_supports_fast_mode("claude-opus-4-7") is False + assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False + assert model_supports_fast_mode("anthropic/claude-opus-4-7") is False def test_non_claude_models_not_anthropic_fast(self): """Non-Claude models should not be treated as Anthropic fast-mode.""" @@ -294,6 +313,17 @@ class TestAnthropicFastMode(unittest.TestCase): result = resolve_fast_mode_overrides("anthropic/claude-opus-4.6") assert result == {"speed": "fast"} + def test_resolve_overrides_returns_none_for_unsupported_claude(self): + """Opus 4.7 and other Claude models don't support fast mode (API 400s). + + Per Anthropic docs, fast mode is currently Opus 4.6 only. + """ + from hermes_cli.models import resolve_fast_mode_overrides + + assert resolve_fast_mode_overrides("claude-opus-4-7") is None + assert resolve_fast_mode_overrides("claude-sonnet-4-6") is None + assert resolve_fast_mode_overrides("claude-haiku-4-5") is None + def test_resolve_overrides_returns_service_tier_for_openai(self): """OpenAI models should still get service_tier, not speed.""" from hermes_cli.models import resolve_fast_mode_overrides @@ -302,13 +332,21 @@ class TestAnthropicFastMode(unittest.TestCase): assert result == {"service_tier": "priority"} def test_is_anthropic_fast_model(self): + """Fast mode is currently Opus 4.6 only — other Claude variants must be excluded.""" from hermes_cli.models import _is_anthropic_fast_model + # Supported: Opus 4.6 in any form assert _is_anthropic_fast_model("claude-opus-4-6") is True assert _is_anthropic_fast_model("claude-opus-4.6") is True - assert _is_anthropic_fast_model("claude-sonnet-4-6") is True - assert _is_anthropic_fast_model("claude-haiku-4-5") is True assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True + assert _is_anthropic_fast_model("claude-opus-4.6:fast") is True + + # Unsupported per Anthropic API contract — would 400 if we sent speed=fast + assert _is_anthropic_fast_model("claude-opus-4-7") is False + assert _is_anthropic_fast_model("claude-sonnet-4-6") is False + assert _is_anthropic_fast_model("claude-haiku-4-5") is False + + # Non-Claude assert _is_anthropic_fast_model("gpt-5.4") is False assert _is_anthropic_fast_model("") is False @@ -320,14 +358,23 @@ class TestAnthropicFastMode(unittest.TestCase): ) assert cli_mod.HermesCLI._fast_command_available(stub) is True - def test_fast_command_exposed_for_anthropic_sonnet(self): - """Sonnet now supports Anthropic Fast Mode — the adapter gates on base_url.""" + def test_fast_command_hidden_for_anthropic_sonnet(self): + """Sonnet doesn't support fast mode (Opus 4.6 only) — /fast must be hidden.""" cli_mod = _import_cli() stub = SimpleNamespace( provider="anthropic", requested_provider="anthropic", model="claude-sonnet-4-6", agent=None, ) - assert cli_mod.HermesCLI._fast_command_available(stub) is True + assert cli_mod.HermesCLI._fast_command_available(stub) is False + + def test_fast_command_hidden_for_anthropic_opus_47(self): + """Opus 4.7 doesn't support fast mode — /fast must be hidden.""" + cli_mod = _import_cli() + stub = SimpleNamespace( + provider="anthropic", requested_provider="anthropic", + model="claude-opus-4-7", agent=None, + ) + assert cli_mod.HermesCLI._fast_command_available(stub) is False def test_fast_command_hidden_for_non_claude_non_openai(self): """Non-Claude, non-OpenAI models should not expose /fast."""