diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 59e7622fbb..3ed34517e1 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -103,6 +103,11 @@ _COMMON_BETAS = [ # fall back to the provider's default response path. _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14" +# Fast mode beta — enables the ``speed: "fast"`` request parameter for +# significantly higher output token throughput on Opus 4.6 (~2.5x). +# See https://platform.claude.com/docs/en/build-with-claude/fast-mode +_FAST_MODE_BETA = "fast-mode-2026-02-01" + # Additional beta headers required for OAuth/subscription auth. # Matches what Claude Code (and pi-ai / OpenCode) send. _OAUTH_ONLY_BETAS = [ @@ -1256,6 +1261,7 @@ def build_anthropic_kwargs( preserve_dots: bool = False, context_length: Optional[int] = None, base_url: str | None = None, + fast_mode: bool = False, ) -> Dict[str, Any]: """Build kwargs for anthropic.messages.create(). @@ -1289,6 +1295,10 @@ def build_anthropic_kwargs( When *base_url* points to a third-party Anthropic-compatible endpoint, thinking block signatures are stripped (they are Anthropic-proprietary). + + When *fast_mode* is True, adds ``speed: "fast"`` and the fast-mode beta + header for ~2.5x faster output throughput on Opus 4.6. Currently only + supported on native Anthropic endpoints (not third-party compatible ones). """ system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url) anthropic_tools = convert_tools_to_anthropic(tools) if tools else [] @@ -1387,6 +1397,20 @@ def build_anthropic_kwargs( kwargs["temperature"] = 1 kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096) + # ── Fast mode (Opus 4.6 only) ──────────────────────────────────── + # Adds speed:"fast" + the fast-mode beta header for ~2.5x output speed. + # Only for native Anthropic endpoints — third-party providers would + # reject the unknown beta header and speed parameter. + if fast_mode and not _is_third_party_anthropic_endpoint(base_url): + kwargs["speed"] = "fast" + # Build extra_headers with ALL applicable betas (the per-request + # extra_headers override the client-level anthropic-beta header). + betas = list(_common_betas_for_base_url(base_url)) + if is_oauth: + betas.extend(_OAUTH_ONLY_BETAS) + betas.append(_FAST_MODE_BETA) + kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)} + return kwargs diff --git a/cli.py b/cli.py index 221976ad25..17fae086e8 100644 --- a/cli.py +++ b/cli.py @@ -5697,15 +5697,24 @@ class HermesCLI: _cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}") def _handle_fast_command(self, cmd: str): - """Handle /fast — toggle OpenAI Priority Processing (service_tier).""" + """Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode).""" if not self._fast_command_available(): - _cprint(" (._.) /fast is only available for OpenAI models that support Priority Processing.") + _cprint(" (._.) /fast is only available for models that support fast mode (OpenAI Priority Processing or Anthropic Fast Mode).") return + # Determine the branding for the current model + try: + from hermes_cli.models import _is_anthropic_fast_model + agent = getattr(self, "agent", None) + model = getattr(agent, "model", None) or getattr(self, "model", None) + feature_name = "Anthropic Fast Mode" if _is_anthropic_fast_model(model) else "Priority Processing" + except Exception: + feature_name = "Fast mode" + parts = cmd.strip().split(maxsplit=1) if len(parts) < 2 or parts[1].strip().lower() == "status": status = "fast" if self.service_tier == "priority" else "normal" - _cprint(f" {_GOLD}Priority Processing: {status}{_RST}") + _cprint(f" {_GOLD}{feature_name}: {status}{_RST}") _cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}") return @@ -5726,9 +5735,9 @@ class HermesCLI: self.agent = None # Force agent re-init with new service-tier config if save_config_value("agent.service_tier", saved_value): - _cprint(f" {_GOLD}✓ Priority Processing set to {label} (saved to config){_RST}") + _cprint(f" {_GOLD}✓ {feature_name} set to {label} (saved to config){_RST}") else: - _cprint(f" {_GOLD}✓ Priority Processing set to {label} (session only){_RST}") + _cprint(f" {_GOLD}✓ {feature_name} set to {label} (session only){_RST}") def _on_reasoning(self, reasoning_text: str): """Callback for intermediate reasoning display during tool-call loops.""" diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index e0368440ff..e5345912bf 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -100,7 +100,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("reasoning", "Manage reasoning effort and display", "Configuration", args_hint="[level|show|hide]", subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")), - CommandDef("fast", "Toggle OpenAI Priority Processing (Normal/Fast)", "Configuration", + CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration", cli_only=True, args_hint="[normal|fast|status]", subcommands=("normal", "fast", "status", "on", "off")), CommandDef("skin", "Show or change the display skin/theme", "Configuration", diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 530c1ec6ce..ac73fa2112 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1036,25 +1036,57 @@ _PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({ "o4-mini", }) +# Models that support Anthropic Fast Mode (speed="fast"). +# See https://platform.claude.com/docs/en/build-with-claude/fast-mode +# Currently only Claude Opus 4.6. Both hyphen and dot variants are stored +# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6). +_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({ + "claude-opus-4-6", + "claude-opus-4.6", +}) -def model_supports_fast_mode(model_id: Optional[str]) -> bool: - """Return whether Hermes should expose the /fast (Priority Processing) toggle.""" + +def _strip_vendor_prefix(model_id: str) -> str: + """Strip vendor/ prefix from a model ID (e.g. 'anthropic/claude-opus-4-6' -> 'claude-opus-4-6').""" raw = str(model_id or "").strip().lower() if "/" in raw: raw = raw.split("/", 1)[1] - return raw in _PRIORITY_PROCESSING_MODELS + return raw + + +def model_supports_fast_mode(model_id: Optional[str]) -> bool: + """Return whether Hermes should expose the /fast toggle for this model.""" + raw = _strip_vendor_prefix(str(model_id or "")) + if raw in _PRIORITY_PROCESSING_MODELS: + return True + # Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401) + # and OpenRouter variant tags (:fast, :beta) for matching. + base = raw.split(":")[0] + return base in _ANTHROPIC_FAST_MODE_MODELS + + +def _is_anthropic_fast_model(model_id: Optional[str]) -> bool: + """Return True if the model supports Anthropic's fast mode (speed='fast').""" + raw = _strip_vendor_prefix(str(model_id or "")) + base = raw.split(":")[0] + return base in _ANTHROPIC_FAST_MODE_MODELS def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None: - """Return request_overrides for Priority Processing, or None if unsupported. + """Return request_overrides for fast/priority mode, or None if unsupported. - Unlike the previous ``resolve_fast_mode_runtime``, this does NOT force a - provider/backend switch. The ``service_tier`` parameter is injected into - whatever API path the user is already on (Codex Responses, Chat Completions, - or OpenRouter passthrough). + Returns provider-appropriate overrides: + - OpenAI models: ``{"service_tier": "priority"}`` (Priority Processing) + - Anthropic models: ``{"speed": "fast"}`` (Anthropic Fast Mode beta) + + The overrides are injected into the API request kwargs by + ``_build_api_kwargs`` in run_agent.py — each API path handles its own + keys (service_tier for OpenAI/Codex, speed for Anthropic Messages). """ if not model_supports_fast_mode(model_id): return None + if _is_anthropic_fast_model(model_id): + return {"speed": "fast"} return {"service_tier": "priority"} diff --git a/run_agent.py b/run_agent.py index 64c8cbadb3..dd03357c2b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5466,6 +5466,7 @@ class AIAgent: preserve_dots=self._anthropic_preserve_dots(), context_length=ctx_len, base_url=getattr(self, "_anthropic_base_url", None), + fast_mode=self.request_overrides.get("speed") == "fast", ) if self.api_mode == "codex_responses": diff --git a/tests/cli/test_fast_command.py b/tests/cli/test_fast_command.py index 907808d32a..d39453c109 100644 --- a/tests/cli/test_fast_command.py +++ b/tests/cli/test_fast_command.py @@ -247,6 +247,163 @@ class TestFastModeRouting(unittest.TestCase): assert route.get("request_overrides") is None +class TestAnthropicFastMode(unittest.TestCase): + """Verify Anthropic Fast Mode model support and override resolution.""" + + def test_anthropic_opus_supported(self): + from hermes_cli.models import model_supports_fast_mode + + # Native Anthropic format (hyphens) + assert model_supports_fast_mode("claude-opus-4-6") is True + # OpenRouter format (dots) + assert model_supports_fast_mode("claude-opus-4.6") is True + # With vendor prefix + assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True + assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True + + def test_anthropic_non_opus_rejected(self): + from hermes_cli.models import model_supports_fast_mode + + assert model_supports_fast_mode("claude-sonnet-4-6") is False + assert model_supports_fast_mode("claude-sonnet-4.6") is False + assert model_supports_fast_mode("claude-haiku-4-5") is False + assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False + + def test_anthropic_variant_tags_stripped(self): + from hermes_cli.models import model_supports_fast_mode + + # OpenRouter variant tags after colon should be stripped + assert model_supports_fast_mode("claude-opus-4.6:fast") is True + assert model_supports_fast_mode("claude-opus-4.6:beta") is True + + def test_resolve_overrides_returns_speed_for_anthropic(self): + from hermes_cli.models import resolve_fast_mode_overrides + + result = resolve_fast_mode_overrides("claude-opus-4-6") + assert result == {"speed": "fast"} + + result = resolve_fast_mode_overrides("anthropic/claude-opus-4.6") + assert result == {"speed": "fast"} + + def test_resolve_overrides_returns_service_tier_for_openai(self): + """OpenAI models should still get service_tier, not speed.""" + from hermes_cli.models import resolve_fast_mode_overrides + + result = resolve_fast_mode_overrides("gpt-5.4") + assert result == {"service_tier": "priority"} + + def test_is_anthropic_fast_model(self): + from hermes_cli.models import _is_anthropic_fast_model + + assert _is_anthropic_fast_model("claude-opus-4-6") is True + assert _is_anthropic_fast_model("claude-opus-4.6") is True + assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True + assert _is_anthropic_fast_model("gpt-5.4") is False + assert _is_anthropic_fast_model("claude-sonnet-4-6") is False + + def test_fast_command_exposed_for_anthropic_model(self): + cli_mod = _import_cli() + stub = SimpleNamespace( + provider="anthropic", requested_provider="anthropic", + model="claude-opus-4-6", agent=None, + ) + assert cli_mod.HermesCLI._fast_command_available(stub) is True + + def test_fast_command_hidden_for_anthropic_sonnet(self): + cli_mod = _import_cli() + stub = SimpleNamespace( + provider="anthropic", requested_provider="anthropic", + model="claude-sonnet-4-6", agent=None, + ) + assert cli_mod.HermesCLI._fast_command_available(stub) is False + + def test_turn_route_injects_speed_for_anthropic(self): + """Anthropic models should get speed:'fast' override, not service_tier.""" + cli_mod = _import_cli() + stub = SimpleNamespace( + model="claude-opus-4-6", + api_key="sk-ant-test", + base_url="https://api.anthropic.com", + provider="anthropic", + api_mode="anthropic_messages", + acp_command=None, + acp_args=[], + _credential_pool=None, + _smart_model_routing={}, + service_tier="priority", + ) + + original_runtime = { + "api_key": "***", + "base_url": "https://api.anthropic.com", + "provider": "anthropic", + "api_mode": "anthropic_messages", + "command": None, + "args": [], + "credential_pool": None, + } + + with patch("agent.smart_model_routing.resolve_turn_route", return_value={ + "model": "claude-opus-4-6", + "runtime": dict(original_runtime), + "label": None, + "signature": ("claude-opus-4-6", "anthropic", "https://api.anthropic.com", "anthropic_messages", None, ()), + }): + route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi") + + assert route["runtime"]["provider"] == "anthropic" + assert route["request_overrides"] == {"speed": "fast"} + + +class TestAnthropicFastModeAdapter(unittest.TestCase): + """Verify build_anthropic_kwargs handles fast_mode parameter.""" + + def test_fast_mode_adds_speed_and_beta(self): + from agent.anthropic_adapter import build_anthropic_kwargs, _FAST_MODE_BETA + + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}], + tools=None, + max_tokens=None, + reasoning_config=None, + fast_mode=True, + ) + assert kwargs.get("speed") == "fast" + assert "extra_headers" in kwargs + assert _FAST_MODE_BETA in kwargs["extra_headers"].get("anthropic-beta", "") + + def test_fast_mode_off_no_speed(self): + from agent.anthropic_adapter import build_anthropic_kwargs + + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}], + tools=None, + max_tokens=None, + reasoning_config=None, + fast_mode=False, + ) + assert "speed" not in kwargs + assert "extra_headers" not in kwargs + + def test_fast_mode_skipped_for_third_party_endpoint(self): + from agent.anthropic_adapter import build_anthropic_kwargs + + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}], + tools=None, + max_tokens=None, + reasoning_config=None, + fast_mode=True, + base_url="https://api.minimax.io/anthropic/v1", + ) + # Third-party endpoints should NOT get speed or fast-mode beta + assert "speed" not in kwargs + assert "extra_headers" not in kwargs + + class TestConfigDefault(unittest.TestCase): def test_default_config_has_service_tier(self): from hermes_cli.config import DEFAULT_CONFIG