feat: add Anthropic Fast Mode support to /fast command (#7037)

Extends the /fast command to support Anthropic's Fast Mode beta in addition to OpenAI Priority Processing. When enabled on Claude Opus 4.6, adds speed:"fast" and the fast-mode-2026-02-01 beta header to API requests for ~2.5x faster output token throughput. Changes: - hermes_cli/models.py: Add _ANTHROPIC_FAST_MODE_MODELS registry, model_supports_fast_mode() now recognizes Claude Opus 4.6, resolve_fast_mode_overrides() returns {speed: fast} for Anthropic vs {service_tier: priority} for OpenAI - agent/anthropic_adapter.py: Add _FAST_MODE_BETA constant, build_anthropic_kwargs() accepts fast_mode=True which injects speed:fast + beta header via extra_headers (skipped for third-party Anthropic-compatible endpoints like MiniMax) - run_agent.py: Pass fast_mode to build_anthropic_kwargs in the anthropic_messages path of _build_api_kwargs() - cli.py: Update _handle_fast_command with provider-aware messaging (shows 'Anthropic Fast Mode' vs 'Priority Processing') - hermes_cli/commands.py: Update /fast description to mention both providers - tests: 13 new tests covering Anthropic model detection, override resolution, CLI availability, routing, adapter kwargs, and third-party endpoint safety
2026-06-19 10:02:16 +00:00 · 2026-04-10 02:32:15 -07:00 · 2026-04-10 02:32:15 -07:00 · 8779a268a7
commit 8779a268a7
parent 0848a79476
6 changed files with 237 additions and 14 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -103,6 +103,11 @@ _COMMON_BETAS = [
 # fall back to the provider's default response path.
 _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"

+# Fast mode beta — enables the ``speed: "fast"`` request parameter for
+# significantly higher output token throughput on Opus 4.6 (~2.5x).
+# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
+_FAST_MODE_BETA = "fast-mode-2026-02-01"
+
 # Additional beta headers required for OAuth/subscription auth.
 # Matches what Claude Code (and pi-ai / OpenCode) send.
 _OAUTH_ONLY_BETAS = [
@ -1256,6 +1261,7 @@ def build_anthropic_kwargs(
    preserve_dots: bool = False,
    context_length: Optional[int] = None,
    base_url: str | None = None,
+    fast_mode: bool = False,
 ) -> Dict[str, Any]:
    """Build kwargs for anthropic.messages.create().

@ -1289,6 +1295,10 @@ def build_anthropic_kwargs(

    When *base_url* points to a third-party Anthropic-compatible endpoint,
    thinking block signatures are stripped (they are Anthropic-proprietary).
+
+    When *fast_mode* is True, adds ``speed: "fast"`` and the fast-mode beta
+    header for ~2.5x faster output throughput on Opus 4.6.  Currently only
+    supported on native Anthropic endpoints (not third-party compatible ones).
    """
    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
@ -1387,6 +1397,20 @@ def build_anthropic_kwargs(
                kwargs["temperature"] = 1
                kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)

+    # ── Fast mode (Opus 4.6 only) ────────────────────────────────────
+    # Adds speed:"fast" + the fast-mode beta header for ~2.5x output speed.
+    # Only for native Anthropic endpoints — third-party providers would
+    # reject the unknown beta header and speed parameter.
+    if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
+        kwargs["speed"] = "fast"
+        # Build extra_headers with ALL applicable betas (the per-request
+        # extra_headers override the client-level anthropic-beta header).
+        betas = list(_common_betas_for_base_url(base_url))
+        if is_oauth:
+            betas.extend(_OAUTH_ONLY_BETAS)
+        betas.append(_FAST_MODE_BETA)
+        kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
+
    return kwargs


--- a/cli.py
+++ b/cli.py
@ -5697,15 +5697,24 @@ class HermesCLI:
            _cprint(f"  {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")

    def _handle_fast_command(self, cmd: str):
-        """Handle /fast — toggle OpenAI Priority Processing (service_tier)."""
+        """Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
        if not self._fast_command_available():
-            _cprint("  (._.) /fast is only available for OpenAI models that support Priority Processing.")
+            _cprint("  (._.) /fast is only available for models that support fast mode (OpenAI Priority Processing or Anthropic Fast Mode).")
            return

+        # Determine the branding for the current model
+        try:
+            from hermes_cli.models import _is_anthropic_fast_model
+            agent = getattr(self, "agent", None)
+            model = getattr(agent, "model", None) or getattr(self, "model", None)
+            feature_name = "Anthropic Fast Mode" if _is_anthropic_fast_model(model) else "Priority Processing"
+        except Exception:
+            feature_name = "Fast mode"
+
        parts = cmd.strip().split(maxsplit=1)
        if len(parts) < 2 or parts[1].strip().lower() == "status":
            status = "fast" if self.service_tier == "priority" else "normal"
-            _cprint(f"  {_GOLD}Priority Processing: {status}{_RST}")
+            _cprint(f"  {_GOLD}{feature_name}: {status}{_RST}")
            _cprint(f"  {_DIM}Usage: /fast [normal|fast|status]{_RST}")
            return

@ -5726,9 +5735,9 @@ class HermesCLI:

        self.agent = None  # Force agent re-init with new service-tier config
        if save_config_value("agent.service_tier", saved_value):
-            _cprint(f"  {_GOLD}✓ Priority Processing set to {label} (saved to config){_RST}")
+            _cprint(f"  {_GOLD}✓ {feature_name} set to {label} (saved to config){_RST}")
        else:
-            _cprint(f"  {_GOLD}✓ Priority Processing set to {label} (session only){_RST}")
+            _cprint(f"  {_GOLD}✓ {feature_name} set to {label} (session only){_RST}")

    def _on_reasoning(self, reasoning_text: str):
        """Callback for intermediate reasoning display during tool-call loops."""
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -100,7 +100,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
               args_hint="[level|show|hide]",
               subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
-    CommandDef("fast", "Toggle OpenAI Priority Processing (Normal/Fast)", "Configuration",
+    CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration",
               cli_only=True, args_hint="[normal|fast|status]",
               subcommands=("normal", "fast", "status", "on", "off")),
    CommandDef("skin", "Show or change the display skin/theme", "Configuration",
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -1036,25 +1036,57 @@ _PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
    "o4-mini",
 })

+# Models that support Anthropic Fast Mode (speed="fast").
+# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
+# Currently only Claude Opus 4.6.  Both hyphen and dot variants are stored
+# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
+_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
+    "claude-opus-4-6",
+    "claude-opus-4.6",
+})

-def model_supports_fast_mode(model_id: Optional[str]) -> bool:
-    """Return whether Hermes should expose the /fast (Priority Processing) toggle."""
+
+def _strip_vendor_prefix(model_id: str) -> str:
+    """Strip vendor/ prefix from a model ID (e.g. 'anthropic/claude-opus-4-6' -> 'claude-opus-4-6')."""
    raw = str(model_id or "").strip().lower()
    if "/" in raw:
        raw = raw.split("/", 1)[1]
-    return raw in _PRIORITY_PROCESSING_MODELS
+    return raw
+
+
+def model_supports_fast_mode(model_id: Optional[str]) -> bool:
+    """Return whether Hermes should expose the /fast toggle for this model."""
+    raw = _strip_vendor_prefix(str(model_id or ""))
+    if raw in _PRIORITY_PROCESSING_MODELS:
+        return True
+    # Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
+    # and OpenRouter variant tags (:fast, :beta) for matching.
+    base = raw.split(":")[0]
+    return base in _ANTHROPIC_FAST_MODE_MODELS
+
+
+def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
+    """Return True if the model supports Anthropic's fast mode (speed='fast')."""
+    raw = _strip_vendor_prefix(str(model_id or ""))
+    base = raw.split(":")[0]
+    return base in _ANTHROPIC_FAST_MODE_MODELS


 def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
-    """Return request_overrides for Priority Processing, or None if unsupported.
+    """Return request_overrides for fast/priority mode, or None if unsupported.

-    Unlike the previous ``resolve_fast_mode_runtime``, this does NOT force a
-    provider/backend switch.  The ``service_tier`` parameter is injected into
-    whatever API path the user is already on (Codex Responses, Chat Completions,
-    or OpenRouter passthrough).
+    Returns provider-appropriate overrides:
+    - OpenAI models: ``{"service_tier": "priority"}`` (Priority Processing)
+    - Anthropic models: ``{"speed": "fast"}`` (Anthropic Fast Mode beta)
+
+    The overrides are injected into the API request kwargs by
+    ``_build_api_kwargs`` in run_agent.py — each API path handles its own
+    keys (service_tier for OpenAI/Codex, speed for Anthropic Messages).
    """
    if not model_supports_fast_mode(model_id):
        return None
+    if _is_anthropic_fast_model(model_id):
+        return {"speed": "fast"}
    return {"service_tier": "priority"}


--- a/run_agent.py
+++ b/run_agent.py
@ -5466,6 +5466,7 @@ class AIAgent:
                preserve_dots=self._anthropic_preserve_dots(),
                context_length=ctx_len,
                base_url=getattr(self, "_anthropic_base_url", None),
+                fast_mode=self.request_overrides.get("speed") == "fast",
            )

        if self.api_mode == "codex_responses":
--- a/tests/cli/test_fast_command.py
+++ b/tests/cli/test_fast_command.py
@ -247,6 +247,163 @@ class TestFastModeRouting(unittest.TestCase):
        assert route.get("request_overrides") is None


+class TestAnthropicFastMode(unittest.TestCase):
+    """Verify Anthropic Fast Mode model support and override resolution."""
+
+    def test_anthropic_opus_supported(self):
+        from hermes_cli.models import model_supports_fast_mode
+
+        # Native Anthropic format (hyphens)
+        assert model_supports_fast_mode("claude-opus-4-6") is True
+        # OpenRouter format (dots)
+        assert model_supports_fast_mode("claude-opus-4.6") is True
+        # With vendor prefix
+        assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True
+        assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True
+
+    def test_anthropic_non_opus_rejected(self):
+        from hermes_cli.models import model_supports_fast_mode
+
+        assert model_supports_fast_mode("claude-sonnet-4-6") is False
+        assert model_supports_fast_mode("claude-sonnet-4.6") is False
+        assert model_supports_fast_mode("claude-haiku-4-5") is False
+        assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False
+
+    def test_anthropic_variant_tags_stripped(self):
+        from hermes_cli.models import model_supports_fast_mode
+
+        # OpenRouter variant tags after colon should be stripped
+        assert model_supports_fast_mode("claude-opus-4.6:fast") is True
+        assert model_supports_fast_mode("claude-opus-4.6:beta") is True
+
+    def test_resolve_overrides_returns_speed_for_anthropic(self):
+        from hermes_cli.models import resolve_fast_mode_overrides
+
+        result = resolve_fast_mode_overrides("claude-opus-4-6")
+        assert result == {"speed": "fast"}
+
+        result = resolve_fast_mode_overrides("anthropic/claude-opus-4.6")
+        assert result == {"speed": "fast"}
+
+    def test_resolve_overrides_returns_service_tier_for_openai(self):
+        """OpenAI models should still get service_tier, not speed."""
+        from hermes_cli.models import resolve_fast_mode_overrides
+
+        result = resolve_fast_mode_overrides("gpt-5.4")
+        assert result == {"service_tier": "priority"}
+
+    def test_is_anthropic_fast_model(self):
+        from hermes_cli.models import _is_anthropic_fast_model
+
+        assert _is_anthropic_fast_model("claude-opus-4-6") is True
+        assert _is_anthropic_fast_model("claude-opus-4.6") is True
+        assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True
+        assert _is_anthropic_fast_model("gpt-5.4") is False
+        assert _is_anthropic_fast_model("claude-sonnet-4-6") is False
+
+    def test_fast_command_exposed_for_anthropic_model(self):
+        cli_mod = _import_cli()
+        stub = SimpleNamespace(
+            provider="anthropic", requested_provider="anthropic",
+            model="claude-opus-4-6", agent=None,
+        )
+        assert cli_mod.HermesCLI._fast_command_available(stub) is True
+
+    def test_fast_command_hidden_for_anthropic_sonnet(self):
+        cli_mod = _import_cli()
+        stub = SimpleNamespace(
+            provider="anthropic", requested_provider="anthropic",
+            model="claude-sonnet-4-6", agent=None,
+        )
+        assert cli_mod.HermesCLI._fast_command_available(stub) is False
+
+    def test_turn_route_injects_speed_for_anthropic(self):
+        """Anthropic models should get speed:'fast' override, not service_tier."""
+        cli_mod = _import_cli()
+        stub = SimpleNamespace(
+            model="claude-opus-4-6",
+            api_key="sk-ant-test",
+            base_url="https://api.anthropic.com",
+            provider="anthropic",
+            api_mode="anthropic_messages",
+            acp_command=None,
+            acp_args=[],
+            _credential_pool=None,
+            _smart_model_routing={},
+            service_tier="priority",
+        )
+
+        original_runtime = {
+            "api_key": "***",
+            "base_url": "https://api.anthropic.com",
+            "provider": "anthropic",
+            "api_mode": "anthropic_messages",
+            "command": None,
+            "args": [],
+            "credential_pool": None,
+        }
+
+        with patch("agent.smart_model_routing.resolve_turn_route", return_value={
+            "model": "claude-opus-4-6",
+            "runtime": dict(original_runtime),
+            "label": None,
+            "signature": ("claude-opus-4-6", "anthropic", "https://api.anthropic.com", "anthropic_messages", None, ()),
+        }):
+            route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
+
+        assert route["runtime"]["provider"] == "anthropic"
+        assert route["request_overrides"] == {"speed": "fast"}
+
+
+class TestAnthropicFastModeAdapter(unittest.TestCase):
+    """Verify build_anthropic_kwargs handles fast_mode parameter."""
+
+    def test_fast_mode_adds_speed_and_beta(self):
+        from agent.anthropic_adapter import build_anthropic_kwargs, _FAST_MODE_BETA
+
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-6",
+            messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+            fast_mode=True,
+        )
+        assert kwargs.get("speed") == "fast"
+        assert "extra_headers" in kwargs
+        assert _FAST_MODE_BETA in kwargs["extra_headers"].get("anthropic-beta", "")
+
+    def test_fast_mode_off_no_speed(self):
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-6",
+            messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+            fast_mode=False,
+        )
+        assert "speed" not in kwargs
+        assert "extra_headers" not in kwargs
+
+    def test_fast_mode_skipped_for_third_party_endpoint(self):
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-6",
+            messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+            fast_mode=True,
+            base_url="https://api.minimax.io/anthropic/v1",
+        )
+        # Third-party endpoints should NOT get speed or fast-mode beta
+        assert "speed" not in kwargs
+        assert "extra_headers" not in kwargs
+
+
 class TestConfigDefault(unittest.TestCase):
    def test_default_config_has_service_tier(self):
        from hermes_cli.config import DEFAULT_CONFIG