feat: add Anthropic Fast Mode support to /fast command (#7037)

Extends the /fast command to support Anthropic's Fast Mode beta in addition to OpenAI Priority Processing. When enabled on Claude Opus 4.6, adds speed:"fast" and the fast-mode-2026-02-01 beta header to API requests for ~2.5x faster output token throughput. Changes: - hermes_cli/models.py: Add _ANTHROPIC_FAST_MODE_MODELS registry, model_supports_fast_mode() now recognizes Claude Opus 4.6, resolve_fast_mode_overrides() returns {speed: fast} for Anthropic vs {service_tier: priority} for OpenAI - agent/anthropic_adapter.py: Add _FAST_MODE_BETA constant, build_anthropic_kwargs() accepts fast_mode=True which injects speed:fast + beta header via extra_headers (skipped for third-party Anthropic-compatible endpoints like MiniMax) - run_agent.py: Pass fast_mode to build_anthropic_kwargs in the anthropic_messages path of _build_api_kwargs() - cli.py: Update _handle_fast_command with provider-aware messaging (shows 'Anthropic Fast Mode' vs 'Priority Processing') - hermes_cli/commands.py: Update /fast description to mention both providers - tests: 13 new tests covering Anthropic model detection, override resolution, CLI availability, routing, adapter kwargs, and third-party endpoint safety
2026-06-09 08:21:50 +00:00 · 2026-04-10 02:32:15 -07:00 · 2026-04-10 02:32:15 -07:00 · 8779a268a7
commit 8779a268a7
parent 0848a79476
6 changed files with 237 additions and 14 deletions
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -1036,25 +1036,57 @@ _PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
    "o4-mini",
 })

+# Models that support Anthropic Fast Mode (speed="fast").
+# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
+# Currently only Claude Opus 4.6.  Both hyphen and dot variants are stored
+# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
+_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
+    "claude-opus-4-6",
+    "claude-opus-4.6",
+})

-def model_supports_fast_mode(model_id: Optional[str]) -> bool:
-    """Return whether Hermes should expose the /fast (Priority Processing) toggle."""
+
+def _strip_vendor_prefix(model_id: str) -> str:
+    """Strip vendor/ prefix from a model ID (e.g. 'anthropic/claude-opus-4-6' -> 'claude-opus-4-6')."""
    raw = str(model_id or "").strip().lower()
    if "/" in raw:
        raw = raw.split("/", 1)[1]
-    return raw in _PRIORITY_PROCESSING_MODELS
+    return raw
+
+
+def model_supports_fast_mode(model_id: Optional[str]) -> bool:
+    """Return whether Hermes should expose the /fast toggle for this model."""
+    raw = _strip_vendor_prefix(str(model_id or ""))
+    if raw in _PRIORITY_PROCESSING_MODELS:
+        return True
+    # Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
+    # and OpenRouter variant tags (:fast, :beta) for matching.
+    base = raw.split(":")[0]
+    return base in _ANTHROPIC_FAST_MODE_MODELS
+
+
+def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
+    """Return True if the model supports Anthropic's fast mode (speed='fast')."""
+    raw = _strip_vendor_prefix(str(model_id or ""))
+    base = raw.split(":")[0]
+    return base in _ANTHROPIC_FAST_MODE_MODELS


 def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
-    """Return request_overrides for Priority Processing, or None if unsupported.
+    """Return request_overrides for fast/priority mode, or None if unsupported.

-    Unlike the previous ``resolve_fast_mode_runtime``, this does NOT force a
-    provider/backend switch.  The ``service_tier`` parameter is injected into
-    whatever API path the user is already on (Codex Responses, Chat Completions,
-    or OpenRouter passthrough).
+    Returns provider-appropriate overrides:
+    - OpenAI models: ``{"service_tier": "priority"}`` (Priority Processing)
+    - Anthropic models: ``{"speed": "fast"}`` (Anthropic Fast Mode beta)
+
+    The overrides are injected into the API request kwargs by
+    ``_build_api_kwargs`` in run_agent.py — each API path handles its own
+    keys (service_tier for OpenAI/Codex, speed for Anthropic Messages).
    """
    if not model_supports_fast_mode(model_id):
        return None
+    if _is_anthropic_fast_model(model_id):
+        return {"speed": "fast"}
    return {"service_tier": "priority"}