diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 28ca6d7dea..422e4d54da 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1623,31 +1623,41 @@ def provider_label(provider: Optional[str]) -> str: # Models that support OpenAI Priority Processing (service_tier="priority"). # See https://openai.com/api-priority-processing/ for the canonical list. -# Only the bare model slug is stored (no vendor prefix). -_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({ - "gpt-5.4", - "gpt-5.4-mini", - "gpt-5.2", - "gpt-5.1", - "gpt-5", - "gpt-5-mini", - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4o", - "gpt-4o-mini", +# +# Pattern-based matching — any OpenAI flagship model (gpt-*, o1*, o3*, o4*) +# is assumed to support Priority Processing. service_tier=priority is silently +# ignored by non-OpenAI endpoints (OpenRouter/Copilot/opencode-zen proxies +# strip the field), so false positives are harmless. Codex-series models +# (gpt-5-codex, gpt-5.3-codex, etc.) are excluded — they don't expose the +# service_tier parameter through the Codex Responses API. +_OPENAI_FAST_MODE_PREFIXES: tuple[str, ...] = ( + "gpt-", + "o1", "o3", - "o4-mini", -}) + "o4", +) + + +def _is_openai_fast_model(model_id: Optional[str]) -> bool: + """Return True if the model is an OpenAI flagship eligible for Priority Processing.""" + raw = _strip_vendor_prefix(str(model_id or "")) + base = raw.split(":")[0] + if not base: + return False + # Exclude Codex-series — they route through the Codex Responses API + # which doesn't accept service_tier. + if "codex" in base: + return False + return any(base.startswith(prefix) for prefix in _OPENAI_FAST_MODE_PREFIXES) + # Models that support Anthropic Fast Mode (speed="fast"). # See https://platform.claude.com/docs/en/build-with-claude/fast-mode -# Currently only Claude Opus 4.6. Both hyphen and dot variants are stored -# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6). -_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({ - "claude-opus-4-6", - "claude-opus-4.6", -}) +# +# Pattern-based matching — any claude-* model is eligible. The anthropic +# adapter gates speed=fast on native Anthropic endpoints only (see +# _is_third_party_anthropic_endpoint in agent/anthropic_adapter.py), so +# third-party proxies that would reject the beta header are protected. def _strip_vendor_prefix(model_id: str) -> str: @@ -1660,20 +1670,14 @@ def _strip_vendor_prefix(model_id: str) -> str: def model_supports_fast_mode(model_id: Optional[str]) -> bool: """Return whether Hermes should expose the /fast toggle for this model.""" - raw = _strip_vendor_prefix(str(model_id or "")) - if raw in _PRIORITY_PROCESSING_MODELS: - return True - # Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401) - # and OpenRouter variant tags (:fast, :beta) for matching. - base = raw.split(":")[0] - return base in _ANTHROPIC_FAST_MODE_MODELS + return _is_anthropic_fast_model(model_id) or _is_openai_fast_model(model_id) def _is_anthropic_fast_model(model_id: Optional[str]) -> bool: - """Return True if the model supports Anthropic's fast mode (speed='fast').""" + """Return True if the model is a Claude model eligible for Anthropic Fast Mode.""" raw = _strip_vendor_prefix(str(model_id or "")) base = raw.split(":")[0] - return base in _ANTHROPIC_FAST_MODE_MODELS + return base.startswith("claude-") def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None: diff --git a/tests/cli/test_fast_command.py b/tests/cli/test_fast_command.py index 23a1a4aa9f..343c05658c 100644 --- a/tests/cli/test_fast_command.py +++ b/tests/cli/test_fast_command.py @@ -114,17 +114,38 @@ class TestPriorityProcessingModels(unittest.TestCase): def test_all_documented_models_supported(self): from hermes_cli.models import model_supports_fast_mode - # All models from OpenAI's Priority Processing pricing table + # All OpenAI flagship models support Priority Processing — including + # future releases (gpt-5.5, 5.6...) via pattern matching. supported = [ + "gpt-5.5", "gpt-5.5-mini", "gpt-5.4", "gpt-5.4-mini", "gpt-5.2", "gpt-5.1", "gpt-5", "gpt-5-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "gpt-4o", "gpt-4o-mini", - "o3", "o4-mini", + "o1", "o1-mini", "o3", "o3-mini", "o4-mini", ] for model in supported: assert model_supports_fast_mode(model), f"{model} should support fast mode" + def test_all_anthropic_models_supported(self): + from hermes_cli.models import model_supports_fast_mode + + # All Claude models support Anthropic Fast Mode — Opus, Sonnet, Haiku. + supported = [ + "claude-opus-4-7", "claude-opus-4-6", "claude-opus-4.6", + "claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4", + "claude-haiku-4-5", "claude-3-5-haiku", + ] + for model in supported: + assert model_supports_fast_mode(model), f"{model} should support fast mode" + + def test_codex_models_excluded(self): + """Codex models route through Responses API and don't accept service_tier.""" + from hermes_cli.models import model_supports_fast_mode + + for model in ["gpt-5-codex", "gpt-5.2-codex", "gpt-5.3-codex", "gpt-5.1-codex-max"]: + assert not model_supports_fast_mode(model), f"{model} is codex — should not expose /fast" + def test_vendor_prefix_stripped(self): from hermes_cli.models import model_supports_fast_mode @@ -135,8 +156,15 @@ class TestPriorityProcessingModels(unittest.TestCase): def test_non_priority_models_rejected(self): from hermes_cli.models import model_supports_fast_mode + # Codex-series models route through the Codex Responses API and + # don't accept service_tier, so they're excluded. assert model_supports_fast_mode("gpt-5.3-codex") is False - assert model_supports_fast_mode("claude-sonnet-4") is False + assert model_supports_fast_mode("gpt-5.2-codex") is False + assert model_supports_fast_mode("gpt-5-codex") is False + # Non-OpenAI, non-Anthropic models + assert model_supports_fast_mode("gemini-3-pro-preview") is False + assert model_supports_fast_mode("kimi-k2-thinking") is False + assert model_supports_fast_mode("deepseek-chat") is False assert model_supports_fast_mode("") is False assert model_supports_fast_mode(None) is False @@ -153,7 +181,8 @@ class TestPriorityProcessingModels(unittest.TestCase): from hermes_cli.models import resolve_fast_mode_overrides assert resolve_fast_mode_overrides("gpt-5.3-codex") is None - assert resolve_fast_mode_overrides("claude-sonnet-4") is None + assert resolve_fast_mode_overrides("gemini-3-pro-preview") is None + assert resolve_fast_mode_overrides("kimi-k2-thinking") is None class TestFastModeRouting(unittest.TestCase): @@ -228,13 +257,26 @@ class TestAnthropicFastMode(unittest.TestCase): assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True - def test_anthropic_non_opus_rejected(self): + def test_anthropic_all_claude_models_supported(self): from hermes_cli.models import model_supports_fast_mode - assert model_supports_fast_mode("claude-sonnet-4-6") is False - assert model_supports_fast_mode("claude-sonnet-4.6") is False - assert model_supports_fast_mode("claude-haiku-4-5") is False - assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False + # All Claude models support fast mode — Opus, Sonnet, Haiku. + # The anthropic adapter gates speed=fast on native Anthropic + # endpoints only, so third-party proxies that reject the beta + # are protected downstream (see _is_third_party_anthropic_endpoint). + assert model_supports_fast_mode("claude-sonnet-4-6") is True + assert model_supports_fast_mode("claude-sonnet-4.6") is True + assert model_supports_fast_mode("claude-haiku-4-5") is True + assert model_supports_fast_mode("claude-opus-4-7") is True + assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is True + + def test_non_claude_models_not_anthropic_fast(self): + """Non-Claude models should not be treated as Anthropic fast-mode.""" + from hermes_cli.models import _is_anthropic_fast_model + + assert _is_anthropic_fast_model("gpt-5.4") is False + assert _is_anthropic_fast_model("gemini-3-pro") is False + assert _is_anthropic_fast_model("kimi-k2-thinking") is False def test_anthropic_variant_tags_stripped(self): from hermes_cli.models import model_supports_fast_mode @@ -264,9 +306,11 @@ class TestAnthropicFastMode(unittest.TestCase): assert _is_anthropic_fast_model("claude-opus-4-6") is True assert _is_anthropic_fast_model("claude-opus-4.6") is True + assert _is_anthropic_fast_model("claude-sonnet-4-6") is True + assert _is_anthropic_fast_model("claude-haiku-4-5") is True assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True assert _is_anthropic_fast_model("gpt-5.4") is False - assert _is_anthropic_fast_model("claude-sonnet-4-6") is False + assert _is_anthropic_fast_model("") is False def test_fast_command_exposed_for_anthropic_model(self): cli_mod = _import_cli() @@ -276,12 +320,22 @@ class TestAnthropicFastMode(unittest.TestCase): ) assert cli_mod.HermesCLI._fast_command_available(stub) is True - def test_fast_command_hidden_for_anthropic_sonnet(self): + def test_fast_command_exposed_for_anthropic_sonnet(self): + """Sonnet now supports Anthropic Fast Mode — the adapter gates on base_url.""" cli_mod = _import_cli() stub = SimpleNamespace( provider="anthropic", requested_provider="anthropic", model="claude-sonnet-4-6", agent=None, ) + assert cli_mod.HermesCLI._fast_command_available(stub) is True + + def test_fast_command_hidden_for_non_claude_non_openai(self): + """Non-Claude, non-OpenAI models should not expose /fast.""" + cli_mod = _import_cli() + stub = SimpleNamespace( + provider="gemini", requested_provider="gemini", + model="gemini-3-pro-preview", agent=None, + ) assert cli_mod.HermesCLI._fast_command_available(stub) is False def test_turn_route_injects_speed_for_anthropic(self):