feat: expand /fast to all OpenAI Priority Processing models (#6960)

Previously /fast only supported gpt-5.4 and forced a provider switch to
openai-codex. Now supports all 13 models from OpenAI's Priority Processing
pricing table (gpt-5.4, gpt-5.4-mini, gpt-5.2, gpt-5.1, gpt-5, gpt-5-mini,
gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4o, gpt-4o-mini, o3, o4-mini).

Key changes:
- Replaced _FAST_MODE_BACKEND_CONFIG with _PRIORITY_PROCESSING_MODELS frozenset
- Removed provider-forcing logic — service_tier is now injected into whatever
  API path the user is already on (Codex Responses, Chat Completions, or
  OpenRouter passthrough)
- Added request_overrides support to chat_completions path in run_agent.py
- Updated messaging from 'Codex inference tier' to 'Priority Processing'
- Expanded test coverage for all supported models
This commit is contained in:
Teknium 2026-04-09 22:06:30 -07:00 committed by GitHub
parent d416a69288
commit 8394b5ddd2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 144 additions and 110 deletions

View file

@ -108,15 +108,52 @@ class TestHandleFastCommand(unittest.TestCase):
self.assertTrue(mock_cprint.called)
class TestFastModeRegistry(unittest.TestCase):
def test_only_gpt_5_4_is_enabled_for_codex(self):
from hermes_cli.models import fast_mode_backend_config
class TestPriorityProcessingModels(unittest.TestCase):
"""Verify the expanded Priority Processing model registry."""
assert fast_mode_backend_config("gpt-5.4") == {
"provider": "openai-codex",
"request_overrides": {"service_tier": "priority"},
}
assert fast_mode_backend_config("gpt-5.3-codex") is None
def test_all_documented_models_supported(self):
from hermes_cli.models import model_supports_fast_mode
# All models from OpenAI's Priority Processing pricing table
supported = [
"gpt-5.4", "gpt-5.4-mini", "gpt-5.2",
"gpt-5.1", "gpt-5", "gpt-5-mini",
"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
"gpt-4o", "gpt-4o-mini",
"o3", "o4-mini",
]
for model in supported:
assert model_supports_fast_mode(model), f"{model} should support fast mode"
def test_vendor_prefix_stripped(self):
from hermes_cli.models import model_supports_fast_mode
assert model_supports_fast_mode("openai/gpt-5.4") is True
assert model_supports_fast_mode("openai/gpt-4.1") is True
assert model_supports_fast_mode("openai/o3") is True
def test_non_priority_models_rejected(self):
from hermes_cli.models import model_supports_fast_mode
assert model_supports_fast_mode("gpt-5.3-codex") is False
assert model_supports_fast_mode("claude-sonnet-4") is False
assert model_supports_fast_mode("") is False
assert model_supports_fast_mode(None) is False
def test_resolve_overrides_returns_service_tier(self):
from hermes_cli.models import resolve_fast_mode_overrides
result = resolve_fast_mode_overrides("gpt-5.4")
assert result == {"service_tier": "priority"}
result = resolve_fast_mode_overrides("gpt-4.1")
assert result == {"service_tier": "priority"}
def test_resolve_overrides_none_for_unsupported(self):
from hermes_cli.models import resolve_fast_mode_overrides
assert resolve_fast_mode_overrides("gpt-5.3-codex") is None
assert resolve_fast_mode_overrides("claude-sonnet-4") is None
class TestFastModeRouting(unittest.TestCase):
@ -126,7 +163,16 @@ class TestFastModeRouting(unittest.TestCase):
assert cli_mod.HermesCLI._fast_command_available(stub) is True
def test_turn_route_switches_to_model_backend_when_fast_enabled(self):
def test_fast_command_exposed_for_non_codex_models(self):
cli_mod = _import_cli()
stub = SimpleNamespace(provider="openai", requested_provider="openai", model="gpt-4.1", agent=None)
assert cli_mod.HermesCLI._fast_command_available(stub) is True
stub = SimpleNamespace(provider="openrouter", requested_provider="openrouter", model="o3", agent=None)
assert cli_mod.HermesCLI._fast_command_available(stub) is True
def test_turn_route_injects_overrides_without_provider_switch(self):
"""Fast mode should add request_overrides but NOT change the provider/runtime."""
cli_mod = _import_cli()
stub = SimpleNamespace(
model="gpt-5.4",
@ -141,35 +187,28 @@ class TestFastModeRouting(unittest.TestCase):
service_tier="priority",
)
with (
patch("agent.smart_model_routing.resolve_turn_route", return_value={
"model": "gpt-5.4",
"runtime": {
"api_key": "primary-key",
"base_url": "https://openrouter.ai/api/v1",
"provider": "openrouter",
"api_mode": "chat_completions",
"command": None,
"args": [],
"credential_pool": None,
},
"label": None,
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
}),
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "codex-key",
"command": None,
"args": [],
"credential_pool": None,
}),
):
original_runtime = {
"api_key": "***",
"base_url": "https://openrouter.ai/api/v1",
"provider": "openrouter",
"api_mode": "chat_completions",
"command": None,
"args": [],
"credential_pool": None,
}
with patch("agent.smart_model_routing.resolve_turn_route", return_value={
"model": "gpt-5.4",
"runtime": dict(original_runtime),
"label": None,
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
}):
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
assert route["runtime"]["provider"] == "openai-codex"
assert route["runtime"]["api_mode"] == "codex_responses"
# Provider should NOT have changed
assert route["runtime"]["provider"] == "openrouter"
assert route["runtime"]["api_mode"] == "chat_completions"
# But request_overrides should be set
assert route["request_overrides"] == {"service_tier": "priority"}
def test_turn_route_keeps_primary_runtime_when_model_has_no_fast_backend(self):
@ -190,7 +229,7 @@ class TestFastModeRouting(unittest.TestCase):
primary_route = {
"model": "gpt-5.3-codex",
"runtime": {
"api_key": "primary-key",
"api_key": "***",
"base_url": "https://openrouter.ai/api/v1",
"provider": "openrouter",
"api_mode": "chat_completions",