mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: add Codex fast mode toggle (/fast command)
Add /fast slash command to toggle OpenAI Codex service_tier between
normal and priority ('fast') inference. Only exposed for models
registered in _FAST_MODE_BACKEND_CONFIG (currently gpt-5.4).
- Registry-based backend config for extensibility
- Dynamic command visibility (hidden from help/autocomplete for
non-supported models) via command_filter on SlashCommandCompleter
- service_tier flows through request_overrides from route resolution
- Omit max_output_tokens for Codex backend (rejects it)
- Persists to config.yaml under agent.service_tier
Salvage cleanup: removed simple_term_menu/input() menu (banned),
bare /fast now shows status like /reasoning. Removed redundant
override resolution in _build_api_kwargs — single source of truth
via request_overrides from route.
Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
parent
4caa635803
commit
d416a69288
9 changed files with 473 additions and 5 deletions
111
cli.py
111
cli.py
|
|
@ -120,6 +120,18 @@ def _parse_reasoning_config(effort: str) -> dict | None:
|
|||
return result
|
||||
|
||||
|
||||
def _parse_service_tier_config(raw: str) -> str | None:
|
||||
"""Parse a persisted service-tier preference into a Responses API value."""
|
||||
value = str(raw or "").strip().lower()
|
||||
if not value or value in {"normal", "default", "standard", "off", "none"}:
|
||||
return None
|
||||
if value in {"fast", "priority", "on"}:
|
||||
return "priority"
|
||||
logger.warning("Unknown service_tier '%s', ignoring", raw)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def _get_chrome_debug_candidates(system: str) -> list[str]:
|
||||
"""Return likely browser executables for local CDP auto-launch."""
|
||||
candidates: list[str] = []
|
||||
|
|
@ -239,6 +251,7 @@ def load_cli_config() -> Dict[str, Any]:
|
|||
"system_prompt": "",
|
||||
"prefill_messages_file": "",
|
||||
"reasoning_effort": "",
|
||||
"service_tier": "",
|
||||
"personalities": {
|
||||
"helpful": "You are a helpful, friendly AI assistant.",
|
||||
"concise": "You are a concise assistant. Keep responses brief and to the point.",
|
||||
|
|
@ -1634,6 +1647,9 @@ class HermesCLI:
|
|||
self.reasoning_config = _parse_reasoning_config(
|
||||
CLI_CONFIG["agent"].get("reasoning_effort", "")
|
||||
)
|
||||
self.service_tier = _parse_service_tier_config(
|
||||
CLI_CONFIG["agent"].get("service_tier", "")
|
||||
)
|
||||
|
||||
# OpenRouter provider routing preferences
|
||||
pr = CLI_CONFIG.get("provider_routing", {}) or {}
|
||||
|
|
@ -2556,8 +2572,9 @@ class HermesCLI:
|
|||
def _resolve_turn_agent_config(self, user_message: str) -> dict:
|
||||
"""Resolve model/runtime overrides for a single user turn."""
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
from hermes_cli.models import resolve_fast_mode_runtime
|
||||
|
||||
return resolve_turn_route(
|
||||
route = resolve_turn_route(
|
||||
user_message,
|
||||
self._smart_model_routing,
|
||||
{
|
||||
|
|
@ -2572,7 +2589,36 @@ class HermesCLI:
|
|||
},
|
||||
)
|
||||
|
||||
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None) -> bool:
|
||||
service_tier = getattr(self, "service_tier", None)
|
||||
if not service_tier:
|
||||
route["request_overrides"] = None
|
||||
return route
|
||||
|
||||
try:
|
||||
fast_runtime = resolve_fast_mode_runtime(route.get("model"))
|
||||
except Exception:
|
||||
route["request_overrides"] = None
|
||||
return route
|
||||
if not fast_runtime:
|
||||
route["request_overrides"] = None
|
||||
return route
|
||||
|
||||
runtime = fast_runtime["runtime"]
|
||||
route["runtime"] = runtime
|
||||
route["request_overrides"] = fast_runtime["request_overrides"]
|
||||
route["label"] = f"fast route → {route.get('model')} ({runtime.get('provider')})"
|
||||
route["signature"] = (
|
||||
route.get("model"),
|
||||
runtime.get("provider"),
|
||||
runtime.get("base_url"),
|
||||
runtime.get("api_mode"),
|
||||
runtime.get("command"),
|
||||
tuple(runtime.get("args") or ()),
|
||||
json.dumps(route["request_overrides"], sort_keys=True),
|
||||
)
|
||||
return route
|
||||
|
||||
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool:
|
||||
"""
|
||||
Initialize the agent on first use.
|
||||
When resuming a session, restores conversation history from SQLite.
|
||||
|
|
@ -2659,6 +2705,8 @@ class HermesCLI:
|
|||
ephemeral_system_prompt=self.system_prompt if self.system_prompt else None,
|
||||
prefill_messages=self.prefill_messages or None,
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=request_overrides,
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
|
|
@ -3316,6 +3364,20 @@ class HermesCLI:
|
|||
f"{toolsets_info}{provider_info}"
|
||||
)
|
||||
|
||||
def _fast_command_available(self) -> bool:
|
||||
try:
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
except Exception:
|
||||
return False
|
||||
agent = getattr(self, "agent", None)
|
||||
model = getattr(agent, "model", None) or getattr(self, "model", None)
|
||||
return model_supports_fast_mode(model)
|
||||
|
||||
def _command_available(self, slash_command: str) -> bool:
|
||||
if slash_command == "/fast":
|
||||
return self._fast_command_available()
|
||||
return True
|
||||
|
||||
def show_help(self):
|
||||
"""Display help information with categorized commands."""
|
||||
from hermes_cli.commands import COMMANDS_BY_CATEGORY
|
||||
|
|
@ -3336,6 +3398,8 @@ class HermesCLI:
|
|||
for category, commands in COMMANDS_BY_CATEGORY.items():
|
||||
_cprint(f"\n {_BOLD}── {category} ──{_RST}")
|
||||
for cmd, desc in commands.items():
|
||||
if not self._command_available(cmd):
|
||||
continue
|
||||
ChatConsole().print(f" [bold {_accent_hex()}]{cmd:<15}[/] [dim]-[/] {_escape(desc)}")
|
||||
|
||||
if _skill_commands:
|
||||
|
|
@ -4788,6 +4852,8 @@ class HermesCLI:
|
|||
self._toggle_yolo()
|
||||
elif canonical == "reasoning":
|
||||
self._handle_reasoning_command(cmd_original)
|
||||
elif canonical == "fast":
|
||||
self._handle_fast_command(cmd_original)
|
||||
elif canonical == "compress":
|
||||
self._manual_compress()
|
||||
elif canonical == "usage":
|
||||
|
|
@ -5027,6 +5093,8 @@ class HermesCLI:
|
|||
platform="cli",
|
||||
session_db=self._session_db,
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
|
|
@ -5162,6 +5230,8 @@ class HermesCLI:
|
|||
session_id=task_id,
|
||||
platform="cli",
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
|
|
@ -5591,6 +5661,40 @@ class HermesCLI:
|
|||
else:
|
||||
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_fast_command(self, cmd: str):
|
||||
"""Handle /fast — choose the Codex Responses service tier."""
|
||||
if not self._fast_command_available():
|
||||
_cprint(" (._.) /fast is only available for models that explicitly expose a fast backend.")
|
||||
return
|
||||
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
||||
status = "fast" if self.service_tier == "priority" else "normal"
|
||||
_cprint(f" {_GOLD}Codex inference tier: {status}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
|
||||
return
|
||||
|
||||
arg = parts[1].strip().lower()
|
||||
|
||||
if arg in {"fast", "on"}:
|
||||
self.service_tier = "priority"
|
||||
saved_value = "fast"
|
||||
label = "FAST"
|
||||
elif arg in {"normal", "off"}:
|
||||
self.service_tier = None
|
||||
saved_value = "normal"
|
||||
label = "NORMAL"
|
||||
else:
|
||||
_cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
|
||||
return
|
||||
|
||||
self.agent = None # Force agent re-init with new service-tier config
|
||||
if save_config_value("agent.service_tier", saved_value):
|
||||
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (saved to config){_RST}")
|
||||
else:
|
||||
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (session only){_RST}")
|
||||
|
||||
def _on_reasoning(self, reasoning_text: str):
|
||||
"""Callback for intermediate reasoning display during tool-call loops."""
|
||||
if not reasoning_text:
|
||||
|
|
@ -6749,6 +6853,7 @@ class HermesCLI:
|
|||
model_override=turn_route["model"],
|
||||
runtime_override=turn_route["runtime"],
|
||||
route_label=turn_route["label"],
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
):
|
||||
return None
|
||||
|
||||
|
|
@ -7931,6 +8036,7 @@ class HermesCLI:
|
|||
|
||||
_completer = SlashCommandCompleter(
|
||||
skill_commands_provider=lambda: _skill_commands,
|
||||
command_filter=cli_ref._command_available,
|
||||
)
|
||||
input_area = TextArea(
|
||||
height=Dimension(min=1, max=8, preferred=1),
|
||||
|
|
@ -9009,6 +9115,7 @@ def main(
|
|||
model_override=turn_route["model"],
|
||||
runtime_override=turn_route["runtime"],
|
||||
route_label=turn_route["label"],
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
):
|
||||
cli.agent.quiet_mode = True
|
||||
cli.agent.suppress_status_output = True
|
||||
|
|
|
|||
|
|
@ -100,6 +100,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||
args_hint="[level|show|hide]",
|
||||
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
||||
CommandDef("fast", "Choose Codex inference tier (Normal/Fast)", "Configuration",
|
||||
cli_only=True, args_hint="[normal|fast|status]",
|
||||
subcommands=("normal", "fast", "status", "on", "off")),
|
||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||
cli_only=True, args_hint="[name]"),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
|
|
@ -639,8 +642,18 @@ class SlashCommandCompleter(Completer):
|
|||
def __init__(
|
||||
self,
|
||||
skill_commands_provider: Callable[[], Mapping[str, dict[str, Any]]] | None = None,
|
||||
command_filter: Callable[[str], bool] | None = None,
|
||||
) -> None:
|
||||
self._skill_commands_provider = skill_commands_provider
|
||||
self._command_filter = command_filter
|
||||
|
||||
def _command_allowed(self, slash_command: str) -> bool:
|
||||
if self._command_filter is None:
|
||||
return True
|
||||
try:
|
||||
return bool(self._command_filter(slash_command))
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
def _iter_skill_commands(self) -> Mapping[str, dict[str, Any]]:
|
||||
if self._skill_commands_provider is None:
|
||||
|
|
@ -918,7 +931,7 @@ class SlashCommandCompleter(Completer):
|
|||
return
|
||||
|
||||
# Static subcommand completions
|
||||
if " " not in sub_text and base_cmd in SUBCOMMANDS:
|
||||
if " " not in sub_text and base_cmd in SUBCOMMANDS and self._command_allowed(base_cmd):
|
||||
for sub in SUBCOMMANDS[base_cmd]:
|
||||
if sub.startswith(sub_lower) and sub != sub_lower:
|
||||
yield Completion(
|
||||
|
|
@ -931,6 +944,8 @@ class SlashCommandCompleter(Completer):
|
|||
word = text[1:]
|
||||
|
||||
for cmd, desc in COMMANDS.items():
|
||||
if not self._command_allowed(cmd):
|
||||
continue
|
||||
cmd_name = cmd[1:]
|
||||
if cmd_name.startswith(word):
|
||||
yield Completion(
|
||||
|
|
@ -989,6 +1004,8 @@ class SlashCommandAutoSuggest(AutoSuggest):
|
|||
# Still typing the command name: /upd → suggest "ate"
|
||||
word = text[1:].lower()
|
||||
for cmd in COMMANDS:
|
||||
if self._completer is not None and not self._completer._command_allowed(cmd):
|
||||
continue
|
||||
cmd_name = cmd[1:] # strip leading /
|
||||
if cmd_name.startswith(word) and cmd_name != word:
|
||||
return Suggestion(cmd_name[len(word):])
|
||||
|
|
@ -999,6 +1016,8 @@ class SlashCommandAutoSuggest(AutoSuggest):
|
|||
sub_lower = sub_text.lower()
|
||||
|
||||
# Static subcommands
|
||||
if self._completer is not None and not self._completer._command_allowed(base_cmd):
|
||||
return None
|
||||
if base_cmd in SUBCOMMANDS and SUBCOMMANDS[base_cmd]:
|
||||
if " " not in sub_text:
|
||||
for sub in SUBCOMMANDS[base_cmd]:
|
||||
|
|
|
|||
|
|
@ -255,6 +255,7 @@ DEFAULT_CONFIG = {
|
|||
# tools or receiving API responses. Only fires when the agent has
|
||||
# been completely idle for this duration. 0 = unlimited.
|
||||
"gateway_timeout": 1800,
|
||||
"service_tier": "",
|
||||
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||
# model to actually call tools instead of describing intended actions.
|
||||
# Values: "auto" (default — applies to gpt/codex models), true/false
|
||||
|
|
|
|||
|
|
@ -1017,6 +1017,60 @@ def provider_label(provider: Optional[str]) -> str:
|
|||
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
|
||||
|
||||
|
||||
_FAST_MODE_BACKEND_CONFIG: dict[str, dict[str, Any]] = {
|
||||
"gpt-5.4": {
|
||||
"provider": "openai-codex",
|
||||
"request_overrides": {"service_tier": "priority"},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def fast_mode_backend_config(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
"""Return backend config for models that expose Fast mode.
|
||||
|
||||
To expose Fast mode for a new model, add its normalized model slug to
|
||||
``_FAST_MODE_BACKEND_CONFIG`` along with the backend runtime selection and
|
||||
backend-specific request overrides Hermes should apply.
|
||||
"""
|
||||
raw = str(model_id or "").strip().lower()
|
||||
if "/" in raw:
|
||||
raw = raw.split("/", 1)[1]
|
||||
config = _FAST_MODE_BACKEND_CONFIG.get(raw)
|
||||
return dict(config) if config else None
|
||||
|
||||
|
||||
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||
"""Return whether Hermes should expose Fast mode for the active model."""
|
||||
return fast_mode_backend_config(model_id) is not None
|
||||
|
||||
|
||||
def resolve_fast_mode_runtime(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
"""Resolve runtime selection and request overrides for a fast-mode model."""
|
||||
cfg = fast_mode_backend_config(model_id)
|
||||
if not cfg:
|
||||
return None
|
||||
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=cfg.get("provider"),
|
||||
explicit_base_url=cfg.get("base_url"),
|
||||
explicit_api_key=cfg.get("api_key"),
|
||||
)
|
||||
return {
|
||||
"runtime": {
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
"credential_pool": runtime.get("credential_pool"),
|
||||
},
|
||||
"request_overrides": dict(cfg.get("request_overrides") or {}),
|
||||
}
|
||||
|
||||
|
||||
def _resolve_copilot_catalog_api_key() -> str:
|
||||
"""Best-effort GitHub token for fetching the Copilot model catalog."""
|
||||
try:
|
||||
|
|
|
|||
18
run_agent.py
18
run_agent.py
|
|
@ -500,6 +500,8 @@ class AIAgent:
|
|||
status_callback: callable = None,
|
||||
max_tokens: int = None,
|
||||
reasoning_config: Dict[str, Any] = None,
|
||||
service_tier: str = None,
|
||||
request_overrides: Dict[str, Any] = None,
|
||||
prefill_messages: List[Dict[str, Any]] = None,
|
||||
platform: str = None,
|
||||
user_id: str = None,
|
||||
|
|
@ -662,6 +664,8 @@ class AIAgent:
|
|||
# Model response configuration
|
||||
self.max_tokens = max_tokens # None = use model default
|
||||
self.reasoning_config = reasoning_config # None = use default (medium for OpenRouter)
|
||||
self.service_tier = service_tier
|
||||
self.request_overrides = dict(request_overrides or {})
|
||||
self.prefill_messages = prefill_messages or [] # Prefilled conversation turns
|
||||
|
||||
# Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
|
||||
|
|
@ -3343,7 +3347,7 @@ class AIAgent:
|
|||
allowed_keys = {
|
||||
"model", "instructions", "input", "tools", "store",
|
||||
"reasoning", "include", "max_output_tokens", "temperature",
|
||||
"tool_choice", "parallel_tool_calls", "prompt_cache_key",
|
||||
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
|
||||
}
|
||||
normalized: Dict[str, Any] = {
|
||||
"model": model,
|
||||
|
|
@ -3361,6 +3365,9 @@ class AIAgent:
|
|||
include = api_kwargs.get("include")
|
||||
if isinstance(include, list):
|
||||
normalized["include"] = include
|
||||
service_tier = api_kwargs.get("service_tier")
|
||||
if isinstance(service_tier, str) and service_tier.strip():
|
||||
normalized["service_tier"] = service_tier.strip()
|
||||
|
||||
# Pass through max_output_tokens and temperature
|
||||
max_output_tokens = api_kwargs.get("max_output_tokens")
|
||||
|
|
@ -5464,6 +5471,10 @@ class AIAgent:
|
|||
"models.github.ai" in self.base_url.lower()
|
||||
or "api.githubcopilot.com" in self.base_url.lower()
|
||||
)
|
||||
is_codex_backend = (
|
||||
self.provider == "openai-codex"
|
||||
or "chatgpt.com/backend-api/codex" in self.base_url.lower()
|
||||
)
|
||||
|
||||
# Resolve reasoning effort: config > default (medium)
|
||||
reasoning_effort = "medium"
|
||||
|
|
@ -5501,7 +5512,10 @@ class AIAgent:
|
|||
elif not is_github_responses:
|
||||
kwargs["include"] = []
|
||||
|
||||
if self.max_tokens is not None:
|
||||
if self.request_overrides:
|
||||
kwargs.update(self.request_overrides)
|
||||
|
||||
if self.max_tokens is not None and not is_codex_backend:
|
||||
kwargs["max_output_tokens"] = self.max_tokens
|
||||
|
||||
return kwargs
|
||||
|
|
|
|||
217
tests/cli/test_fast_command.py
Normal file
217
tests/cli/test_fast_command.py
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
"""Tests for the /fast CLI command and service-tier config handling."""
|
||||
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
def _import_cli():
|
||||
import hermes_cli.config as config_mod
|
||||
|
||||
if not hasattr(config_mod, "save_env_value_secure"):
|
||||
config_mod.save_env_value_secure = lambda key, value: {
|
||||
"success": True,
|
||||
"stored_as": key,
|
||||
"validated": False,
|
||||
}
|
||||
|
||||
import cli as cli_mod
|
||||
|
||||
return cli_mod
|
||||
|
||||
|
||||
class TestParseServiceTierConfig(unittest.TestCase):
|
||||
def _parse(self, raw):
|
||||
cli_mod = _import_cli()
|
||||
return cli_mod._parse_service_tier_config(raw)
|
||||
|
||||
def test_fast_maps_to_priority(self):
|
||||
self.assertEqual(self._parse("fast"), "priority")
|
||||
self.assertEqual(self._parse("priority"), "priority")
|
||||
|
||||
def test_normal_disables_service_tier(self):
|
||||
self.assertIsNone(self._parse("normal"))
|
||||
self.assertIsNone(self._parse("off"))
|
||||
self.assertIsNone(self._parse(""))
|
||||
|
||||
|
||||
class TestHandleFastCommand(unittest.TestCase):
|
||||
def _make_cli(self, service_tier=None):
|
||||
return SimpleNamespace(
|
||||
service_tier=service_tier,
|
||||
provider="openai-codex",
|
||||
requested_provider="openai-codex",
|
||||
model="gpt-5.4",
|
||||
_fast_command_available=lambda: True,
|
||||
agent=MagicMock(),
|
||||
)
|
||||
|
||||
def test_no_args_shows_status(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli(service_tier=None)
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint") as mock_cprint,
|
||||
patch.object(cli_mod, "save_config_value") as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_fast_command(stub, "/fast")
|
||||
|
||||
# Bare /fast shows status, does not change config
|
||||
mock_save.assert_not_called()
|
||||
# Should have printed the status line
|
||||
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
|
||||
self.assertIn("normal", printed)
|
||||
|
||||
def test_no_args_shows_fast_when_enabled(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli(service_tier="priority")
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint") as mock_cprint,
|
||||
patch.object(cli_mod, "save_config_value") as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_fast_command(stub, "/fast")
|
||||
|
||||
mock_save.assert_not_called()
|
||||
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
|
||||
self.assertIn("fast", printed)
|
||||
|
||||
def test_normal_argument_clears_service_tier(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli(service_tier="priority")
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint"),
|
||||
patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_fast_command(stub, "/fast normal")
|
||||
|
||||
mock_save.assert_called_once_with("agent.service_tier", "normal")
|
||||
self.assertIsNone(stub.service_tier)
|
||||
self.assertIsNone(stub.agent)
|
||||
|
||||
def test_unsupported_model_does_not_expose_fast(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(
|
||||
service_tier=None,
|
||||
provider="openai-codex",
|
||||
requested_provider="openai-codex",
|
||||
model="gpt-5.3-codex",
|
||||
_fast_command_available=lambda: False,
|
||||
agent=MagicMock(),
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint") as mock_cprint,
|
||||
patch.object(cli_mod, "save_config_value") as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_fast_command(stub, "/fast")
|
||||
|
||||
mock_save.assert_not_called()
|
||||
self.assertTrue(mock_cprint.called)
|
||||
|
||||
|
||||
class TestFastModeRegistry(unittest.TestCase):
|
||||
def test_only_gpt_5_4_is_enabled_for_codex(self):
|
||||
from hermes_cli.models import fast_mode_backend_config
|
||||
|
||||
assert fast_mode_backend_config("gpt-5.4") == {
|
||||
"provider": "openai-codex",
|
||||
"request_overrides": {"service_tier": "priority"},
|
||||
}
|
||||
assert fast_mode_backend_config("gpt-5.3-codex") is None
|
||||
|
||||
|
||||
class TestFastModeRouting(unittest.TestCase):
|
||||
def test_fast_command_exposed_for_model_even_when_provider_is_auto(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(provider="auto", requested_provider="auto", model="gpt-5.4", agent=None)
|
||||
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||
|
||||
def test_turn_route_switches_to_model_backend_when_fast_enabled(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(
|
||||
model="gpt-5.4",
|
||||
api_key="primary-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
provider="openrouter",
|
||||
api_mode="chat_completions",
|
||||
acp_command=None,
|
||||
acp_args=[],
|
||||
_credential_pool=None,
|
||||
_smart_model_routing={},
|
||||
service_tier="priority",
|
||||
)
|
||||
|
||||
with (
|
||||
patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
||||
"model": "gpt-5.4",
|
||||
"runtime": {
|
||||
"api_key": "primary-key",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": None,
|
||||
},
|
||||
"label": None,
|
||||
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
||||
}),
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
"api_key": "codex-key",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": None,
|
||||
}),
|
||||
):
|
||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||
|
||||
assert route["runtime"]["provider"] == "openai-codex"
|
||||
assert route["runtime"]["api_mode"] == "codex_responses"
|
||||
assert route["request_overrides"] == {"service_tier": "priority"}
|
||||
|
||||
def test_turn_route_keeps_primary_runtime_when_model_has_no_fast_backend(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(
|
||||
model="gpt-5.3-codex",
|
||||
api_key="primary-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
provider="openrouter",
|
||||
api_mode="chat_completions",
|
||||
acp_command=None,
|
||||
acp_args=[],
|
||||
_credential_pool=None,
|
||||
_smart_model_routing={},
|
||||
service_tier="priority",
|
||||
)
|
||||
|
||||
primary_route = {
|
||||
"model": "gpt-5.3-codex",
|
||||
"runtime": {
|
||||
"api_key": "primary-key",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": None,
|
||||
},
|
||||
"label": None,
|
||||
"signature": ("gpt-5.3-codex", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
||||
}
|
||||
with patch("agent.smart_model_routing.resolve_turn_route", return_value=primary_route):
|
||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||
|
||||
assert route["runtime"]["provider"] == "openrouter"
|
||||
assert route.get("request_overrides") is None
|
||||
|
||||
|
||||
class TestConfigDefault(unittest.TestCase):
|
||||
def test_default_config_has_service_tier(self):
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
agent = DEFAULT_CONFIG.get("agent", {})
|
||||
self.assertIn("service_tier", agent)
|
||||
self.assertEqual(agent["service_tier"], "")
|
||||
|
|
@ -446,6 +446,13 @@ class TestSubcommands:
|
|||
assert "show" in subs
|
||||
assert "hide" in subs
|
||||
|
||||
def test_fast_has_subcommands(self):
|
||||
assert "/fast" in SUBCOMMANDS
|
||||
subs = SUBCOMMANDS["/fast"]
|
||||
assert "fast" in subs
|
||||
assert "normal" in subs
|
||||
assert "status" in subs
|
||||
|
||||
def test_voice_has_subcommands(self):
|
||||
assert "/voice" in SUBCOMMANDS
|
||||
assert "on" in SUBCOMMANDS["/voice"]
|
||||
|
|
@ -474,6 +481,20 @@ class TestSubcommandCompletion:
|
|||
assert "high" in texts
|
||||
assert "show" in texts
|
||||
|
||||
def test_fast_subcommand_completion_after_space(self):
|
||||
completions = _completions(SlashCommandCompleter(), "/fast ")
|
||||
texts = {c.text for c in completions}
|
||||
assert "fast" in texts
|
||||
assert "normal" in texts
|
||||
|
||||
def test_fast_command_filtered_out_when_unavailable(self):
|
||||
completions = _completions(
|
||||
SlashCommandCompleter(command_filter=lambda cmd: cmd != "/fast"),
|
||||
"/fa",
|
||||
)
|
||||
texts = {c.text for c in completions}
|
||||
assert "fast" not in texts
|
||||
|
||||
def test_subcommand_prefix_filters(self):
|
||||
"""Typing '/reasoning sh' should only show 'show'."""
|
||||
completions = _completions(SlashCommandCompleter(), "/reasoning sh")
|
||||
|
|
@ -527,6 +548,13 @@ class TestGhostText:
|
|||
"""/reasoning sh → 'ow'"""
|
||||
assert _suggestion("/reasoning sh") == "ow"
|
||||
|
||||
def test_fast_subcommand_suggestion(self):
|
||||
assert _suggestion("/fast f") == "ast"
|
||||
|
||||
def test_fast_subcommand_suggestion_hidden_when_filtered(self):
|
||||
completer = SlashCommandCompleter(command_filter=lambda cmd: cmd != "/fast")
|
||||
assert _suggestion("/fa", completer=completer) is None
|
||||
|
||||
def test_no_suggestion_for_non_slash(self):
|
||||
assert _suggestion("hello") is None
|
||||
|
||||
|
|
|
|||
|
|
@ -356,6 +356,25 @@ class TestBuildApiKwargsCodex:
|
|||
assert "reasoning" in kwargs
|
||||
assert kwargs["reasoning"]["effort"] == "medium"
|
||||
|
||||
def test_includes_service_tier_via_request_overrides(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
agent.model = "gpt-5.4"
|
||||
agent.service_tier = "priority"
|
||||
agent.request_overrides = {"service_tier": "priority"}
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["service_tier"] == "priority"
|
||||
|
||||
def test_omits_max_output_tokens_for_codex_backend(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
agent.model = "gpt-5.4"
|
||||
agent.max_tokens = 20
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "max_output_tokens" not in kwargs
|
||||
|
||||
def test_includes_encrypted_content_in_include(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
|
||||
base_url="https://chatgpt.com/backend-api/codex")
|
||||
|
|
|
|||
|
|
@ -648,6 +648,15 @@ def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch
|
|||
assert result["max_output_tokens"] == 4096
|
||||
|
||||
|
||||
def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
kwargs = _codex_request_kwargs()
|
||||
kwargs["service_tier"] = "priority"
|
||||
|
||||
result = agent._preflight_codex_api_kwargs(kwargs)
|
||||
assert result["service_tier"] == "priority"
|
||||
|
||||
|
||||
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [_codex_tool_call_response(), _codex_message_response("done")]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue