diff --git a/cli.py b/cli.py index b93fde77a..015e5bde7 100644 --- a/cli.py +++ b/cli.py @@ -120,6 +120,18 @@ def _parse_reasoning_config(effort: str) -> dict | None: return result +def _parse_service_tier_config(raw: str) -> str | None: + """Parse a persisted service-tier preference into a Responses API value.""" + value = str(raw or "").strip().lower() + if not value or value in {"normal", "default", "standard", "off", "none"}: + return None + if value in {"fast", "priority", "on"}: + return "priority" + logger.warning("Unknown service_tier '%s', ignoring", raw) + return None + + + def _get_chrome_debug_candidates(system: str) -> list[str]: """Return likely browser executables for local CDP auto-launch.""" candidates: list[str] = [] @@ -239,6 +251,7 @@ def load_cli_config() -> Dict[str, Any]: "system_prompt": "", "prefill_messages_file": "", "reasoning_effort": "", + "service_tier": "", "personalities": { "helpful": "You are a helpful, friendly AI assistant.", "concise": "You are a concise assistant. Keep responses brief and to the point.", @@ -1634,6 +1647,9 @@ class HermesCLI: self.reasoning_config = _parse_reasoning_config( CLI_CONFIG["agent"].get("reasoning_effort", "") ) + self.service_tier = _parse_service_tier_config( + CLI_CONFIG["agent"].get("service_tier", "") + ) # OpenRouter provider routing preferences pr = CLI_CONFIG.get("provider_routing", {}) or {} @@ -2556,8 +2572,9 @@ class HermesCLI: def _resolve_turn_agent_config(self, user_message: str) -> dict: """Resolve model/runtime overrides for a single user turn.""" from agent.smart_model_routing import resolve_turn_route + from hermes_cli.models import resolve_fast_mode_runtime - return resolve_turn_route( + route = resolve_turn_route( user_message, self._smart_model_routing, { @@ -2572,7 +2589,36 @@ class HermesCLI: }, ) - def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None) -> bool: + service_tier = getattr(self, "service_tier", None) + if not service_tier: + route["request_overrides"] = None + return route + + try: + fast_runtime = resolve_fast_mode_runtime(route.get("model")) + except Exception: + route["request_overrides"] = None + return route + if not fast_runtime: + route["request_overrides"] = None + return route + + runtime = fast_runtime["runtime"] + route["runtime"] = runtime + route["request_overrides"] = fast_runtime["request_overrides"] + route["label"] = f"fast route → {route.get('model')} ({runtime.get('provider')})" + route["signature"] = ( + route.get("model"), + runtime.get("provider"), + runtime.get("base_url"), + runtime.get("api_mode"), + runtime.get("command"), + tuple(runtime.get("args") or ()), + json.dumps(route["request_overrides"], sort_keys=True), + ) + return route + + def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool: """ Initialize the agent on first use. When resuming a session, restores conversation history from SQLite. @@ -2659,6 +2705,8 @@ class HermesCLI: ephemeral_system_prompt=self.system_prompt if self.system_prompt else None, prefill_messages=self.prefill_messages or None, reasoning_config=self.reasoning_config, + service_tier=self.service_tier, + request_overrides=request_overrides, providers_allowed=self._providers_only, providers_ignored=self._providers_ignore, providers_order=self._providers_order, @@ -3316,6 +3364,20 @@ class HermesCLI: f"{toolsets_info}{provider_info}" ) + def _fast_command_available(self) -> bool: + try: + from hermes_cli.models import model_supports_fast_mode + except Exception: + return False + agent = getattr(self, "agent", None) + model = getattr(agent, "model", None) or getattr(self, "model", None) + return model_supports_fast_mode(model) + + def _command_available(self, slash_command: str) -> bool: + if slash_command == "/fast": + return self._fast_command_available() + return True + def show_help(self): """Display help information with categorized commands.""" from hermes_cli.commands import COMMANDS_BY_CATEGORY @@ -3336,6 +3398,8 @@ class HermesCLI: for category, commands in COMMANDS_BY_CATEGORY.items(): _cprint(f"\n {_BOLD}── {category} ──{_RST}") for cmd, desc in commands.items(): + if not self._command_available(cmd): + continue ChatConsole().print(f" [bold {_accent_hex()}]{cmd:<15}[/] [dim]-[/] {_escape(desc)}") if _skill_commands: @@ -4788,6 +4852,8 @@ class HermesCLI: self._toggle_yolo() elif canonical == "reasoning": self._handle_reasoning_command(cmd_original) + elif canonical == "fast": + self._handle_fast_command(cmd_original) elif canonical == "compress": self._manual_compress() elif canonical == "usage": @@ -5027,6 +5093,8 @@ class HermesCLI: platform="cli", session_db=self._session_db, reasoning_config=self.reasoning_config, + service_tier=self.service_tier, + request_overrides=turn_route.get("request_overrides"), providers_allowed=self._providers_only, providers_ignored=self._providers_ignore, providers_order=self._providers_order, @@ -5162,6 +5230,8 @@ class HermesCLI: session_id=task_id, platform="cli", reasoning_config=self.reasoning_config, + service_tier=self.service_tier, + request_overrides=turn_route.get("request_overrides"), providers_allowed=self._providers_only, providers_ignored=self._providers_ignore, providers_order=self._providers_order, @@ -5591,6 +5661,40 @@ class HermesCLI: else: _cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}") + def _handle_fast_command(self, cmd: str): + """Handle /fast — choose the Codex Responses service tier.""" + if not self._fast_command_available(): + _cprint(" (._.) /fast is only available for models that explicitly expose a fast backend.") + return + + parts = cmd.strip().split(maxsplit=1) + if len(parts) < 2 or parts[1].strip().lower() == "status": + status = "fast" if self.service_tier == "priority" else "normal" + _cprint(f" {_GOLD}Codex inference tier: {status}{_RST}") + _cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}") + return + + arg = parts[1].strip().lower() + + if arg in {"fast", "on"}: + self.service_tier = "priority" + saved_value = "fast" + label = "FAST" + elif arg in {"normal", "off"}: + self.service_tier = None + saved_value = "normal" + label = "NORMAL" + else: + _cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}") + _cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}") + return + + self.agent = None # Force agent re-init with new service-tier config + if save_config_value("agent.service_tier", saved_value): + _cprint(f" {_GOLD}✓ Codex inference tier set to {label} (saved to config){_RST}") + else: + _cprint(f" {_GOLD}✓ Codex inference tier set to {label} (session only){_RST}") + def _on_reasoning(self, reasoning_text: str): """Callback for intermediate reasoning display during tool-call loops.""" if not reasoning_text: @@ -6749,6 +6853,7 @@ class HermesCLI: model_override=turn_route["model"], runtime_override=turn_route["runtime"], route_label=turn_route["label"], + request_overrides=turn_route.get("request_overrides"), ): return None @@ -7931,6 +8036,7 @@ class HermesCLI: _completer = SlashCommandCompleter( skill_commands_provider=lambda: _skill_commands, + command_filter=cli_ref._command_available, ) input_area = TextArea( height=Dimension(min=1, max=8, preferred=1), @@ -9009,6 +9115,7 @@ def main( model_override=turn_route["model"], runtime_override=turn_route["runtime"], route_label=turn_route["label"], + request_overrides=turn_route.get("request_overrides"), ): cli.agent.quiet_mode = True cli.agent.suppress_status_output = True diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 9f26b4bb0..9260a6c6f 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -100,6 +100,9 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("reasoning", "Manage reasoning effort and display", "Configuration", args_hint="[level|show|hide]", subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")), + CommandDef("fast", "Choose Codex inference tier (Normal/Fast)", "Configuration", + cli_only=True, args_hint="[normal|fast|status]", + subcommands=("normal", "fast", "status", "on", "off")), CommandDef("skin", "Show or change the display skin/theme", "Configuration", cli_only=True, args_hint="[name]"), CommandDef("voice", "Toggle voice mode", "Configuration", @@ -639,8 +642,18 @@ class SlashCommandCompleter(Completer): def __init__( self, skill_commands_provider: Callable[[], Mapping[str, dict[str, Any]]] | None = None, + command_filter: Callable[[str], bool] | None = None, ) -> None: self._skill_commands_provider = skill_commands_provider + self._command_filter = command_filter + + def _command_allowed(self, slash_command: str) -> bool: + if self._command_filter is None: + return True + try: + return bool(self._command_filter(slash_command)) + except Exception: + return True def _iter_skill_commands(self) -> Mapping[str, dict[str, Any]]: if self._skill_commands_provider is None: @@ -918,7 +931,7 @@ class SlashCommandCompleter(Completer): return # Static subcommand completions - if " " not in sub_text and base_cmd in SUBCOMMANDS: + if " " not in sub_text and base_cmd in SUBCOMMANDS and self._command_allowed(base_cmd): for sub in SUBCOMMANDS[base_cmd]: if sub.startswith(sub_lower) and sub != sub_lower: yield Completion( @@ -931,6 +944,8 @@ class SlashCommandCompleter(Completer): word = text[1:] for cmd, desc in COMMANDS.items(): + if not self._command_allowed(cmd): + continue cmd_name = cmd[1:] if cmd_name.startswith(word): yield Completion( @@ -989,6 +1004,8 @@ class SlashCommandAutoSuggest(AutoSuggest): # Still typing the command name: /upd → suggest "ate" word = text[1:].lower() for cmd in COMMANDS: + if self._completer is not None and not self._completer._command_allowed(cmd): + continue cmd_name = cmd[1:] # strip leading / if cmd_name.startswith(word) and cmd_name != word: return Suggestion(cmd_name[len(word):]) @@ -999,6 +1016,8 @@ class SlashCommandAutoSuggest(AutoSuggest): sub_lower = sub_text.lower() # Static subcommands + if self._completer is not None and not self._completer._command_allowed(base_cmd): + return None if base_cmd in SUBCOMMANDS and SUBCOMMANDS[base_cmd]: if " " not in sub_text: for sub in SUBCOMMANDS[base_cmd]: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 6ae094e3f..3b4eee14e 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -255,6 +255,7 @@ DEFAULT_CONFIG = { # tools or receiving API responses. Only fires when the agent has # been completely idle for this duration. 0 = unlimited. "gateway_timeout": 1800, + "service_tier": "", # Tool-use enforcement: injects system prompt guidance that tells the # model to actually call tools instead of describing intended actions. # Values: "auto" (default — applies to gpt/codex models), true/false diff --git a/hermes_cli/models.py b/hermes_cli/models.py index b55249a70..b5485ab89 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1017,6 +1017,60 @@ def provider_label(provider: Optional[str]) -> str: return _PROVIDER_LABELS.get(normalized, original or "OpenRouter") +_FAST_MODE_BACKEND_CONFIG: dict[str, dict[str, Any]] = { + "gpt-5.4": { + "provider": "openai-codex", + "request_overrides": {"service_tier": "priority"}, + }, +} + + +def fast_mode_backend_config(model_id: Optional[str]) -> dict[str, Any] | None: + """Return backend config for models that expose Fast mode. + + To expose Fast mode for a new model, add its normalized model slug to + ``_FAST_MODE_BACKEND_CONFIG`` along with the backend runtime selection and + backend-specific request overrides Hermes should apply. + """ + raw = str(model_id or "").strip().lower() + if "/" in raw: + raw = raw.split("/", 1)[1] + config = _FAST_MODE_BACKEND_CONFIG.get(raw) + return dict(config) if config else None + + +def model_supports_fast_mode(model_id: Optional[str]) -> bool: + """Return whether Hermes should expose Fast mode for the active model.""" + return fast_mode_backend_config(model_id) is not None + + +def resolve_fast_mode_runtime(model_id: Optional[str]) -> dict[str, Any] | None: + """Resolve runtime selection and request overrides for a fast-mode model.""" + cfg = fast_mode_backend_config(model_id) + if not cfg: + return None + + from hermes_cli.runtime_provider import resolve_runtime_provider + + runtime = resolve_runtime_provider( + requested=cfg.get("provider"), + explicit_base_url=cfg.get("base_url"), + explicit_api_key=cfg.get("api_key"), + ) + return { + "runtime": { + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + "command": runtime.get("command"), + "args": list(runtime.get("args") or []), + "credential_pool": runtime.get("credential_pool"), + }, + "request_overrides": dict(cfg.get("request_overrides") or {}), + } + + def _resolve_copilot_catalog_api_key() -> str: """Best-effort GitHub token for fetching the Copilot model catalog.""" try: diff --git a/run_agent.py b/run_agent.py index f4367fe7d..bee98ed00 100644 --- a/run_agent.py +++ b/run_agent.py @@ -500,6 +500,8 @@ class AIAgent: status_callback: callable = None, max_tokens: int = None, reasoning_config: Dict[str, Any] = None, + service_tier: str = None, + request_overrides: Dict[str, Any] = None, prefill_messages: List[Dict[str, Any]] = None, platform: str = None, user_id: str = None, @@ -662,6 +664,8 @@ class AIAgent: # Model response configuration self.max_tokens = max_tokens # None = use model default self.reasoning_config = reasoning_config # None = use default (medium for OpenRouter) + self.service_tier = service_tier + self.request_overrides = dict(request_overrides or {}) self.prefill_messages = prefill_messages or [] # Prefilled conversation turns # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter. @@ -3343,7 +3347,7 @@ class AIAgent: allowed_keys = { "model", "instructions", "input", "tools", "store", "reasoning", "include", "max_output_tokens", "temperature", - "tool_choice", "parallel_tool_calls", "prompt_cache_key", + "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier", } normalized: Dict[str, Any] = { "model": model, @@ -3361,6 +3365,9 @@ class AIAgent: include = api_kwargs.get("include") if isinstance(include, list): normalized["include"] = include + service_tier = api_kwargs.get("service_tier") + if isinstance(service_tier, str) and service_tier.strip(): + normalized["service_tier"] = service_tier.strip() # Pass through max_output_tokens and temperature max_output_tokens = api_kwargs.get("max_output_tokens") @@ -5464,6 +5471,10 @@ class AIAgent: "models.github.ai" in self.base_url.lower() or "api.githubcopilot.com" in self.base_url.lower() ) + is_codex_backend = ( + self.provider == "openai-codex" + or "chatgpt.com/backend-api/codex" in self.base_url.lower() + ) # Resolve reasoning effort: config > default (medium) reasoning_effort = "medium" @@ -5501,7 +5512,10 @@ class AIAgent: elif not is_github_responses: kwargs["include"] = [] - if self.max_tokens is not None: + if self.request_overrides: + kwargs.update(self.request_overrides) + + if self.max_tokens is not None and not is_codex_backend: kwargs["max_output_tokens"] = self.max_tokens return kwargs diff --git a/tests/cli/test_fast_command.py b/tests/cli/test_fast_command.py new file mode 100644 index 000000000..0305bf599 --- /dev/null +++ b/tests/cli/test_fast_command.py @@ -0,0 +1,217 @@ +"""Tests for the /fast CLI command and service-tier config handling.""" + +import unittest +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + + +def _import_cli(): + import hermes_cli.config as config_mod + + if not hasattr(config_mod, "save_env_value_secure"): + config_mod.save_env_value_secure = lambda key, value: { + "success": True, + "stored_as": key, + "validated": False, + } + + import cli as cli_mod + + return cli_mod + + +class TestParseServiceTierConfig(unittest.TestCase): + def _parse(self, raw): + cli_mod = _import_cli() + return cli_mod._parse_service_tier_config(raw) + + def test_fast_maps_to_priority(self): + self.assertEqual(self._parse("fast"), "priority") + self.assertEqual(self._parse("priority"), "priority") + + def test_normal_disables_service_tier(self): + self.assertIsNone(self._parse("normal")) + self.assertIsNone(self._parse("off")) + self.assertIsNone(self._parse("")) + + +class TestHandleFastCommand(unittest.TestCase): + def _make_cli(self, service_tier=None): + return SimpleNamespace( + service_tier=service_tier, + provider="openai-codex", + requested_provider="openai-codex", + model="gpt-5.4", + _fast_command_available=lambda: True, + agent=MagicMock(), + ) + + def test_no_args_shows_status(self): + cli_mod = _import_cli() + stub = self._make_cli(service_tier=None) + with ( + patch.object(cli_mod, "_cprint") as mock_cprint, + patch.object(cli_mod, "save_config_value") as mock_save, + ): + cli_mod.HermesCLI._handle_fast_command(stub, "/fast") + + # Bare /fast shows status, does not change config + mock_save.assert_not_called() + # Should have printed the status line + printed = " ".join(str(c) for c in mock_cprint.call_args_list) + self.assertIn("normal", printed) + + def test_no_args_shows_fast_when_enabled(self): + cli_mod = _import_cli() + stub = self._make_cli(service_tier="priority") + with ( + patch.object(cli_mod, "_cprint") as mock_cprint, + patch.object(cli_mod, "save_config_value") as mock_save, + ): + cli_mod.HermesCLI._handle_fast_command(stub, "/fast") + + mock_save.assert_not_called() + printed = " ".join(str(c) for c in mock_cprint.call_args_list) + self.assertIn("fast", printed) + + def test_normal_argument_clears_service_tier(self): + cli_mod = _import_cli() + stub = self._make_cli(service_tier="priority") + with ( + patch.object(cli_mod, "_cprint"), + patch.object(cli_mod, "save_config_value", return_value=True) as mock_save, + ): + cli_mod.HermesCLI._handle_fast_command(stub, "/fast normal") + + mock_save.assert_called_once_with("agent.service_tier", "normal") + self.assertIsNone(stub.service_tier) + self.assertIsNone(stub.agent) + + def test_unsupported_model_does_not_expose_fast(self): + cli_mod = _import_cli() + stub = SimpleNamespace( + service_tier=None, + provider="openai-codex", + requested_provider="openai-codex", + model="gpt-5.3-codex", + _fast_command_available=lambda: False, + agent=MagicMock(), + ) + + with ( + patch.object(cli_mod, "_cprint") as mock_cprint, + patch.object(cli_mod, "save_config_value") as mock_save, + ): + cli_mod.HermesCLI._handle_fast_command(stub, "/fast") + + mock_save.assert_not_called() + self.assertTrue(mock_cprint.called) + + +class TestFastModeRegistry(unittest.TestCase): + def test_only_gpt_5_4_is_enabled_for_codex(self): + from hermes_cli.models import fast_mode_backend_config + + assert fast_mode_backend_config("gpt-5.4") == { + "provider": "openai-codex", + "request_overrides": {"service_tier": "priority"}, + } + assert fast_mode_backend_config("gpt-5.3-codex") is None + + +class TestFastModeRouting(unittest.TestCase): + def test_fast_command_exposed_for_model_even_when_provider_is_auto(self): + cli_mod = _import_cli() + stub = SimpleNamespace(provider="auto", requested_provider="auto", model="gpt-5.4", agent=None) + + assert cli_mod.HermesCLI._fast_command_available(stub) is True + + def test_turn_route_switches_to_model_backend_when_fast_enabled(self): + cli_mod = _import_cli() + stub = SimpleNamespace( + model="gpt-5.4", + api_key="primary-key", + base_url="https://openrouter.ai/api/v1", + provider="openrouter", + api_mode="chat_completions", + acp_command=None, + acp_args=[], + _credential_pool=None, + _smart_model_routing={}, + service_tier="priority", + ) + + with ( + patch("agent.smart_model_routing.resolve_turn_route", return_value={ + "model": "gpt-5.4", + "runtime": { + "api_key": "primary-key", + "base_url": "https://openrouter.ai/api/v1", + "provider": "openrouter", + "api_mode": "chat_completions", + "command": None, + "args": [], + "credential_pool": None, + }, + "label": None, + "signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()), + }), + patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={ + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "codex-key", + "command": None, + "args": [], + "credential_pool": None, + }), + ): + route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi") + + assert route["runtime"]["provider"] == "openai-codex" + assert route["runtime"]["api_mode"] == "codex_responses" + assert route["request_overrides"] == {"service_tier": "priority"} + + def test_turn_route_keeps_primary_runtime_when_model_has_no_fast_backend(self): + cli_mod = _import_cli() + stub = SimpleNamespace( + model="gpt-5.3-codex", + api_key="primary-key", + base_url="https://openrouter.ai/api/v1", + provider="openrouter", + api_mode="chat_completions", + acp_command=None, + acp_args=[], + _credential_pool=None, + _smart_model_routing={}, + service_tier="priority", + ) + + primary_route = { + "model": "gpt-5.3-codex", + "runtime": { + "api_key": "primary-key", + "base_url": "https://openrouter.ai/api/v1", + "provider": "openrouter", + "api_mode": "chat_completions", + "command": None, + "args": [], + "credential_pool": None, + }, + "label": None, + "signature": ("gpt-5.3-codex", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()), + } + with patch("agent.smart_model_routing.resolve_turn_route", return_value=primary_route): + route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi") + + assert route["runtime"]["provider"] == "openrouter" + assert route.get("request_overrides") is None + + +class TestConfigDefault(unittest.TestCase): + def test_default_config_has_service_tier(self): + from hermes_cli.config import DEFAULT_CONFIG + + agent = DEFAULT_CONFIG.get("agent", {}) + self.assertIn("service_tier", agent) + self.assertEqual(agent["service_tier"], "") diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index 29996fe18..30c2f22c2 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -446,6 +446,13 @@ class TestSubcommands: assert "show" in subs assert "hide" in subs + def test_fast_has_subcommands(self): + assert "/fast" in SUBCOMMANDS + subs = SUBCOMMANDS["/fast"] + assert "fast" in subs + assert "normal" in subs + assert "status" in subs + def test_voice_has_subcommands(self): assert "/voice" in SUBCOMMANDS assert "on" in SUBCOMMANDS["/voice"] @@ -474,6 +481,20 @@ class TestSubcommandCompletion: assert "high" in texts assert "show" in texts + def test_fast_subcommand_completion_after_space(self): + completions = _completions(SlashCommandCompleter(), "/fast ") + texts = {c.text for c in completions} + assert "fast" in texts + assert "normal" in texts + + def test_fast_command_filtered_out_when_unavailable(self): + completions = _completions( + SlashCommandCompleter(command_filter=lambda cmd: cmd != "/fast"), + "/fa", + ) + texts = {c.text for c in completions} + assert "fast" not in texts + def test_subcommand_prefix_filters(self): """Typing '/reasoning sh' should only show 'show'.""" completions = _completions(SlashCommandCompleter(), "/reasoning sh") @@ -527,6 +548,13 @@ class TestGhostText: """/reasoning sh → 'ow'""" assert _suggestion("/reasoning sh") == "ow" + def test_fast_subcommand_suggestion(self): + assert _suggestion("/fast f") == "ast" + + def test_fast_subcommand_suggestion_hidden_when_filtered(self): + completer = SlashCommandCompleter(command_filter=lambda cmd: cmd != "/fast") + assert _suggestion("/fa", completer=completer) is None + def test_no_suggestion_for_non_slash(self): assert _suggestion("hello") is None diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index 0029376ab..094852530 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -356,6 +356,25 @@ class TestBuildApiKwargsCodex: assert "reasoning" in kwargs assert kwargs["reasoning"]["effort"] == "medium" + def test_includes_service_tier_via_request_overrides(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + agent.model = "gpt-5.4" + agent.service_tier = "priority" + agent.request_overrides = {"service_tier": "priority"} + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["service_tier"] == "priority" + + def test_omits_max_output_tokens_for_codex_backend(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + agent.model = "gpt-5.4" + agent.max_tokens = 20 + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "max_output_tokens" not in kwargs + def test_includes_encrypted_content_in_include(self, monkeypatch): agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", base_url="https://chatgpt.com/backend-api/codex") diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index ea703ffbb..635c75fcf 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -648,6 +648,15 @@ def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch assert result["max_output_tokens"] == 4096 +def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch): + agent = _build_agent(monkeypatch) + kwargs = _codex_request_kwargs() + kwargs["service_tier"] = "priority" + + result = agent._preflight_codex_api_kwargs(kwargs) + assert result["service_tier"] == "priority" + + def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch): agent = _build_agent(monkeypatch) responses = [_codex_tool_call_response(), _codex_message_response("done")]