feat: add Codex fast mode toggle (/fast command)

Add /fast slash command to toggle OpenAI Codex service_tier between
normal and priority ('fast') inference. Only exposed for models
registered in _FAST_MODE_BACKEND_CONFIG (currently gpt-5.4).

- Registry-based backend config for extensibility
- Dynamic command visibility (hidden from help/autocomplete for
  non-supported models) via command_filter on SlashCommandCompleter
- service_tier flows through request_overrides from route resolution
- Omit max_output_tokens for Codex backend (rejects it)
- Persists to config.yaml under agent.service_tier

Salvage cleanup: removed simple_term_menu/input() menu (banned),
bare /fast now shows status like /reasoning. Removed redundant
override resolution in _build_api_kwargs — single source of truth
via request_overrides from route.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
g-guthrie 2026-04-09 18:10:57 -07:00 committed by Teknium
parent 4caa635803
commit d416a69288
9 changed files with 473 additions and 5 deletions

111
cli.py
View file

@ -120,6 +120,18 @@ def _parse_reasoning_config(effort: str) -> dict | None:
return result
def _parse_service_tier_config(raw: str) -> str | None:
"""Parse a persisted service-tier preference into a Responses API value."""
value = str(raw or "").strip().lower()
if not value or value in {"normal", "default", "standard", "off", "none"}:
return None
if value in {"fast", "priority", "on"}:
return "priority"
logger.warning("Unknown service_tier '%s', ignoring", raw)
return None
def _get_chrome_debug_candidates(system: str) -> list[str]:
"""Return likely browser executables for local CDP auto-launch."""
candidates: list[str] = []
@ -239,6 +251,7 @@ def load_cli_config() -> Dict[str, Any]:
"system_prompt": "",
"prefill_messages_file": "",
"reasoning_effort": "",
"service_tier": "",
"personalities": {
"helpful": "You are a helpful, friendly AI assistant.",
"concise": "You are a concise assistant. Keep responses brief and to the point.",
@ -1634,6 +1647,9 @@ class HermesCLI:
self.reasoning_config = _parse_reasoning_config(
CLI_CONFIG["agent"].get("reasoning_effort", "")
)
self.service_tier = _parse_service_tier_config(
CLI_CONFIG["agent"].get("service_tier", "")
)
# OpenRouter provider routing preferences
pr = CLI_CONFIG.get("provider_routing", {}) or {}
@ -2556,8 +2572,9 @@ class HermesCLI:
def _resolve_turn_agent_config(self, user_message: str) -> dict:
"""Resolve model/runtime overrides for a single user turn."""
from agent.smart_model_routing import resolve_turn_route
from hermes_cli.models import resolve_fast_mode_runtime
return resolve_turn_route(
route = resolve_turn_route(
user_message,
self._smart_model_routing,
{
@ -2572,7 +2589,36 @@ class HermesCLI:
},
)
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None) -> bool:
service_tier = getattr(self, "service_tier", None)
if not service_tier:
route["request_overrides"] = None
return route
try:
fast_runtime = resolve_fast_mode_runtime(route.get("model"))
except Exception:
route["request_overrides"] = None
return route
if not fast_runtime:
route["request_overrides"] = None
return route
runtime = fast_runtime["runtime"]
route["runtime"] = runtime
route["request_overrides"] = fast_runtime["request_overrides"]
route["label"] = f"fast route → {route.get('model')} ({runtime.get('provider')})"
route["signature"] = (
route.get("model"),
runtime.get("provider"),
runtime.get("base_url"),
runtime.get("api_mode"),
runtime.get("command"),
tuple(runtime.get("args") or ()),
json.dumps(route["request_overrides"], sort_keys=True),
)
return route
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool:
"""
Initialize the agent on first use.
When resuming a session, restores conversation history from SQLite.
@ -2659,6 +2705,8 @@ class HermesCLI:
ephemeral_system_prompt=self.system_prompt if self.system_prompt else None,
prefill_messages=self.prefill_messages or None,
reasoning_config=self.reasoning_config,
service_tier=self.service_tier,
request_overrides=request_overrides,
providers_allowed=self._providers_only,
providers_ignored=self._providers_ignore,
providers_order=self._providers_order,
@ -3316,6 +3364,20 @@ class HermesCLI:
f"{toolsets_info}{provider_info}"
)
def _fast_command_available(self) -> bool:
try:
from hermes_cli.models import model_supports_fast_mode
except Exception:
return False
agent = getattr(self, "agent", None)
model = getattr(agent, "model", None) or getattr(self, "model", None)
return model_supports_fast_mode(model)
def _command_available(self, slash_command: str) -> bool:
if slash_command == "/fast":
return self._fast_command_available()
return True
def show_help(self):
"""Display help information with categorized commands."""
from hermes_cli.commands import COMMANDS_BY_CATEGORY
@ -3336,6 +3398,8 @@ class HermesCLI:
for category, commands in COMMANDS_BY_CATEGORY.items():
_cprint(f"\n {_BOLD}── {category} ──{_RST}")
for cmd, desc in commands.items():
if not self._command_available(cmd):
continue
ChatConsole().print(f" [bold {_accent_hex()}]{cmd:<15}[/] [dim]-[/] {_escape(desc)}")
if _skill_commands:
@ -4788,6 +4852,8 @@ class HermesCLI:
self._toggle_yolo()
elif canonical == "reasoning":
self._handle_reasoning_command(cmd_original)
elif canonical == "fast":
self._handle_fast_command(cmd_original)
elif canonical == "compress":
self._manual_compress()
elif canonical == "usage":
@ -5027,6 +5093,8 @@ class HermesCLI:
platform="cli",
session_db=self._session_db,
reasoning_config=self.reasoning_config,
service_tier=self.service_tier,
request_overrides=turn_route.get("request_overrides"),
providers_allowed=self._providers_only,
providers_ignored=self._providers_ignore,
providers_order=self._providers_order,
@ -5162,6 +5230,8 @@ class HermesCLI:
session_id=task_id,
platform="cli",
reasoning_config=self.reasoning_config,
service_tier=self.service_tier,
request_overrides=turn_route.get("request_overrides"),
providers_allowed=self._providers_only,
providers_ignored=self._providers_ignore,
providers_order=self._providers_order,
@ -5591,6 +5661,40 @@ class HermesCLI:
else:
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
def _handle_fast_command(self, cmd: str):
"""Handle /fast — choose the Codex Responses service tier."""
if not self._fast_command_available():
_cprint(" (._.) /fast is only available for models that explicitly expose a fast backend.")
return
parts = cmd.strip().split(maxsplit=1)
if len(parts) < 2 or parts[1].strip().lower() == "status":
status = "fast" if self.service_tier == "priority" else "normal"
_cprint(f" {_GOLD}Codex inference tier: {status}{_RST}")
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
return
arg = parts[1].strip().lower()
if arg in {"fast", "on"}:
self.service_tier = "priority"
saved_value = "fast"
label = "FAST"
elif arg in {"normal", "off"}:
self.service_tier = None
saved_value = "normal"
label = "NORMAL"
else:
_cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}")
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
return
self.agent = None # Force agent re-init with new service-tier config
if save_config_value("agent.service_tier", saved_value):
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (saved to config){_RST}")
else:
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (session only){_RST}")
def _on_reasoning(self, reasoning_text: str):
"""Callback for intermediate reasoning display during tool-call loops."""
if not reasoning_text:
@ -6749,6 +6853,7 @@ class HermesCLI:
model_override=turn_route["model"],
runtime_override=turn_route["runtime"],
route_label=turn_route["label"],
request_overrides=turn_route.get("request_overrides"),
):
return None
@ -7931,6 +8036,7 @@ class HermesCLI:
_completer = SlashCommandCompleter(
skill_commands_provider=lambda: _skill_commands,
command_filter=cli_ref._command_available,
)
input_area = TextArea(
height=Dimension(min=1, max=8, preferred=1),
@ -9009,6 +9115,7 @@ def main(
model_override=turn_route["model"],
runtime_override=turn_route["runtime"],
route_label=turn_route["label"],
request_overrides=turn_route.get("request_overrides"),
):
cli.agent.quiet_mode = True
cli.agent.suppress_status_output = True

View file

@ -100,6 +100,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
args_hint="[level|show|hide]",
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
CommandDef("fast", "Choose Codex inference tier (Normal/Fast)", "Configuration",
cli_only=True, args_hint="[normal|fast|status]",
subcommands=("normal", "fast", "status", "on", "off")),
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
cli_only=True, args_hint="[name]"),
CommandDef("voice", "Toggle voice mode", "Configuration",
@ -639,8 +642,18 @@ class SlashCommandCompleter(Completer):
def __init__(
self,
skill_commands_provider: Callable[[], Mapping[str, dict[str, Any]]] | None = None,
command_filter: Callable[[str], bool] | None = None,
) -> None:
self._skill_commands_provider = skill_commands_provider
self._command_filter = command_filter
def _command_allowed(self, slash_command: str) -> bool:
if self._command_filter is None:
return True
try:
return bool(self._command_filter(slash_command))
except Exception:
return True
def _iter_skill_commands(self) -> Mapping[str, dict[str, Any]]:
if self._skill_commands_provider is None:
@ -918,7 +931,7 @@ class SlashCommandCompleter(Completer):
return
# Static subcommand completions
if " " not in sub_text and base_cmd in SUBCOMMANDS:
if " " not in sub_text and base_cmd in SUBCOMMANDS and self._command_allowed(base_cmd):
for sub in SUBCOMMANDS[base_cmd]:
if sub.startswith(sub_lower) and sub != sub_lower:
yield Completion(
@ -931,6 +944,8 @@ class SlashCommandCompleter(Completer):
word = text[1:]
for cmd, desc in COMMANDS.items():
if not self._command_allowed(cmd):
continue
cmd_name = cmd[1:]
if cmd_name.startswith(word):
yield Completion(
@ -989,6 +1004,8 @@ class SlashCommandAutoSuggest(AutoSuggest):
# Still typing the command name: /upd → suggest "ate"
word = text[1:].lower()
for cmd in COMMANDS:
if self._completer is not None and not self._completer._command_allowed(cmd):
continue
cmd_name = cmd[1:] # strip leading /
if cmd_name.startswith(word) and cmd_name != word:
return Suggestion(cmd_name[len(word):])
@ -999,6 +1016,8 @@ class SlashCommandAutoSuggest(AutoSuggest):
sub_lower = sub_text.lower()
# Static subcommands
if self._completer is not None and not self._completer._command_allowed(base_cmd):
return None
if base_cmd in SUBCOMMANDS and SUBCOMMANDS[base_cmd]:
if " " not in sub_text:
for sub in SUBCOMMANDS[base_cmd]:

View file

@ -255,6 +255,7 @@ DEFAULT_CONFIG = {
# tools or receiving API responses. Only fires when the agent has
# been completely idle for this duration. 0 = unlimited.
"gateway_timeout": 1800,
"service_tier": "",
# Tool-use enforcement: injects system prompt guidance that tells the
# model to actually call tools instead of describing intended actions.
# Values: "auto" (default — applies to gpt/codex models), true/false

View file

@ -1017,6 +1017,60 @@ def provider_label(provider: Optional[str]) -> str:
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
_FAST_MODE_BACKEND_CONFIG: dict[str, dict[str, Any]] = {
"gpt-5.4": {
"provider": "openai-codex",
"request_overrides": {"service_tier": "priority"},
},
}
def fast_mode_backend_config(model_id: Optional[str]) -> dict[str, Any] | None:
"""Return backend config for models that expose Fast mode.
To expose Fast mode for a new model, add its normalized model slug to
``_FAST_MODE_BACKEND_CONFIG`` along with the backend runtime selection and
backend-specific request overrides Hermes should apply.
"""
raw = str(model_id or "").strip().lower()
if "/" in raw:
raw = raw.split("/", 1)[1]
config = _FAST_MODE_BACKEND_CONFIG.get(raw)
return dict(config) if config else None
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
"""Return whether Hermes should expose Fast mode for the active model."""
return fast_mode_backend_config(model_id) is not None
def resolve_fast_mode_runtime(model_id: Optional[str]) -> dict[str, Any] | None:
"""Resolve runtime selection and request overrides for a fast-mode model."""
cfg = fast_mode_backend_config(model_id)
if not cfg:
return None
from hermes_cli.runtime_provider import resolve_runtime_provider
runtime = resolve_runtime_provider(
requested=cfg.get("provider"),
explicit_base_url=cfg.get("base_url"),
explicit_api_key=cfg.get("api_key"),
)
return {
"runtime": {
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
"credential_pool": runtime.get("credential_pool"),
},
"request_overrides": dict(cfg.get("request_overrides") or {}),
}
def _resolve_copilot_catalog_api_key() -> str:
"""Best-effort GitHub token for fetching the Copilot model catalog."""
try:

View file

@ -500,6 +500,8 @@ class AIAgent:
status_callback: callable = None,
max_tokens: int = None,
reasoning_config: Dict[str, Any] = None,
service_tier: str = None,
request_overrides: Dict[str, Any] = None,
prefill_messages: List[Dict[str, Any]] = None,
platform: str = None,
user_id: str = None,
@ -662,6 +664,8 @@ class AIAgent:
# Model response configuration
self.max_tokens = max_tokens # None = use model default
self.reasoning_config = reasoning_config # None = use default (medium for OpenRouter)
self.service_tier = service_tier
self.request_overrides = dict(request_overrides or {})
self.prefill_messages = prefill_messages or [] # Prefilled conversation turns
# Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
@ -3343,7 +3347,7 @@ class AIAgent:
allowed_keys = {
"model", "instructions", "input", "tools", "store",
"reasoning", "include", "max_output_tokens", "temperature",
"tool_choice", "parallel_tool_calls", "prompt_cache_key",
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
}
normalized: Dict[str, Any] = {
"model": model,
@ -3361,6 +3365,9 @@ class AIAgent:
include = api_kwargs.get("include")
if isinstance(include, list):
normalized["include"] = include
service_tier = api_kwargs.get("service_tier")
if isinstance(service_tier, str) and service_tier.strip():
normalized["service_tier"] = service_tier.strip()
# Pass through max_output_tokens and temperature
max_output_tokens = api_kwargs.get("max_output_tokens")
@ -5464,6 +5471,10 @@ class AIAgent:
"models.github.ai" in self.base_url.lower()
or "api.githubcopilot.com" in self.base_url.lower()
)
is_codex_backend = (
self.provider == "openai-codex"
or "chatgpt.com/backend-api/codex" in self.base_url.lower()
)
# Resolve reasoning effort: config > default (medium)
reasoning_effort = "medium"
@ -5501,7 +5512,10 @@ class AIAgent:
elif not is_github_responses:
kwargs["include"] = []
if self.max_tokens is not None:
if self.request_overrides:
kwargs.update(self.request_overrides)
if self.max_tokens is not None and not is_codex_backend:
kwargs["max_output_tokens"] = self.max_tokens
return kwargs

View file

@ -0,0 +1,217 @@
"""Tests for the /fast CLI command and service-tier config handling."""
import unittest
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
def _import_cli():
import hermes_cli.config as config_mod
if not hasattr(config_mod, "save_env_value_secure"):
config_mod.save_env_value_secure = lambda key, value: {
"success": True,
"stored_as": key,
"validated": False,
}
import cli as cli_mod
return cli_mod
class TestParseServiceTierConfig(unittest.TestCase):
def _parse(self, raw):
cli_mod = _import_cli()
return cli_mod._parse_service_tier_config(raw)
def test_fast_maps_to_priority(self):
self.assertEqual(self._parse("fast"), "priority")
self.assertEqual(self._parse("priority"), "priority")
def test_normal_disables_service_tier(self):
self.assertIsNone(self._parse("normal"))
self.assertIsNone(self._parse("off"))
self.assertIsNone(self._parse(""))
class TestHandleFastCommand(unittest.TestCase):
def _make_cli(self, service_tier=None):
return SimpleNamespace(
service_tier=service_tier,
provider="openai-codex",
requested_provider="openai-codex",
model="gpt-5.4",
_fast_command_available=lambda: True,
agent=MagicMock(),
)
def test_no_args_shows_status(self):
cli_mod = _import_cli()
stub = self._make_cli(service_tier=None)
with (
patch.object(cli_mod, "_cprint") as mock_cprint,
patch.object(cli_mod, "save_config_value") as mock_save,
):
cli_mod.HermesCLI._handle_fast_command(stub, "/fast")
# Bare /fast shows status, does not change config
mock_save.assert_not_called()
# Should have printed the status line
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
self.assertIn("normal", printed)
def test_no_args_shows_fast_when_enabled(self):
cli_mod = _import_cli()
stub = self._make_cli(service_tier="priority")
with (
patch.object(cli_mod, "_cprint") as mock_cprint,
patch.object(cli_mod, "save_config_value") as mock_save,
):
cli_mod.HermesCLI._handle_fast_command(stub, "/fast")
mock_save.assert_not_called()
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
self.assertIn("fast", printed)
def test_normal_argument_clears_service_tier(self):
cli_mod = _import_cli()
stub = self._make_cli(service_tier="priority")
with (
patch.object(cli_mod, "_cprint"),
patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
):
cli_mod.HermesCLI._handle_fast_command(stub, "/fast normal")
mock_save.assert_called_once_with("agent.service_tier", "normal")
self.assertIsNone(stub.service_tier)
self.assertIsNone(stub.agent)
def test_unsupported_model_does_not_expose_fast(self):
cli_mod = _import_cli()
stub = SimpleNamespace(
service_tier=None,
provider="openai-codex",
requested_provider="openai-codex",
model="gpt-5.3-codex",
_fast_command_available=lambda: False,
agent=MagicMock(),
)
with (
patch.object(cli_mod, "_cprint") as mock_cprint,
patch.object(cli_mod, "save_config_value") as mock_save,
):
cli_mod.HermesCLI._handle_fast_command(stub, "/fast")
mock_save.assert_not_called()
self.assertTrue(mock_cprint.called)
class TestFastModeRegistry(unittest.TestCase):
def test_only_gpt_5_4_is_enabled_for_codex(self):
from hermes_cli.models import fast_mode_backend_config
assert fast_mode_backend_config("gpt-5.4") == {
"provider": "openai-codex",
"request_overrides": {"service_tier": "priority"},
}
assert fast_mode_backend_config("gpt-5.3-codex") is None
class TestFastModeRouting(unittest.TestCase):
def test_fast_command_exposed_for_model_even_when_provider_is_auto(self):
cli_mod = _import_cli()
stub = SimpleNamespace(provider="auto", requested_provider="auto", model="gpt-5.4", agent=None)
assert cli_mod.HermesCLI._fast_command_available(stub) is True
def test_turn_route_switches_to_model_backend_when_fast_enabled(self):
cli_mod = _import_cli()
stub = SimpleNamespace(
model="gpt-5.4",
api_key="primary-key",
base_url="https://openrouter.ai/api/v1",
provider="openrouter",
api_mode="chat_completions",
acp_command=None,
acp_args=[],
_credential_pool=None,
_smart_model_routing={},
service_tier="priority",
)
with (
patch("agent.smart_model_routing.resolve_turn_route", return_value={
"model": "gpt-5.4",
"runtime": {
"api_key": "primary-key",
"base_url": "https://openrouter.ai/api/v1",
"provider": "openrouter",
"api_mode": "chat_completions",
"command": None,
"args": [],
"credential_pool": None,
},
"label": None,
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
}),
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "codex-key",
"command": None,
"args": [],
"credential_pool": None,
}),
):
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
assert route["runtime"]["provider"] == "openai-codex"
assert route["runtime"]["api_mode"] == "codex_responses"
assert route["request_overrides"] == {"service_tier": "priority"}
def test_turn_route_keeps_primary_runtime_when_model_has_no_fast_backend(self):
cli_mod = _import_cli()
stub = SimpleNamespace(
model="gpt-5.3-codex",
api_key="primary-key",
base_url="https://openrouter.ai/api/v1",
provider="openrouter",
api_mode="chat_completions",
acp_command=None,
acp_args=[],
_credential_pool=None,
_smart_model_routing={},
service_tier="priority",
)
primary_route = {
"model": "gpt-5.3-codex",
"runtime": {
"api_key": "primary-key",
"base_url": "https://openrouter.ai/api/v1",
"provider": "openrouter",
"api_mode": "chat_completions",
"command": None,
"args": [],
"credential_pool": None,
},
"label": None,
"signature": ("gpt-5.3-codex", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
}
with patch("agent.smart_model_routing.resolve_turn_route", return_value=primary_route):
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
assert route["runtime"]["provider"] == "openrouter"
assert route.get("request_overrides") is None
class TestConfigDefault(unittest.TestCase):
def test_default_config_has_service_tier(self):
from hermes_cli.config import DEFAULT_CONFIG
agent = DEFAULT_CONFIG.get("agent", {})
self.assertIn("service_tier", agent)
self.assertEqual(agent["service_tier"], "")

View file

@ -446,6 +446,13 @@ class TestSubcommands:
assert "show" in subs
assert "hide" in subs
def test_fast_has_subcommands(self):
assert "/fast" in SUBCOMMANDS
subs = SUBCOMMANDS["/fast"]
assert "fast" in subs
assert "normal" in subs
assert "status" in subs
def test_voice_has_subcommands(self):
assert "/voice" in SUBCOMMANDS
assert "on" in SUBCOMMANDS["/voice"]
@ -474,6 +481,20 @@ class TestSubcommandCompletion:
assert "high" in texts
assert "show" in texts
def test_fast_subcommand_completion_after_space(self):
completions = _completions(SlashCommandCompleter(), "/fast ")
texts = {c.text for c in completions}
assert "fast" in texts
assert "normal" in texts
def test_fast_command_filtered_out_when_unavailable(self):
completions = _completions(
SlashCommandCompleter(command_filter=lambda cmd: cmd != "/fast"),
"/fa",
)
texts = {c.text for c in completions}
assert "fast" not in texts
def test_subcommand_prefix_filters(self):
"""Typing '/reasoning sh' should only show 'show'."""
completions = _completions(SlashCommandCompleter(), "/reasoning sh")
@ -527,6 +548,13 @@ class TestGhostText:
"""/reasoning sh → 'ow'"""
assert _suggestion("/reasoning sh") == "ow"
def test_fast_subcommand_suggestion(self):
assert _suggestion("/fast f") == "ast"
def test_fast_subcommand_suggestion_hidden_when_filtered(self):
completer = SlashCommandCompleter(command_filter=lambda cmd: cmd != "/fast")
assert _suggestion("/fa", completer=completer) is None
def test_no_suggestion_for_non_slash(self):
assert _suggestion("hello") is None

View file

@ -356,6 +356,25 @@ class TestBuildApiKwargsCodex:
assert "reasoning" in kwargs
assert kwargs["reasoning"]["effort"] == "medium"
def test_includes_service_tier_via_request_overrides(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
agent.model = "gpt-5.4"
agent.service_tier = "priority"
agent.request_overrides = {"service_tier": "priority"}
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["service_tier"] == "priority"
def test_omits_max_output_tokens_for_codex_backend(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
agent.model = "gpt-5.4"
agent.max_tokens = 20
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "max_output_tokens" not in kwargs
def test_includes_encrypted_content_in_include(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")

View file

@ -648,6 +648,15 @@ def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch
assert result["max_output_tokens"] == 4096
def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
agent = _build_agent(monkeypatch)
kwargs = _codex_request_kwargs()
kwargs["service_tier"] = "priority"
result = agent._preflight_codex_api_kwargs(kwargs)
assert result["service_tier"] == "priority"
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [_codex_tool_call_response(), _codex_message_response("done")]