mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: add Codex fast mode toggle (/fast command)
Add /fast slash command to toggle OpenAI Codex service_tier between
normal and priority ('fast') inference. Only exposed for models
registered in _FAST_MODE_BACKEND_CONFIG (currently gpt-5.4).
- Registry-based backend config for extensibility
- Dynamic command visibility (hidden from help/autocomplete for
non-supported models) via command_filter on SlashCommandCompleter
- service_tier flows through request_overrides from route resolution
- Omit max_output_tokens for Codex backend (rejects it)
- Persists to config.yaml under agent.service_tier
Salvage cleanup: removed simple_term_menu/input() menu (banned),
bare /fast now shows status like /reasoning. Removed redundant
override resolution in _build_api_kwargs — single source of truth
via request_overrides from route.
Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
parent
4caa635803
commit
d416a69288
9 changed files with 473 additions and 5 deletions
111
cli.py
111
cli.py
|
|
@ -120,6 +120,18 @@ def _parse_reasoning_config(effort: str) -> dict | None:
|
|||
return result
|
||||
|
||||
|
||||
def _parse_service_tier_config(raw: str) -> str | None:
|
||||
"""Parse a persisted service-tier preference into a Responses API value."""
|
||||
value = str(raw or "").strip().lower()
|
||||
if not value or value in {"normal", "default", "standard", "off", "none"}:
|
||||
return None
|
||||
if value in {"fast", "priority", "on"}:
|
||||
return "priority"
|
||||
logger.warning("Unknown service_tier '%s', ignoring", raw)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def _get_chrome_debug_candidates(system: str) -> list[str]:
|
||||
"""Return likely browser executables for local CDP auto-launch."""
|
||||
candidates: list[str] = []
|
||||
|
|
@ -239,6 +251,7 @@ def load_cli_config() -> Dict[str, Any]:
|
|||
"system_prompt": "",
|
||||
"prefill_messages_file": "",
|
||||
"reasoning_effort": "",
|
||||
"service_tier": "",
|
||||
"personalities": {
|
||||
"helpful": "You are a helpful, friendly AI assistant.",
|
||||
"concise": "You are a concise assistant. Keep responses brief and to the point.",
|
||||
|
|
@ -1634,6 +1647,9 @@ class HermesCLI:
|
|||
self.reasoning_config = _parse_reasoning_config(
|
||||
CLI_CONFIG["agent"].get("reasoning_effort", "")
|
||||
)
|
||||
self.service_tier = _parse_service_tier_config(
|
||||
CLI_CONFIG["agent"].get("service_tier", "")
|
||||
)
|
||||
|
||||
# OpenRouter provider routing preferences
|
||||
pr = CLI_CONFIG.get("provider_routing", {}) or {}
|
||||
|
|
@ -2556,8 +2572,9 @@ class HermesCLI:
|
|||
def _resolve_turn_agent_config(self, user_message: str) -> dict:
|
||||
"""Resolve model/runtime overrides for a single user turn."""
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
from hermes_cli.models import resolve_fast_mode_runtime
|
||||
|
||||
return resolve_turn_route(
|
||||
route = resolve_turn_route(
|
||||
user_message,
|
||||
self._smart_model_routing,
|
||||
{
|
||||
|
|
@ -2572,7 +2589,36 @@ class HermesCLI:
|
|||
},
|
||||
)
|
||||
|
||||
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None) -> bool:
|
||||
service_tier = getattr(self, "service_tier", None)
|
||||
if not service_tier:
|
||||
route["request_overrides"] = None
|
||||
return route
|
||||
|
||||
try:
|
||||
fast_runtime = resolve_fast_mode_runtime(route.get("model"))
|
||||
except Exception:
|
||||
route["request_overrides"] = None
|
||||
return route
|
||||
if not fast_runtime:
|
||||
route["request_overrides"] = None
|
||||
return route
|
||||
|
||||
runtime = fast_runtime["runtime"]
|
||||
route["runtime"] = runtime
|
||||
route["request_overrides"] = fast_runtime["request_overrides"]
|
||||
route["label"] = f"fast route → {route.get('model')} ({runtime.get('provider')})"
|
||||
route["signature"] = (
|
||||
route.get("model"),
|
||||
runtime.get("provider"),
|
||||
runtime.get("base_url"),
|
||||
runtime.get("api_mode"),
|
||||
runtime.get("command"),
|
||||
tuple(runtime.get("args") or ()),
|
||||
json.dumps(route["request_overrides"], sort_keys=True),
|
||||
)
|
||||
return route
|
||||
|
||||
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool:
|
||||
"""
|
||||
Initialize the agent on first use.
|
||||
When resuming a session, restores conversation history from SQLite.
|
||||
|
|
@ -2659,6 +2705,8 @@ class HermesCLI:
|
|||
ephemeral_system_prompt=self.system_prompt if self.system_prompt else None,
|
||||
prefill_messages=self.prefill_messages or None,
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=request_overrides,
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
|
|
@ -3316,6 +3364,20 @@ class HermesCLI:
|
|||
f"{toolsets_info}{provider_info}"
|
||||
)
|
||||
|
||||
def _fast_command_available(self) -> bool:
|
||||
try:
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
except Exception:
|
||||
return False
|
||||
agent = getattr(self, "agent", None)
|
||||
model = getattr(agent, "model", None) or getattr(self, "model", None)
|
||||
return model_supports_fast_mode(model)
|
||||
|
||||
def _command_available(self, slash_command: str) -> bool:
|
||||
if slash_command == "/fast":
|
||||
return self._fast_command_available()
|
||||
return True
|
||||
|
||||
def show_help(self):
|
||||
"""Display help information with categorized commands."""
|
||||
from hermes_cli.commands import COMMANDS_BY_CATEGORY
|
||||
|
|
@ -3336,6 +3398,8 @@ class HermesCLI:
|
|||
for category, commands in COMMANDS_BY_CATEGORY.items():
|
||||
_cprint(f"\n {_BOLD}── {category} ──{_RST}")
|
||||
for cmd, desc in commands.items():
|
||||
if not self._command_available(cmd):
|
||||
continue
|
||||
ChatConsole().print(f" [bold {_accent_hex()}]{cmd:<15}[/] [dim]-[/] {_escape(desc)}")
|
||||
|
||||
if _skill_commands:
|
||||
|
|
@ -4788,6 +4852,8 @@ class HermesCLI:
|
|||
self._toggle_yolo()
|
||||
elif canonical == "reasoning":
|
||||
self._handle_reasoning_command(cmd_original)
|
||||
elif canonical == "fast":
|
||||
self._handle_fast_command(cmd_original)
|
||||
elif canonical == "compress":
|
||||
self._manual_compress()
|
||||
elif canonical == "usage":
|
||||
|
|
@ -5027,6 +5093,8 @@ class HermesCLI:
|
|||
platform="cli",
|
||||
session_db=self._session_db,
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
|
|
@ -5162,6 +5230,8 @@ class HermesCLI:
|
|||
session_id=task_id,
|
||||
platform="cli",
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
|
|
@ -5591,6 +5661,40 @@ class HermesCLI:
|
|||
else:
|
||||
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_fast_command(self, cmd: str):
|
||||
"""Handle /fast — choose the Codex Responses service tier."""
|
||||
if not self._fast_command_available():
|
||||
_cprint(" (._.) /fast is only available for models that explicitly expose a fast backend.")
|
||||
return
|
||||
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
||||
status = "fast" if self.service_tier == "priority" else "normal"
|
||||
_cprint(f" {_GOLD}Codex inference tier: {status}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
|
||||
return
|
||||
|
||||
arg = parts[1].strip().lower()
|
||||
|
||||
if arg in {"fast", "on"}:
|
||||
self.service_tier = "priority"
|
||||
saved_value = "fast"
|
||||
label = "FAST"
|
||||
elif arg in {"normal", "off"}:
|
||||
self.service_tier = None
|
||||
saved_value = "normal"
|
||||
label = "NORMAL"
|
||||
else:
|
||||
_cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
|
||||
return
|
||||
|
||||
self.agent = None # Force agent re-init with new service-tier config
|
||||
if save_config_value("agent.service_tier", saved_value):
|
||||
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (saved to config){_RST}")
|
||||
else:
|
||||
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (session only){_RST}")
|
||||
|
||||
def _on_reasoning(self, reasoning_text: str):
|
||||
"""Callback for intermediate reasoning display during tool-call loops."""
|
||||
if not reasoning_text:
|
||||
|
|
@ -6749,6 +6853,7 @@ class HermesCLI:
|
|||
model_override=turn_route["model"],
|
||||
runtime_override=turn_route["runtime"],
|
||||
route_label=turn_route["label"],
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
):
|
||||
return None
|
||||
|
||||
|
|
@ -7931,6 +8036,7 @@ class HermesCLI:
|
|||
|
||||
_completer = SlashCommandCompleter(
|
||||
skill_commands_provider=lambda: _skill_commands,
|
||||
command_filter=cli_ref._command_available,
|
||||
)
|
||||
input_area = TextArea(
|
||||
height=Dimension(min=1, max=8, preferred=1),
|
||||
|
|
@ -9009,6 +9115,7 @@ def main(
|
|||
model_override=turn_route["model"],
|
||||
runtime_override=turn_route["runtime"],
|
||||
route_label=turn_route["label"],
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
):
|
||||
cli.agent.quiet_mode = True
|
||||
cli.agent.suppress_status_output = True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue