feat: add Codex fast mode toggle (/fast command)

Add /fast slash command to toggle OpenAI Codex service_tier between
normal and priority ('fast') inference. Only exposed for models
registered in _FAST_MODE_BACKEND_CONFIG (currently gpt-5.4).

- Registry-based backend config for extensibility
- Dynamic command visibility (hidden from help/autocomplete for
  non-supported models) via command_filter on SlashCommandCompleter
- service_tier flows through request_overrides from route resolution
- Omit max_output_tokens for Codex backend (rejects it)
- Persists to config.yaml under agent.service_tier

Salvage cleanup: removed simple_term_menu/input() menu (banned),
bare /fast now shows status like /reasoning. Removed redundant
override resolution in _build_api_kwargs — single source of truth
via request_overrides from route.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
g-guthrie 2026-04-09 18:10:57 -07:00 committed by Teknium
parent 4caa635803
commit d416a69288
9 changed files with 473 additions and 5 deletions

View file

@ -1017,6 +1017,60 @@ def provider_label(provider: Optional[str]) -> str:
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
_FAST_MODE_BACKEND_CONFIG: dict[str, dict[str, Any]] = {
"gpt-5.4": {
"provider": "openai-codex",
"request_overrides": {"service_tier": "priority"},
},
}
def fast_mode_backend_config(model_id: Optional[str]) -> dict[str, Any] | None:
"""Return backend config for models that expose Fast mode.
To expose Fast mode for a new model, add its normalized model slug to
``_FAST_MODE_BACKEND_CONFIG`` along with the backend runtime selection and
backend-specific request overrides Hermes should apply.
"""
raw = str(model_id or "").strip().lower()
if "/" in raw:
raw = raw.split("/", 1)[1]
config = _FAST_MODE_BACKEND_CONFIG.get(raw)
return dict(config) if config else None
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
"""Return whether Hermes should expose Fast mode for the active model."""
return fast_mode_backend_config(model_id) is not None
def resolve_fast_mode_runtime(model_id: Optional[str]) -> dict[str, Any] | None:
"""Resolve runtime selection and request overrides for a fast-mode model."""
cfg = fast_mode_backend_config(model_id)
if not cfg:
return None
from hermes_cli.runtime_provider import resolve_runtime_provider
runtime = resolve_runtime_provider(
requested=cfg.get("provider"),
explicit_base_url=cfg.get("base_url"),
explicit_api_key=cfg.get("api_key"),
)
return {
"runtime": {
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
"credential_pool": runtime.get("credential_pool"),
},
"request_overrides": dict(cfg.get("request_overrides") or {}),
}
def _resolve_copilot_catalog_api_key() -> str:
"""Best-effort GitHub token for fetching the Copilot model catalog."""
try: