feat: add Codex fast mode toggle (/fast command)

Add /fast slash command to toggle OpenAI Codex service_tier between normal and priority ('fast') inference. Only exposed for models registered in _FAST_MODE_BACKEND_CONFIG (currently gpt-5.4). - Registry-based backend config for extensibility - Dynamic command visibility (hidden from help/autocomplete for non-supported models) via command_filter on SlashCommandCompleter - service_tier flows through request_overrides from route resolution - Omit max_output_tokens for Codex backend (rejects it) - Persists to config.yaml under agent.service_tier Salvage cleanup: removed simple_term_menu/input() menu (banned), bare /fast now shows status like /reasoning. Removed redundant override resolution in _build_api_kwargs — single source of truth via request_overrides from route. Co-authored-by: Hermes Agent <hermes@nousresearch.com>
2026-04-25 00:51:20 +00:00 · 2026-04-09 18:10:57 -07:00 · 2026-04-09 18:10:57 -07:00 · d416a69288
commit d416a69288
parent 4caa635803
9 changed files with 473 additions and 5 deletions
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -1017,6 +1017,60 @@ def provider_label(provider: Optional[str]) -> str:
    return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")


+_FAST_MODE_BACKEND_CONFIG: dict[str, dict[str, Any]] = {
+    "gpt-5.4": {
+        "provider": "openai-codex",
+        "request_overrides": {"service_tier": "priority"},
+    },
+}
+
+
+def fast_mode_backend_config(model_id: Optional[str]) -> dict[str, Any] | None:
+    """Return backend config for models that expose Fast mode.
+
+    To expose Fast mode for a new model, add its normalized model slug to
+    ``_FAST_MODE_BACKEND_CONFIG`` along with the backend runtime selection and
+    backend-specific request overrides Hermes should apply.
+    """
+    raw = str(model_id or "").strip().lower()
+    if "/" in raw:
+        raw = raw.split("/", 1)[1]
+    config = _FAST_MODE_BACKEND_CONFIG.get(raw)
+    return dict(config) if config else None
+
+
+def model_supports_fast_mode(model_id: Optional[str]) -> bool:
+    """Return whether Hermes should expose Fast mode for the active model."""
+    return fast_mode_backend_config(model_id) is not None
+
+
+def resolve_fast_mode_runtime(model_id: Optional[str]) -> dict[str, Any] | None:
+    """Resolve runtime selection and request overrides for a fast-mode model."""
+    cfg = fast_mode_backend_config(model_id)
+    if not cfg:
+        return None
+
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+
+    runtime = resolve_runtime_provider(
+        requested=cfg.get("provider"),
+        explicit_base_url=cfg.get("base_url"),
+        explicit_api_key=cfg.get("api_key"),
+    )
+    return {
+        "runtime": {
+            "api_key": runtime.get("api_key"),
+            "base_url": runtime.get("base_url"),
+            "provider": runtime.get("provider"),
+            "api_mode": runtime.get("api_mode"),
+            "command": runtime.get("command"),
+            "args": list(runtime.get("args") or []),
+            "credential_pool": runtime.get("credential_pool"),
+        },
+        "request_overrides": dict(cfg.get("request_overrides") or {}),
+    }
+
+
 def _resolve_copilot_catalog_api_key() -> str:
    """Best-effort GitHub token for fetching the Copilot model catalog."""
    try: