mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: add Codex fast mode toggle (/fast command)
Add /fast slash command to toggle OpenAI Codex service_tier between
normal and priority ('fast') inference. Only exposed for models
registered in _FAST_MODE_BACKEND_CONFIG (currently gpt-5.4).
- Registry-based backend config for extensibility
- Dynamic command visibility (hidden from help/autocomplete for
non-supported models) via command_filter on SlashCommandCompleter
- service_tier flows through request_overrides from route resolution
- Omit max_output_tokens for Codex backend (rejects it)
- Persists to config.yaml under agent.service_tier
Salvage cleanup: removed simple_term_menu/input() menu (banned),
bare /fast now shows status like /reasoning. Removed redundant
override resolution in _build_api_kwargs — single source of truth
via request_overrides from route.
Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
parent
4caa635803
commit
d416a69288
9 changed files with 473 additions and 5 deletions
|
|
@ -1017,6 +1017,60 @@ def provider_label(provider: Optional[str]) -> str:
|
|||
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
|
||||
|
||||
|
||||
_FAST_MODE_BACKEND_CONFIG: dict[str, dict[str, Any]] = {
|
||||
"gpt-5.4": {
|
||||
"provider": "openai-codex",
|
||||
"request_overrides": {"service_tier": "priority"},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def fast_mode_backend_config(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
"""Return backend config for models that expose Fast mode.
|
||||
|
||||
To expose Fast mode for a new model, add its normalized model slug to
|
||||
``_FAST_MODE_BACKEND_CONFIG`` along with the backend runtime selection and
|
||||
backend-specific request overrides Hermes should apply.
|
||||
"""
|
||||
raw = str(model_id or "").strip().lower()
|
||||
if "/" in raw:
|
||||
raw = raw.split("/", 1)[1]
|
||||
config = _FAST_MODE_BACKEND_CONFIG.get(raw)
|
||||
return dict(config) if config else None
|
||||
|
||||
|
||||
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||
"""Return whether Hermes should expose Fast mode for the active model."""
|
||||
return fast_mode_backend_config(model_id) is not None
|
||||
|
||||
|
||||
def resolve_fast_mode_runtime(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
"""Resolve runtime selection and request overrides for a fast-mode model."""
|
||||
cfg = fast_mode_backend_config(model_id)
|
||||
if not cfg:
|
||||
return None
|
||||
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=cfg.get("provider"),
|
||||
explicit_base_url=cfg.get("base_url"),
|
||||
explicit_api_key=cfg.get("api_key"),
|
||||
)
|
||||
return {
|
||||
"runtime": {
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
"credential_pool": runtime.get("credential_pool"),
|
||||
},
|
||||
"request_overrides": dict(cfg.get("request_overrides") or {}),
|
||||
}
|
||||
|
||||
|
||||
def _resolve_copilot_catalog_api_key() -> str:
|
||||
"""Best-effort GitHub token for fetching the Copilot model catalog."""
|
||||
try:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue