diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index c9f8f6bf1b..1cbd466ec6 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -23,7 +23,7 @@ import time import urllib.parse import urllib.request from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple import yaml @@ -445,6 +445,20 @@ class EnvVarReveal(BaseModel): key: str +class ModelAssignment(BaseModel): + """Payload for POST /api/model/set — assign a provider/model to a slot. + + scope="main" → writes model.provider + model.default + scope="auxiliary" → writes auxiliary..provider + auxiliary..model + scope="auxiliary" with task="" → applied to every auxiliary.* slot + scope="auxiliary" with task="__reset__" → resets every slot to provider="auto" + """ + scope: str + provider: str + model: str + task: str = "" + + _GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL") try: _GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3")) @@ -921,6 +935,206 @@ def get_model_info(): return dict(_EMPTY_MODEL_INFO) +# --------------------------------------------------------------------------- +# Model assignment — pick provider+model for main slot or auxiliary slots. +# Mirrors the model.options JSON-RPC from tui_gateway but uses REST so the +# Models page (which has no chat PTY open) can drive it. +# --------------------------------------------------------------------------- + +# Canonical auxiliary task slots. Keep in sync with DEFAULT_CONFIG["auxiliary"] +# in hermes_cli/config.py — listed here for deterministic ordering in the UI. +_AUX_TASK_SLOTS: Tuple[str, ...] = ( + "vision", + "web_extract", + "compression", + "session_search", + "skills_hub", + "approval", + "mcp", + "title_generation", +) + + +@app.get("/api/model/options") +def get_model_options(): + """Return authenticated providers + their curated model lists. + + REST equivalent of the ``model.options`` JSON-RPC on tui_gateway, so the + dashboard Models page can render the picker without a live chat session. + The response shape matches ``model.options`` 1:1 so ``ModelPickerDialog`` + can share the same types. + """ + try: + from hermes_cli.model_switch import list_authenticated_providers + + cfg = load_config() + model_cfg = cfg.get("model", {}) + if isinstance(model_cfg, dict): + current_model = model_cfg.get("default", model_cfg.get("name", "")) or "" + current_provider = model_cfg.get("provider", "") or "" + current_base_url = model_cfg.get("base_url", "") or "" + else: + current_model = str(model_cfg) if model_cfg else "" + current_provider = "" + current_base_url = "" + + user_providers = cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {} + custom_providers = ( + cfg.get("custom_providers") + if isinstance(cfg.get("custom_providers"), list) + else [] + ) + + providers = list_authenticated_providers( + current_provider=current_provider, + current_base_url=current_base_url, + current_model=current_model, + user_providers=user_providers, + custom_providers=custom_providers, + max_models=50, + ) + return { + "providers": providers, + "model": current_model, + "provider": current_provider, + } + except Exception: + _log.exception("GET /api/model/options failed") + raise HTTPException(status_code=500, detail="Failed to list model options") + + +@app.get("/api/model/auxiliary") +def get_auxiliary_models(): + """Return current auxiliary task assignments. + + Shape: + { + "tasks": [ + {"task": "vision", "provider": "auto", "model": "", "base_url": ""}, + ... + ], + "main": {"provider": "openrouter", "model": "anthropic/claude-opus-4.7"}, + } + """ + try: + cfg = load_config() + aux_cfg = cfg.get("auxiliary", {}) + if not isinstance(aux_cfg, dict): + aux_cfg = {} + + tasks = [] + for slot in _AUX_TASK_SLOTS: + slot_cfg = aux_cfg.get(slot, {}) if isinstance(aux_cfg.get(slot), dict) else {} + tasks.append({ + "task": slot, + "provider": str(slot_cfg.get("provider", "auto") or "auto"), + "model": str(slot_cfg.get("model", "") or ""), + "base_url": str(slot_cfg.get("base_url", "") or ""), + }) + + model_cfg = cfg.get("model", {}) + if isinstance(model_cfg, dict): + main = { + "provider": str(model_cfg.get("provider", "") or ""), + "model": str(model_cfg.get("default", model_cfg.get("name", "")) or ""), + } + else: + main = {"provider": "", "model": str(model_cfg) if model_cfg else ""} + + return {"tasks": tasks, "main": main} + except Exception: + _log.exception("GET /api/model/auxiliary failed") + raise HTTPException(status_code=500, detail="Failed to read auxiliary config") + + +@app.post("/api/model/set") +async def set_model_assignment(body: ModelAssignment): + """Assign a model to the main slot or an auxiliary task slot. + + Writes to ``~/.hermes/config.yaml`` — applies to **new** sessions only. + The currently running chat PTY (if any) is not affected; use the + ``/model`` slash command inside a chat to hot-swap that specific session. + """ + scope = (body.scope or "").strip().lower() + provider = (body.provider or "").strip() + model = (body.model or "").strip() + task = (body.task or "").strip().lower() + + if scope not in ("main", "auxiliary"): + raise HTTPException(status_code=400, detail="scope must be 'main' or 'auxiliary'") + + try: + cfg = load_config() + + if scope == "main": + if not provider or not model: + raise HTTPException(status_code=400, detail="provider and model required for main") + model_cfg = cfg.get("model", {}) + if not isinstance(model_cfg, dict): + model_cfg = {} + model_cfg["provider"] = provider + model_cfg["default"] = model + # Clear stale base_url so the resolver picks the provider's own default. + if "base_url" in model_cfg and model_cfg.get("base_url"): + model_cfg["base_url"] = "" + # Also clear hardcoded context_length override — new model may have + # a different context window. + if "context_length" in model_cfg: + model_cfg.pop("context_length", None) + cfg["model"] = model_cfg + save_config(cfg) + return {"ok": True, "scope": "main", "provider": provider, "model": model} + + # scope == "auxiliary" + aux = cfg.get("auxiliary") + if not isinstance(aux, dict): + aux = {} + + if task == "__reset__": + # Reset every slot to provider="auto", model="" — keeps other fields intact. + for slot in _AUX_TASK_SLOTS: + slot_cfg = aux.get(slot) + if not isinstance(slot_cfg, dict): + slot_cfg = {} + slot_cfg["provider"] = "auto" + slot_cfg["model"] = "" + aux[slot] = slot_cfg + cfg["auxiliary"] = aux + save_config(cfg) + return {"ok": True, "scope": "auxiliary", "reset": True} + + if not provider: + raise HTTPException(status_code=400, detail="provider required for auxiliary") + + targets = [task] if task else list(_AUX_TASK_SLOTS) + for slot in targets: + if slot not in _AUX_TASK_SLOTS: + raise HTTPException(status_code=400, detail=f"unknown auxiliary task: {slot}") + slot_cfg = aux.get(slot) + if not isinstance(slot_cfg, dict): + slot_cfg = {} + slot_cfg["provider"] = provider + slot_cfg["model"] = model + aux[slot] = slot_cfg + + cfg["auxiliary"] = aux + save_config(cfg) + return { + "ok": True, + "scope": "auxiliary", + "tasks": targets, + "provider": provider, + "model": model, + } + except HTTPException: + raise + except Exception: + _log.exception("POST /api/model/set failed") + raise HTTPException(status_code=500, detail="Failed to save model assignment") + + + + def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]: """Reverse _normalize_config_for_web before saving. diff --git a/web/src/components/ModelPickerDialog.tsx b/web/src/components/ModelPickerDialog.tsx index 66987d2ef9..0cc195ecaa 100644 --- a/web/src/components/ModelPickerDialog.tsx +++ b/web/src/components/ModelPickerDialog.tsx @@ -11,9 +11,18 @@ import { useEffect, useMemo, useRef, useState } from "react"; * Stage 1: pick provider (authenticated providers only) * Stage 2: pick model within that provider * - * On confirm, emits `/model --provider [--global]` through - * the parent callback so ChatPage can dispatch it via the existing slash - * pipeline. That keeps persistence + actual switch logic in one place. + * Two invocation modes: + * + * 1. Chat-session mode (ChatSidebar) — pass `gw` + `sessionId`. The picker + * loads options via `model.options` JSON-RPC and emits the result as a + * slash command string (`/model --provider [--global]`) + * through `onSubmit`, which the ChatPage pipes to `slashExec`. + * + * 2. Standalone mode (ModelsPage, Config settings) — pass a `loader` and + * `onApply`. The picker fetches options via the REST endpoint and calls + * `onApply(provider, model, persistGlobal)` instead of emitting a slash + * command. This lets the Models page reuse the same UI without + * requiring an open chat PTY. */ interface ModelOptionProvider { @@ -32,14 +41,38 @@ interface ModelOptionsResponse { } interface Props { - gw: GatewayClient; - sessionId: string; + /** Chat-mode: when present, picker emits a slash command via onSubmit. */ + gw?: GatewayClient; + sessionId?: string; + onSubmit?(slashCommand: string): void; + + /** Standalone-mode: when present (and onSubmit absent), picker calls onApply. */ + loader?(): Promise; + onApply?(args: { + provider: string; + model: string; + persistGlobal: boolean; + }): Promise | void; + onClose(): void; - /** Parent runs the resulting slash command through slashExec. */ - onSubmit(slashCommand: string): void; + title?: string; + /** If true, hides "Persist globally" checkbox — always saves to config.yaml. */ + alwaysGlobal?: boolean; } -export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) { +export function ModelPickerDialog(props: Props) { + const { + gw, + sessionId, + onSubmit, + loader, + onApply, + onClose, + title = "Switch Model", + alwaysGlobal = false, + } = props; + const standalone = !!loader && !!onApply; + const [providers, setProviders] = useState([]); const [currentModel, setCurrentModel] = useState(""); const [currentProviderSlug, setCurrentProviderSlug] = useState(""); @@ -48,17 +81,22 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) { const [selectedSlug, setSelectedSlug] = useState(""); const [selectedModel, setSelectedModel] = useState(""); const [query, setQuery] = useState(""); - const [persistGlobal, setPersistGlobal] = useState(false); + const [persistGlobal, setPersistGlobal] = useState(alwaysGlobal); + const [applying, setApplying] = useState(false); const closedRef = useRef(false); // Load providers + models on open. useEffect(() => { closedRef.current = false; - gw.request( - "model.options", - sessionId ? { session_id: sessionId } : {}, - ) + const promise = standalone + ? (loader as () => Promise)() + : (gw as GatewayClient).request( + "model.options", + sessionId ? { session_id: sessionId } : {}, + ); + + promise .then((r) => { if (closedRef.current) return; const next = r?.providers ?? []; @@ -80,7 +118,9 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) { return () => { closedRef.current = true; }; - }, [gw, sessionId]); + // Deliberately omit props from deps — stable for the dialog's lifetime. + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); // Esc closes. useEffect(() => { @@ -125,15 +165,31 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) { [models, needle], ); - const canConfirm = !!selectedProvider && !!selectedModel; + const canConfirm = !!selectedProvider && !!selectedModel && !applying; - const confirm = () => { - if (!canConfirm) return; - const global = persistGlobal ? " --global" : ""; - onSubmit( - `/model ${selectedModel} --provider ${selectedProvider.slug}${global}`, - ); - onClose(); + const confirm = async () => { + if (!canConfirm || !selectedProvider) return; + if (standalone && onApply) { + setApplying(true); + try { + await onApply({ + provider: selectedProvider.slug, + model: selectedModel, + persistGlobal, + }); + onClose(); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + } finally { + setApplying(false); + } + } else if (onSubmit) { + const global = persistGlobal ? " --global" : ""; + onSubmit( + `/model ${selectedModel} --provider ${selectedProvider.slug}${global}`, + ); + onClose(); + } }; return ( @@ -160,7 +216,7 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) { id="model-picker-title" className="font-display text-base tracking-wider uppercase" > - Switch Model + {title}

current: {currentModel || "(unknown)"} @@ -212,22 +268,28 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {

- + {alwaysGlobal ? ( + + Saves to config.yaml — applies to new sessions. + + ) : ( + + )}
-
diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 4d101a3d90..ef8e3ae10d 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -69,6 +69,14 @@ export const api = { getDefaults: () => fetchJSON>("/api/config/defaults"), getSchema: () => fetchJSON<{ fields: Record; category_order: string[] }>("/api/config/schema"), getModelInfo: () => fetchJSON("/api/model/info"), + getModelOptions: () => fetchJSON("/api/model/options"), + getAuxiliaryModels: () => fetchJSON("/api/model/auxiliary"), + setModelAssignment: (body: ModelAssignmentRequest) => + fetchJSON("/api/model/set", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }), saveConfig: (config: Record) => fetchJSON<{ ok: boolean }>("/api/config", { method: "PUT", @@ -473,6 +481,54 @@ export interface ModelInfoResponse { }; } +// ── Model options / assignment types ────────────────────────────────── + +export interface ModelOptionProvider { + name: string; + slug: string; + models?: string[]; + total_models?: number; + is_current?: boolean; + is_user_defined?: boolean; + source?: string; + warning?: string; +} + +export interface ModelOptionsResponse { + model?: string; + provider?: string; + providers?: ModelOptionProvider[]; +} + +export interface AuxiliaryTaskAssignment { + task: string; + provider: string; + model: string; + base_url: string; +} + +export interface AuxiliaryModelsResponse { + tasks: AuxiliaryTaskAssignment[]; + main: { provider: string; model: string }; +} + +export interface ModelAssignmentRequest { + scope: "main" | "auxiliary"; + provider: string; + model: string; + /** For auxiliary: task slot name, "" for all, "__reset__" to reset all. */ + task?: string; +} + +export interface ModelAssignmentResponse { + ok: boolean; + scope?: string; + provider?: string; + model?: string; + tasks?: string[]; + reset?: boolean; +} + // ── OAuth provider types ──────────────────────────────────────────────── export interface OAuthProviderStatus { diff --git a/web/src/pages/ModelsPage.tsx b/web/src/pages/ModelsPage.tsx index a4f7864054..97ff1d0695 100644 --- a/web/src/pages/ModelsPage.tsx +++ b/web/src/pages/ModelsPage.tsx @@ -1,15 +1,23 @@ import { useCallback, useEffect, useLayoutEffect, useState } from "react"; import { Brain, + ChevronDown, Cpu, DollarSign, Eye, RefreshCw, + Settings2, + Star, Wrench, Zap, } from "lucide-react"; import { api } from "@/lib/api"; -import type { ModelsAnalyticsModelEntry, ModelsAnalyticsResponse } from "@/lib/api"; +import type { + AuxiliaryModelsResponse, + AuxiliaryTaskAssignment, + ModelsAnalyticsModelEntry, + ModelsAnalyticsResponse, +} from "@/lib/api"; import { timeAgo } from "@/lib/utils"; import { formatTokenCount } from "@/lib/format"; import { Button, Spinner, Stats } from "@nous-research/ui"; @@ -18,6 +26,7 @@ import { Badge } from "@nous-research/ui"; import { usePageHeader } from "@/contexts/usePageHeader"; import { useI18n } from "@/i18n"; import { PluginSlot } from "@/plugins"; +import { ModelPickerDialog } from "@/components/ModelPickerDialog"; const PERIODS = [ { label: "7d", days: 7 }, @@ -25,6 +34,18 @@ const PERIODS = [ { label: "90d", days: 90 }, ] as const; +// Must match _AUX_TASK_SLOTS in hermes_cli/web_server.py. +const AUX_TASKS: readonly { key: string; label: string; hint: string }[] = [ + { key: "vision", label: "Vision", hint: "Image analysis" }, + { key: "web_extract", label: "Web Extract", hint: "Page summarization" }, + { key: "compression", label: "Compression", hint: "Context compaction" }, + { key: "session_search", label: "Session Search", hint: "Recall queries" }, + { key: "skills_hub", label: "Skills Hub", hint: "Skill search" }, + { key: "approval", label: "Approval", hint: "Smart auto-approve" }, + { key: "mcp", label: "MCP", hint: "MCP tool routing" }, + { key: "title_generation", label: "Title Gen", hint: "Session titles" }, +] as const; + function formatTokens(n: number): string { if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`; if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`; @@ -134,20 +155,168 @@ function CapabilityBadges({ ); } +/* ──────────────────────────────────────────────────────────────────── */ +/* Per-card "Use as" menu */ +/* ──────────────────────────────────────────────────────────────────── */ + +function UseAsMenu({ + provider, + model, + isMain, + mainAuxTask, + onAssigned, +}: { + provider: string; + model: string; + /** True when this card's model+provider match config.yaml's main slot. */ + isMain: boolean; + /** If this model is assigned to a specific aux task, that task's key. */ + mainAuxTask: string | null; + onAssigned(): void; +}) { + const [open, setOpen] = useState(false); + const [busy, setBusy] = useState(false); + const [error, setError] = useState(null); + + const assign = async ( + scope: "main" | "auxiliary", + task: string, + ) => { + if (!provider || !model) { + setError("Missing provider/model"); + return; + } + setBusy(true); + setError(null); + try { + await api.setModelAssignment({ scope, provider, model, task }); + onAssigned(); + setOpen(false); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + } finally { + setBusy(false); + } + }; + + // Close on outside click. + useEffect(() => { + if (!open) return; + const onDown = (e: MouseEvent) => { + const target = e.target as HTMLElement | null; + if (target && !target.closest?.("[data-use-as-menu]")) setOpen(false); + }; + window.addEventListener("mousedown", onDown); + return () => window.removeEventListener("mousedown", onDown); + }, [open]); + + return ( +
+ + {open && ( +
+ + +
+ Auxiliary task +
+ + + + {AUX_TASKS.map((t) => ( + + ))} + + {error && ( +
+ {error} +
+ )} +
+ )} +
+ ); +} + +/* ──────────────────────────────────────────────────────────────────── */ +/* ModelCard */ +/* ──────────────────────────────────────────────────────────────────── */ + function ModelCard({ entry, rank, + main, + aux, + onAssigned, }: { entry: ModelsAnalyticsModelEntry; rank: number; + main: { provider: string; model: string } | null; + aux: AuxiliaryTaskAssignment[]; + onAssigned(): void; }) { const { t } = useI18n(); const provider = entry.provider || modelVendor(entry.model); const totalTokens = entry.input_tokens + entry.output_tokens; const caps = entry.capabilities; + const isMain = + !!main && + main.provider === provider && + main.model === entry.model; + + // First aux task currently using this model (if any). + const mainAuxTask = + aux.find( + (a) => a.provider === provider && a.model === entry.model, + )?.task ?? null; + return ( - +
@@ -158,6 +327,16 @@ function ModelCard({ {shortModelName(entry.model)} + {isMain && ( + + main + + )} + {mainAuxTask && ( + + aux · {mainAuxTask} + + )}
{provider && ( @@ -177,13 +356,22 @@ function ModelCard({ )}
-
-
- {formatTokens(totalTokens)} -
-
- {t.models.tokens} +
+
+
+ {formatTokens(totalTokens)} +
+
+ {t.models.tokens} +
+
@@ -246,24 +434,242 @@ function ModelCard({ ); } +/* ──────────────────────────────────────────────────────────────────── */ +/* Model Settings panel (top of page) */ +/* ──────────────────────────────────────────────────────────────────── */ + +type PickerTarget = + | { kind: "main" } + | { kind: "aux"; task: string }; + +function ModelSettingsPanel({ + aux, + refreshKey, + onSaved, +}: { + aux: AuxiliaryModelsResponse | null; + refreshKey: number; + onSaved(): void; +}) { + const [expanded, setExpanded] = useState(false); + const [picker, setPicker] = useState(null); + const [resetBusy, setResetBusy] = useState(false); + + const mainProv = aux?.main.provider ?? ""; + const mainModel = aux?.main.model ?? ""; + + const applyAssignment = async ({ + scope, + task, + provider, + model, + }: { + scope: "main" | "auxiliary"; + task: string; + provider: string; + model: string; + }) => { + await api.setModelAssignment({ scope, task, provider, model }); + onSaved(); + }; + + const resetAllAux = async () => { + if (!window.confirm("Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set.")) { + return; + } + setResetBusy(true); + try { + await api.setModelAssignment({ + scope: "auxiliary", + task: "__reset__", + provider: "", + model: "", + }); + onSaved(); + } finally { + setResetBusy(false); + } + }; + + return ( + + +
+
+ + Model Settings + + applies to new sessions + +
+ +
+
+ + + {/* Main row */} +
+
+
+ + + Main model + +
+
+ {mainProv || "(unset)"} + {mainProv && mainModel && " · "} + {mainModel || "(unset)"} +
+
+ +
+ + {/* Auxiliary rows */} + {expanded && ( +
+
+
+ Auxiliary tasks +
+ +
+ +

+ Auxiliary tasks handle side-jobs like vision, session search, and + compression. auto means + "use the main model". Override per-task when you want a + cheap/fast model for a specific job. +

+ + {AUX_TASKS.map((t) => { + const cur = aux?.tasks.find((a) => a.task === t.key); + const isAuto = + !cur || cur.provider === "auto" || !cur.provider; + return ( +
+
+
+ {t.label} + + {t.hint} + +
+
+ {isAuto + ? "auto (use main model)" + : `${cur?.provider} · ${cur?.model || "(provider default)"}`} +
+
+ +
+ ); + })} +
+ )} + + {picker && ( + t.key === picker.task)?.label ?? + picker.task + }` + } + onApply={async ({ provider, model }) => { + await applyAssignment({ + scope: picker.kind === "main" ? "main" : "auxiliary", + task: picker.kind === "main" ? "" : picker.task, + provider, + model, + }); + }} + onClose={() => setPicker(null)} + /> + )} +
+
+ ); +} + +/* ──────────────────────────────────────────────────────────────────── */ +/* Page */ +/* ──────────────────────────────────────────────────────────────────── */ + export default function ModelsPage() { const [days, setDays] = useState(30); const [data, setData] = useState(null); + const [aux, setAux] = useState(null); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); + const [saveKey, setSaveKey] = useState(0); const { t } = useI18n(); const { setAfterTitle, setEnd } = usePageHeader(); const load = useCallback(() => { setLoading(true); setError(null); - api - .getModelsAnalytics(days) - .then(setData) + Promise.all([ + api.getModelsAnalytics(days), + api.getAuxiliaryModels().catch(() => null), + ]) + .then(([models, auxData]) => { + setData(models); + setAux(auxData); + }) .catch((err) => setError(String(err))) .finally(() => setLoading(false)); }, [days]); + const onAssigned = useCallback(() => { + // Reload aux state after any assignment change. + api + .getAuxiliaryModels() + .then(setAux) + .catch(() => {}); + setSaveKey((k) => k + 1); + }, []); + useLayoutEffect(() => { const periodLabel = PERIODS.find((p) => p.days === days)?.label ?? `${days}d`; @@ -315,6 +721,13 @@ export default function ModelsPage() { return (
+ + + {loading && !data && (
@@ -369,7 +782,14 @@ export default function ModelsPage() { {data.models.length > 0 ? (
{data.models.map((m, i) => ( - + ))}
) : ( diff --git a/website/docs/user-guide/configuring-models.md b/website/docs/user-guide/configuring-models.md new file mode 100644 index 0000000000..397b89ec89 --- /dev/null +++ b/website/docs/user-guide/configuring-models.md @@ -0,0 +1,207 @@ +--- +sidebar_position: 3 +--- + +# Configuring Models + +Hermes uses two kinds of model slots: + +- **Main model** — what the agent thinks with. Every user message, every tool-call loop, every streamed response goes through this model. +- **Auxiliary models** — smaller side-jobs the agent offloads. Context compression, vision (image analysis), web-page summarization, session search, approval scoring, MCP tool routing, session-title generation, and skill search. Each has its own slot and can be overridden independently. + +This page covers configuring both from the dashboard. If you prefer config files or the CLI, jump to [Alternative methods](#alternative-methods) at the bottom. + +## The Models page + +Open the dashboard and click **Models** in the sidebar. You get two sections: + +1. **Model Settings** — the top panel, where you assign models to slots. +2. **Usage analytics** — ranked cards showing every model that ran a session in the selected period, with token counts, cost, and capability badges. + +![Models page overview](/img/docs/dashboard-models/overview.png) + +The top card is the **Model Settings** panel. The main row always shows what the agent will spin up for new sessions. Click **Change** to open the picker. + +## Setting the main model + +Click **Change** on the Main model row: + +![Model picker dialog](/img/docs/dashboard-models/picker-dialog.png) + +The picker has two columns: + +- **Left** — authenticated providers. Only providers you've set up (API key set, OAuth'd, or defined as a custom endpoint) show up here. If a provider is missing, head to **Keys** and add its credential. +- **Right** — the curated model list for the selected provider. These are the agentic models Hermes recommends for that provider, not the raw `/models` dump (which on OpenRouter includes 400+ models including TTS, image generators, and rerankers). + +Type in the filter box to narrow by provider name, slug, or model ID. + +Pick a model, hit **Switch**, and Hermes writes it to `~/.hermes/config.yaml` under the `model` section. **This applies to new sessions only** — any chat tab you already have open keeps running whatever model it started with. To hot-swap the current chat, use the `/model` slash command inside it. + +## Setting auxiliary models + +Click **Show auxiliary** to reveal the eight task slots: + +![Auxiliary panel expanded](/img/docs/dashboard-models/auxiliary-expanded.png) + +Every auxiliary task defaults to `auto` — meaning Hermes uses your main model for that job too. Override a specific task when you want a cheaper or faster model for a side-job. + +### Common override patterns + +| Task | When to override | +|---|---| +| **Title Gen** | Almost always. A $0.10/M flash model writes session titles as well as Opus. Default config sets this to `google/gemini-3-flash-preview` on OpenRouter. | +| **Vision** | When your main model is a coding model without vision (e.g. Kimi, DeepSeek). Point it at `google/gemini-2.5-flash` or `gpt-4o-mini`. | +| **Compression** | When you're burning reasoning tokens on Opus/M2.7 just to summarize context. A fast chat model does the job at 1/50th the cost. | +| **Session Search** | When recall queries fan out — default max_concurrency is 3. A cheap model keeps the bill predictable. | +| **Approval** | For `approval_mode: smart` — a fast/cheap model (haiku, flash, gpt-5-mini) decides whether to auto-approve low-risk commands. Expensive models here are waste. | +| **Web Extract** | When you use `web_extract` heavily. Same logic as compression — summarization doesn't need reasoning. | +| **Skills Hub** | `hermes skills search` uses this. Usually fine at `auto`. | +| **MCP** | MCP tool routing. Usually fine at `auto`. | + +### Per-task override + +Click **Change** on any auxiliary row. Same picker opens, same behavior — pick provider + model, hit Switch. The row updates to show `provider · model` instead of `auto (use main model)`. + +### Reset all to auto + +If you've over-tuned and want to start over, click **Reset all to auto** at the top of the auxiliary section. Every slot goes back to using your main model. + +## The "Use as" shortcut + +Every model card on the page has a **Use as** dropdown. This is the fast path — pick a model you see in your analytics, click **Use as**, and assign it to the main slot or any specific auxiliary task in one click: + +![Use as dropdown](/img/docs/dashboard-models/use-as-dropdown.png) + +The dropdown has: + +- **Main model** — same as clicking Change on the main row. +- **All auxiliary tasks** — assigns this model to all 8 aux slots at once. Useful when you just want every side-job on a cheap flash model. +- **Individual task options** — Vision, Web Extract, Compression, etc. The currently-assigned model for each task is marked `current`. + +Cards are badged with `main` or `aux · ` when they're currently assigned to something — so you can see at a glance which of your historical models are wired in where. + +## What gets written to `config.yaml` + +When you save via the dashboard, Hermes writes to `~/.hermes/config.yaml`: + +**Main model:** +```yaml +model: + provider: openrouter + default: anthropic/claude-opus-4.7 + base_url: '' # cleared on provider switch + api_mode: chat_completions +``` + +**Auxiliary override (example — vision on gemini-flash):** +```yaml +auxiliary: + vision: + provider: openrouter + model: google/gemini-2.5-flash + base_url: '' + api_key: '' + timeout: 120 + extra_body: {} + download_timeout: 30 +``` + +**Auxiliary on auto (default):** +```yaml +auxiliary: + compression: + provider: auto + model: '' + base_url: '' + # ... other fields unchanged +``` + +`provider: auto` with `model: ''` tells Hermes to use the main model for that task. + +## When does it take effect? + +- **CLI** (`hermes chat`): next `hermes chat` invocation. +- **Gateway** (Telegram, Discord, Slack, etc.): next *new* session. Existing sessions keep their model. Restart the gateway (`hermes gateway restart`) if you want to force all sessions to pick up the change. +- **Dashboard chat tab** (`/chat`): next new PTY. The currently-open chat keeps its model — use `/model` inside it to hot-swap. + +Changes never invalidate prompt caches on running sessions. That's deliberate: swapping the main model inside a session requires a cache reset (the system prompt contains model-specific content), and we reserve that for the explicit `/model` slash command inside chat. + +## Troubleshooting + +### "No authenticated providers" in the picker + +Hermes lists a provider only if it has a working credential. Check **Keys** in the sidebar — you should see one of: an API key, a successful OAuth, or a custom endpoint URL. If the provider you want isn't there, run `hermes setup` to wire it up, or go to **Keys** and add the env var. + +### Main model didn't change in my running chat + +Expected. The dashboard writes `config.yaml`, which new sessions read. The currently-open chat is a live agent process — it keeps whatever model it was spawned with. Use `/model ` inside the chat to hot-swap that specific session. + +### Auxiliary override "didn't take effect" + +Three things to check: + +1. **Did you start a new session?** Existing chats don't re-read config. +2. **Is `provider` set to something other than `auto`?** If the field shows `auto`, the task is still using your main model. Click **Change** and pick a real provider. +3. **Is the provider authenticated?** If you assigned `minimax` to a task but don't have a MiniMax API key, that task falls back to the openrouter default and logs a warning in `agent.log`. + +### I picked a model but Hermes switched providers on me + +On OpenRouter (or any aggregator), bare model names resolve *within* the aggregator first. So `claude-sonnet-4` on OpenRouter becomes `anthropic/claude-sonnet-4.6`, staying on your OpenRouter auth. But if you typed `claude-sonnet-4` on a native Anthropic auth, it would stay as `claude-sonnet-4-6`. If you see an unexpected provider switch, check that your current provider is what you expect — the picker always shows the current main at the top of the dialog. + +## Alternative methods + +### CLI slash command + +Inside any `hermes chat` session: + +``` +/model gpt-5.4 --provider openrouter # session-only +/model gpt-5.4 --provider openrouter --global # also persists to config.yaml +``` + +`--global` does the same thing the dashboard's **Change** button does, plus it switches the running session in-place. + +### `hermes model` subcommand + +```bash +hermes model list # list authenticated providers + models +hermes model set anthropic/claude-opus-4.7 --provider openrouter +``` + +### Direct config edit + +Edit `~/.hermes/config.yaml` and restart whatever reads it. See the [Configuration reference](./configuration.md) for the full schema. + +### REST API + +The dashboard uses three endpoints. Useful for scripting: + +```bash +# List authenticated providers + curated model lists +curl -H "X-Hermes-Session-Token: $TOKEN" http://localhost:PORT/api/model/options + +# Read current main + auxiliary assignments +curl -H "X-Hermes-Session-Token: $TOKEN" http://localhost:PORT/api/model/auxiliary + +# Set the main model +curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \ + -d '{"scope":"main","provider":"openrouter","model":"anthropic/claude-opus-4.7"}' \ + http://localhost:PORT/api/model/set + +# Override a single auxiliary task +curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \ + -d '{"scope":"auxiliary","task":"vision","provider":"openrouter","model":"google/gemini-2.5-flash"}' \ + http://localhost:PORT/api/model/set + +# Assign one model to every auxiliary task +curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \ + -d '{"scope":"auxiliary","task":"","provider":"openrouter","model":"google/gemini-2.5-flash"}' \ + http://localhost:PORT/api/model/set + +# Reset all auxiliary tasks to auto +curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \ + -d '{"scope":"auxiliary","task":"__reset__","provider":"","model":""}' \ + http://localhost:PORT/api/model/set +``` + +The session token is injected into the dashboard HTML at startup and rotates on every server restart. Grab it from the browser devtools (`window.__HERMES_SESSION_TOKEN__`) if you're scripting against a running dashboard. diff --git a/website/sidebars.ts b/website/sidebars.ts index 4cd17c1ea5..03093b5037 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -23,6 +23,7 @@ const sidebars: SidebarsConfig = { 'user-guide/cli', 'user-guide/tui', 'user-guide/configuration', + 'user-guide/configuring-models', 'user-guide/sessions', 'user-guide/profiles', 'user-guide/git-worktrees', diff --git a/website/static/img/docs/dashboard-models/auxiliary-expanded.png b/website/static/img/docs/dashboard-models/auxiliary-expanded.png new file mode 100644 index 0000000000..81fa043459 Binary files /dev/null and b/website/static/img/docs/dashboard-models/auxiliary-expanded.png differ diff --git a/website/static/img/docs/dashboard-models/overview.png b/website/static/img/docs/dashboard-models/overview.png new file mode 100644 index 0000000000..d64c221d78 Binary files /dev/null and b/website/static/img/docs/dashboard-models/overview.png differ diff --git a/website/static/img/docs/dashboard-models/picker-dialog.png b/website/static/img/docs/dashboard-models/picker-dialog.png new file mode 100644 index 0000000000..4f65af1264 Binary files /dev/null and b/website/static/img/docs/dashboard-models/picker-dialog.png differ diff --git a/website/static/img/docs/dashboard-models/use-as-dropdown.png b/website/static/img/docs/dashboard-models/use-as-dropdown.png new file mode 100644 index 0000000000..ff92961586 Binary files /dev/null and b/website/static/img/docs/dashboard-models/use-as-dropdown.png differ