feat(dashboard): configure main + auxiliary models from Models page (#17802)

Dashboard Models page was analytics-only — no way to pick a model as main
for new sessions or override an auxiliary task slot without hand-editing
config.yaml or running a /model slash command inside a chat.

Changes:
- hermes_cli/web_server.py: three REST endpoints (GET /api/model/options,
  GET /api/model/auxiliary, POST /api/model/set). Reuses
  list_authenticated_providers() from model_switch.py so the REST path
  surfaces the same curated model lists as the TUI-gateway model.options
  JSON-RPC. POST /api/model/set writes model.provider + model.default for
  scope=main, and auxiliary.<task>.{provider,model} for scope=auxiliary
  (with task="" meaning 'all 8 slots' and task="__reset__" resetting them
  to auto).
- web/src/components/ModelPickerDialog.tsx: accepts an optional loader +
  onApply pair so it works without an open chat PTY. ChatSidebar's
  gw-WebSocket path still works unchanged (back-compat).
- web/src/pages/ModelsPage.tsx: Model Settings panel at the top showing
  main model + collapsible list of 8 auxiliary tasks with per-row Change
  buttons and Reset all to auto. Every existing model card gets a
  'Use as' dropdown for one-click assignment to main or any aux slot.
  Cards badged 'main' or 'aux · <task>' when currently assigned.
- website/docs/user-guide/configuring-models.md: new docs page walking
  through both UI paths, aux task override patterns, troubleshooting,
  plus REST/CLI alternatives.
- Screenshots under website/static/img/docs/dashboard-models/.

Applies to new sessions only — running sessions keep their model (use
/model slash command to hot-swap a live session). No prompt-cache
invalidation on existing sessions.
This commit is contained in:
Teknium 2026-04-29 23:53:12 -07:00 committed by GitHub
parent 718e4e2e7e
commit 3c27efbb91
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 1007 additions and 47 deletions

View file

@ -23,7 +23,7 @@ import time
import urllib.parse
import urllib.request
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple
import yaml
@ -445,6 +445,20 @@ class EnvVarReveal(BaseModel):
key: str
class ModelAssignment(BaseModel):
"""Payload for POST /api/model/set — assign a provider/model to a slot.
scope="main" writes model.provider + model.default
scope="auxiliary" writes auxiliary.<task>.provider + auxiliary.<task>.model
scope="auxiliary" with task="" applied to every auxiliary.* slot
scope="auxiliary" with task="__reset__" resets every slot to provider="auto"
"""
scope: str
provider: str
model: str
task: str = ""
_GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
try:
_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
@ -921,6 +935,206 @@ def get_model_info():
return dict(_EMPTY_MODEL_INFO)
# ---------------------------------------------------------------------------
# Model assignment — pick provider+model for main slot or auxiliary slots.
# Mirrors the model.options JSON-RPC from tui_gateway but uses REST so the
# Models page (which has no chat PTY open) can drive it.
# ---------------------------------------------------------------------------
# Canonical auxiliary task slots. Keep in sync with DEFAULT_CONFIG["auxiliary"]
# in hermes_cli/config.py — listed here for deterministic ordering in the UI.
_AUX_TASK_SLOTS: Tuple[str, ...] = (
"vision",
"web_extract",
"compression",
"session_search",
"skills_hub",
"approval",
"mcp",
"title_generation",
)
@app.get("/api/model/options")
def get_model_options():
"""Return authenticated providers + their curated model lists.
REST equivalent of the ``model.options`` JSON-RPC on tui_gateway, so the
dashboard Models page can render the picker without a live chat session.
The response shape matches ``model.options`` 1:1 so ``ModelPickerDialog``
can share the same types.
"""
try:
from hermes_cli.model_switch import list_authenticated_providers
cfg = load_config()
model_cfg = cfg.get("model", {})
if isinstance(model_cfg, dict):
current_model = model_cfg.get("default", model_cfg.get("name", "")) or ""
current_provider = model_cfg.get("provider", "") or ""
current_base_url = model_cfg.get("base_url", "") or ""
else:
current_model = str(model_cfg) if model_cfg else ""
current_provider = ""
current_base_url = ""
user_providers = cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {}
custom_providers = (
cfg.get("custom_providers")
if isinstance(cfg.get("custom_providers"), list)
else []
)
providers = list_authenticated_providers(
current_provider=current_provider,
current_base_url=current_base_url,
current_model=current_model,
user_providers=user_providers,
custom_providers=custom_providers,
max_models=50,
)
return {
"providers": providers,
"model": current_model,
"provider": current_provider,
}
except Exception:
_log.exception("GET /api/model/options failed")
raise HTTPException(status_code=500, detail="Failed to list model options")
@app.get("/api/model/auxiliary")
def get_auxiliary_models():
"""Return current auxiliary task assignments.
Shape:
{
"tasks": [
{"task": "vision", "provider": "auto", "model": "", "base_url": ""},
...
],
"main": {"provider": "openrouter", "model": "anthropic/claude-opus-4.7"},
}
"""
try:
cfg = load_config()
aux_cfg = cfg.get("auxiliary", {})
if not isinstance(aux_cfg, dict):
aux_cfg = {}
tasks = []
for slot in _AUX_TASK_SLOTS:
slot_cfg = aux_cfg.get(slot, {}) if isinstance(aux_cfg.get(slot), dict) else {}
tasks.append({
"task": slot,
"provider": str(slot_cfg.get("provider", "auto") or "auto"),
"model": str(slot_cfg.get("model", "") or ""),
"base_url": str(slot_cfg.get("base_url", "") or ""),
})
model_cfg = cfg.get("model", {})
if isinstance(model_cfg, dict):
main = {
"provider": str(model_cfg.get("provider", "") or ""),
"model": str(model_cfg.get("default", model_cfg.get("name", "")) or ""),
}
else:
main = {"provider": "", "model": str(model_cfg) if model_cfg else ""}
return {"tasks": tasks, "main": main}
except Exception:
_log.exception("GET /api/model/auxiliary failed")
raise HTTPException(status_code=500, detail="Failed to read auxiliary config")
@app.post("/api/model/set")
async def set_model_assignment(body: ModelAssignment):
"""Assign a model to the main slot or an auxiliary task slot.
Writes to ``~/.hermes/config.yaml`` applies to **new** sessions only.
The currently running chat PTY (if any) is not affected; use the
``/model`` slash command inside a chat to hot-swap that specific session.
"""
scope = (body.scope or "").strip().lower()
provider = (body.provider or "").strip()
model = (body.model or "").strip()
task = (body.task or "").strip().lower()
if scope not in ("main", "auxiliary"):
raise HTTPException(status_code=400, detail="scope must be 'main' or 'auxiliary'")
try:
cfg = load_config()
if scope == "main":
if not provider or not model:
raise HTTPException(status_code=400, detail="provider and model required for main")
model_cfg = cfg.get("model", {})
if not isinstance(model_cfg, dict):
model_cfg = {}
model_cfg["provider"] = provider
model_cfg["default"] = model
# Clear stale base_url so the resolver picks the provider's own default.
if "base_url" in model_cfg and model_cfg.get("base_url"):
model_cfg["base_url"] = ""
# Also clear hardcoded context_length override — new model may have
# a different context window.
if "context_length" in model_cfg:
model_cfg.pop("context_length", None)
cfg["model"] = model_cfg
save_config(cfg)
return {"ok": True, "scope": "main", "provider": provider, "model": model}
# scope == "auxiliary"
aux = cfg.get("auxiliary")
if not isinstance(aux, dict):
aux = {}
if task == "__reset__":
# Reset every slot to provider="auto", model="" — keeps other fields intact.
for slot in _AUX_TASK_SLOTS:
slot_cfg = aux.get(slot)
if not isinstance(slot_cfg, dict):
slot_cfg = {}
slot_cfg["provider"] = "auto"
slot_cfg["model"] = ""
aux[slot] = slot_cfg
cfg["auxiliary"] = aux
save_config(cfg)
return {"ok": True, "scope": "auxiliary", "reset": True}
if not provider:
raise HTTPException(status_code=400, detail="provider required for auxiliary")
targets = [task] if task else list(_AUX_TASK_SLOTS)
for slot in targets:
if slot not in _AUX_TASK_SLOTS:
raise HTTPException(status_code=400, detail=f"unknown auxiliary task: {slot}")
slot_cfg = aux.get(slot)
if not isinstance(slot_cfg, dict):
slot_cfg = {}
slot_cfg["provider"] = provider
slot_cfg["model"] = model
aux[slot] = slot_cfg
cfg["auxiliary"] = aux
save_config(cfg)
return {
"ok": True,
"scope": "auxiliary",
"tasks": targets,
"provider": provider,
"model": model,
}
except HTTPException:
raise
except Exception:
_log.exception("POST /api/model/set failed")
raise HTTPException(status_code=500, detail="Failed to save model assignment")
def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
"""Reverse _normalize_config_for_web before saving.

View file

@ -11,9 +11,18 @@ import { useEffect, useMemo, useRef, useState } from "react";
* Stage 1: pick provider (authenticated providers only)
* Stage 2: pick model within that provider
*
* On confirm, emits `/model <model> --provider <slug> [--global]` through
* the parent callback so ChatPage can dispatch it via the existing slash
* pipeline. That keeps persistence + actual switch logic in one place.
* Two invocation modes:
*
* 1. Chat-session mode (ChatSidebar) pass `gw` + `sessionId`. The picker
* loads options via `model.options` JSON-RPC and emits the result as a
* slash command string (`/model <model> --provider <slug> [--global]`)
* through `onSubmit`, which the ChatPage pipes to `slashExec`.
*
* 2. Standalone mode (ModelsPage, Config settings) pass a `loader` and
* `onApply`. The picker fetches options via the REST endpoint and calls
* `onApply(provider, model, persistGlobal)` instead of emitting a slash
* command. This lets the Models page reuse the same UI without
* requiring an open chat PTY.
*/
interface ModelOptionProvider {
@ -32,14 +41,38 @@ interface ModelOptionsResponse {
}
interface Props {
gw: GatewayClient;
sessionId: string;
/** Chat-mode: when present, picker emits a slash command via onSubmit. */
gw?: GatewayClient;
sessionId?: string;
onSubmit?(slashCommand: string): void;
/** Standalone-mode: when present (and onSubmit absent), picker calls onApply. */
loader?(): Promise<ModelOptionsResponse>;
onApply?(args: {
provider: string;
model: string;
persistGlobal: boolean;
}): Promise<void> | void;
onClose(): void;
/** Parent runs the resulting slash command through slashExec. */
onSubmit(slashCommand: string): void;
title?: string;
/** If true, hides "Persist globally" checkbox — always saves to config.yaml. */
alwaysGlobal?: boolean;
}
export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
export function ModelPickerDialog(props: Props) {
const {
gw,
sessionId,
onSubmit,
loader,
onApply,
onClose,
title = "Switch Model",
alwaysGlobal = false,
} = props;
const standalone = !!loader && !!onApply;
const [providers, setProviders] = useState<ModelOptionProvider[]>([]);
const [currentModel, setCurrentModel] = useState("");
const [currentProviderSlug, setCurrentProviderSlug] = useState("");
@ -48,17 +81,22 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
const [selectedSlug, setSelectedSlug] = useState("");
const [selectedModel, setSelectedModel] = useState("");
const [query, setQuery] = useState("");
const [persistGlobal, setPersistGlobal] = useState(false);
const [persistGlobal, setPersistGlobal] = useState(alwaysGlobal);
const [applying, setApplying] = useState(false);
const closedRef = useRef(false);
// Load providers + models on open.
useEffect(() => {
closedRef.current = false;
gw.request<ModelOptionsResponse>(
"model.options",
sessionId ? { session_id: sessionId } : {},
)
const promise = standalone
? (loader as () => Promise<ModelOptionsResponse>)()
: (gw as GatewayClient).request<ModelOptionsResponse>(
"model.options",
sessionId ? { session_id: sessionId } : {},
);
promise
.then((r) => {
if (closedRef.current) return;
const next = r?.providers ?? [];
@ -80,7 +118,9 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
return () => {
closedRef.current = true;
};
}, [gw, sessionId]);
// Deliberately omit props from deps — stable for the dialog's lifetime.
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
// Esc closes.
useEffect(() => {
@ -125,15 +165,31 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
[models, needle],
);
const canConfirm = !!selectedProvider && !!selectedModel;
const canConfirm = !!selectedProvider && !!selectedModel && !applying;
const confirm = () => {
if (!canConfirm) return;
const global = persistGlobal ? " --global" : "";
onSubmit(
`/model ${selectedModel} --provider ${selectedProvider.slug}${global}`,
);
onClose();
const confirm = async () => {
if (!canConfirm || !selectedProvider) return;
if (standalone && onApply) {
setApplying(true);
try {
await onApply({
provider: selectedProvider.slug,
model: selectedModel,
persistGlobal,
});
onClose();
} catch (e) {
setError(e instanceof Error ? e.message : String(e));
} finally {
setApplying(false);
}
} else if (onSubmit) {
const global = persistGlobal ? " --global" : "";
onSubmit(
`/model ${selectedModel} --provider ${selectedProvider.slug}${global}`,
);
onClose();
}
};
return (
@ -160,7 +216,7 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
id="model-picker-title"
className="font-display text-base tracking-wider uppercase"
>
Switch Model
{title}
</h2>
<p className="text-xs text-muted-foreground mt-1 font-mono">
current: {currentModel || "(unknown)"}
@ -212,22 +268,28 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
</div>
<footer className="border-t border-border p-3 flex items-center justify-between gap-3 flex-wrap">
<label className="flex items-center gap-2 text-xs text-muted-foreground cursor-pointer select-none">
<input
type="checkbox"
checked={persistGlobal}
onChange={(e) => setPersistGlobal(e.target.checked)}
className="cursor-pointer"
/>
Persist globally (otherwise this session only)
</label>
{alwaysGlobal ? (
<span className="text-xs text-muted-foreground">
Saves to config.yaml applies to new sessions.
</span>
) : (
<label className="flex items-center gap-2 text-xs text-muted-foreground cursor-pointer select-none">
<input
type="checkbox"
checked={persistGlobal}
onChange={(e) => setPersistGlobal(e.target.checked)}
className="cursor-pointer"
/>
Persist globally (otherwise this session only)
</label>
)}
<div className="flex items-center gap-2 ml-auto">
<Button outlined onClick={onClose}>
<Button outlined onClick={onClose} disabled={applying}>
Cancel
</Button>
<Button onClick={confirm} disabled={!canConfirm}>
Switch
{applying ? <Spinner /> : "Switch"}
</Button>
</div>
</footer>

View file

@ -69,6 +69,14 @@ export const api = {
getDefaults: () => fetchJSON<Record<string, unknown>>("/api/config/defaults"),
getSchema: () => fetchJSON<{ fields: Record<string, unknown>; category_order: string[] }>("/api/config/schema"),
getModelInfo: () => fetchJSON<ModelInfoResponse>("/api/model/info"),
getModelOptions: () => fetchJSON<ModelOptionsResponse>("/api/model/options"),
getAuxiliaryModels: () => fetchJSON<AuxiliaryModelsResponse>("/api/model/auxiliary"),
setModelAssignment: (body: ModelAssignmentRequest) =>
fetchJSON<ModelAssignmentResponse>("/api/model/set", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
}),
saveConfig: (config: Record<string, unknown>) =>
fetchJSON<{ ok: boolean }>("/api/config", {
method: "PUT",
@ -473,6 +481,54 @@ export interface ModelInfoResponse {
};
}
// ── Model options / assignment types ──────────────────────────────────
export interface ModelOptionProvider {
name: string;
slug: string;
models?: string[];
total_models?: number;
is_current?: boolean;
is_user_defined?: boolean;
source?: string;
warning?: string;
}
export interface ModelOptionsResponse {
model?: string;
provider?: string;
providers?: ModelOptionProvider[];
}
export interface AuxiliaryTaskAssignment {
task: string;
provider: string;
model: string;
base_url: string;
}
export interface AuxiliaryModelsResponse {
tasks: AuxiliaryTaskAssignment[];
main: { provider: string; model: string };
}
export interface ModelAssignmentRequest {
scope: "main" | "auxiliary";
provider: string;
model: string;
/** For auxiliary: task slot name, "" for all, "__reset__" to reset all. */
task?: string;
}
export interface ModelAssignmentResponse {
ok: boolean;
scope?: string;
provider?: string;
model?: string;
tasks?: string[];
reset?: boolean;
}
// ── OAuth provider types ────────────────────────────────────────────────
export interface OAuthProviderStatus {

View file

@ -1,15 +1,23 @@
import { useCallback, useEffect, useLayoutEffect, useState } from "react";
import {
Brain,
ChevronDown,
Cpu,
DollarSign,
Eye,
RefreshCw,
Settings2,
Star,
Wrench,
Zap,
} from "lucide-react";
import { api } from "@/lib/api";
import type { ModelsAnalyticsModelEntry, ModelsAnalyticsResponse } from "@/lib/api";
import type {
AuxiliaryModelsResponse,
AuxiliaryTaskAssignment,
ModelsAnalyticsModelEntry,
ModelsAnalyticsResponse,
} from "@/lib/api";
import { timeAgo } from "@/lib/utils";
import { formatTokenCount } from "@/lib/format";
import { Button, Spinner, Stats } from "@nous-research/ui";
@ -18,6 +26,7 @@ import { Badge } from "@nous-research/ui";
import { usePageHeader } from "@/contexts/usePageHeader";
import { useI18n } from "@/i18n";
import { PluginSlot } from "@/plugins";
import { ModelPickerDialog } from "@/components/ModelPickerDialog";
const PERIODS = [
{ label: "7d", days: 7 },
@ -25,6 +34,18 @@ const PERIODS = [
{ label: "90d", days: 90 },
] as const;
// Must match _AUX_TASK_SLOTS in hermes_cli/web_server.py.
const AUX_TASKS: readonly { key: string; label: string; hint: string }[] = [
{ key: "vision", label: "Vision", hint: "Image analysis" },
{ key: "web_extract", label: "Web Extract", hint: "Page summarization" },
{ key: "compression", label: "Compression", hint: "Context compaction" },
{ key: "session_search", label: "Session Search", hint: "Recall queries" },
{ key: "skills_hub", label: "Skills Hub", hint: "Skill search" },
{ key: "approval", label: "Approval", hint: "Smart auto-approve" },
{ key: "mcp", label: "MCP", hint: "MCP tool routing" },
{ key: "title_generation", label: "Title Gen", hint: "Session titles" },
] as const;
function formatTokens(n: number): string {
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
@ -134,20 +155,168 @@ function CapabilityBadges({
);
}
/* ──────────────────────────────────────────────────────────────────── */
/* Per-card "Use as" menu */
/* ──────────────────────────────────────────────────────────────────── */
function UseAsMenu({
provider,
model,
isMain,
mainAuxTask,
onAssigned,
}: {
provider: string;
model: string;
/** True when this card's model+provider match config.yaml's main slot. */
isMain: boolean;
/** If this model is assigned to a specific aux task, that task's key. */
mainAuxTask: string | null;
onAssigned(): void;
}) {
const [open, setOpen] = useState(false);
const [busy, setBusy] = useState(false);
const [error, setError] = useState<string | null>(null);
const assign = async (
scope: "main" | "auxiliary",
task: string,
) => {
if (!provider || !model) {
setError("Missing provider/model");
return;
}
setBusy(true);
setError(null);
try {
await api.setModelAssignment({ scope, provider, model, task });
onAssigned();
setOpen(false);
} catch (e) {
setError(e instanceof Error ? e.message : String(e));
} finally {
setBusy(false);
}
};
// Close on outside click.
useEffect(() => {
if (!open) return;
const onDown = (e: MouseEvent) => {
const target = e.target as HTMLElement | null;
if (target && !target.closest?.("[data-use-as-menu]")) setOpen(false);
};
window.addEventListener("mousedown", onDown);
return () => window.removeEventListener("mousedown", onDown);
}, [open]);
return (
<div className="relative" data-use-as-menu>
<Button
size="sm"
outlined
onClick={() => setOpen((v) => !v)}
disabled={busy}
className="text-[10px] h-6 px-2"
prefix={busy ? <Spinner /> : null}
>
Use as <ChevronDown className="h-3 w-3" />
</Button>
{open && (
<div className="absolute right-0 top-full mt-1 z-50 min-w-[220px] border border-border bg-card shadow-lg">
<button
type="button"
onClick={() => assign("main", "")}
disabled={busy}
className="flex w-full items-center justify-between px-3 py-2 text-xs hover:bg-muted/50 disabled:opacity-40"
>
<span className="flex items-center gap-2">
<Star className="h-3 w-3" />
Main model
</span>
{isMain && (
<span className="text-[9px] uppercase tracking-wider text-primary/80">
current
</span>
)}
</button>
<div className="border-t border-border/50 px-3 py-1.5 text-[9px] uppercase tracking-wider text-muted-foreground">
Auxiliary task
</div>
<button
type="button"
onClick={() => assign("auxiliary", "")}
disabled={busy}
className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
>
<span>All auxiliary tasks</span>
</button>
{AUX_TASKS.map((t) => (
<button
key={t.key}
type="button"
onClick={() => assign("auxiliary", t.key)}
disabled={busy}
className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
>
<span>{t.label}</span>
{mainAuxTask === t.key && (
<span className="text-[9px] uppercase tracking-wider text-primary/80">
current
</span>
)}
</button>
))}
{error && (
<div className="px-3 py-2 text-[10px] text-destructive border-t border-border/50">
{error}
</div>
)}
</div>
)}
</div>
);
}
/* ──────────────────────────────────────────────────────────────────── */
/* ModelCard */
/* ──────────────────────────────────────────────────────────────────── */
function ModelCard({
entry,
rank,
main,
aux,
onAssigned,
}: {
entry: ModelsAnalyticsModelEntry;
rank: number;
main: { provider: string; model: string } | null;
aux: AuxiliaryTaskAssignment[];
onAssigned(): void;
}) {
const { t } = useI18n();
const provider = entry.provider || modelVendor(entry.model);
const totalTokens = entry.input_tokens + entry.output_tokens;
const caps = entry.capabilities;
const isMain =
!!main &&
main.provider === provider &&
main.model === entry.model;
// First aux task currently using this model (if any).
const mainAuxTask =
aux.find(
(a) => a.provider === provider && a.model === entry.model,
)?.task ?? null;
return (
<Card>
<Card className={isMain ? "ring-1 ring-primary/40" : undefined}>
<CardHeader className="pb-3">
<div className="flex items-start justify-between gap-2">
<div className="min-w-0 flex-1">
@ -158,6 +327,16 @@ function ModelCard({
<CardTitle className="text-sm font-mono-ui truncate">
{shortModelName(entry.model)}
</CardTitle>
{isMain && (
<span className="inline-flex items-center gap-0.5 bg-primary/15 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-primary">
<Star className="h-2.5 w-2.5" /> main
</span>
)}
{mainAuxTask && (
<span className="inline-flex items-center bg-purple-500/10 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-purple-600 dark:text-purple-400">
aux · {mainAuxTask}
</span>
)}
</div>
<div className="flex items-center gap-2 mt-1">
{provider && (
@ -177,13 +356,22 @@ function ModelCard({
)}
</div>
</div>
<div className="text-right shrink-0">
<div className="text-xs font-mono font-semibold">
{formatTokens(totalTokens)}
</div>
<div className="text-[10px] text-muted-foreground">
{t.models.tokens}
<div className="flex flex-col items-end gap-1 shrink-0">
<div className="text-right">
<div className="text-xs font-mono font-semibold">
{formatTokens(totalTokens)}
</div>
<div className="text-[10px] text-muted-foreground">
{t.models.tokens}
</div>
</div>
<UseAsMenu
provider={provider}
model={entry.model}
isMain={isMain}
mainAuxTask={mainAuxTask}
onAssigned={onAssigned}
/>
</div>
</div>
</CardHeader>
@ -246,24 +434,242 @@ function ModelCard({
);
}
/* ──────────────────────────────────────────────────────────────────── */
/* Model Settings panel (top of page) */
/* ──────────────────────────────────────────────────────────────────── */
type PickerTarget =
| { kind: "main" }
| { kind: "aux"; task: string };
function ModelSettingsPanel({
aux,
refreshKey,
onSaved,
}: {
aux: AuxiliaryModelsResponse | null;
refreshKey: number;
onSaved(): void;
}) {
const [expanded, setExpanded] = useState(false);
const [picker, setPicker] = useState<PickerTarget | null>(null);
const [resetBusy, setResetBusy] = useState(false);
const mainProv = aux?.main.provider ?? "";
const mainModel = aux?.main.model ?? "";
const applyAssignment = async ({
scope,
task,
provider,
model,
}: {
scope: "main" | "auxiliary";
task: string;
provider: string;
model: string;
}) => {
await api.setModelAssignment({ scope, task, provider, model });
onSaved();
};
const resetAllAux = async () => {
if (!window.confirm("Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set.")) {
return;
}
setResetBusy(true);
try {
await api.setModelAssignment({
scope: "auxiliary",
task: "__reset__",
provider: "",
model: "",
});
onSaved();
} finally {
setResetBusy(false);
}
};
return (
<Card>
<CardHeader className="pb-3">
<div className="flex items-center justify-between gap-3 flex-wrap">
<div className="flex items-center gap-2">
<Settings2 className="h-4 w-4 text-muted-foreground" />
<CardTitle className="text-sm">Model Settings</CardTitle>
<span className="text-[10px] text-muted-foreground">
applies to new sessions
</span>
</div>
<Button
size="sm"
outlined
onClick={() => setExpanded((v) => !v)}
className="text-xs"
>
{expanded ? "Hide auxiliary" : "Show auxiliary"}
<ChevronDown
className={`h-3 w-3 transition-transform ${expanded ? "rotate-180" : ""}`}
/>
</Button>
</div>
</CardHeader>
<CardContent className="space-y-3 pt-0">
{/* Main row */}
<div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
<div className="min-w-0 flex-1">
<div className="flex items-center gap-2 mb-0.5">
<Star className="h-3 w-3 text-primary" />
<span className="text-xs font-medium uppercase tracking-wider">
Main model
</span>
</div>
<div className="text-xs font-mono text-muted-foreground truncate">
{mainProv || "(unset)"}
{mainProv && mainModel && " · "}
{mainModel || "(unset)"}
</div>
</div>
<Button
size="sm"
onClick={() => setPicker({ kind: "main" })}
className="text-xs"
>
Change
</Button>
</div>
{/* Auxiliary rows */}
{expanded && (
<div className="space-y-1 border-t border-border/50 pt-3">
<div className="flex items-center justify-between pb-1">
<div className="text-[10px] uppercase tracking-wider text-muted-foreground">
Auxiliary tasks
</div>
<Button
size="sm"
outlined
onClick={resetAllAux}
disabled={resetBusy}
className="text-[10px] h-6"
prefix={resetBusy ? <Spinner /> : null}
>
Reset all to auto
</Button>
</div>
<p className="text-[10px] text-muted-foreground/80 pb-2">
Auxiliary tasks handle side-jobs like vision, session search, and
compression. <span className="font-mono">auto</span> means
&quot;use the main model&quot;. Override per-task when you want a
cheap/fast model for a specific job.
</p>
{AUX_TASKS.map((t) => {
const cur = aux?.tasks.find((a) => a.task === t.key);
const isAuto =
!cur || cur.provider === "auto" || !cur.provider;
return (
<div
key={t.key}
className="flex items-center justify-between gap-3 px-3 py-1.5 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
>
<div className="min-w-0 flex-1">
<div className="flex items-baseline gap-2">
<span className="text-xs font-medium">{t.label}</span>
<span className="text-[10px] text-muted-foreground/60">
{t.hint}
</span>
</div>
<div className="text-[10px] font-mono text-muted-foreground truncate">
{isAuto
? "auto (use main model)"
: `${cur?.provider} · ${cur?.model || "(provider default)"}`}
</div>
</div>
<Button
size="sm"
outlined
onClick={() => setPicker({ kind: "aux", task: t.key })}
className="text-[10px] h-6"
>
Change
</Button>
</div>
);
})}
</div>
)}
{picker && (
<ModelPickerDialog
key={`picker-${refreshKey}`}
loader={api.getModelOptions}
alwaysGlobal
title={
picker.kind === "main"
? "Set Main Model"
: `Set Auxiliary: ${
AUX_TASKS.find((t) => t.key === picker.task)?.label ??
picker.task
}`
}
onApply={async ({ provider, model }) => {
await applyAssignment({
scope: picker.kind === "main" ? "main" : "auxiliary",
task: picker.kind === "main" ? "" : picker.task,
provider,
model,
});
}}
onClose={() => setPicker(null)}
/>
)}
</CardContent>
</Card>
);
}
/* ──────────────────────────────────────────────────────────────────── */
/* Page */
/* ──────────────────────────────────────────────────────────────────── */
export default function ModelsPage() {
const [days, setDays] = useState(30);
const [data, setData] = useState<ModelsAnalyticsResponse | null>(null);
const [aux, setAux] = useState<AuxiliaryModelsResponse | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [saveKey, setSaveKey] = useState(0);
const { t } = useI18n();
const { setAfterTitle, setEnd } = usePageHeader();
const load = useCallback(() => {
setLoading(true);
setError(null);
api
.getModelsAnalytics(days)
.then(setData)
Promise.all([
api.getModelsAnalytics(days),
api.getAuxiliaryModels().catch(() => null),
])
.then(([models, auxData]) => {
setData(models);
setAux(auxData);
})
.catch((err) => setError(String(err)))
.finally(() => setLoading(false));
}, [days]);
const onAssigned = useCallback(() => {
// Reload aux state after any assignment change.
api
.getAuxiliaryModels()
.then(setAux)
.catch(() => {});
setSaveKey((k) => k + 1);
}, []);
useLayoutEffect(() => {
const periodLabel =
PERIODS.find((p) => p.days === days)?.label ?? `${days}d`;
@ -315,6 +721,13 @@ export default function ModelsPage() {
return (
<div className="flex flex-col gap-6">
<PluginSlot name="models:top" />
<ModelSettingsPanel
aux={aux}
refreshKey={saveKey}
onSaved={onAssigned}
/>
{loading && !data && (
<div className="flex items-center justify-center py-24">
<Spinner className="text-2xl text-primary" />
@ -369,7 +782,14 @@ export default function ModelsPage() {
{data.models.length > 0 ? (
<div className="grid gap-4 md:grid-cols-2 xl:grid-cols-3">
{data.models.map((m, i) => (
<ModelCard key={`${m.model}:${m.provider}`} entry={m} rank={i + 1} />
<ModelCard
key={`${m.model}:${m.provider}`}
entry={m}
rank={i + 1}
main={aux?.main ?? null}
aux={aux?.tasks ?? []}
onAssigned={onAssigned}
/>
))}
</div>
) : (

View file

@ -0,0 +1,207 @@
---
sidebar_position: 3
---
# Configuring Models
Hermes uses two kinds of model slots:
- **Main model** — what the agent thinks with. Every user message, every tool-call loop, every streamed response goes through this model.
- **Auxiliary models** — smaller side-jobs the agent offloads. Context compression, vision (image analysis), web-page summarization, session search, approval scoring, MCP tool routing, session-title generation, and skill search. Each has its own slot and can be overridden independently.
This page covers configuring both from the dashboard. If you prefer config files or the CLI, jump to [Alternative methods](#alternative-methods) at the bottom.
## The Models page
Open the dashboard and click **Models** in the sidebar. You get two sections:
1. **Model Settings** — the top panel, where you assign models to slots.
2. **Usage analytics** — ranked cards showing every model that ran a session in the selected period, with token counts, cost, and capability badges.
![Models page overview](/img/docs/dashboard-models/overview.png)
The top card is the **Model Settings** panel. The main row always shows what the agent will spin up for new sessions. Click **Change** to open the picker.
## Setting the main model
Click **Change** on the Main model row:
![Model picker dialog](/img/docs/dashboard-models/picker-dialog.png)
The picker has two columns:
- **Left** — authenticated providers. Only providers you've set up (API key set, OAuth'd, or defined as a custom endpoint) show up here. If a provider is missing, head to **Keys** and add its credential.
- **Right** — the curated model list for the selected provider. These are the agentic models Hermes recommends for that provider, not the raw `/models` dump (which on OpenRouter includes 400+ models including TTS, image generators, and rerankers).
Type in the filter box to narrow by provider name, slug, or model ID.
Pick a model, hit **Switch**, and Hermes writes it to `~/.hermes/config.yaml` under the `model` section. **This applies to new sessions only** — any chat tab you already have open keeps running whatever model it started with. To hot-swap the current chat, use the `/model` slash command inside it.
## Setting auxiliary models
Click **Show auxiliary** to reveal the eight task slots:
![Auxiliary panel expanded](/img/docs/dashboard-models/auxiliary-expanded.png)
Every auxiliary task defaults to `auto` — meaning Hermes uses your main model for that job too. Override a specific task when you want a cheaper or faster model for a side-job.
### Common override patterns
| Task | When to override |
|---|---|
| **Title Gen** | Almost always. A $0.10/M flash model writes session titles as well as Opus. Default config sets this to `google/gemini-3-flash-preview` on OpenRouter. |
| **Vision** | When your main model is a coding model without vision (e.g. Kimi, DeepSeek). Point it at `google/gemini-2.5-flash` or `gpt-4o-mini`. |
| **Compression** | When you're burning reasoning tokens on Opus/M2.7 just to summarize context. A fast chat model does the job at 1/50th the cost. |
| **Session Search** | When recall queries fan out — default max_concurrency is 3. A cheap model keeps the bill predictable. |
| **Approval** | For `approval_mode: smart` — a fast/cheap model (haiku, flash, gpt-5-mini) decides whether to auto-approve low-risk commands. Expensive models here are waste. |
| **Web Extract** | When you use `web_extract` heavily. Same logic as compression — summarization doesn't need reasoning. |
| **Skills Hub** | `hermes skills search` uses this. Usually fine at `auto`. |
| **MCP** | MCP tool routing. Usually fine at `auto`. |
### Per-task override
Click **Change** on any auxiliary row. Same picker opens, same behavior — pick provider + model, hit Switch. The row updates to show `provider · model` instead of `auto (use main model)`.
### Reset all to auto
If you've over-tuned and want to start over, click **Reset all to auto** at the top of the auxiliary section. Every slot goes back to using your main model.
## The "Use as" shortcut
Every model card on the page has a **Use as** dropdown. This is the fast path — pick a model you see in your analytics, click **Use as**, and assign it to the main slot or any specific auxiliary task in one click:
![Use as dropdown](/img/docs/dashboard-models/use-as-dropdown.png)
The dropdown has:
- **Main model** — same as clicking Change on the main row.
- **All auxiliary tasks** — assigns this model to all 8 aux slots at once. Useful when you just want every side-job on a cheap flash model.
- **Individual task options** — Vision, Web Extract, Compression, etc. The currently-assigned model for each task is marked `current`.
Cards are badged with `main` or `aux · <task>` when they're currently assigned to something — so you can see at a glance which of your historical models are wired in where.
## What gets written to `config.yaml`
When you save via the dashboard, Hermes writes to `~/.hermes/config.yaml`:
**Main model:**
```yaml
model:
provider: openrouter
default: anthropic/claude-opus-4.7
base_url: '' # cleared on provider switch
api_mode: chat_completions
```
**Auxiliary override (example — vision on gemini-flash):**
```yaml
auxiliary:
vision:
provider: openrouter
model: google/gemini-2.5-flash
base_url: ''
api_key: ''
timeout: 120
extra_body: {}
download_timeout: 30
```
**Auxiliary on auto (default):**
```yaml
auxiliary:
compression:
provider: auto
model: ''
base_url: ''
# ... other fields unchanged
```
`provider: auto` with `model: ''` tells Hermes to use the main model for that task.
## When does it take effect?
- **CLI** (`hermes chat`): next `hermes chat` invocation.
- **Gateway** (Telegram, Discord, Slack, etc.): next *new* session. Existing sessions keep their model. Restart the gateway (`hermes gateway restart`) if you want to force all sessions to pick up the change.
- **Dashboard chat tab** (`/chat`): next new PTY. The currently-open chat keeps its model — use `/model` inside it to hot-swap.
Changes never invalidate prompt caches on running sessions. That's deliberate: swapping the main model inside a session requires a cache reset (the system prompt contains model-specific content), and we reserve that for the explicit `/model` slash command inside chat.
## Troubleshooting
### "No authenticated providers" in the picker
Hermes lists a provider only if it has a working credential. Check **Keys** in the sidebar — you should see one of: an API key, a successful OAuth, or a custom endpoint URL. If the provider you want isn't there, run `hermes setup` to wire it up, or go to **Keys** and add the env var.
### Main model didn't change in my running chat
Expected. The dashboard writes `config.yaml`, which new sessions read. The currently-open chat is a live agent process — it keeps whatever model it was spawned with. Use `/model <name>` inside the chat to hot-swap that specific session.
### Auxiliary override "didn't take effect"
Three things to check:
1. **Did you start a new session?** Existing chats don't re-read config.
2. **Is `provider` set to something other than `auto`?** If the field shows `auto`, the task is still using your main model. Click **Change** and pick a real provider.
3. **Is the provider authenticated?** If you assigned `minimax` to a task but don't have a MiniMax API key, that task falls back to the openrouter default and logs a warning in `agent.log`.
### I picked a model but Hermes switched providers on me
On OpenRouter (or any aggregator), bare model names resolve *within* the aggregator first. So `claude-sonnet-4` on OpenRouter becomes `anthropic/claude-sonnet-4.6`, staying on your OpenRouter auth. But if you typed `claude-sonnet-4` on a native Anthropic auth, it would stay as `claude-sonnet-4-6`. If you see an unexpected provider switch, check that your current provider is what you expect — the picker always shows the current main at the top of the dialog.
## Alternative methods
### CLI slash command
Inside any `hermes chat` session:
```
/model gpt-5.4 --provider openrouter # session-only
/model gpt-5.4 --provider openrouter --global # also persists to config.yaml
```
`--global` does the same thing the dashboard's **Change** button does, plus it switches the running session in-place.
### `hermes model` subcommand
```bash
hermes model list # list authenticated providers + models
hermes model set anthropic/claude-opus-4.7 --provider openrouter
```
### Direct config edit
Edit `~/.hermes/config.yaml` and restart whatever reads it. See the [Configuration reference](./configuration.md) for the full schema.
### REST API
The dashboard uses three endpoints. Useful for scripting:
```bash
# List authenticated providers + curated model lists
curl -H "X-Hermes-Session-Token: $TOKEN" http://localhost:PORT/api/model/options
# Read current main + auxiliary assignments
curl -H "X-Hermes-Session-Token: $TOKEN" http://localhost:PORT/api/model/auxiliary
# Set the main model
curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
-d '{"scope":"main","provider":"openrouter","model":"anthropic/claude-opus-4.7"}' \
http://localhost:PORT/api/model/set
# Override a single auxiliary task
curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
-d '{"scope":"auxiliary","task":"vision","provider":"openrouter","model":"google/gemini-2.5-flash"}' \
http://localhost:PORT/api/model/set
# Assign one model to every auxiliary task
curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
-d '{"scope":"auxiliary","task":"","provider":"openrouter","model":"google/gemini-2.5-flash"}' \
http://localhost:PORT/api/model/set
# Reset all auxiliary tasks to auto
curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
-d '{"scope":"auxiliary","task":"__reset__","provider":"","model":""}' \
http://localhost:PORT/api/model/set
```
The session token is injected into the dashboard HTML at startup and rotates on every server restart. Grab it from the browser devtools (`window.__HERMES_SESSION_TOKEN__`) if you're scripting against a running dashboard.

View file

@ -23,6 +23,7 @@ const sidebars: SidebarsConfig = {
'user-guide/cli',
'user-guide/tui',
'user-guide/configuration',
'user-guide/configuring-models',
'user-guide/sessions',
'user-guide/profiles',
'user-guide/git-worktrees',

Binary file not shown.

After

Width:  |  Height:  |  Size: 593 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 665 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 649 KiB