mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-19 10:02:16 +00:00
feat(dashboard): configure main + auxiliary models from Models page (#17802)
Dashboard Models page was analytics-only — no way to pick a model as main
for new sessions or override an auxiliary task slot without hand-editing
config.yaml or running a /model slash command inside a chat.
Changes:
- hermes_cli/web_server.py: three REST endpoints (GET /api/model/options,
GET /api/model/auxiliary, POST /api/model/set). Reuses
list_authenticated_providers() from model_switch.py so the REST path
surfaces the same curated model lists as the TUI-gateway model.options
JSON-RPC. POST /api/model/set writes model.provider + model.default for
scope=main, and auxiliary.<task>.{provider,model} for scope=auxiliary
(with task="" meaning 'all 8 slots' and task="__reset__" resetting them
to auto).
- web/src/components/ModelPickerDialog.tsx: accepts an optional loader +
onApply pair so it works without an open chat PTY. ChatSidebar's
gw-WebSocket path still works unchanged (back-compat).
- web/src/pages/ModelsPage.tsx: Model Settings panel at the top showing
main model + collapsible list of 8 auxiliary tasks with per-row Change
buttons and Reset all to auto. Every existing model card gets a
'Use as' dropdown for one-click assignment to main or any aux slot.
Cards badged 'main' or 'aux · <task>' when currently assigned.
- website/docs/user-guide/configuring-models.md: new docs page walking
through both UI paths, aux task override patterns, troubleshooting,
plus REST/CLI alternatives.
- Screenshots under website/static/img/docs/dashboard-models/.
Applies to new sessions only — running sessions keep their model (use
/model slash command to hot-swap a live session). No prompt-cache
invalidation on existing sessions.
This commit is contained in:
parent
718e4e2e7e
commit
3c27efbb91
10 changed files with 1007 additions and 47 deletions
|
|
@ -23,7 +23,7 @@ import time
|
|||
import urllib.parse
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import yaml
|
||||
|
||||
|
|
@ -445,6 +445,20 @@ class EnvVarReveal(BaseModel):
|
|||
key: str
|
||||
|
||||
|
||||
class ModelAssignment(BaseModel):
|
||||
"""Payload for POST /api/model/set — assign a provider/model to a slot.
|
||||
|
||||
scope="main" → writes model.provider + model.default
|
||||
scope="auxiliary" → writes auxiliary.<task>.provider + auxiliary.<task>.model
|
||||
scope="auxiliary" with task="" → applied to every auxiliary.* slot
|
||||
scope="auxiliary" with task="__reset__" → resets every slot to provider="auto"
|
||||
"""
|
||||
scope: str
|
||||
provider: str
|
||||
model: str
|
||||
task: str = ""
|
||||
|
||||
|
||||
_GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
|
||||
try:
|
||||
_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
|
||||
|
|
@ -921,6 +935,206 @@ def get_model_info():
|
|||
return dict(_EMPTY_MODEL_INFO)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model assignment — pick provider+model for main slot or auxiliary slots.
|
||||
# Mirrors the model.options JSON-RPC from tui_gateway but uses REST so the
|
||||
# Models page (which has no chat PTY open) can drive it.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Canonical auxiliary task slots. Keep in sync with DEFAULT_CONFIG["auxiliary"]
|
||||
# in hermes_cli/config.py — listed here for deterministic ordering in the UI.
|
||||
_AUX_TASK_SLOTS: Tuple[str, ...] = (
|
||||
"vision",
|
||||
"web_extract",
|
||||
"compression",
|
||||
"session_search",
|
||||
"skills_hub",
|
||||
"approval",
|
||||
"mcp",
|
||||
"title_generation",
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/model/options")
|
||||
def get_model_options():
|
||||
"""Return authenticated providers + their curated model lists.
|
||||
|
||||
REST equivalent of the ``model.options`` JSON-RPC on tui_gateway, so the
|
||||
dashboard Models page can render the picker without a live chat session.
|
||||
The response shape matches ``model.options`` 1:1 so ``ModelPickerDialog``
|
||||
can share the same types.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.model_switch import list_authenticated_providers
|
||||
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
current_model = model_cfg.get("default", model_cfg.get("name", "")) or ""
|
||||
current_provider = model_cfg.get("provider", "") or ""
|
||||
current_base_url = model_cfg.get("base_url", "") or ""
|
||||
else:
|
||||
current_model = str(model_cfg) if model_cfg else ""
|
||||
current_provider = ""
|
||||
current_base_url = ""
|
||||
|
||||
user_providers = cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {}
|
||||
custom_providers = (
|
||||
cfg.get("custom_providers")
|
||||
if isinstance(cfg.get("custom_providers"), list)
|
||||
else []
|
||||
)
|
||||
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=current_provider,
|
||||
current_base_url=current_base_url,
|
||||
current_model=current_model,
|
||||
user_providers=user_providers,
|
||||
custom_providers=custom_providers,
|
||||
max_models=50,
|
||||
)
|
||||
return {
|
||||
"providers": providers,
|
||||
"model": current_model,
|
||||
"provider": current_provider,
|
||||
}
|
||||
except Exception:
|
||||
_log.exception("GET /api/model/options failed")
|
||||
raise HTTPException(status_code=500, detail="Failed to list model options")
|
||||
|
||||
|
||||
@app.get("/api/model/auxiliary")
|
||||
def get_auxiliary_models():
|
||||
"""Return current auxiliary task assignments.
|
||||
|
||||
Shape:
|
||||
{
|
||||
"tasks": [
|
||||
{"task": "vision", "provider": "auto", "model": "", "base_url": ""},
|
||||
...
|
||||
],
|
||||
"main": {"provider": "openrouter", "model": "anthropic/claude-opus-4.7"},
|
||||
}
|
||||
"""
|
||||
try:
|
||||
cfg = load_config()
|
||||
aux_cfg = cfg.get("auxiliary", {})
|
||||
if not isinstance(aux_cfg, dict):
|
||||
aux_cfg = {}
|
||||
|
||||
tasks = []
|
||||
for slot in _AUX_TASK_SLOTS:
|
||||
slot_cfg = aux_cfg.get(slot, {}) if isinstance(aux_cfg.get(slot), dict) else {}
|
||||
tasks.append({
|
||||
"task": slot,
|
||||
"provider": str(slot_cfg.get("provider", "auto") or "auto"),
|
||||
"model": str(slot_cfg.get("model", "") or ""),
|
||||
"base_url": str(slot_cfg.get("base_url", "") or ""),
|
||||
})
|
||||
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
main = {
|
||||
"provider": str(model_cfg.get("provider", "") or ""),
|
||||
"model": str(model_cfg.get("default", model_cfg.get("name", "")) or ""),
|
||||
}
|
||||
else:
|
||||
main = {"provider": "", "model": str(model_cfg) if model_cfg else ""}
|
||||
|
||||
return {"tasks": tasks, "main": main}
|
||||
except Exception:
|
||||
_log.exception("GET /api/model/auxiliary failed")
|
||||
raise HTTPException(status_code=500, detail="Failed to read auxiliary config")
|
||||
|
||||
|
||||
@app.post("/api/model/set")
|
||||
async def set_model_assignment(body: ModelAssignment):
|
||||
"""Assign a model to the main slot or an auxiliary task slot.
|
||||
|
||||
Writes to ``~/.hermes/config.yaml`` — applies to **new** sessions only.
|
||||
The currently running chat PTY (if any) is not affected; use the
|
||||
``/model`` slash command inside a chat to hot-swap that specific session.
|
||||
"""
|
||||
scope = (body.scope or "").strip().lower()
|
||||
provider = (body.provider or "").strip()
|
||||
model = (body.model or "").strip()
|
||||
task = (body.task or "").strip().lower()
|
||||
|
||||
if scope not in ("main", "auxiliary"):
|
||||
raise HTTPException(status_code=400, detail="scope must be 'main' or 'auxiliary'")
|
||||
|
||||
try:
|
||||
cfg = load_config()
|
||||
|
||||
if scope == "main":
|
||||
if not provider or not model:
|
||||
raise HTTPException(status_code=400, detail="provider and model required for main")
|
||||
model_cfg = cfg.get("model", {})
|
||||
if not isinstance(model_cfg, dict):
|
||||
model_cfg = {}
|
||||
model_cfg["provider"] = provider
|
||||
model_cfg["default"] = model
|
||||
# Clear stale base_url so the resolver picks the provider's own default.
|
||||
if "base_url" in model_cfg and model_cfg.get("base_url"):
|
||||
model_cfg["base_url"] = ""
|
||||
# Also clear hardcoded context_length override — new model may have
|
||||
# a different context window.
|
||||
if "context_length" in model_cfg:
|
||||
model_cfg.pop("context_length", None)
|
||||
cfg["model"] = model_cfg
|
||||
save_config(cfg)
|
||||
return {"ok": True, "scope": "main", "provider": provider, "model": model}
|
||||
|
||||
# scope == "auxiliary"
|
||||
aux = cfg.get("auxiliary")
|
||||
if not isinstance(aux, dict):
|
||||
aux = {}
|
||||
|
||||
if task == "__reset__":
|
||||
# Reset every slot to provider="auto", model="" — keeps other fields intact.
|
||||
for slot in _AUX_TASK_SLOTS:
|
||||
slot_cfg = aux.get(slot)
|
||||
if not isinstance(slot_cfg, dict):
|
||||
slot_cfg = {}
|
||||
slot_cfg["provider"] = "auto"
|
||||
slot_cfg["model"] = ""
|
||||
aux[slot] = slot_cfg
|
||||
cfg["auxiliary"] = aux
|
||||
save_config(cfg)
|
||||
return {"ok": True, "scope": "auxiliary", "reset": True}
|
||||
|
||||
if not provider:
|
||||
raise HTTPException(status_code=400, detail="provider required for auxiliary")
|
||||
|
||||
targets = [task] if task else list(_AUX_TASK_SLOTS)
|
||||
for slot in targets:
|
||||
if slot not in _AUX_TASK_SLOTS:
|
||||
raise HTTPException(status_code=400, detail=f"unknown auxiliary task: {slot}")
|
||||
slot_cfg = aux.get(slot)
|
||||
if not isinstance(slot_cfg, dict):
|
||||
slot_cfg = {}
|
||||
slot_cfg["provider"] = provider
|
||||
slot_cfg["model"] = model
|
||||
aux[slot] = slot_cfg
|
||||
|
||||
cfg["auxiliary"] = aux
|
||||
save_config(cfg)
|
||||
return {
|
||||
"ok": True,
|
||||
"scope": "auxiliary",
|
||||
"tasks": targets,
|
||||
"provider": provider,
|
||||
"model": model,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception:
|
||||
_log.exception("POST /api/model/set failed")
|
||||
raise HTTPException(status_code=500, detail="Failed to save model assignment")
|
||||
|
||||
|
||||
|
||||
|
||||
def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Reverse _normalize_config_for_web before saving.
|
||||
|
||||
|
|
|
|||
|
|
@ -11,9 +11,18 @@ import { useEffect, useMemo, useRef, useState } from "react";
|
|||
* Stage 1: pick provider (authenticated providers only)
|
||||
* Stage 2: pick model within that provider
|
||||
*
|
||||
* On confirm, emits `/model <model> --provider <slug> [--global]` through
|
||||
* the parent callback so ChatPage can dispatch it via the existing slash
|
||||
* pipeline. That keeps persistence + actual switch logic in one place.
|
||||
* Two invocation modes:
|
||||
*
|
||||
* 1. Chat-session mode (ChatSidebar) — pass `gw` + `sessionId`. The picker
|
||||
* loads options via `model.options` JSON-RPC and emits the result as a
|
||||
* slash command string (`/model <model> --provider <slug> [--global]`)
|
||||
* through `onSubmit`, which the ChatPage pipes to `slashExec`.
|
||||
*
|
||||
* 2. Standalone mode (ModelsPage, Config settings) — pass a `loader` and
|
||||
* `onApply`. The picker fetches options via the REST endpoint and calls
|
||||
* `onApply(provider, model, persistGlobal)` instead of emitting a slash
|
||||
* command. This lets the Models page reuse the same UI without
|
||||
* requiring an open chat PTY.
|
||||
*/
|
||||
|
||||
interface ModelOptionProvider {
|
||||
|
|
@ -32,14 +41,38 @@ interface ModelOptionsResponse {
|
|||
}
|
||||
|
||||
interface Props {
|
||||
gw: GatewayClient;
|
||||
sessionId: string;
|
||||
/** Chat-mode: when present, picker emits a slash command via onSubmit. */
|
||||
gw?: GatewayClient;
|
||||
sessionId?: string;
|
||||
onSubmit?(slashCommand: string): void;
|
||||
|
||||
/** Standalone-mode: when present (and onSubmit absent), picker calls onApply. */
|
||||
loader?(): Promise<ModelOptionsResponse>;
|
||||
onApply?(args: {
|
||||
provider: string;
|
||||
model: string;
|
||||
persistGlobal: boolean;
|
||||
}): Promise<void> | void;
|
||||
|
||||
onClose(): void;
|
||||
/** Parent runs the resulting slash command through slashExec. */
|
||||
onSubmit(slashCommand: string): void;
|
||||
title?: string;
|
||||
/** If true, hides "Persist globally" checkbox — always saves to config.yaml. */
|
||||
alwaysGlobal?: boolean;
|
||||
}
|
||||
|
||||
export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
|
||||
export function ModelPickerDialog(props: Props) {
|
||||
const {
|
||||
gw,
|
||||
sessionId,
|
||||
onSubmit,
|
||||
loader,
|
||||
onApply,
|
||||
onClose,
|
||||
title = "Switch Model",
|
||||
alwaysGlobal = false,
|
||||
} = props;
|
||||
const standalone = !!loader && !!onApply;
|
||||
|
||||
const [providers, setProviders] = useState<ModelOptionProvider[]>([]);
|
||||
const [currentModel, setCurrentModel] = useState("");
|
||||
const [currentProviderSlug, setCurrentProviderSlug] = useState("");
|
||||
|
|
@ -48,17 +81,22 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
|
|||
const [selectedSlug, setSelectedSlug] = useState("");
|
||||
const [selectedModel, setSelectedModel] = useState("");
|
||||
const [query, setQuery] = useState("");
|
||||
const [persistGlobal, setPersistGlobal] = useState(false);
|
||||
const [persistGlobal, setPersistGlobal] = useState(alwaysGlobal);
|
||||
const [applying, setApplying] = useState(false);
|
||||
const closedRef = useRef(false);
|
||||
|
||||
// Load providers + models on open.
|
||||
useEffect(() => {
|
||||
closedRef.current = false;
|
||||
|
||||
gw.request<ModelOptionsResponse>(
|
||||
"model.options",
|
||||
sessionId ? { session_id: sessionId } : {},
|
||||
)
|
||||
const promise = standalone
|
||||
? (loader as () => Promise<ModelOptionsResponse>)()
|
||||
: (gw as GatewayClient).request<ModelOptionsResponse>(
|
||||
"model.options",
|
||||
sessionId ? { session_id: sessionId } : {},
|
||||
);
|
||||
|
||||
promise
|
||||
.then((r) => {
|
||||
if (closedRef.current) return;
|
||||
const next = r?.providers ?? [];
|
||||
|
|
@ -80,7 +118,9 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
|
|||
return () => {
|
||||
closedRef.current = true;
|
||||
};
|
||||
}, [gw, sessionId]);
|
||||
// Deliberately omit props from deps — stable for the dialog's lifetime.
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, []);
|
||||
|
||||
// Esc closes.
|
||||
useEffect(() => {
|
||||
|
|
@ -125,15 +165,31 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
|
|||
[models, needle],
|
||||
);
|
||||
|
||||
const canConfirm = !!selectedProvider && !!selectedModel;
|
||||
const canConfirm = !!selectedProvider && !!selectedModel && !applying;
|
||||
|
||||
const confirm = () => {
|
||||
if (!canConfirm) return;
|
||||
const global = persistGlobal ? " --global" : "";
|
||||
onSubmit(
|
||||
`/model ${selectedModel} --provider ${selectedProvider.slug}${global}`,
|
||||
);
|
||||
onClose();
|
||||
const confirm = async () => {
|
||||
if (!canConfirm || !selectedProvider) return;
|
||||
if (standalone && onApply) {
|
||||
setApplying(true);
|
||||
try {
|
||||
await onApply({
|
||||
provider: selectedProvider.slug,
|
||||
model: selectedModel,
|
||||
persistGlobal,
|
||||
});
|
||||
onClose();
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e));
|
||||
} finally {
|
||||
setApplying(false);
|
||||
}
|
||||
} else if (onSubmit) {
|
||||
const global = persistGlobal ? " --global" : "";
|
||||
onSubmit(
|
||||
`/model ${selectedModel} --provider ${selectedProvider.slug}${global}`,
|
||||
);
|
||||
onClose();
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
|
|
@ -160,7 +216,7 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
|
|||
id="model-picker-title"
|
||||
className="font-display text-base tracking-wider uppercase"
|
||||
>
|
||||
Switch Model
|
||||
{title}
|
||||
</h2>
|
||||
<p className="text-xs text-muted-foreground mt-1 font-mono">
|
||||
current: {currentModel || "(unknown)"}
|
||||
|
|
@ -212,22 +268,28 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
|
|||
</div>
|
||||
|
||||
<footer className="border-t border-border p-3 flex items-center justify-between gap-3 flex-wrap">
|
||||
<label className="flex items-center gap-2 text-xs text-muted-foreground cursor-pointer select-none">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={persistGlobal}
|
||||
onChange={(e) => setPersistGlobal(e.target.checked)}
|
||||
className="cursor-pointer"
|
||||
/>
|
||||
Persist globally (otherwise this session only)
|
||||
</label>
|
||||
{alwaysGlobal ? (
|
||||
<span className="text-xs text-muted-foreground">
|
||||
Saves to config.yaml — applies to new sessions.
|
||||
</span>
|
||||
) : (
|
||||
<label className="flex items-center gap-2 text-xs text-muted-foreground cursor-pointer select-none">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={persistGlobal}
|
||||
onChange={(e) => setPersistGlobal(e.target.checked)}
|
||||
className="cursor-pointer"
|
||||
/>
|
||||
Persist globally (otherwise this session only)
|
||||
</label>
|
||||
)}
|
||||
|
||||
<div className="flex items-center gap-2 ml-auto">
|
||||
<Button outlined onClick={onClose}>
|
||||
<Button outlined onClick={onClose} disabled={applying}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button onClick={confirm} disabled={!canConfirm}>
|
||||
Switch
|
||||
{applying ? <Spinner /> : "Switch"}
|
||||
</Button>
|
||||
</div>
|
||||
</footer>
|
||||
|
|
|
|||
|
|
@ -69,6 +69,14 @@ export const api = {
|
|||
getDefaults: () => fetchJSON<Record<string, unknown>>("/api/config/defaults"),
|
||||
getSchema: () => fetchJSON<{ fields: Record<string, unknown>; category_order: string[] }>("/api/config/schema"),
|
||||
getModelInfo: () => fetchJSON<ModelInfoResponse>("/api/model/info"),
|
||||
getModelOptions: () => fetchJSON<ModelOptionsResponse>("/api/model/options"),
|
||||
getAuxiliaryModels: () => fetchJSON<AuxiliaryModelsResponse>("/api/model/auxiliary"),
|
||||
setModelAssignment: (body: ModelAssignmentRequest) =>
|
||||
fetchJSON<ModelAssignmentResponse>("/api/model/set", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
}),
|
||||
saveConfig: (config: Record<string, unknown>) =>
|
||||
fetchJSON<{ ok: boolean }>("/api/config", {
|
||||
method: "PUT",
|
||||
|
|
@ -473,6 +481,54 @@ export interface ModelInfoResponse {
|
|||
};
|
||||
}
|
||||
|
||||
// ── Model options / assignment types ──────────────────────────────────
|
||||
|
||||
export interface ModelOptionProvider {
|
||||
name: string;
|
||||
slug: string;
|
||||
models?: string[];
|
||||
total_models?: number;
|
||||
is_current?: boolean;
|
||||
is_user_defined?: boolean;
|
||||
source?: string;
|
||||
warning?: string;
|
||||
}
|
||||
|
||||
export interface ModelOptionsResponse {
|
||||
model?: string;
|
||||
provider?: string;
|
||||
providers?: ModelOptionProvider[];
|
||||
}
|
||||
|
||||
export interface AuxiliaryTaskAssignment {
|
||||
task: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
base_url: string;
|
||||
}
|
||||
|
||||
export interface AuxiliaryModelsResponse {
|
||||
tasks: AuxiliaryTaskAssignment[];
|
||||
main: { provider: string; model: string };
|
||||
}
|
||||
|
||||
export interface ModelAssignmentRequest {
|
||||
scope: "main" | "auxiliary";
|
||||
provider: string;
|
||||
model: string;
|
||||
/** For auxiliary: task slot name, "" for all, "__reset__" to reset all. */
|
||||
task?: string;
|
||||
}
|
||||
|
||||
export interface ModelAssignmentResponse {
|
||||
ok: boolean;
|
||||
scope?: string;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
tasks?: string[];
|
||||
reset?: boolean;
|
||||
}
|
||||
|
||||
// ── OAuth provider types ────────────────────────────────────────────────
|
||||
|
||||
export interface OAuthProviderStatus {
|
||||
|
|
|
|||
|
|
@ -1,15 +1,23 @@
|
|||
import { useCallback, useEffect, useLayoutEffect, useState } from "react";
|
||||
import {
|
||||
Brain,
|
||||
ChevronDown,
|
||||
Cpu,
|
||||
DollarSign,
|
||||
Eye,
|
||||
RefreshCw,
|
||||
Settings2,
|
||||
Star,
|
||||
Wrench,
|
||||
Zap,
|
||||
} from "lucide-react";
|
||||
import { api } from "@/lib/api";
|
||||
import type { ModelsAnalyticsModelEntry, ModelsAnalyticsResponse } from "@/lib/api";
|
||||
import type {
|
||||
AuxiliaryModelsResponse,
|
||||
AuxiliaryTaskAssignment,
|
||||
ModelsAnalyticsModelEntry,
|
||||
ModelsAnalyticsResponse,
|
||||
} from "@/lib/api";
|
||||
import { timeAgo } from "@/lib/utils";
|
||||
import { formatTokenCount } from "@/lib/format";
|
||||
import { Button, Spinner, Stats } from "@nous-research/ui";
|
||||
|
|
@ -18,6 +26,7 @@ import { Badge } from "@nous-research/ui";
|
|||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
import { ModelPickerDialog } from "@/components/ModelPickerDialog";
|
||||
|
||||
const PERIODS = [
|
||||
{ label: "7d", days: 7 },
|
||||
|
|
@ -25,6 +34,18 @@ const PERIODS = [
|
|||
{ label: "90d", days: 90 },
|
||||
] as const;
|
||||
|
||||
// Must match _AUX_TASK_SLOTS in hermes_cli/web_server.py.
|
||||
const AUX_TASKS: readonly { key: string; label: string; hint: string }[] = [
|
||||
{ key: "vision", label: "Vision", hint: "Image analysis" },
|
||||
{ key: "web_extract", label: "Web Extract", hint: "Page summarization" },
|
||||
{ key: "compression", label: "Compression", hint: "Context compaction" },
|
||||
{ key: "session_search", label: "Session Search", hint: "Recall queries" },
|
||||
{ key: "skills_hub", label: "Skills Hub", hint: "Skill search" },
|
||||
{ key: "approval", label: "Approval", hint: "Smart auto-approve" },
|
||||
{ key: "mcp", label: "MCP", hint: "MCP tool routing" },
|
||||
{ key: "title_generation", label: "Title Gen", hint: "Session titles" },
|
||||
] as const;
|
||||
|
||||
function formatTokens(n: number): string {
|
||||
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
|
||||
if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
|
||||
|
|
@ -134,20 +155,168 @@ function CapabilityBadges({
|
|||
);
|
||||
}
|
||||
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
/* Per-card "Use as" menu */
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
|
||||
function UseAsMenu({
|
||||
provider,
|
||||
model,
|
||||
isMain,
|
||||
mainAuxTask,
|
||||
onAssigned,
|
||||
}: {
|
||||
provider: string;
|
||||
model: string;
|
||||
/** True when this card's model+provider match config.yaml's main slot. */
|
||||
isMain: boolean;
|
||||
/** If this model is assigned to a specific aux task, that task's key. */
|
||||
mainAuxTask: string | null;
|
||||
onAssigned(): void;
|
||||
}) {
|
||||
const [open, setOpen] = useState(false);
|
||||
const [busy, setBusy] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const assign = async (
|
||||
scope: "main" | "auxiliary",
|
||||
task: string,
|
||||
) => {
|
||||
if (!provider || !model) {
|
||||
setError("Missing provider/model");
|
||||
return;
|
||||
}
|
||||
setBusy(true);
|
||||
setError(null);
|
||||
try {
|
||||
await api.setModelAssignment({ scope, provider, model, task });
|
||||
onAssigned();
|
||||
setOpen(false);
|
||||
} catch (e) {
|
||||
setError(e instanceof Error ? e.message : String(e));
|
||||
} finally {
|
||||
setBusy(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Close on outside click.
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
const onDown = (e: MouseEvent) => {
|
||||
const target = e.target as HTMLElement | null;
|
||||
if (target && !target.closest?.("[data-use-as-menu]")) setOpen(false);
|
||||
};
|
||||
window.addEventListener("mousedown", onDown);
|
||||
return () => window.removeEventListener("mousedown", onDown);
|
||||
}, [open]);
|
||||
|
||||
return (
|
||||
<div className="relative" data-use-as-menu>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setOpen((v) => !v)}
|
||||
disabled={busy}
|
||||
className="text-[10px] h-6 px-2"
|
||||
prefix={busy ? <Spinner /> : null}
|
||||
>
|
||||
Use as <ChevronDown className="h-3 w-3" />
|
||||
</Button>
|
||||
{open && (
|
||||
<div className="absolute right-0 top-full mt-1 z-50 min-w-[220px] border border-border bg-card shadow-lg">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => assign("main", "")}
|
||||
disabled={busy}
|
||||
className="flex w-full items-center justify-between px-3 py-2 text-xs hover:bg-muted/50 disabled:opacity-40"
|
||||
>
|
||||
<span className="flex items-center gap-2">
|
||||
<Star className="h-3 w-3" />
|
||||
Main model
|
||||
</span>
|
||||
{isMain && (
|
||||
<span className="text-[9px] uppercase tracking-wider text-primary/80">
|
||||
current
|
||||
</span>
|
||||
)}
|
||||
</button>
|
||||
|
||||
<div className="border-t border-border/50 px-3 py-1.5 text-[9px] uppercase tracking-wider text-muted-foreground">
|
||||
Auxiliary task
|
||||
</div>
|
||||
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => assign("auxiliary", "")}
|
||||
disabled={busy}
|
||||
className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
|
||||
>
|
||||
<span>All auxiliary tasks</span>
|
||||
</button>
|
||||
|
||||
{AUX_TASKS.map((t) => (
|
||||
<button
|
||||
key={t.key}
|
||||
type="button"
|
||||
onClick={() => assign("auxiliary", t.key)}
|
||||
disabled={busy}
|
||||
className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
|
||||
>
|
||||
<span>{t.label}</span>
|
||||
{mainAuxTask === t.key && (
|
||||
<span className="text-[9px] uppercase tracking-wider text-primary/80">
|
||||
current
|
||||
</span>
|
||||
)}
|
||||
</button>
|
||||
))}
|
||||
|
||||
{error && (
|
||||
<div className="px-3 py-2 text-[10px] text-destructive border-t border-border/50">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
/* ModelCard */
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
|
||||
function ModelCard({
|
||||
entry,
|
||||
rank,
|
||||
main,
|
||||
aux,
|
||||
onAssigned,
|
||||
}: {
|
||||
entry: ModelsAnalyticsModelEntry;
|
||||
rank: number;
|
||||
main: { provider: string; model: string } | null;
|
||||
aux: AuxiliaryTaskAssignment[];
|
||||
onAssigned(): void;
|
||||
}) {
|
||||
const { t } = useI18n();
|
||||
const provider = entry.provider || modelVendor(entry.model);
|
||||
const totalTokens = entry.input_tokens + entry.output_tokens;
|
||||
const caps = entry.capabilities;
|
||||
|
||||
const isMain =
|
||||
!!main &&
|
||||
main.provider === provider &&
|
||||
main.model === entry.model;
|
||||
|
||||
// First aux task currently using this model (if any).
|
||||
const mainAuxTask =
|
||||
aux.find(
|
||||
(a) => a.provider === provider && a.model === entry.model,
|
||||
)?.task ?? null;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<Card className={isMain ? "ring-1 ring-primary/40" : undefined}>
|
||||
<CardHeader className="pb-3">
|
||||
<div className="flex items-start justify-between gap-2">
|
||||
<div className="min-w-0 flex-1">
|
||||
|
|
@ -158,6 +327,16 @@ function ModelCard({
|
|||
<CardTitle className="text-sm font-mono-ui truncate">
|
||||
{shortModelName(entry.model)}
|
||||
</CardTitle>
|
||||
{isMain && (
|
||||
<span className="inline-flex items-center gap-0.5 bg-primary/15 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-primary">
|
||||
<Star className="h-2.5 w-2.5" /> main
|
||||
</span>
|
||||
)}
|
||||
{mainAuxTask && (
|
||||
<span className="inline-flex items-center bg-purple-500/10 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-purple-600 dark:text-purple-400">
|
||||
aux · {mainAuxTask}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center gap-2 mt-1">
|
||||
{provider && (
|
||||
|
|
@ -177,13 +356,22 @@ function ModelCard({
|
|||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="text-right shrink-0">
|
||||
<div className="text-xs font-mono font-semibold">
|
||||
{formatTokens(totalTokens)}
|
||||
</div>
|
||||
<div className="text-[10px] text-muted-foreground">
|
||||
{t.models.tokens}
|
||||
<div className="flex flex-col items-end gap-1 shrink-0">
|
||||
<div className="text-right">
|
||||
<div className="text-xs font-mono font-semibold">
|
||||
{formatTokens(totalTokens)}
|
||||
</div>
|
||||
<div className="text-[10px] text-muted-foreground">
|
||||
{t.models.tokens}
|
||||
</div>
|
||||
</div>
|
||||
<UseAsMenu
|
||||
provider={provider}
|
||||
model={entry.model}
|
||||
isMain={isMain}
|
||||
mainAuxTask={mainAuxTask}
|
||||
onAssigned={onAssigned}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</CardHeader>
|
||||
|
|
@ -246,24 +434,242 @@ function ModelCard({
|
|||
);
|
||||
}
|
||||
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
/* Model Settings panel (top of page) */
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
|
||||
type PickerTarget =
|
||||
| { kind: "main" }
|
||||
| { kind: "aux"; task: string };
|
||||
|
||||
function ModelSettingsPanel({
|
||||
aux,
|
||||
refreshKey,
|
||||
onSaved,
|
||||
}: {
|
||||
aux: AuxiliaryModelsResponse | null;
|
||||
refreshKey: number;
|
||||
onSaved(): void;
|
||||
}) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const [picker, setPicker] = useState<PickerTarget | null>(null);
|
||||
const [resetBusy, setResetBusy] = useState(false);
|
||||
|
||||
const mainProv = aux?.main.provider ?? "";
|
||||
const mainModel = aux?.main.model ?? "";
|
||||
|
||||
const applyAssignment = async ({
|
||||
scope,
|
||||
task,
|
||||
provider,
|
||||
model,
|
||||
}: {
|
||||
scope: "main" | "auxiliary";
|
||||
task: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
}) => {
|
||||
await api.setModelAssignment({ scope, task, provider, model });
|
||||
onSaved();
|
||||
};
|
||||
|
||||
const resetAllAux = async () => {
|
||||
if (!window.confirm("Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set.")) {
|
||||
return;
|
||||
}
|
||||
setResetBusy(true);
|
||||
try {
|
||||
await api.setModelAssignment({
|
||||
scope: "auxiliary",
|
||||
task: "__reset__",
|
||||
provider: "",
|
||||
model: "",
|
||||
});
|
||||
onSaved();
|
||||
} finally {
|
||||
setResetBusy(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<CardHeader className="pb-3">
|
||||
<div className="flex items-center justify-between gap-3 flex-wrap">
|
||||
<div className="flex items-center gap-2">
|
||||
<Settings2 className="h-4 w-4 text-muted-foreground" />
|
||||
<CardTitle className="text-sm">Model Settings</CardTitle>
|
||||
<span className="text-[10px] text-muted-foreground">
|
||||
applies to new sessions
|
||||
</span>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setExpanded((v) => !v)}
|
||||
className="text-xs"
|
||||
>
|
||||
{expanded ? "Hide auxiliary" : "Show auxiliary"}
|
||||
<ChevronDown
|
||||
className={`h-3 w-3 transition-transform ${expanded ? "rotate-180" : ""}`}
|
||||
/>
|
||||
</Button>
|
||||
</div>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="space-y-3 pt-0">
|
||||
{/* Main row */}
|
||||
<div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="flex items-center gap-2 mb-0.5">
|
||||
<Star className="h-3 w-3 text-primary" />
|
||||
<span className="text-xs font-medium uppercase tracking-wider">
|
||||
Main model
|
||||
</span>
|
||||
</div>
|
||||
<div className="text-xs font-mono text-muted-foreground truncate">
|
||||
{mainProv || "(unset)"}
|
||||
{mainProv && mainModel && " · "}
|
||||
{mainModel || "(unset)"}
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={() => setPicker({ kind: "main" })}
|
||||
className="text-xs"
|
||||
>
|
||||
Change
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Auxiliary rows */}
|
||||
{expanded && (
|
||||
<div className="space-y-1 border-t border-border/50 pt-3">
|
||||
<div className="flex items-center justify-between pb-1">
|
||||
<div className="text-[10px] uppercase tracking-wider text-muted-foreground">
|
||||
Auxiliary tasks
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={resetAllAux}
|
||||
disabled={resetBusy}
|
||||
className="text-[10px] h-6"
|
||||
prefix={resetBusy ? <Spinner /> : null}
|
||||
>
|
||||
Reset all to auto
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<p className="text-[10px] text-muted-foreground/80 pb-2">
|
||||
Auxiliary tasks handle side-jobs like vision, session search, and
|
||||
compression. <span className="font-mono">auto</span> means
|
||||
"use the main model". Override per-task when you want a
|
||||
cheap/fast model for a specific job.
|
||||
</p>
|
||||
|
||||
{AUX_TASKS.map((t) => {
|
||||
const cur = aux?.tasks.find((a) => a.task === t.key);
|
||||
const isAuto =
|
||||
!cur || cur.provider === "auto" || !cur.provider;
|
||||
return (
|
||||
<div
|
||||
key={t.key}
|
||||
className="flex items-center justify-between gap-3 px-3 py-1.5 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
|
||||
>
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="flex items-baseline gap-2">
|
||||
<span className="text-xs font-medium">{t.label}</span>
|
||||
<span className="text-[10px] text-muted-foreground/60">
|
||||
{t.hint}
|
||||
</span>
|
||||
</div>
|
||||
<div className="text-[10px] font-mono text-muted-foreground truncate">
|
||||
{isAuto
|
||||
? "auto (use main model)"
|
||||
: `${cur?.provider} · ${cur?.model || "(provider default)"}`}
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setPicker({ kind: "aux", task: t.key })}
|
||||
className="text-[10px] h-6"
|
||||
>
|
||||
Change
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{picker && (
|
||||
<ModelPickerDialog
|
||||
key={`picker-${refreshKey}`}
|
||||
loader={api.getModelOptions}
|
||||
alwaysGlobal
|
||||
title={
|
||||
picker.kind === "main"
|
||||
? "Set Main Model"
|
||||
: `Set Auxiliary: ${
|
||||
AUX_TASKS.find((t) => t.key === picker.task)?.label ??
|
||||
picker.task
|
||||
}`
|
||||
}
|
||||
onApply={async ({ provider, model }) => {
|
||||
await applyAssignment({
|
||||
scope: picker.kind === "main" ? "main" : "auxiliary",
|
||||
task: picker.kind === "main" ? "" : picker.task,
|
||||
provider,
|
||||
model,
|
||||
});
|
||||
}}
|
||||
onClose={() => setPicker(null)}
|
||||
/>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
/* Page */
|
||||
/* ──────────────────────────────────────────────────────────────────── */
|
||||
|
||||
export default function ModelsPage() {
|
||||
const [days, setDays] = useState(30);
|
||||
const [data, setData] = useState<ModelsAnalyticsResponse | null>(null);
|
||||
const [aux, setAux] = useState<AuxiliaryModelsResponse | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [saveKey, setSaveKey] = useState(0);
|
||||
const { t } = useI18n();
|
||||
const { setAfterTitle, setEnd } = usePageHeader();
|
||||
|
||||
const load = useCallback(() => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
api
|
||||
.getModelsAnalytics(days)
|
||||
.then(setData)
|
||||
Promise.all([
|
||||
api.getModelsAnalytics(days),
|
||||
api.getAuxiliaryModels().catch(() => null),
|
||||
])
|
||||
.then(([models, auxData]) => {
|
||||
setData(models);
|
||||
setAux(auxData);
|
||||
})
|
||||
.catch((err) => setError(String(err)))
|
||||
.finally(() => setLoading(false));
|
||||
}, [days]);
|
||||
|
||||
const onAssigned = useCallback(() => {
|
||||
// Reload aux state after any assignment change.
|
||||
api
|
||||
.getAuxiliaryModels()
|
||||
.then(setAux)
|
||||
.catch(() => {});
|
||||
setSaveKey((k) => k + 1);
|
||||
}, []);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
const periodLabel =
|
||||
PERIODS.find((p) => p.days === days)?.label ?? `${days}d`;
|
||||
|
|
@ -315,6 +721,13 @@ export default function ModelsPage() {
|
|||
return (
|
||||
<div className="flex flex-col gap-6">
|
||||
<PluginSlot name="models:top" />
|
||||
|
||||
<ModelSettingsPanel
|
||||
aux={aux}
|
||||
refreshKey={saveKey}
|
||||
onSaved={onAssigned}
|
||||
/>
|
||||
|
||||
{loading && !data && (
|
||||
<div className="flex items-center justify-center py-24">
|
||||
<Spinner className="text-2xl text-primary" />
|
||||
|
|
@ -369,7 +782,14 @@ export default function ModelsPage() {
|
|||
{data.models.length > 0 ? (
|
||||
<div className="grid gap-4 md:grid-cols-2 xl:grid-cols-3">
|
||||
{data.models.map((m, i) => (
|
||||
<ModelCard key={`${m.model}:${m.provider}`} entry={m} rank={i + 1} />
|
||||
<ModelCard
|
||||
key={`${m.model}:${m.provider}`}
|
||||
entry={m}
|
||||
rank={i + 1}
|
||||
main={aux?.main ?? null}
|
||||
aux={aux?.tasks ?? []}
|
||||
onAssigned={onAssigned}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
|
|
|
|||
207
website/docs/user-guide/configuring-models.md
Normal file
207
website/docs/user-guide/configuring-models.md
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
---
|
||||
sidebar_position: 3
|
||||
---
|
||||
|
||||
# Configuring Models
|
||||
|
||||
Hermes uses two kinds of model slots:
|
||||
|
||||
- **Main model** — what the agent thinks with. Every user message, every tool-call loop, every streamed response goes through this model.
|
||||
- **Auxiliary models** — smaller side-jobs the agent offloads. Context compression, vision (image analysis), web-page summarization, session search, approval scoring, MCP tool routing, session-title generation, and skill search. Each has its own slot and can be overridden independently.
|
||||
|
||||
This page covers configuring both from the dashboard. If you prefer config files or the CLI, jump to [Alternative methods](#alternative-methods) at the bottom.
|
||||
|
||||
## The Models page
|
||||
|
||||
Open the dashboard and click **Models** in the sidebar. You get two sections:
|
||||
|
||||
1. **Model Settings** — the top panel, where you assign models to slots.
|
||||
2. **Usage analytics** — ranked cards showing every model that ran a session in the selected period, with token counts, cost, and capability badges.
|
||||
|
||||

|
||||
|
||||
The top card is the **Model Settings** panel. The main row always shows what the agent will spin up for new sessions. Click **Change** to open the picker.
|
||||
|
||||
## Setting the main model
|
||||
|
||||
Click **Change** on the Main model row:
|
||||
|
||||

|
||||
|
||||
The picker has two columns:
|
||||
|
||||
- **Left** — authenticated providers. Only providers you've set up (API key set, OAuth'd, or defined as a custom endpoint) show up here. If a provider is missing, head to **Keys** and add its credential.
|
||||
- **Right** — the curated model list for the selected provider. These are the agentic models Hermes recommends for that provider, not the raw `/models` dump (which on OpenRouter includes 400+ models including TTS, image generators, and rerankers).
|
||||
|
||||
Type in the filter box to narrow by provider name, slug, or model ID.
|
||||
|
||||
Pick a model, hit **Switch**, and Hermes writes it to `~/.hermes/config.yaml` under the `model` section. **This applies to new sessions only** — any chat tab you already have open keeps running whatever model it started with. To hot-swap the current chat, use the `/model` slash command inside it.
|
||||
|
||||
## Setting auxiliary models
|
||||
|
||||
Click **Show auxiliary** to reveal the eight task slots:
|
||||
|
||||

|
||||
|
||||
Every auxiliary task defaults to `auto` — meaning Hermes uses your main model for that job too. Override a specific task when you want a cheaper or faster model for a side-job.
|
||||
|
||||
### Common override patterns
|
||||
|
||||
| Task | When to override |
|
||||
|---|---|
|
||||
| **Title Gen** | Almost always. A $0.10/M flash model writes session titles as well as Opus. Default config sets this to `google/gemini-3-flash-preview` on OpenRouter. |
|
||||
| **Vision** | When your main model is a coding model without vision (e.g. Kimi, DeepSeek). Point it at `google/gemini-2.5-flash` or `gpt-4o-mini`. |
|
||||
| **Compression** | When you're burning reasoning tokens on Opus/M2.7 just to summarize context. A fast chat model does the job at 1/50th the cost. |
|
||||
| **Session Search** | When recall queries fan out — default max_concurrency is 3. A cheap model keeps the bill predictable. |
|
||||
| **Approval** | For `approval_mode: smart` — a fast/cheap model (haiku, flash, gpt-5-mini) decides whether to auto-approve low-risk commands. Expensive models here are waste. |
|
||||
| **Web Extract** | When you use `web_extract` heavily. Same logic as compression — summarization doesn't need reasoning. |
|
||||
| **Skills Hub** | `hermes skills search` uses this. Usually fine at `auto`. |
|
||||
| **MCP** | MCP tool routing. Usually fine at `auto`. |
|
||||
|
||||
### Per-task override
|
||||
|
||||
Click **Change** on any auxiliary row. Same picker opens, same behavior — pick provider + model, hit Switch. The row updates to show `provider · model` instead of `auto (use main model)`.
|
||||
|
||||
### Reset all to auto
|
||||
|
||||
If you've over-tuned and want to start over, click **Reset all to auto** at the top of the auxiliary section. Every slot goes back to using your main model.
|
||||
|
||||
## The "Use as" shortcut
|
||||
|
||||
Every model card on the page has a **Use as** dropdown. This is the fast path — pick a model you see in your analytics, click **Use as**, and assign it to the main slot or any specific auxiliary task in one click:
|
||||
|
||||

|
||||
|
||||
The dropdown has:
|
||||
|
||||
- **Main model** — same as clicking Change on the main row.
|
||||
- **All auxiliary tasks** — assigns this model to all 8 aux slots at once. Useful when you just want every side-job on a cheap flash model.
|
||||
- **Individual task options** — Vision, Web Extract, Compression, etc. The currently-assigned model for each task is marked `current`.
|
||||
|
||||
Cards are badged with `main` or `aux · <task>` when they're currently assigned to something — so you can see at a glance which of your historical models are wired in where.
|
||||
|
||||
## What gets written to `config.yaml`
|
||||
|
||||
When you save via the dashboard, Hermes writes to `~/.hermes/config.yaml`:
|
||||
|
||||
**Main model:**
|
||||
```yaml
|
||||
model:
|
||||
provider: openrouter
|
||||
default: anthropic/claude-opus-4.7
|
||||
base_url: '' # cleared on provider switch
|
||||
api_mode: chat_completions
|
||||
```
|
||||
|
||||
**Auxiliary override (example — vision on gemini-flash):**
|
||||
```yaml
|
||||
auxiliary:
|
||||
vision:
|
||||
provider: openrouter
|
||||
model: google/gemini-2.5-flash
|
||||
base_url: ''
|
||||
api_key: ''
|
||||
timeout: 120
|
||||
extra_body: {}
|
||||
download_timeout: 30
|
||||
```
|
||||
|
||||
**Auxiliary on auto (default):**
|
||||
```yaml
|
||||
auxiliary:
|
||||
compression:
|
||||
provider: auto
|
||||
model: ''
|
||||
base_url: ''
|
||||
# ... other fields unchanged
|
||||
```
|
||||
|
||||
`provider: auto` with `model: ''` tells Hermes to use the main model for that task.
|
||||
|
||||
## When does it take effect?
|
||||
|
||||
- **CLI** (`hermes chat`): next `hermes chat` invocation.
|
||||
- **Gateway** (Telegram, Discord, Slack, etc.): next *new* session. Existing sessions keep their model. Restart the gateway (`hermes gateway restart`) if you want to force all sessions to pick up the change.
|
||||
- **Dashboard chat tab** (`/chat`): next new PTY. The currently-open chat keeps its model — use `/model` inside it to hot-swap.
|
||||
|
||||
Changes never invalidate prompt caches on running sessions. That's deliberate: swapping the main model inside a session requires a cache reset (the system prompt contains model-specific content), and we reserve that for the explicit `/model` slash command inside chat.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "No authenticated providers" in the picker
|
||||
|
||||
Hermes lists a provider only if it has a working credential. Check **Keys** in the sidebar — you should see one of: an API key, a successful OAuth, or a custom endpoint URL. If the provider you want isn't there, run `hermes setup` to wire it up, or go to **Keys** and add the env var.
|
||||
|
||||
### Main model didn't change in my running chat
|
||||
|
||||
Expected. The dashboard writes `config.yaml`, which new sessions read. The currently-open chat is a live agent process — it keeps whatever model it was spawned with. Use `/model <name>` inside the chat to hot-swap that specific session.
|
||||
|
||||
### Auxiliary override "didn't take effect"
|
||||
|
||||
Three things to check:
|
||||
|
||||
1. **Did you start a new session?** Existing chats don't re-read config.
|
||||
2. **Is `provider` set to something other than `auto`?** If the field shows `auto`, the task is still using your main model. Click **Change** and pick a real provider.
|
||||
3. **Is the provider authenticated?** If you assigned `minimax` to a task but don't have a MiniMax API key, that task falls back to the openrouter default and logs a warning in `agent.log`.
|
||||
|
||||
### I picked a model but Hermes switched providers on me
|
||||
|
||||
On OpenRouter (or any aggregator), bare model names resolve *within* the aggregator first. So `claude-sonnet-4` on OpenRouter becomes `anthropic/claude-sonnet-4.6`, staying on your OpenRouter auth. But if you typed `claude-sonnet-4` on a native Anthropic auth, it would stay as `claude-sonnet-4-6`. If you see an unexpected provider switch, check that your current provider is what you expect — the picker always shows the current main at the top of the dialog.
|
||||
|
||||
## Alternative methods
|
||||
|
||||
### CLI slash command
|
||||
|
||||
Inside any `hermes chat` session:
|
||||
|
||||
```
|
||||
/model gpt-5.4 --provider openrouter # session-only
|
||||
/model gpt-5.4 --provider openrouter --global # also persists to config.yaml
|
||||
```
|
||||
|
||||
`--global` does the same thing the dashboard's **Change** button does, plus it switches the running session in-place.
|
||||
|
||||
### `hermes model` subcommand
|
||||
|
||||
```bash
|
||||
hermes model list # list authenticated providers + models
|
||||
hermes model set anthropic/claude-opus-4.7 --provider openrouter
|
||||
```
|
||||
|
||||
### Direct config edit
|
||||
|
||||
Edit `~/.hermes/config.yaml` and restart whatever reads it. See the [Configuration reference](./configuration.md) for the full schema.
|
||||
|
||||
### REST API
|
||||
|
||||
The dashboard uses three endpoints. Useful for scripting:
|
||||
|
||||
```bash
|
||||
# List authenticated providers + curated model lists
|
||||
curl -H "X-Hermes-Session-Token: $TOKEN" http://localhost:PORT/api/model/options
|
||||
|
||||
# Read current main + auxiliary assignments
|
||||
curl -H "X-Hermes-Session-Token: $TOKEN" http://localhost:PORT/api/model/auxiliary
|
||||
|
||||
# Set the main model
|
||||
curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
|
||||
-d '{"scope":"main","provider":"openrouter","model":"anthropic/claude-opus-4.7"}' \
|
||||
http://localhost:PORT/api/model/set
|
||||
|
||||
# Override a single auxiliary task
|
||||
curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
|
||||
-d '{"scope":"auxiliary","task":"vision","provider":"openrouter","model":"google/gemini-2.5-flash"}' \
|
||||
http://localhost:PORT/api/model/set
|
||||
|
||||
# Assign one model to every auxiliary task
|
||||
curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
|
||||
-d '{"scope":"auxiliary","task":"","provider":"openrouter","model":"google/gemini-2.5-flash"}' \
|
||||
http://localhost:PORT/api/model/set
|
||||
|
||||
# Reset all auxiliary tasks to auto
|
||||
curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
|
||||
-d '{"scope":"auxiliary","task":"__reset__","provider":"","model":""}' \
|
||||
http://localhost:PORT/api/model/set
|
||||
```
|
||||
|
||||
The session token is injected into the dashboard HTML at startup and rotates on every server restart. Grab it from the browser devtools (`window.__HERMES_SESSION_TOKEN__`) if you're scripting against a running dashboard.
|
||||
|
|
@ -23,6 +23,7 @@ const sidebars: SidebarsConfig = {
|
|||
'user-guide/cli',
|
||||
'user-guide/tui',
|
||||
'user-guide/configuration',
|
||||
'user-guide/configuring-models',
|
||||
'user-guide/sessions',
|
||||
'user-guide/profiles',
|
||||
'user-guide/git-worktrees',
|
||||
|
|
|
|||
BIN
website/static/img/docs/dashboard-models/auxiliary-expanded.png
Normal file
BIN
website/static/img/docs/dashboard-models/auxiliary-expanded.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 593 KiB |
BIN
website/static/img/docs/dashboard-models/overview.png
Normal file
BIN
website/static/img/docs/dashboard-models/overview.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 665 KiB |
BIN
website/static/img/docs/dashboard-models/picker-dialog.png
Normal file
BIN
website/static/img/docs/dashboard-models/picker-dialog.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 216 KiB |
BIN
website/static/img/docs/dashboard-models/use-as-dropdown.png
Normal file
BIN
website/static/img/docs/dashboard-models/use-as-dropdown.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 649 KiB |
Loading…
Add table
Add a link
Reference in a new issue