feat(dashboard): configure main + auxiliary models from Models page (#17802)

Dashboard Models page was analytics-only — no way to pick a model as main for new sessions or override an auxiliary task slot without hand-editing config.yaml or running a /model slash command inside a chat. Changes: - hermes_cli/web_server.py: three REST endpoints (GET /api/model/options, GET /api/model/auxiliary, POST /api/model/set). Reuses list_authenticated_providers() from model_switch.py so the REST path surfaces the same curated model lists as the TUI-gateway model.options JSON-RPC. POST /api/model/set writes model.provider + model.default for scope=main, and auxiliary.<task>.{provider,model} for scope=auxiliary (with task="" meaning 'all 8 slots' and task="__reset__" resetting them to auto). - web/src/components/ModelPickerDialog.tsx: accepts an optional loader + onApply pair so it works without an open chat PTY. ChatSidebar's gw-WebSocket path still works unchanged (back-compat). - web/src/pages/ModelsPage.tsx: Model Settings panel at the top showing main model + collapsible list of 8 auxiliary tasks with per-row Change buttons and Reset all to auto. Every existing model card gets a 'Use as' dropdown for one-click assignment to main or any aux slot. Cards badged 'main' or 'aux · <task>' when currently assigned. - website/docs/user-guide/configuring-models.md: new docs page walking through both UI paths, aux task override patterns, troubleshooting, plus REST/CLI alternatives. - Screenshots under website/static/img/docs/dashboard-models/. Applies to new sessions only — running sessions keep their model (use /model slash command to hot-swap a live session). No prompt-cache invalidation on existing sessions.
2026-06-19 10:02:16 +00:00 · 2026-04-29 23:53:12 -07:00 · 2026-04-29 23:53:12 -07:00 · 3c27efbb91
commit 3c27efbb91
parent 718e4e2e7e
10 changed files with 1007 additions and 47 deletions
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -23,7 +23,7 @@ import time
 import urllib.parse
 import urllib.request
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple

 import yaml

@ -445,6 +445,20 @@ class EnvVarReveal(BaseModel):
    key: str


+class ModelAssignment(BaseModel):
+    """Payload for POST /api/model/set — assign a provider/model to a slot.
+
+    scope="main"        → writes model.provider + model.default
+    scope="auxiliary"   → writes auxiliary.<task>.provider + auxiliary.<task>.model
+    scope="auxiliary" with task=""  → applied to every auxiliary.* slot
+    scope="auxiliary" with task="__reset__"  → resets every slot to provider="auto"
+    """
+    scope: str
+    provider: str
+    model: str
+    task: str = ""
+
+
 _GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
 try:
    _GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
@ -921,6 +935,206 @@ def get_model_info():
        return dict(_EMPTY_MODEL_INFO)


+# ---------------------------------------------------------------------------
+# Model assignment — pick provider+model for main slot or auxiliary slots.
+# Mirrors the model.options JSON-RPC from tui_gateway but uses REST so the
+# Models page (which has no chat PTY open) can drive it.
+# ---------------------------------------------------------------------------
+
+# Canonical auxiliary task slots. Keep in sync with DEFAULT_CONFIG["auxiliary"]
+# in hermes_cli/config.py — listed here for deterministic ordering in the UI.
+_AUX_TASK_SLOTS: Tuple[str, ...] = (
+    "vision",
+    "web_extract",
+    "compression",
+    "session_search",
+    "skills_hub",
+    "approval",
+    "mcp",
+    "title_generation",
+)
+
+
+@app.get("/api/model/options")
+def get_model_options():
+    """Return authenticated providers + their curated model lists.
+
+    REST equivalent of the ``model.options`` JSON-RPC on tui_gateway, so the
+    dashboard Models page can render the picker without a live chat session.
+    The response shape matches ``model.options`` 1:1 so ``ModelPickerDialog``
+    can share the same types.
+    """
+    try:
+        from hermes_cli.model_switch import list_authenticated_providers
+
+        cfg = load_config()
+        model_cfg = cfg.get("model", {})
+        if isinstance(model_cfg, dict):
+            current_model = model_cfg.get("default", model_cfg.get("name", "")) or ""
+            current_provider = model_cfg.get("provider", "") or ""
+            current_base_url = model_cfg.get("base_url", "") or ""
+        else:
+            current_model = str(model_cfg) if model_cfg else ""
+            current_provider = ""
+            current_base_url = ""
+
+        user_providers = cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {}
+        custom_providers = (
+            cfg.get("custom_providers")
+            if isinstance(cfg.get("custom_providers"), list)
+            else []
+        )
+
+        providers = list_authenticated_providers(
+            current_provider=current_provider,
+            current_base_url=current_base_url,
+            current_model=current_model,
+            user_providers=user_providers,
+            custom_providers=custom_providers,
+            max_models=50,
+        )
+        return {
+            "providers": providers,
+            "model": current_model,
+            "provider": current_provider,
+        }
+    except Exception:
+        _log.exception("GET /api/model/options failed")
+        raise HTTPException(status_code=500, detail="Failed to list model options")
+
+
+@app.get("/api/model/auxiliary")
+def get_auxiliary_models():
+    """Return current auxiliary task assignments.
+
+    Shape:
+      {
+        "tasks": [
+          {"task": "vision", "provider": "auto", "model": "", "base_url": ""},
+          ...
+        ],
+        "main": {"provider": "openrouter", "model": "anthropic/claude-opus-4.7"},
+      }
+    """
+    try:
+        cfg = load_config()
+        aux_cfg = cfg.get("auxiliary", {})
+        if not isinstance(aux_cfg, dict):
+            aux_cfg = {}
+
+        tasks = []
+        for slot in _AUX_TASK_SLOTS:
+            slot_cfg = aux_cfg.get(slot, {}) if isinstance(aux_cfg.get(slot), dict) else {}
+            tasks.append({
+                "task": slot,
+                "provider": str(slot_cfg.get("provider", "auto") or "auto"),
+                "model": str(slot_cfg.get("model", "") or ""),
+                "base_url": str(slot_cfg.get("base_url", "") or ""),
+            })
+
+        model_cfg = cfg.get("model", {})
+        if isinstance(model_cfg, dict):
+            main = {
+                "provider": str(model_cfg.get("provider", "") or ""),
+                "model": str(model_cfg.get("default", model_cfg.get("name", "")) or ""),
+            }
+        else:
+            main = {"provider": "", "model": str(model_cfg) if model_cfg else ""}
+
+        return {"tasks": tasks, "main": main}
+    except Exception:
+        _log.exception("GET /api/model/auxiliary failed")
+        raise HTTPException(status_code=500, detail="Failed to read auxiliary config")
+
+
+@app.post("/api/model/set")
+async def set_model_assignment(body: ModelAssignment):
+    """Assign a model to the main slot or an auxiliary task slot.
+
+    Writes to ``~/.hermes/config.yaml`` — applies to **new** sessions only.
+    The currently running chat PTY (if any) is not affected; use the
+    ``/model`` slash command inside a chat to hot-swap that specific session.
+    """
+    scope = (body.scope or "").strip().lower()
+    provider = (body.provider or "").strip()
+    model = (body.model or "").strip()
+    task = (body.task or "").strip().lower()
+
+    if scope not in ("main", "auxiliary"):
+        raise HTTPException(status_code=400, detail="scope must be 'main' or 'auxiliary'")
+
+    try:
+        cfg = load_config()
+
+        if scope == "main":
+            if not provider or not model:
+                raise HTTPException(status_code=400, detail="provider and model required for main")
+            model_cfg = cfg.get("model", {})
+            if not isinstance(model_cfg, dict):
+                model_cfg = {}
+            model_cfg["provider"] = provider
+            model_cfg["default"] = model
+            # Clear stale base_url so the resolver picks the provider's own default.
+            if "base_url" in model_cfg and model_cfg.get("base_url"):
+                model_cfg["base_url"] = ""
+            # Also clear hardcoded context_length override — new model may have
+            # a different context window.
+            if "context_length" in model_cfg:
+                model_cfg.pop("context_length", None)
+            cfg["model"] = model_cfg
+            save_config(cfg)
+            return {"ok": True, "scope": "main", "provider": provider, "model": model}
+
+        # scope == "auxiliary"
+        aux = cfg.get("auxiliary")
+        if not isinstance(aux, dict):
+            aux = {}
+
+        if task == "__reset__":
+            # Reset every slot to provider="auto", model="" — keeps other fields intact.
+            for slot in _AUX_TASK_SLOTS:
+                slot_cfg = aux.get(slot)
+                if not isinstance(slot_cfg, dict):
+                    slot_cfg = {}
+                slot_cfg["provider"] = "auto"
+                slot_cfg["model"] = ""
+                aux[slot] = slot_cfg
+            cfg["auxiliary"] = aux
+            save_config(cfg)
+            return {"ok": True, "scope": "auxiliary", "reset": True}
+
+        if not provider:
+            raise HTTPException(status_code=400, detail="provider required for auxiliary")
+
+        targets = [task] if task else list(_AUX_TASK_SLOTS)
+        for slot in targets:
+            if slot not in _AUX_TASK_SLOTS:
+                raise HTTPException(status_code=400, detail=f"unknown auxiliary task: {slot}")
+            slot_cfg = aux.get(slot)
+            if not isinstance(slot_cfg, dict):
+                slot_cfg = {}
+            slot_cfg["provider"] = provider
+            slot_cfg["model"] = model
+            aux[slot] = slot_cfg
+
+        cfg["auxiliary"] = aux
+        save_config(cfg)
+        return {
+            "ok": True,
+            "scope": "auxiliary",
+            "tasks": targets,
+            "provider": provider,
+            "model": model,
+        }
+    except HTTPException:
+        raise
+    except Exception:
+        _log.exception("POST /api/model/set failed")
+        raise HTTPException(status_code=500, detail="Failed to save model assignment")
+
+
+
+
 def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
    """Reverse _normalize_config_for_web before saving.

--- a/web/src/components/ModelPickerDialog.tsx
+++ b/web/src/components/ModelPickerDialog.tsx
@ -11,9 +11,18 @@ import { useEffect, useMemo, useRef, useState } from "react";
 *   Stage 1: pick provider (authenticated providers only)
 *   Stage 2: pick model within that provider
 *
- * On confirm, emits `/model <model> --provider <slug> [--global]` through
- * the parent callback so ChatPage can dispatch it via the existing slash
- * pipeline. That keeps persistence + actual switch logic in one place.
+ * Two invocation modes:
+ *
+ * 1. Chat-session mode (ChatSidebar) — pass `gw` + `sessionId`. The picker
+ *    loads options via `model.options` JSON-RPC and emits the result as a
+ *    slash command string (`/model <model> --provider <slug> [--global]`)
+ *    through `onSubmit`, which the ChatPage pipes to `slashExec`.
+ *
+ * 2. Standalone mode (ModelsPage, Config settings) — pass a `loader` and
+ *    `onApply`. The picker fetches options via the REST endpoint and calls
+ *    `onApply(provider, model, persistGlobal)` instead of emitting a slash
+ *    command.  This lets the Models page reuse the same UI without
+ *    requiring an open chat PTY.
 */

 interface ModelOptionProvider {
@ -32,14 +41,38 @@ interface ModelOptionsResponse {
 }

 interface Props {
-  gw: GatewayClient;
-  sessionId: string;
+  /** Chat-mode: when present, picker emits a slash command via onSubmit. */
+  gw?: GatewayClient;
+  sessionId?: string;
+  onSubmit?(slashCommand: string): void;
+
+  /** Standalone-mode: when present (and onSubmit absent), picker calls onApply. */
+  loader?(): Promise<ModelOptionsResponse>;
+  onApply?(args: {
+    provider: string;
+    model: string;
+    persistGlobal: boolean;
+  }): Promise<void> | void;
+
  onClose(): void;
-  /** Parent runs the resulting slash command through slashExec. */
-  onSubmit(slashCommand: string): void;
+  title?: string;
+  /** If true, hides "Persist globally" checkbox — always saves to config.yaml. */
+  alwaysGlobal?: boolean;
 }

-export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
+export function ModelPickerDialog(props: Props) {
+  const {
+    gw,
+    sessionId,
+    onSubmit,
+    loader,
+    onApply,
+    onClose,
+    title = "Switch Model",
+    alwaysGlobal = false,
+  } = props;
+  const standalone = !!loader && !!onApply;
+
  const [providers, setProviders] = useState<ModelOptionProvider[]>([]);
  const [currentModel, setCurrentModel] = useState("");
  const [currentProviderSlug, setCurrentProviderSlug] = useState("");
@ -48,17 +81,22 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
  const [selectedSlug, setSelectedSlug] = useState("");
  const [selectedModel, setSelectedModel] = useState("");
  const [query, setQuery] = useState("");
-  const [persistGlobal, setPersistGlobal] = useState(false);
+  const [persistGlobal, setPersistGlobal] = useState(alwaysGlobal);
+  const [applying, setApplying] = useState(false);
  const closedRef = useRef(false);

  // Load providers + models on open.
  useEffect(() => {
    closedRef.current = false;

-    gw.request<ModelOptionsResponse>(
-      "model.options",
-      sessionId ? { session_id: sessionId } : {},
-    )
+    const promise = standalone
+      ? (loader as () => Promise<ModelOptionsResponse>)()
+      : (gw as GatewayClient).request<ModelOptionsResponse>(
+          "model.options",
+          sessionId ? { session_id: sessionId } : {},
+        );
+
+    promise
      .then((r) => {
        if (closedRef.current) return;
        const next = r?.providers ?? [];
@ -80,7 +118,9 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
    return () => {
      closedRef.current = true;
    };
-  }, [gw, sessionId]);
+    // Deliberately omit props from deps — stable for the dialog's lifetime.
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);

  // Esc closes.
  useEffect(() => {
@ -125,15 +165,31 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
    [models, needle],
  );

-  const canConfirm = !!selectedProvider && !!selectedModel;
+  const canConfirm = !!selectedProvider && !!selectedModel && !applying;

-  const confirm = () => {
-    if (!canConfirm) return;
-    const global = persistGlobal ? " --global" : "";
-    onSubmit(
-      `/model ${selectedModel} --provider ${selectedProvider.slug}${global}`,
-    );
-    onClose();
+  const confirm = async () => {
+    if (!canConfirm || !selectedProvider) return;
+    if (standalone && onApply) {
+      setApplying(true);
+      try {
+        await onApply({
+          provider: selectedProvider.slug,
+          model: selectedModel,
+          persistGlobal,
+        });
+        onClose();
+      } catch (e) {
+        setError(e instanceof Error ? e.message : String(e));
+      } finally {
+        setApplying(false);
+      }
+    } else if (onSubmit) {
+      const global = persistGlobal ? " --global" : "";
+      onSubmit(
+        `/model ${selectedModel} --provider ${selectedProvider.slug}${global}`,
+      );
+      onClose();
+    }
  };

  return (
@ -160,7 +216,7 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
            id="model-picker-title"
            className="font-display text-base tracking-wider uppercase"
          >
-            Switch Model
+            {title}
          </h2>
          <p className="text-xs text-muted-foreground mt-1 font-mono">
            current: {currentModel || "(unknown)"}
@ -212,22 +268,28 @@ export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
        </div>

        <footer className="border-t border-border p-3 flex items-center justify-between gap-3 flex-wrap">
-          <label className="flex items-center gap-2 text-xs text-muted-foreground cursor-pointer select-none">
-            <input
-              type="checkbox"
-              checked={persistGlobal}
-              onChange={(e) => setPersistGlobal(e.target.checked)}
-              className="cursor-pointer"
-            />
-            Persist globally (otherwise this session only)
-          </label>
+          {alwaysGlobal ? (
+            <span className="text-xs text-muted-foreground">
+              Saves to config.yaml — applies to new sessions.
+            </span>
+          ) : (
+            <label className="flex items-center gap-2 text-xs text-muted-foreground cursor-pointer select-none">
+              <input
+                type="checkbox"
+                checked={persistGlobal}
+                onChange={(e) => setPersistGlobal(e.target.checked)}
+                className="cursor-pointer"
+              />
+              Persist globally (otherwise this session only)
+            </label>
+          )}

          <div className="flex items-center gap-2 ml-auto">
-            <Button outlined onClick={onClose}>
+            <Button outlined onClick={onClose} disabled={applying}>
              Cancel
            </Button>
            <Button onClick={confirm} disabled={!canConfirm}>
-              Switch
+              {applying ? <Spinner /> : "Switch"}
            </Button>
          </div>
        </footer>
--- a/web/src/lib/api.ts
+++ b/web/src/lib/api.ts
@ -69,6 +69,14 @@ export const api = {
  getDefaults: () => fetchJSON<Record<string, unknown>>("/api/config/defaults"),
  getSchema: () => fetchJSON<{ fields: Record<string, unknown>; category_order: string[] }>("/api/config/schema"),
  getModelInfo: () => fetchJSON<ModelInfoResponse>("/api/model/info"),
+  getModelOptions: () => fetchJSON<ModelOptionsResponse>("/api/model/options"),
+  getAuxiliaryModels: () => fetchJSON<AuxiliaryModelsResponse>("/api/model/auxiliary"),
+  setModelAssignment: (body: ModelAssignmentRequest) =>
+    fetchJSON<ModelAssignmentResponse>("/api/model/set", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(body),
+    }),
  saveConfig: (config: Record<string, unknown>) =>
    fetchJSON<{ ok: boolean }>("/api/config", {
      method: "PUT",
@ -473,6 +481,54 @@ export interface ModelInfoResponse {
  };
 }

+// ── Model options / assignment types ──────────────────────────────────
+
+export interface ModelOptionProvider {
+  name: string;
+  slug: string;
+  models?: string[];
+  total_models?: number;
+  is_current?: boolean;
+  is_user_defined?: boolean;
+  source?: string;
+  warning?: string;
+}
+
+export interface ModelOptionsResponse {
+  model?: string;
+  provider?: string;
+  providers?: ModelOptionProvider[];
+}
+
+export interface AuxiliaryTaskAssignment {
+  task: string;
+  provider: string;
+  model: string;
+  base_url: string;
+}
+
+export interface AuxiliaryModelsResponse {
+  tasks: AuxiliaryTaskAssignment[];
+  main: { provider: string; model: string };
+}
+
+export interface ModelAssignmentRequest {
+  scope: "main" | "auxiliary";
+  provider: string;
+  model: string;
+  /** For auxiliary: task slot name, "" for all, "__reset__" to reset all. */
+  task?: string;
+}
+
+export interface ModelAssignmentResponse {
+  ok: boolean;
+  scope?: string;
+  provider?: string;
+  model?: string;
+  tasks?: string[];
+  reset?: boolean;
+}
+
 // ── OAuth provider types ────────────────────────────────────────────────

 export interface OAuthProviderStatus {
--- a/web/src/pages/ModelsPage.tsx
+++ b/web/src/pages/ModelsPage.tsx
@ -1,15 +1,23 @@
 import { useCallback, useEffect, useLayoutEffect, useState } from "react";
 import {
  Brain,
+  ChevronDown,
  Cpu,
  DollarSign,
  Eye,
  RefreshCw,
+  Settings2,
+  Star,
  Wrench,
  Zap,
 } from "lucide-react";
 import { api } from "@/lib/api";
-import type { ModelsAnalyticsModelEntry, ModelsAnalyticsResponse } from "@/lib/api";
+import type {
+  AuxiliaryModelsResponse,
+  AuxiliaryTaskAssignment,
+  ModelsAnalyticsModelEntry,
+  ModelsAnalyticsResponse,
+} from "@/lib/api";
 import { timeAgo } from "@/lib/utils";
 import { formatTokenCount } from "@/lib/format";
 import { Button, Spinner, Stats } from "@nous-research/ui";
@ -18,6 +26,7 @@ import { Badge } from "@nous-research/ui";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { useI18n } from "@/i18n";
 import { PluginSlot } from "@/plugins";
+import { ModelPickerDialog } from "@/components/ModelPickerDialog";

 const PERIODS = [
  { label: "7d", days: 7 },
@ -25,6 +34,18 @@ const PERIODS = [
  { label: "90d", days: 90 },
 ] as const;

+// Must match _AUX_TASK_SLOTS in hermes_cli/web_server.py.
+const AUX_TASKS: readonly { key: string; label: string; hint: string }[] = [
+  { key: "vision", label: "Vision", hint: "Image analysis" },
+  { key: "web_extract", label: "Web Extract", hint: "Page summarization" },
+  { key: "compression", label: "Compression", hint: "Context compaction" },
+  { key: "session_search", label: "Session Search", hint: "Recall queries" },
+  { key: "skills_hub", label: "Skills Hub", hint: "Skill search" },
+  { key: "approval", label: "Approval", hint: "Smart auto-approve" },
+  { key: "mcp", label: "MCP", hint: "MCP tool routing" },
+  { key: "title_generation", label: "Title Gen", hint: "Session titles" },
+] as const;
+
 function formatTokens(n: number): string {
  if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
  if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
@ -134,20 +155,168 @@ function CapabilityBadges({
  );
 }

+/* ──────────────────────────────────────────────────────────────────── */
+/*  Per-card "Use as" menu                                              */
+/* ──────────────────────────────────────────────────────────────────── */
+
+function UseAsMenu({
+  provider,
+  model,
+  isMain,
+  mainAuxTask,
+  onAssigned,
+}: {
+  provider: string;
+  model: string;
+  /** True when this card's model+provider match config.yaml's main slot. */
+  isMain: boolean;
+  /** If this model is assigned to a specific aux task, that task's key. */
+  mainAuxTask: string | null;
+  onAssigned(): void;
+}) {
+  const [open, setOpen] = useState(false);
+  const [busy, setBusy] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  const assign = async (
+    scope: "main" | "auxiliary",
+    task: string,
+  ) => {
+    if (!provider || !model) {
+      setError("Missing provider/model");
+      return;
+    }
+    setBusy(true);
+    setError(null);
+    try {
+      await api.setModelAssignment({ scope, provider, model, task });
+      onAssigned();
+      setOpen(false);
+    } catch (e) {
+      setError(e instanceof Error ? e.message : String(e));
+    } finally {
+      setBusy(false);
+    }
+  };
+
+  // Close on outside click.
+  useEffect(() => {
+    if (!open) return;
+    const onDown = (e: MouseEvent) => {
+      const target = e.target as HTMLElement | null;
+      if (target && !target.closest?.("[data-use-as-menu]")) setOpen(false);
+    };
+    window.addEventListener("mousedown", onDown);
+    return () => window.removeEventListener("mousedown", onDown);
+  }, [open]);
+
+  return (
+    <div className="relative" data-use-as-menu>
+      <Button
+        size="sm"
+        outlined
+        onClick={() => setOpen((v) => !v)}
+        disabled={busy}
+        className="text-[10px] h-6 px-2"
+        prefix={busy ? <Spinner /> : null}
+      >
+        Use as <ChevronDown className="h-3 w-3" />
+      </Button>
+      {open && (
+        <div className="absolute right-0 top-full mt-1 z-50 min-w-[220px] border border-border bg-card shadow-lg">
+          <button
+            type="button"
+            onClick={() => assign("main", "")}
+            disabled={busy}
+            className="flex w-full items-center justify-between px-3 py-2 text-xs hover:bg-muted/50 disabled:opacity-40"
+          >
+            <span className="flex items-center gap-2">
+              <Star className="h-3 w-3" />
+              Main model
+            </span>
+            {isMain && (
+              <span className="text-[9px] uppercase tracking-wider text-primary/80">
+                current
+              </span>
+            )}
+          </button>
+
+          <div className="border-t border-border/50 px-3 py-1.5 text-[9px] uppercase tracking-wider text-muted-foreground">
+            Auxiliary task
+          </div>
+
+          <button
+            type="button"
+            onClick={() => assign("auxiliary", "")}
+            disabled={busy}
+            className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
+          >
+            <span>All auxiliary tasks</span>
+          </button>
+
+          {AUX_TASKS.map((t) => (
+            <button
+              key={t.key}
+              type="button"
+              onClick={() => assign("auxiliary", t.key)}
+              disabled={busy}
+              className="flex w-full items-center justify-between px-3 py-1.5 text-xs hover:bg-muted/50 disabled:opacity-40"
+            >
+              <span>{t.label}</span>
+              {mainAuxTask === t.key && (
+                <span className="text-[9px] uppercase tracking-wider text-primary/80">
+                  current
+                </span>
+              )}
+            </button>
+          ))}
+
+          {error && (
+            <div className="px-3 py-2 text-[10px] text-destructive border-t border-border/50">
+              {error}
+            </div>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
+
+/* ──────────────────────────────────────────────────────────────────── */
+/*  ModelCard                                                           */
+/* ──────────────────────────────────────────────────────────────────── */
+
 function ModelCard({
  entry,
  rank,
+  main,
+  aux,
+  onAssigned,
 }: {
  entry: ModelsAnalyticsModelEntry;
  rank: number;
+  main: { provider: string; model: string } | null;
+  aux: AuxiliaryTaskAssignment[];
+  onAssigned(): void;
 }) {
  const { t } = useI18n();
  const provider = entry.provider || modelVendor(entry.model);
  const totalTokens = entry.input_tokens + entry.output_tokens;
  const caps = entry.capabilities;

+  const isMain =
+    !!main &&
+    main.provider === provider &&
+    main.model === entry.model;
+
+  // First aux task currently using this model (if any).
+  const mainAuxTask =
+    aux.find(
+      (a) => a.provider === provider && a.model === entry.model,
+    )?.task ?? null;
+
  return (
-    <Card>
+    <Card className={isMain ? "ring-1 ring-primary/40" : undefined}>
      <CardHeader className="pb-3">
        <div className="flex items-start justify-between gap-2">
          <div className="min-w-0 flex-1">
@ -158,6 +327,16 @@ function ModelCard({
              <CardTitle className="text-sm font-mono-ui truncate">
                {shortModelName(entry.model)}
              </CardTitle>
+              {isMain && (
+                <span className="inline-flex items-center gap-0.5 bg-primary/15 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-primary">
+                  <Star className="h-2.5 w-2.5" /> main
+                </span>
+              )}
+              {mainAuxTask && (
+                <span className="inline-flex items-center bg-purple-500/10 px-1.5 py-0.5 text-[9px] font-medium uppercase tracking-wider text-purple-600 dark:text-purple-400">
+                  aux · {mainAuxTask}
+                </span>
+              )}
            </div>
            <div className="flex items-center gap-2 mt-1">
              {provider && (
@ -177,13 +356,22 @@ function ModelCard({
              )}
            </div>
          </div>
-          <div className="text-right shrink-0">
-            <div className="text-xs font-mono font-semibold">
-              {formatTokens(totalTokens)}
-            </div>
-            <div className="text-[10px] text-muted-foreground">
-              {t.models.tokens}
+          <div className="flex flex-col items-end gap-1 shrink-0">
+            <div className="text-right">
+              <div className="text-xs font-mono font-semibold">
+                {formatTokens(totalTokens)}
+              </div>
+              <div className="text-[10px] text-muted-foreground">
+                {t.models.tokens}
+              </div>
            </div>
+            <UseAsMenu
+              provider={provider}
+              model={entry.model}
+              isMain={isMain}
+              mainAuxTask={mainAuxTask}
+              onAssigned={onAssigned}
+            />
          </div>
        </div>
      </CardHeader>
@ -246,24 +434,242 @@ function ModelCard({
  );
 }

+/* ──────────────────────────────────────────────────────────────────── */
+/*  Model Settings panel (top of page)                                  */
+/* ──────────────────────────────────────────────────────────────────── */
+
+type PickerTarget =
+  | { kind: "main" }
+  | { kind: "aux"; task: string };
+
+function ModelSettingsPanel({
+  aux,
+  refreshKey,
+  onSaved,
+}: {
+  aux: AuxiliaryModelsResponse | null;
+  refreshKey: number;
+  onSaved(): void;
+}) {
+  const [expanded, setExpanded] = useState(false);
+  const [picker, setPicker] = useState<PickerTarget | null>(null);
+  const [resetBusy, setResetBusy] = useState(false);
+
+  const mainProv = aux?.main.provider ?? "";
+  const mainModel = aux?.main.model ?? "";
+
+  const applyAssignment = async ({
+    scope,
+    task,
+    provider,
+    model,
+  }: {
+    scope: "main" | "auxiliary";
+    task: string;
+    provider: string;
+    model: string;
+  }) => {
+    await api.setModelAssignment({ scope, task, provider, model });
+    onSaved();
+  };
+
+  const resetAllAux = async () => {
+    if (!window.confirm("Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set.")) {
+      return;
+    }
+    setResetBusy(true);
+    try {
+      await api.setModelAssignment({
+        scope: "auxiliary",
+        task: "__reset__",
+        provider: "",
+        model: "",
+      });
+      onSaved();
+    } finally {
+      setResetBusy(false);
+    }
+  };
+
+  return (
+    <Card>
+      <CardHeader className="pb-3">
+        <div className="flex items-center justify-between gap-3 flex-wrap">
+          <div className="flex items-center gap-2">
+            <Settings2 className="h-4 w-4 text-muted-foreground" />
+            <CardTitle className="text-sm">Model Settings</CardTitle>
+            <span className="text-[10px] text-muted-foreground">
+              applies to new sessions
+            </span>
+          </div>
+          <Button
+            size="sm"
+            outlined
+            onClick={() => setExpanded((v) => !v)}
+            className="text-xs"
+          >
+            {expanded ? "Hide auxiliary" : "Show auxiliary"}
+            <ChevronDown
+              className={`h-3 w-3 transition-transform ${expanded ? "rotate-180" : ""}`}
+            />
+          </Button>
+        </div>
+      </CardHeader>
+
+      <CardContent className="space-y-3 pt-0">
+        {/* Main row */}
+        <div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
+          <div className="min-w-0 flex-1">
+            <div className="flex items-center gap-2 mb-0.5">
+              <Star className="h-3 w-3 text-primary" />
+              <span className="text-xs font-medium uppercase tracking-wider">
+                Main model
+              </span>
+            </div>
+            <div className="text-xs font-mono text-muted-foreground truncate">
+              {mainProv || "(unset)"}
+              {mainProv && mainModel && " · "}
+              {mainModel || "(unset)"}
+            </div>
+          </div>
+          <Button
+            size="sm"
+            onClick={() => setPicker({ kind: "main" })}
+            className="text-xs"
+          >
+            Change
+          </Button>
+        </div>
+
+        {/* Auxiliary rows */}
+        {expanded && (
+          <div className="space-y-1 border-t border-border/50 pt-3">
+            <div className="flex items-center justify-between pb-1">
+              <div className="text-[10px] uppercase tracking-wider text-muted-foreground">
+                Auxiliary tasks
+              </div>
+              <Button
+                size="sm"
+                outlined
+                onClick={resetAllAux}
+                disabled={resetBusy}
+                className="text-[10px] h-6"
+                prefix={resetBusy ? <Spinner /> : null}
+              >
+                Reset all to auto
+              </Button>
+            </div>
+
+            <p className="text-[10px] text-muted-foreground/80 pb-2">
+              Auxiliary tasks handle side-jobs like vision, session search, and
+              compression. <span className="font-mono">auto</span> means
+              &quot;use the main model&quot;. Override per-task when you want a
+              cheap/fast model for a specific job.
+            </p>
+
+            {AUX_TASKS.map((t) => {
+              const cur = aux?.tasks.find((a) => a.task === t.key);
+              const isAuto =
+                !cur || cur.provider === "auto" || !cur.provider;
+              return (
+                <div
+                  key={t.key}
+                  className="flex items-center justify-between gap-3 px-3 py-1.5 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
+                >
+                  <div className="min-w-0 flex-1">
+                    <div className="flex items-baseline gap-2">
+                      <span className="text-xs font-medium">{t.label}</span>
+                      <span className="text-[10px] text-muted-foreground/60">
+                        {t.hint}
+                      </span>
+                    </div>
+                    <div className="text-[10px] font-mono text-muted-foreground truncate">
+                      {isAuto
+                        ? "auto (use main model)"
+                        : `${cur?.provider} · ${cur?.model || "(provider default)"}`}
+                    </div>
+                  </div>
+                  <Button
+                    size="sm"
+                    outlined
+                    onClick={() => setPicker({ kind: "aux", task: t.key })}
+                    className="text-[10px] h-6"
+                  >
+                    Change
+                  </Button>
+                </div>
+              );
+            })}
+          </div>
+        )}
+
+        {picker && (
+          <ModelPickerDialog
+            key={`picker-${refreshKey}`}
+            loader={api.getModelOptions}
+            alwaysGlobal
+            title={
+              picker.kind === "main"
+                ? "Set Main Model"
+                : `Set Auxiliary: ${
+                    AUX_TASKS.find((t) => t.key === picker.task)?.label ??
+                    picker.task
+                  }`
+            }
+            onApply={async ({ provider, model }) => {
+              await applyAssignment({
+                scope: picker.kind === "main" ? "main" : "auxiliary",
+                task: picker.kind === "main" ? "" : picker.task,
+                provider,
+                model,
+              });
+            }}
+            onClose={() => setPicker(null)}
+          />
+        )}
+      </CardContent>
+    </Card>
+  );
+}
+
+/* ──────────────────────────────────────────────────────────────────── */
+/*  Page                                                                */
+/* ──────────────────────────────────────────────────────────────────── */
+
 export default function ModelsPage() {
  const [days, setDays] = useState(30);
  const [data, setData] = useState<ModelsAnalyticsResponse | null>(null);
+  const [aux, setAux] = useState<AuxiliaryModelsResponse | null>(null);
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);
+  const [saveKey, setSaveKey] = useState(0);
  const { t } = useI18n();
  const { setAfterTitle, setEnd } = usePageHeader();

  const load = useCallback(() => {
    setLoading(true);
    setError(null);
-    api
-      .getModelsAnalytics(days)
-      .then(setData)
+    Promise.all([
+      api.getModelsAnalytics(days),
+      api.getAuxiliaryModels().catch(() => null),
+    ])
+      .then(([models, auxData]) => {
+        setData(models);
+        setAux(auxData);
+      })
      .catch((err) => setError(String(err)))
      .finally(() => setLoading(false));
  }, [days]);

+  const onAssigned = useCallback(() => {
+    // Reload aux state after any assignment change.
+    api
+      .getAuxiliaryModels()
+      .then(setAux)
+      .catch(() => {});
+    setSaveKey((k) => k + 1);
+  }, []);
+
  useLayoutEffect(() => {
    const periodLabel =
      PERIODS.find((p) => p.days === days)?.label ?? `${days}d`;
@ -315,6 +721,13 @@ export default function ModelsPage() {
  return (
    <div className="flex flex-col gap-6">
      <PluginSlot name="models:top" />
+
+      <ModelSettingsPanel
+        aux={aux}
+        refreshKey={saveKey}
+        onSaved={onAssigned}
+      />
+
      {loading && !data && (
        <div className="flex items-center justify-center py-24">
          <Spinner className="text-2xl text-primary" />
@ -369,7 +782,14 @@ export default function ModelsPage() {
          {data.models.length > 0 ? (
            <div className="grid gap-4 md:grid-cols-2 xl:grid-cols-3">
              {data.models.map((m, i) => (
-                <ModelCard key={`${m.model}:${m.provider}`} entry={m} rank={i + 1} />
+                <ModelCard
+                  key={`${m.model}:${m.provider}`}
+                  entry={m}
+                  rank={i + 1}
+                  main={aux?.main ?? null}
+                  aux={aux?.tasks ?? []}
+                  onAssigned={onAssigned}
+                />
              ))}
            </div>
          ) : (
--- a/website/docs/user-guide/configuring-models.md
+++ b/website/docs/user-guide/configuring-models.md
@ -0,0 +1,207 @@
+---
+sidebar_position: 3
+---
+
+# Configuring Models
+
+Hermes uses two kinds of model slots:
+
+- **Main model** — what the agent thinks with. Every user message, every tool-call loop, every streamed response goes through this model.
+- **Auxiliary models** — smaller side-jobs the agent offloads. Context compression, vision (image analysis), web-page summarization, session search, approval scoring, MCP tool routing, session-title generation, and skill search. Each has its own slot and can be overridden independently.
+
+This page covers configuring both from the dashboard. If you prefer config files or the CLI, jump to [Alternative methods](#alternative-methods) at the bottom.
+
+## The Models page
+
+Open the dashboard and click **Models** in the sidebar. You get two sections:
+
+1. **Model Settings** — the top panel, where you assign models to slots.
+2. **Usage analytics** — ranked cards showing every model that ran a session in the selected period, with token counts, cost, and capability badges.
+
+![Models page overview](/img/docs/dashboard-models/overview.png)
+
+The top card is the **Model Settings** panel. The main row always shows what the agent will spin up for new sessions. Click **Change** to open the picker.
+
+## Setting the main model
+
+Click **Change** on the Main model row:
+
+![Model picker dialog](/img/docs/dashboard-models/picker-dialog.png)
+
+The picker has two columns:
+
+- **Left** — authenticated providers. Only providers you've set up (API key set, OAuth'd, or defined as a custom endpoint) show up here. If a provider is missing, head to **Keys** and add its credential.
+- **Right** — the curated model list for the selected provider. These are the agentic models Hermes recommends for that provider, not the raw `/models` dump (which on OpenRouter includes 400+ models including TTS, image generators, and rerankers).
+
+Type in the filter box to narrow by provider name, slug, or model ID.
+
+Pick a model, hit **Switch**, and Hermes writes it to `~/.hermes/config.yaml` under the `model` section. **This applies to new sessions only** — any chat tab you already have open keeps running whatever model it started with. To hot-swap the current chat, use the `/model` slash command inside it.
+
+## Setting auxiliary models
+
+Click **Show auxiliary** to reveal the eight task slots:
+
+![Auxiliary panel expanded](/img/docs/dashboard-models/auxiliary-expanded.png)
+
+Every auxiliary task defaults to `auto` — meaning Hermes uses your main model for that job too. Override a specific task when you want a cheaper or faster model for a side-job.
+
+### Common override patterns
+
+| Task | When to override |
+|---|---|
+| **Title Gen** | Almost always. A $0.10/M flash model writes session titles as well as Opus. Default config sets this to `google/gemini-3-flash-preview` on OpenRouter. |
+| **Vision** | When your main model is a coding model without vision (e.g. Kimi, DeepSeek). Point it at `google/gemini-2.5-flash` or `gpt-4o-mini`. |
+| **Compression** | When you're burning reasoning tokens on Opus/M2.7 just to summarize context. A fast chat model does the job at 1/50th the cost. |
+| **Session Search** | When recall queries fan out — default max_concurrency is 3. A cheap model keeps the bill predictable. |
+| **Approval** | For `approval_mode: smart` — a fast/cheap model (haiku, flash, gpt-5-mini) decides whether to auto-approve low-risk commands. Expensive models here are waste. |
+| **Web Extract** | When you use `web_extract` heavily. Same logic as compression — summarization doesn't need reasoning. |
+| **Skills Hub** | `hermes skills search` uses this. Usually fine at `auto`. |
+| **MCP** | MCP tool routing. Usually fine at `auto`. |
+
+### Per-task override
+
+Click **Change** on any auxiliary row. Same picker opens, same behavior — pick provider + model, hit Switch. The row updates to show `provider · model` instead of `auto (use main model)`.
+
+### Reset all to auto
+
+If you've over-tuned and want to start over, click **Reset all to auto** at the top of the auxiliary section. Every slot goes back to using your main model.
+
+## The "Use as" shortcut
+
+Every model card on the page has a **Use as** dropdown. This is the fast path — pick a model you see in your analytics, click **Use as**, and assign it to the main slot or any specific auxiliary task in one click:
+
+![Use as dropdown](/img/docs/dashboard-models/use-as-dropdown.png)
+
+The dropdown has:
+
+- **Main model** — same as clicking Change on the main row.
+- **All auxiliary tasks** — assigns this model to all 8 aux slots at once. Useful when you just want every side-job on a cheap flash model.
+- **Individual task options** — Vision, Web Extract, Compression, etc. The currently-assigned model for each task is marked `current`.
+
+Cards are badged with `main` or `aux · <task>` when they're currently assigned to something — so you can see at a glance which of your historical models are wired in where.
+
+## What gets written to `config.yaml`
+
+When you save via the dashboard, Hermes writes to `~/.hermes/config.yaml`:
+
+**Main model:**
+```yaml
+model:
+  provider: openrouter
+  default: anthropic/claude-opus-4.7
+  base_url: ''        # cleared on provider switch
+  api_mode: chat_completions
+```
+
+**Auxiliary override (example — vision on gemini-flash):**
+```yaml
+auxiliary:
+  vision:
+    provider: openrouter
+    model: google/gemini-2.5-flash
+    base_url: ''
+    api_key: ''
+    timeout: 120
+    extra_body: {}
+    download_timeout: 30
+```
+
+**Auxiliary on auto (default):**
+```yaml
+auxiliary:
+  compression:
+    provider: auto
+    model: ''
+    base_url: ''
+    # ... other fields unchanged
+```
+
+`provider: auto` with `model: ''` tells Hermes to use the main model for that task.
+
+## When does it take effect?
+
+- **CLI** (`hermes chat`): next `hermes chat` invocation.
+- **Gateway** (Telegram, Discord, Slack, etc.): next *new* session. Existing sessions keep their model. Restart the gateway (`hermes gateway restart`) if you want to force all sessions to pick up the change.
+- **Dashboard chat tab** (`/chat`): next new PTY. The currently-open chat keeps its model — use `/model` inside it to hot-swap.
+
+Changes never invalidate prompt caches on running sessions. That's deliberate: swapping the main model inside a session requires a cache reset (the system prompt contains model-specific content), and we reserve that for the explicit `/model` slash command inside chat.
+
+## Troubleshooting
+
+### "No authenticated providers" in the picker
+
+Hermes lists a provider only if it has a working credential. Check **Keys** in the sidebar — you should see one of: an API key, a successful OAuth, or a custom endpoint URL. If the provider you want isn't there, run `hermes setup` to wire it up, or go to **Keys** and add the env var.
+
+### Main model didn't change in my running chat
+
+Expected. The dashboard writes `config.yaml`, which new sessions read. The currently-open chat is a live agent process — it keeps whatever model it was spawned with. Use `/model <name>` inside the chat to hot-swap that specific session.
+
+### Auxiliary override "didn't take effect"
+
+Three things to check:
+
+1. **Did you start a new session?** Existing chats don't re-read config.
+2. **Is `provider` set to something other than `auto`?** If the field shows `auto`, the task is still using your main model. Click **Change** and pick a real provider.
+3. **Is the provider authenticated?** If you assigned `minimax` to a task but don't have a MiniMax API key, that task falls back to the openrouter default and logs a warning in `agent.log`.
+
+### I picked a model but Hermes switched providers on me
+
+On OpenRouter (or any aggregator), bare model names resolve *within* the aggregator first. So `claude-sonnet-4` on OpenRouter becomes `anthropic/claude-sonnet-4.6`, staying on your OpenRouter auth. But if you typed `claude-sonnet-4` on a native Anthropic auth, it would stay as `claude-sonnet-4-6`. If you see an unexpected provider switch, check that your current provider is what you expect — the picker always shows the current main at the top of the dialog.
+
+## Alternative methods
+
+### CLI slash command
+
+Inside any `hermes chat` session:
+
+```
+/model gpt-5.4 --provider openrouter             # session-only
+/model gpt-5.4 --provider openrouter --global    # also persists to config.yaml
+```
+
+`--global` does the same thing the dashboard's **Change** button does, plus it switches the running session in-place.
+
+### `hermes model` subcommand
+
+```bash
+hermes model list                   # list authenticated providers + models
+hermes model set anthropic/claude-opus-4.7 --provider openrouter
+```
+
+### Direct config edit
+
+Edit `~/.hermes/config.yaml` and restart whatever reads it. See the [Configuration reference](./configuration.md) for the full schema.
+
+### REST API
+
+The dashboard uses three endpoints. Useful for scripting:
+
+```bash
+# List authenticated providers + curated model lists
+curl -H "X-Hermes-Session-Token: $TOKEN" http://localhost:PORT/api/model/options
+
+# Read current main + auxiliary assignments
+curl -H "X-Hermes-Session-Token: $TOKEN" http://localhost:PORT/api/model/auxiliary
+
+# Set the main model
+curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
+  -d '{"scope":"main","provider":"openrouter","model":"anthropic/claude-opus-4.7"}' \
+  http://localhost:PORT/api/model/set
+
+# Override a single auxiliary task
+curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
+  -d '{"scope":"auxiliary","task":"vision","provider":"openrouter","model":"google/gemini-2.5-flash"}' \
+  http://localhost:PORT/api/model/set
+
+# Assign one model to every auxiliary task
+curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
+  -d '{"scope":"auxiliary","task":"","provider":"openrouter","model":"google/gemini-2.5-flash"}' \
+  http://localhost:PORT/api/model/set
+
+# Reset all auxiliary tasks to auto
+curl -X POST -H "Content-Type: application/json" -H "X-Hermes-Session-Token: $TOKEN" \
+  -d '{"scope":"auxiliary","task":"__reset__","provider":"","model":""}' \
+  http://localhost:PORT/api/model/set
+```
+
+The session token is injected into the dashboard HTML at startup and rotates on every server restart. Grab it from the browser devtools (`window.__HERMES_SESSION_TOKEN__`) if you're scripting against a running dashboard.
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@ -23,6 +23,7 @@ const sidebars: SidebarsConfig = {
        'user-guide/cli',
        'user-guide/tui',
        'user-guide/configuration',
+        'user-guide/configuring-models',
        'user-guide/sessions',
        'user-guide/profiles',
        'user-guide/git-worktrees',
--- a/website/static/img/docs/dashboard-models/auxiliary-expanded.png
+++ b/website/static/img/docs/dashboard-models/auxiliary-expanded.png
--- a/website/static/img/docs/dashboard-models/overview.png
+++ b/website/static/img/docs/dashboard-models/overview.png
--- a/website/static/img/docs/dashboard-models/picker-dialog.png
+++ b/website/static/img/docs/dashboard-models/picker-dialog.png
--- a/website/static/img/docs/dashboard-models/use-as-dropdown.png
+++ b/website/static/img/docs/dashboard-models/use-as-dropdown.png