feat(desktop): warn when main-model switch leaves auxiliary tasks pinned to another provider (#40286)

Switching the main model never touches auxiliary slot pins (they're
independent, sticky per-task overrides). A user who switches main away
from a now-unpaid provider keeps paying 402s on every background aux call
until they manually reset those pins — silently, with no UI signal.

- /api/model/set scope:'main' now returns stale_aux: slots still pinned
  to a provider different from the new main (additive field).
- Desktop Model Settings shows a switch-time notice after Apply AND a
  persistent banner when any loaded aux slot mismatches the main provider,
  both wired to the existing 'Reset all to main' action.
- Never auto-clears pins — a dedicated cheaper aux model is a legitimate
  config; surface-and-offer instead of nuking.
- Fixes a stale pre-existing assertion in the panel test (main model now
  renders via selectors, not a standalone label).
This commit is contained in:
Teknium 2026-06-05 23:35:36 -07:00 committed by GitHub
parent f8a241e105
commit b91aade176
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 217 additions and 3 deletions

View file

@ -41,7 +41,10 @@ describe('ModelSettings', () => {
await renderModelSettings()
await waitFor(() => expect(getGlobalModelInfo).toHaveBeenCalled())
expect(screen.getByText('nous / hermes-4')).toBeTruthy()
// The current model is loaded into the main-slot selectors (provider name
// + model id), not a standalone label.
expect(await screen.findByText('Nous')).toBeTruthy()
expect(screen.getByText('hermes-4')).toBeTruthy()
})
it('renders the auxiliary task rows', async () => {
@ -67,4 +70,35 @@ describe('ModelSettings', () => {
})
)
})
it('warns when a main switch leaves auxiliary tasks pinned to another provider', async () => {
setModelAssignment.mockResolvedValueOnce({
provider: 'openrouter',
model: 'anthropic/claude-opus-4.7',
gateway_tools: [],
stale_aux: [{ task: 'compression', provider: 'nous', model: 'hermes-4' }]
})
await renderModelSettings()
await waitFor(() => expect(getGlobalModelInfo).toHaveBeenCalled())
const applyButton = await screen.findByRole('button', { name: 'Apply' })
fireEvent.click(applyButton)
// The switch-time notice names the pinned provider and offers a reset.
expect(await screen.findByText(/still run on/)).toBeTruthy()
expect(screen.getByText('nous')).toBeTruthy()
})
it('shows a persistent banner when a loaded aux slot mismatches the main provider', async () => {
getAuxiliaryModels.mockResolvedValueOnce({
main: { provider: 'nous', model: 'hermes-4' },
tasks: [{ task: 'curator', provider: 'openrouter', model: 'anthropic/claude-opus-4.7', base_url: '' }]
})
await renderModelSettings()
// Banner present on load, no switch required.
expect(await screen.findByText(/still run on/)).toBeTruthy()
})
})

View file

@ -3,8 +3,8 @@ import { useCallback, useEffect, useMemo, useState } from 'react'
import { Button } from '@/components/ui/button'
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
import { getAuxiliaryModels, getGlobalModelInfo, getGlobalModelOptions, setModelAssignment } from '@/hermes'
import type { AuxiliaryModelsResponse, ModelOptionProvider } from '@/hermes'
import { Cpu, Loader2 } from '@/lib/icons'
import type { AuxiliaryModelsResponse, ModelOptionProvider, StaleAuxAssignment } from '@/hermes'
import { AlertTriangle, Cpu, Loader2 } from '@/lib/icons'
import { cn } from '@/lib/utils'
import { CONTROL_TEXT } from './constants'
@ -32,6 +32,47 @@ const AUX_TASKS: readonly AuxTaskMeta[] = [
const NO_PROVIDERS: readonly ModelOptionProvider[] = [{ name: '—', slug: '', models: [] }]
const AUX_TASK_LABELS: Record<string, string> = Object.fromEntries(
AUX_TASKS.map(meta => [meta.key, meta.label])
)
function taskLabel(key: string): string {
return AUX_TASK_LABELS[key] ?? key
}
interface StaleAuxWarningProps {
applying: boolean
onReset: () => void
slots: readonly StaleAuxAssignment[]
}
// Shared notice: auxiliary tasks still pinned to a provider that isn't the
// current main. Surfaces the silent credit-burn path (e.g. aux pinned to a
// $0-balance provider after switching main away from it) and offers the
// existing one-click reset rather than auto-clearing legitimate pins.
function StaleAuxWarning({ applying, onReset, slots }: StaleAuxWarningProps) {
if (!slots.length) {
return null
}
const provider = slots[0].provider
const allSameProvider = slots.every(slot => slot.provider === provider)
const names = slots.map(slot => taskLabel(slot.task)).join(', ')
return (
<div className="flex flex-wrap items-center gap-2 rounded-md border border-amber-500/40 bg-amber-500/10 px-3 py-2 text-xs text-amber-200">
<AlertTriangle className="size-3.5 shrink-0" />
<span className="grow">
{slots.length} auxiliary task{slots.length === 1 ? '' : 's'} ({names}) still run on{' '}
<span className="font-mono">{allSameProvider ? provider : 'other providers'}</span>, not your main model.
</span>
<Button disabled={applying} onClick={onReset} size="sm" variant="textStrong">
Reset all to main
</Button>
</div>
)
}
interface ModelSettingsProps {
/** Notified after the main model is applied, so live UI stores can sync. */
onMainModelChanged?: (provider: string, model: string) => void
@ -48,6 +89,9 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
const [applying, setApplying] = useState(false)
const [editingAuxTask, setEditingAuxTask] = useState<null | string>(null)
const [auxDraft, setAuxDraft] = useState<{ model: string; provider: string }>({ model: '', provider: '' })
// Aux slots reported stale by the backend immediately after a main-model
// switch (provider differs from the new main). Cleared on next switch/reset.
const [switchStaleAux, setSwitchStaleAux] = useState<StaleAuxAssignment[]>([])
const refresh = useCallback(async () => {
setLoading(true)
@ -88,6 +132,22 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
[auxDraft.provider, providers]
)
// Persistent mismatch: any aux slot pinned to a provider different from the
// current main, regardless of whether the user just switched. Catches the
// "I pinned aux months ago and forgot, now it bills a dead provider" case.
const persistentStaleAux = useMemo<StaleAuxAssignment[]>(() => {
const mainProvider = (mainModel?.provider ?? '').toLowerCase()
if (!mainProvider || !auxiliary) {
return []
}
return auxiliary.tasks
.filter(entry => {
const p = (entry.provider ?? '').toLowerCase()
return p && p !== 'auto' && p !== mainProvider
})
.map(entry => ({ task: entry.task, provider: entry.provider, model: entry.model }))
}, [auxiliary, mainModel])
const applyMainModel = useCallback(async () => {
if (!selectedProvider || !selectedModel) {
return
@ -101,6 +161,7 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
const provider = result.provider || selectedProvider
const model = result.model || selectedModel
setMainModel({ provider, model })
setSwitchStaleAux(result.stale_aux ?? [])
onMainModelChanged?.(provider, model)
await refresh()
} catch (err) {
@ -182,6 +243,7 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
scope: 'auxiliary',
task: '__reset__'
})
setSwitchStaleAux([])
await refresh()
} catch (err) {
setError(err instanceof Error ? err.message : String(err))
@ -235,6 +297,11 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
</Button>
</div>
{error && <div className="mt-2 text-xs text-destructive">{error}</div>}
{switchStaleAux.length > 0 && (
<div className="mt-2">
<StaleAuxWarning applying={applying} onReset={() => void resetAuxiliaryModels()} slots={switchStaleAux} />
</div>
)}
</section>
<section>
@ -252,6 +319,11 @@ export function ModelSettings({ onMainModelChanged }: ModelSettingsProps) {
<p className="mb-2 text-xs text-muted-foreground">
Helper tasks run on the main model by default. Assign a dedicated model to any task to override.
</p>
{switchStaleAux.length === 0 && persistentStaleAux.length > 0 && (
<div className="mb-2.5">
<StaleAuxWarning applying={applying} onReset={() => void resetAuxiliaryModels()} slots={persistentStaleAux} />
</div>
)}
<div className="grid gap-1">
{AUX_TASKS.map(meta => {
const current = auxiliary?.tasks.find(entry => entry.task === meta.key)

View file

@ -94,6 +94,7 @@ export type {
SessionSearchResponse,
SessionSearchResult,
SkillInfo,
StaleAuxAssignment,
StatusResponse,
ToolsetConfig,
ToolsetInfo

View file

@ -606,6 +606,14 @@ export interface ModelAssignmentRequest {
task?: string
}
/** An auxiliary task still pinned to a provider that differs from the
* newly-selected main provider after a main-model switch. */
export interface StaleAuxAssignment {
task: string
provider: string
model: string
}
export interface ModelAssignmentResponse {
/** Persisted endpoint URL for custom/local providers (echoed back). */
base_url?: string
@ -618,5 +626,9 @@ export interface ModelAssignmentResponse {
provider?: string
reset?: boolean
scope?: string
/** Auxiliary slots still pinned to a different provider than the new main.
* Switching main never clears aux pins; this lets the UI warn the user
* their helper tasks aren't following the switch. Only set on scope:'main'. */
stale_aux?: StaleAuxAssignment[]
tasks?: string[]
}

View file

@ -2248,6 +2248,36 @@ async def set_model_assignment(body: ModelAssignment):
_log.debug("apply_nous_managed_defaults skipped", exc_info=True)
save_config(cfg)
# Surface auxiliary slots still pinned to a *different* provider than
# the new main one. Switching the main model does NOT touch aux pins
# (they're independent, sticky per-task overrides — see
# auxiliary_client._resolve_auto). A user who switches main away from
# a now-unpaid provider (e.g. nous with $0 balance) keeps paying 402s
# on every background aux call until they reset those pins. We never
# auto-clear them — pinning aux to a cheaper/different model is a
# legitimate config — but we tell the caller so the UI can offer a
# "reset to main" nudge instead of silently burning credits.
new_provider = provider.strip().lower()
stale_aux: list[dict] = []
aux_cfg = cfg.get("auxiliary", {})
if isinstance(aux_cfg, dict):
for slot in _AUX_TASK_SLOTS:
slot_cfg = aux_cfg.get(slot)
if not isinstance(slot_cfg, dict):
continue
slot_provider = str(slot_cfg.get("provider", "") or "").strip()
if (
slot_provider
and slot_provider.lower() not in {"auto", ""}
and slot_provider.lower() != new_provider
):
stale_aux.append({
"task": slot,
"provider": slot_provider,
"model": str(slot_cfg.get("model", "") or ""),
})
return {
"ok": True,
"scope": "main",
@ -2255,6 +2285,7 @@ async def set_model_assignment(body: ModelAssignment):
"model": model,
"base_url": model_cfg.get("base_url", ""),
"gateway_tools": gateway_tools,
"stale_aux": stale_aux,
}
# scope == "auxiliary"

View file

@ -1377,6 +1377,58 @@ class TestWebServerEndpoints:
assert resp.status_code == 200
assert resp.json()["base_url"] == ""
def test_set_model_main_reports_stale_auxiliary_pins(self):
"""Switching the main provider must report auxiliary slots still pinned
to a *different* provider so the UI can warn the user their helper tasks
aren't following the switch (the silent credit-burn path)."""
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {"provider": "nous", "default": "hermes-4"}
cfg["auxiliary"] = {
# Pinned to nous — same as the OLD main, becomes stale after switch.
"compression": {"provider": "nous", "model": "anthropic/claude-sonnet-4.6"},
# Auto — follows main, never stale.
"vision": {"provider": "auto", "model": ""},
# Pinned to a third provider — also stale vs the new main.
"curator": {"provider": "deepseek", "model": "deepseek-chat"},
}
save_config(cfg)
resp = self.client.post(
"/api/model/set",
json={"scope": "main", "provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
)
assert resp.status_code == 200
stale = resp.json()["stale_aux"]
stale_tasks = {entry["task"] for entry in stale}
assert stale_tasks == {"compression", "curator"}
# auto slot must never appear.
assert "vision" not in stale_tasks
# Provider/model echoed back for the UI label.
comp = next(e for e in stale if e["task"] == "compression")
assert comp["provider"] == "nous"
assert comp["model"] == "anthropic/claude-sonnet-4.6"
def test_set_model_main_no_stale_when_aux_matches_new_provider(self):
"""Aux slots pinned to the SAME provider as the new main are not stale."""
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {"provider": "nous", "default": "hermes-4"}
cfg["auxiliary"] = {
"compression": {"provider": "openrouter", "model": "google/gemini-2.5-flash"},
"vision": {"provider": "auto", "model": ""},
}
save_config(cfg)
resp = self.client.post(
"/api/model/set",
json={"scope": "main", "provider": "openrouter", "model": "anthropic/claude-opus-4.8"},
)
assert resp.status_code == 200
assert resp.json()["stale_aux"] == []
model_cfg = load_config().get("model")
assert model_cfg["provider"] == "openrouter"
assert model_cfg.get("base_url", "") == ""

View file

@ -1608,6 +1608,14 @@ export interface ModelAssignmentRequest {
task?: string;
}
/** An auxiliary task still pinned to a provider that differs from the
* newly-selected main provider after a main-model switch. */
export interface StaleAuxAssignment {
task: string;
provider: string;
model: string;
}
export interface ModelAssignmentResponse {
ok: boolean;
scope?: string;
@ -1615,6 +1623,10 @@ export interface ModelAssignmentResponse {
model?: string;
tasks?: string[];
reset?: boolean;
/** Auxiliary slots still pinned to a different provider than the new main.
* Switching main never clears aux pins; this lets the UI warn the user
* their helper tasks aren't following the switch. Only set on scope:'main'. */
stale_aux?: StaleAuxAssignment[];
}
// ── OAuth provider types ────────────────────────────────────────────────