diff --git a/apps/desktop/src/app/desktop-controller.tsx b/apps/desktop/src/app/desktop-controller.tsx index e071a2a0ce6..45251ceef9b 100644 --- a/apps/desktop/src/app/desktop-controller.tsx +++ b/apps/desktop/src/app/desktop-controller.tsx @@ -711,7 +711,9 @@ export function DesktopController() { } lastGatewayProfileRef.current = activeGatewayProfile - void refreshCurrentModel() + // Force: the new profile has its own default, so reseed even if the composer + // already shows the previous profile's model. + void refreshCurrentModel(true) void refreshActiveProfile() }, [activeGatewayProfile, refreshCurrentModel]) diff --git a/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx b/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx index 612290800e0..f7765de04c5 100644 --- a/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx +++ b/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx @@ -130,7 +130,6 @@ describe('useModelControls', () => { await expect( controls.selectModel({ model: 'claude-sonnet-4.6', - persistGlobal: false, provider: 'anthropic' }) ).resolves.toBe(true) @@ -143,26 +142,57 @@ describe('useModelControls', () => { expect(requestGateway).not.toHaveBeenCalledWith('slash.exec', expect.anything()) }) - it('keeps the global path on setGlobalModel when there is no active session', async () => { - setGlobalModel.mockResolvedValue(undefined) + it('stores a no-session pick as UI state with no gateway or global write', async () => { + const requestGateway = vi.fn() let controls!: Controls render( (controls = value)} - requestGateway={vi.fn()} + requestGateway={requestGateway} /> ) await expect( controls.selectModel({ model: 'claude-sonnet-4.6', - persistGlobal: false, provider: 'anthropic' }) ).resolves.toBe(true) - expect(setGlobalModel).toHaveBeenCalledWith('anthropic', 'claude-sonnet-4.6') + // The pick is plain UI state; session.create ships it later. Nothing touches + // the gateway or the profile default here. + expect($currentModel.get()).toBe('claude-sonnet-4.6') + expect($currentProvider.get()).toBe('anthropic') + expect(requestGateway).not.toHaveBeenCalled() + expect(setGlobalModel).not.toHaveBeenCalled() + }) + + it('seeds an empty composer model from global but never clobbers a pick', async () => { + vi.mocked(getGlobalModelInfo).mockResolvedValue({ model: 'openai/gpt-5.5', provider: 'openai-codex' }) + + const { result } = renderHook(() => + useModelControls({ + activeSessionId: null, + queryClient: new QueryClient(), + requestGateway: vi.fn() + }) + ) + + // Empty → seeds the default. + await result.current.refreshCurrentModel() + expect($currentModel.get()).toBe('openai/gpt-5.5') + + // A user pick must survive the lifecycle refreshes that fire on boot / fresh + // draft / session events. + setCurrentModel('anthropic/claude-sonnet-4.6') + setCurrentProvider('anthropic') + await result.current.refreshCurrentModel() + expect($currentModel.get()).toBe('anthropic/claude-sonnet-4.6') + + // A profile swap forces a reseed to the new profile's default. + await result.current.refreshCurrentModel(true) + expect($currentModel.get()).toBe('openai/gpt-5.5') }) }) diff --git a/apps/desktop/src/app/session/hooks/use-model-controls.ts b/apps/desktop/src/app/session/hooks/use-model-controls.ts index 681eac871a2..50788b1e0be 100644 --- a/apps/desktop/src/app/session/hooks/use-model-controls.ts +++ b/apps/desktop/src/app/session/hooks/use-model-controls.ts @@ -1,7 +1,7 @@ import { type QueryClient } from '@tanstack/react-query' import { useCallback } from 'react' -import { getGlobalModelInfo, setGlobalModel } from '@/hermes' +import { getGlobalModelInfo } from '@/hermes' import { useI18n } from '@/i18n' import { notifyError } from '@/store/notifications' import { @@ -15,7 +15,6 @@ import type { ModelOptionsResponse } from '@/types/hermes' interface ModelSelection { model: string - persistGlobal: boolean provider: string } @@ -28,6 +27,7 @@ interface ModelControlsOptions { export function useModelControls({ activeSessionId, queryClient, requestGateway }: ModelControlsOptions) { const { t } = useI18n() const copy = t.desktop + const updateModelOptionsCache = useCallback( (provider: string, model: string, includeGlobal: boolean) => { const patch = (prev: ModelOptionsResponse | undefined) => ({ ...(prev ?? {}), provider, model }) @@ -41,14 +41,24 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway [activeSessionId, queryClient] ) - const refreshCurrentModel = useCallback(async () => { + // Seed the composer's model state from the profile default. `force` reseeds + // for a profile swap (the new profile has its own default); otherwise this + // only fills an EMPTY selection so a user's pick (plain UI state in + // $currentModel) survives the lifecycle refreshes that fire on boot / fresh + // draft / session events. A live session owns the footer, so skip entirely. + const refreshCurrentModel = useCallback(async (force = false) => { try { + if ($activeSessionId.get()) { + return + } + + if (!force && $currentModel.get()) { + return + } + const result = await getGlobalModelInfo() - // A resumed/live session owns the footer model state. Global config - // refreshes (gateway boot, profile swap, settings save) must not clobber - // the active chat's runtime model/provider in the status bar. - if ($activeSessionId.get()) { + if ($activeSessionId.get() || (!force && $currentModel.get())) { return } @@ -64,12 +74,14 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway } }, []) - // Returns whether the switch succeeded so callers can await it before - // applying follow-up changes (e.g. editing a model's reasoning/fast must land - // on the right active model — bail rather than write to the previous one). + // Returns whether the switch succeeded so callers can await it before applying + // follow-up changes. The composer model is plain UI state: with no live + // session it's just stored (and shipped on the next session.create); with one + // it's scoped to that session via config.set. It NEVER writes the profile + // default — that lives in Settings → Model — so picking a model here can't + // silently mutate global config. const selectModel = useCallback( async (selection: ModelSelection): Promise => { - const includeGlobal = selection.persistGlobal || !activeSessionId // Snapshot for rollback: the switch is applied optimistically, so a // failure must restore the prior model/provider (store + query cache) // rather than leave the UI showing a model the backend never selected. @@ -78,42 +90,34 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway setCurrentModel(selection.model) setCurrentProvider(selection.provider) - updateModelOptionsCache(selection.provider, selection.model, includeGlobal) + updateModelOptionsCache(selection.provider, selection.model, !activeSessionId) + + // No live session yet: the pick is pure UI state. session.create reads + // $currentModel/$currentProvider and applies it as that session's override. + if (!activeSessionId) { + return true + } try { - if (activeSessionId) { - await requestGateway('config.set', { - session_id: activeSessionId, - key: 'model', - value: `${selection.model} --provider ${selection.provider}${selection.persistGlobal ? ' --global' : ''}` - }) + await requestGateway('config.set', { + session_id: activeSessionId, + key: 'model', + value: `${selection.model} --provider ${selection.provider}` + }) - if (selection.persistGlobal) { - void refreshCurrentModel() - } - - void queryClient.invalidateQueries({ - queryKey: selection.persistGlobal ? ['model-options'] : ['model-options', activeSessionId] - }) - - return true - } - - await setGlobalModel(selection.provider, selection.model) - void refreshCurrentModel() - void queryClient.invalidateQueries({ queryKey: ['model-options'] }) + void queryClient.invalidateQueries({ queryKey: ['model-options', activeSessionId] }) return true } catch (err) { setCurrentModel(prevModel) setCurrentProvider(prevProvider) - updateModelOptionsCache(prevProvider, prevModel, includeGlobal) + updateModelOptionsCache(prevProvider, prevModel, !activeSessionId) notifyError(err, copy.modelSwitchFailed) return false } }, - [activeSessionId, copy.modelSwitchFailed, queryClient, refreshCurrentModel, requestGateway, updateModelOptionsCache] + [activeSessionId, copy.modelSwitchFailed, queryClient, requestGateway, updateModelOptionsCache] ) return { refreshCurrentModel, selectModel, updateModelOptionsCache } diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts index 50b6bb0d270..6f7a779e8ea 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts @@ -15,6 +15,10 @@ import { requestDesktopOnboarding } from '@/store/onboarding' import { $activeGatewayProfile, $newChatProfile, $profiles, ensureGatewayProfile, normalizeProfileKey } from '@/store/profile' import { $currentCwd, + $currentFastMode, + $currentModel, + $currentProvider, + $currentReasoningEffort, $messages, $sessions, $yoloActive, @@ -407,13 +411,13 @@ export function useSessionActions({ }) setSessionStartedAt(null) setTurnStartedAt(null) - // New chats start in the configured default project dir when set, - // otherwise the sticky last-used workspace (PR #37586). - setCurrentModel('') - setCurrentProvider('') - setCurrentReasoningEffort('') + // The composer's model/effort/fast is sticky UI state (persisted in + // localStorage) — a new chat FOLLOWS your last pick instead of snapping + // back to the profile default, so we deliberately don't reset it here. The + // profile default still owns first-run seeding and profile switches (see + // refreshCurrentModel). Only $currentServiceTier (a live-session mirror) + // is cleared. setCurrentServiceTier('') - setCurrentFastMode(false) setYoloActive(false) setCurrentCwd(workspaceCwdForNewSession()) setCurrentBranch('') @@ -443,11 +447,23 @@ export function useSessionActions({ const newChatProfile = $newChatProfile.get() ?? normalizeProfileKey($activeGatewayProfile.get()) await ensureGatewayProfile(newChatProfile) const cwd = $currentCwd.get().trim() || workspaceCwdForNewSession() + // The composer's model/effort/fast is sticky UI state ($currentModel, + // $currentProvider, $currentReasoningEffort, $currentFastMode). Ship it + // with every session.create so the new chat opens on whatever the picker + // shows — applied as per-session overrides, never written to the profile + // default (that lives in Settings → Model). + const uiModel = $currentModel.get().trim() + const uiProvider = $currentProvider.get().trim() + const uiEffort = $currentReasoningEffort.get().trim() + const uiFast = $currentFastMode.get() const created = await requestGateway('session.create', { cols: 96, ...(cwd && { cwd }), - ...(newChatProfile ? { profile: newChatProfile } : {}) + ...(newChatProfile ? { profile: newChatProfile } : {}), + ...(uiModel ? { model: uiModel, ...(uiProvider ? { provider: uiProvider } : {}) } : {}), + ...(uiEffort ? { reasoning_effort: uiEffort } : {}), + ...(uiFast ? { fast: true } : {}) }) const stored = created.stored_session_id ?? null diff --git a/apps/desktop/src/app/shell/model-menu-panel.tsx b/apps/desktop/src/app/shell/model-menu-panel.tsx index c0c6936175e..b87b1a030d1 100644 --- a/apps/desktop/src/app/shell/model-menu-panel.tsx +++ b/apps/desktop/src/app/shell/model-menu-panel.tsx @@ -43,7 +43,7 @@ import { ModelEditSubmenu, resolveFastControl } from './model-edit-submenu' interface ModelMenuPanelProps { gateway?: HermesGateway - onSelectModel: (selection: { model: string; persistGlobal: boolean; provider: string }) => Promise | void + onSelectModel: (selection: { model: string; provider: string }) => Promise | void requestGateway: (method: string, params?: Record) => Promise } @@ -95,8 +95,10 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model [visibleModels, providers] ) - const switchTo = (model: string, provider: string) => - onSelectModel({ model, persistGlobal: !activeSessionId, provider }) + // The composer picker never persists the profile default. With a session it + // scopes the switch to that session; with none it's UI state shipped on the + // next session.create (see selectModel). The default lives in Settings → Model. + const switchTo = (model: string, provider: string) => onSelectModel({ model, provider }) // Selecting a model row restores that model's remembered preset onto the // session (effort/fast), gated by capability. Unset → Hermes defaults. diff --git a/apps/desktop/src/components/model-picker.tsx b/apps/desktop/src/components/model-picker.tsx index d65bf7f89a7..be941e23d06 100644 --- a/apps/desktop/src/components/model-picker.tsx +++ b/apps/desktop/src/components/model-picker.tsx @@ -11,7 +11,6 @@ import { startManualOnboarding } from '../store/onboarding' import { InlineNotice } from './notifications' import { Button } from './ui/button' -import { Checkbox } from './ui/checkbox' import { Command, CommandEmpty, CommandGroup, CommandInput, CommandItem, CommandList } from './ui/command' import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from './ui/dialog' import { Skeleton } from './ui/skeleton' @@ -23,7 +22,7 @@ interface ModelPickerDialogProps { sessionId?: string | null currentModel: string currentProvider: string - onSelect: (selection: { provider: string; model: string; persistGlobal: boolean }) => void + onSelect: (selection: { provider: string; model: string }) => void /** * Optional class to apply to DialogContent. Use to override z-index when * stacking the picker on top of another fixed overlay (e.g. the desktop @@ -45,7 +44,6 @@ export function ModelPickerDialog({ }: ModelPickerDialogProps) { const { t } = useI18n() const copy = t.modelPicker - const [persistGlobal, setPersistGlobal] = useState(!sessionId) // Own the search term so we can filter manually. cmdk's built-in // shouldFilter reorders items by its fuzzy-match score (≈alphabetical with // an empty query), which destroys the backend's curated order. We disable @@ -79,11 +77,7 @@ export function ModelPickerDialog({ : null const selectModel = (provider: ModelOptionProvider, model: string) => { - onSelect({ - provider: provider.slug, - model, - persistGlobal: persistGlobal || !sessionId - }) + onSelect({ provider: provider.slug, model }) onOpenChange(false) } @@ -128,24 +122,13 @@ export function ModelPickerDialog({ - - - -
- - -
+ + + diff --git a/apps/desktop/src/i18n/en.ts b/apps/desktop/src/i18n/en.ts index 2710f8273f6..c1fbf90bcb7 100644 --- a/apps/desktop/src/i18n/en.ts +++ b/apps/desktop/src/i18n/en.ts @@ -1503,8 +1503,6 @@ export const en: Translations = { unknown: '(unknown)', search: 'Filter providers and models...', noModels: 'No models found.', - persistGlobalSession: 'Persist globally (otherwise this session only)', - persistGlobal: 'Persist globally', addProvider: 'Add provider', loadFailed: 'Could not load models', noAuthenticatedProviders: 'No authenticated providers.', diff --git a/apps/desktop/src/i18n/ja.ts b/apps/desktop/src/i18n/ja.ts index 4f56ed46b65..f26508e5897 100644 --- a/apps/desktop/src/i18n/ja.ts +++ b/apps/desktop/src/i18n/ja.ts @@ -1637,8 +1637,6 @@ export const ja = defineLocale({ unknown: '(不明)', search: 'プロバイダーとモデルをフィルター...', noModels: 'モデルが見つかりません。', - persistGlobalSession: 'グローバルに保持(それ以外はこのセッションのみ)', - persistGlobal: 'グローバルに保持', addProvider: 'プロバイダーを追加', loadFailed: 'モデルを読み込めませんでした', noAuthenticatedProviders: '認証済みプロバイダーがありません。', diff --git a/apps/desktop/src/i18n/types.ts b/apps/desktop/src/i18n/types.ts index 58d78d4a384..cc76b30d346 100644 --- a/apps/desktop/src/i18n/types.ts +++ b/apps/desktop/src/i18n/types.ts @@ -1145,8 +1145,6 @@ export interface Translations { unknown: string search: string noModels: string - persistGlobalSession: string - persistGlobal: string addProvider: string loadFailed: string noAuthenticatedProviders: string diff --git a/apps/desktop/src/i18n/zh-hant.ts b/apps/desktop/src/i18n/zh-hant.ts index f01c94de738..6f964c071f2 100644 --- a/apps/desktop/src/i18n/zh-hant.ts +++ b/apps/desktop/src/i18n/zh-hant.ts @@ -1581,8 +1581,6 @@ export const zhHant = defineLocale({ unknown: '(未知)', search: '篩選提供方和模型...', noModels: '找不到模型。', - persistGlobalSession: '全域儲存(否則僅限此工作階段)', - persistGlobal: '全域儲存', addProvider: '新增提供方', loadFailed: '無法載入模型', noAuthenticatedProviders: '沒有已驗證的提供方。', diff --git a/apps/desktop/src/i18n/zh.ts b/apps/desktop/src/i18n/zh.ts index ea24026a5b1..0387a6be5bc 100644 --- a/apps/desktop/src/i18n/zh.ts +++ b/apps/desktop/src/i18n/zh.ts @@ -1683,8 +1683,6 @@ export const zh: Translations = { unknown: '(未知)', search: '筛选提供方和模型...', noModels: '未找到模型。', - persistGlobalSession: '全局保存 (否则仅当前会话)', - persistGlobal: '全局保存', addProvider: '添加提供方', loadFailed: '无法加载模型', noAuthenticatedProviders: '没有已认证的提供方。', diff --git a/apps/desktop/src/store/session.ts b/apps/desktop/src/store/session.ts index f1e1e2ee617..e40484cfec1 100644 --- a/apps/desktop/src/store/session.ts +++ b/apps/desktop/src/store/session.ts @@ -4,13 +4,23 @@ import { lastVisibleMessageIsUser } from '@/app/chat/thread-loading' import type { ContextSuggestion } from '@/app/types' import type { HermesConnection } from '@/global' import type { ChatMessage } from '@/lib/chat-messages' -import { persistString, storedString } from '@/lib/storage' +import { persistBoolean, persistString, storedBoolean, storedString } from '@/lib/storage' import type { SessionInfo, UsageStats } from '@/types/hermes' type Updater = T | ((current: T) => T) const WORKSPACE_CWD_KEY = 'hermes.desktop.workspace-cwd' +// The composer's model/effort/fast is sticky UI state, NOT the profile default +// (that lives in Settings → Model). Persisting it in localStorage makes a pick +// follow across Cmd+N and app restarts instead of snapping back to the default. +// It's deliberately global (not per-profile): a profile switch force-reseeds to +// that profile's default, while within a profile new chats keep your last pick. +const COMPOSER_MODEL_KEY = 'hermes.desktop.composer.model' +const COMPOSER_PROVIDER_KEY = 'hermes.desktop.composer.provider' +const COMPOSER_EFFORT_KEY = 'hermes.desktop.composer.reasoning-effort' +const COMPOSER_FAST_KEY = 'hermes.desktop.composer.fast' + let configuredDefaultProjectDir = '' function workspaceCwdKey(connection: HermesConnection | null = $connection.get()): string { @@ -208,11 +218,11 @@ export const $lastVisibleMessageIsUser = computed($messages, lastVisibleMessageI export const $freshDraftReady = atom(false) export const $busy = atom(false) export const $awaitingResponse = atom(false) -export const $currentModel = atom('') -export const $currentProvider = atom('') -export const $currentReasoningEffort = atom('') +export const $currentModel = atom(storedString(COMPOSER_MODEL_KEY) ?? '') +export const $currentProvider = atom(storedString(COMPOSER_PROVIDER_KEY) ?? '') +export const $currentReasoningEffort = atom(storedString(COMPOSER_EFFORT_KEY) ?? '') export const $currentServiceTier = atom('') -export const $currentFastMode = atom(false) +export const $currentFastMode = atom(storedBoolean(COMPOSER_FAST_KEY, false)) // Effective approval-bypass state mirrored from the gateway (session.info). // Persistence lives in the backend config (approvals.mode), so this is a plain // reflection of the truth the gateway reports rather than its own store. @@ -254,11 +264,29 @@ export const setMessages = (next: Updater) => updateAtom($message export const setFreshDraftReady = (next: Updater) => updateAtom($freshDraftReady, next) export const setBusy = (next: Updater) => updateAtom($busy, next) export const setAwaitingResponse = (next: Updater) => updateAtom($awaitingResponse, next) -export const setCurrentModel = (next: Updater) => updateAtom($currentModel, next) -export const setCurrentProvider = (next: Updater) => updateAtom($currentProvider, next) -export const setCurrentReasoningEffort = (next: Updater) => updateAtom($currentReasoningEffort, next) + +export const setCurrentModel = (next: Updater) => { + updateAtom($currentModel, next) + persistString(COMPOSER_MODEL_KEY, $currentModel.get() || null) +} + +export const setCurrentProvider = (next: Updater) => { + updateAtom($currentProvider, next) + persistString(COMPOSER_PROVIDER_KEY, $currentProvider.get() || null) +} + +export const setCurrentReasoningEffort = (next: Updater) => { + updateAtom($currentReasoningEffort, next) + persistString(COMPOSER_EFFORT_KEY, $currentReasoningEffort.get() || null) +} + export const setCurrentServiceTier = (next: Updater) => updateAtom($currentServiceTier, next) -export const setCurrentFastMode = (next: Updater) => updateAtom($currentFastMode, next) + +export const setCurrentFastMode = (next: Updater) => { + updateAtom($currentFastMode, next) + persistBoolean(COMPOSER_FAST_KEY, $currentFastMode.get()) +} + export const setYoloActive = (next: Updater) => updateAtom($yoloActive, next) export const setCurrentCwd = (next: Updater) => { diff --git a/apps/desktop/src/store/updates.test.ts b/apps/desktop/src/store/updates.test.ts index 01f78bc08dc..913e4fb11ee 100644 --- a/apps/desktop/src/store/updates.test.ts +++ b/apps/desktop/src/store/updates.test.ts @@ -5,6 +5,9 @@ import type { DesktopUpdateStatus } from '@/global' const storage = new Map() vi.mock('@/lib/storage', () => ({ + persistBoolean: (key: string, value: boolean) => { + storage.set(key, String(value)) + }, persistString: (key: string, value: null | string) => { if (value === null) { storage.delete(key) @@ -12,6 +15,11 @@ vi.mock('@/lib/storage', () => ({ storage.set(key, value) } }, + storedBoolean: (key: string, fallback: boolean) => { + const value = storage.get(key) + + return value === undefined ? fallback : value === 'true' + }, storedString: (key: string) => storage.get(key) ?? null })) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 2b37b5788bb..77884c5920e 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -1851,8 +1851,10 @@ def test_ensure_session_db_row_persists_explicit_cwd(monkeypatch, tmp_path): created = [] class _FakeDB: - def create_session(self, key, source=None, model=None, cwd=None): - created.append({"key": key, "source": source, "model": model, "cwd": cwd}) + def create_session(self, key, source=None, model=None, model_config=None, cwd=None): + created.append( + {"key": key, "source": source, "model": model, "model_config": model_config, "cwd": cwd} + ) monkeypatch.setattr(server, "_get_db", lambda: _FakeDB()) monkeypatch.setattr(server, "_resolve_model", lambda: "test-model") @@ -1860,7 +1862,7 @@ def test_ensure_session_db_row_persists_explicit_cwd(monkeypatch, tmp_path): server._ensure_session_db_row({"session_key": "k1", "cwd": str(tmp_path), "explicit_cwd": True}) assert created == [ - {"key": "k1", "source": "tui", "model": "test-model", "cwd": str(tmp_path)} + {"key": "k1", "source": "tui", "model": "test-model", "model_config": None, "cwd": str(tmp_path)} ] @@ -1870,15 +1872,74 @@ def test_ensure_session_db_row_defaults_to_no_workspace(monkeypatch, tmp_path): created = [] class _FakeDB: - def create_session(self, key, source=None, model=None, cwd=None): - created.append({"key": key, "source": source, "model": model, "cwd": cwd}) + def create_session(self, key, source=None, model=None, model_config=None, cwd=None): + created.append( + {"key": key, "source": source, "model": model, "model_config": model_config, "cwd": cwd} + ) monkeypatch.setattr(server, "_get_db", lambda: _FakeDB()) monkeypatch.setattr(server, "_resolve_model", lambda: "test-model") server._ensure_session_db_row({"session_key": "k1", "cwd": str(tmp_path)}) - assert created == [{"key": "k1", "source": "tui", "model": "test-model", "cwd": None}] + assert created == [ + {"key": "k1", "source": "tui", "model": "test-model", "model_config": None, "cwd": None} + ] + + +def test_ensure_session_db_row_persists_session_model_override(monkeypatch): + """The session's composer pick (model + effort + fast) must own the DB row. + + Regression for the "switched to gpt-5.5, reconnect snapped back to opus" + bug: the row was created with the global default and won the INSERT-OR-IGNORE + race, so resume rebuilt from the global model and silently reverted the + chat. The override model + a model_config carrying provider/reasoning/ + service_tier must be persisted so session.resume restores all three. + """ + created = [] + + class _FakeDB: + def create_session(self, key, source=None, model=None, model_config=None, cwd=None): + created.append( + {"key": key, "model": model, "model_config": model_config, "cwd": cwd} + ) + + monkeypatch.setattr(server, "_get_db", lambda: _FakeDB()) + monkeypatch.setattr(server, "_resolve_model", lambda: "global/default") + + server._ensure_session_db_row( + { + "session_key": "k1", + "model_override": {"model": "openai/gpt-5.5", "provider": "openrouter"}, + "create_reasoning_override": {"effort": "high"}, + "create_service_tier_override": "priority", + } + ) + + assert len(created) == 1 + row = created[0] + assert row["model"] == "openai/gpt-5.5" + assert row["model_config"]["model"] == "openai/gpt-5.5" + assert row["model_config"]["provider"] == "openrouter" + assert row["model_config"]["reasoning_config"] == {"effort": "high"} + assert row["model_config"]["service_tier"] == "priority" + + +def test_ensure_session_db_row_no_override_uses_global(monkeypatch): + """A chat that made no explicit pick falls back to the global model and + writes no model_config (so it tracks the profile default).""" + created = [] + + class _FakeDB: + def create_session(self, key, source=None, model=None, model_config=None, cwd=None): + created.append({"model": model, "model_config": model_config}) + + monkeypatch.setattr(server, "_get_db", lambda: _FakeDB()) + monkeypatch.setattr(server, "_resolve_model", lambda: "global/default") + + server._ensure_session_db_row({"session_key": "k1", "model_override": None}) + + assert created == [{"model": "global/default", "model_config": None}] def test_session_title_clears_pending_after_persist(monkeypatch): @@ -7485,3 +7546,97 @@ def test_reap_idle_sessions_closes_only_evictable(monkeypatch): assert closed == [("stale", "idle_timeout")] finally: server._sessions.clear() + + +def test_session_create_records_ui_model_as_session_override(monkeypatch): + """The desktop composer owns its model as plain UI state and ships it on + session.create. The gateway must record it as a PER-SESSION override (built + into the agent), never a global config write — picking a model for a new chat + must not mutate the profile default. + """ + monkeypatch.setattr(server, "_enable_gateway_prompts", lambda: None) + # Don't run the real deferred build in this storage-focused test. + monkeypatch.setattr(server, "_start_agent_build", lambda *a, **k: None) + try: + resp = server._methods["session.create"]( + "r1", + { + "cols": 80, + "model": "claude-sonnet-4.6", + "provider": "anthropic", + "reasoning_effort": "high", + "fast": True, + }, + ) + sid = resp["result"]["session_id"] + sess = server._sessions[sid] + assert sess["model_override"] == {"model": "claude-sonnet-4.6", "provider": "anthropic"} + assert sess["create_reasoning_override"] is not None + assert sess["create_service_tier_override"] == "priority" + # The immediate response reflects the override (not the global default) so + # the client never clobbers its sticky pick before the build lands. + assert resp["result"]["info"]["model"] == "claude-sonnet-4.6" + assert resp["result"]["info"]["provider"] == "anthropic" + + # No knobs → no overrides; the session builds from the profile default. + plain = server._methods["session.create"]("r2", {"cols": 80}) + plain_sess = server._sessions[plain["result"]["session_id"]] + assert plain_sess["model_override"] is None + assert plain_sess["create_reasoning_override"] is None + assert plain_sess["create_service_tier_override"] is None + finally: + server._sessions.clear() + + +def test_start_agent_build_passes_session_model_override(monkeypatch): + """A model staged on the session (e.g. by session.create from the desktop + composer) must reach _make_agent so the first build runs on it directly — + no global config, no build-then-switch. + """ + captured = {} + + class FakeWorker: + def __init__(self, *_a, **_k): + pass + + def close(self): + pass + + def fake_make_agent(sid, key, session_id=None, session_db=None, **kwargs): + captured.update(kwargs) + return types.SimpleNamespace(model="claude-sonnet-4.6") + + monkeypatch.setattr(server, "_set_session_context", lambda target: []) + monkeypatch.setattr(server, "_clear_session_context", lambda tokens: None) + monkeypatch.setattr(server, "_make_agent", fake_make_agent) + monkeypatch.setattr(server, "_SlashWorker", FakeWorker) + monkeypatch.setattr(server, "_attach_worker", lambda *a, **k: None) + monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None) + monkeypatch.setattr(server, "_emit", lambda *a, **k: None) + monkeypatch.setattr(server, "_session_info", lambda *a, **k: {}) + monkeypatch.setattr(server, "_start_notification_poller", lambda *a, **k: None) + monkeypatch.setattr(server, "_notify_session_boundary", lambda *a, **k: None) + monkeypatch.setattr(server, "_probe_config_health", lambda *_a: None) + + sid = "build-sid" + override = {"model": "claude-sonnet-4.6", "provider": "anthropic"} + reasoning = {"enabled": True, "effort": "high"} + session = { + "agent": None, + "agent_ready": threading.Event(), + "session_key": "k1", + "profile_home": None, + "model_override": override, + "create_reasoning_override": reasoning, + "create_service_tier_override": "priority", + } + server._sessions[sid] = session + try: + server._start_agent_build(sid, session) + assert session["agent_ready"].wait(timeout=3), "agent build did not finish" + assert captured.get("model_override") == override + assert captured.get("reasoning_config_override") == reasoning + assert captured.get("service_tier_override") == "priority" + assert session["agent"].model == "claude-sonnet-4.6" + finally: + server._sessions.clear() diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 4d12a1a417b..d0e52635e7c 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -946,6 +946,15 @@ def _start_agent_build(sid: str, session: dict) -> None: kw = {"session_db": session_db} if resume_sid := current.get("resume_session_id"): kw["session_id"] = resume_sid + # Model/effort/fast the desktop picked for a brand-new chat ride + # in as per-session overrides so the first build uses them + # directly (no global config, no build-then-switch). + if override := current.get("model_override"): + kw["model_override"] = override + if (reasoning := current.get("create_reasoning_override")) is not None: + kw["reasoning_config_override"] = reasoning + if (tier := current.get("create_service_tier_override")) is not None: + kw["service_tier_override"] = tier agent = _make_agent(sid, key, **kw) finally: _clear_session_context(tokens) @@ -1174,11 +1183,38 @@ def _ensure_session_db_row(session: dict) -> None: close_db = False if db is None: return + # The session's own model/effort/fast pick — the composer override shipped on + # session.create, or a restored /model switch — must own the row's model + + # model_config. The agent isn't built yet at first prompt.submit, so derive + # the row from the live override dict; fall back to the global resolved model + # only when this chat made no explicit pick. Writing the global default here + # used to win the INSERT-OR-IGNORE race against the agent's own correct + # lazy-create, so a reconnect/resume rebuilt from the global model and + # silently reverted the chat (e.g. picked gpt-5.5, reconnect snapped back to + # the profile default). model_config carries provider/reasoning/service_tier + # so resume restores effort + fast too, not just the model name. + override = session.get("model_override") + override = override if isinstance(override, dict) else {} + row_model = str(override.get("model") or "").strip() or _resolve_model() + model_config: dict = {} + for src_key, cfg_key in ( + ("model", "model"), + ("provider", "provider"), + ("base_url", "base_url"), + ("api_mode", "api_mode"), + ): + if val := override.get(src_key): + model_config[cfg_key] = str(val) + if (reasoning := session.get("create_reasoning_override")) is not None: + model_config["reasoning_config"] = reasoning + if tier := session.get("create_service_tier_override"): + model_config["service_tier"] = tier try: db.create_session( key, source="tui", - model=_resolve_model(), + model=row_model, + model_config=model_config or None, cwd=_session_cwd(session) if session.get("explicit_cwd") else None, ) except Exception: @@ -3887,6 +3923,29 @@ def _(rid, params: dict) -> dict: profile = (params.get("profile") or "").strip() or None profile_home = _profile_home(profile) + # The desktop composer owns its model/effort/fast as plain UI state and ships + # it on every session.create. Honor each as a PER-SESSION override (built into + # the agent below) — never a global config write, so picking a model/effort + # for a new chat can't mutate the profile default. provider is optional + # (resolved at build). + create_model = str(params.get("model") or "").strip() + session_model_override = ( + {"model": create_model, "provider": str(params.get("provider") or "").strip() or None} + if create_model + else None + ) + create_reasoning_override = None + if effort := str(params.get("reasoning_effort") or "").strip(): + try: + from hermes_constants import parse_reasoning_effort + + create_reasoning_override = parse_reasoning_effort(effort) + except Exception: + create_reasoning_override = None + # Only pin "fast" when explicitly requested; leaving it None lets the build + # fall back to the profile default service tier rather than forcing normal. + create_service_tier_override = "priority" if params.get("fast") else None + ready = threading.Event() now = time.time() lease, limit_message = _claim_active_session_slot(key, live_session_id=sid) @@ -3912,6 +3971,9 @@ def _(rid, params: dict) -> dict: "cwd": resolved_cwd, "inflight_turn": None, "last_active": now, + "model_override": session_model_override, + "create_reasoning_override": create_reasoning_override, + "create_service_tier_override": create_service_tier_override, "pending_title": title or None, "profile_home": str(profile_home) if profile_home is not None else None, "running": False, @@ -3951,7 +4013,20 @@ def _(rid, params: dict) -> dict: "message_count": len(history), "messages": _history_to_messages(history), "info": { - "model": _resolve_model(), + # Reflect the per-session model override (desktop composer pick) + # in the immediate response so the client doesn't briefly clobber + # its sticky pick with the global default before the deferred + # build's session.info lands. + "model": ( + session_model_override.get("model") + if session_model_override + else _resolve_model() + ), + **( + {"provider": session_model_override["provider"]} + if session_model_override and session_model_override.get("provider") + else {} + ), "tools": {}, "skills": {}, "cwd": _sessions[sid]["cwd"], diff --git a/website/docs/user-guide/desktop.md b/website/docs/user-guide/desktop.md index 5f132793f21..87639ce3818 100644 --- a/website/docs/user-guide/desktop.md +++ b/website/docs/user-guide/desktop.md @@ -50,11 +50,18 @@ The center of the app. You get: The bar along the bottom of the chat shows live session state and exposes quick controls without opening Settings: -- **Inline model picker** — switch the model for the active session straight from the status bar. - **Per-session YOLO toggle** — flip YOLO on or off for just this session (matching the TUI). YOLO bypasses the dangerous-command approval prompts, so know what you're turning off — see [Security → YOLO Mode](./security.md#yolo-mode). Chatting against a Hermes instance on another machine instead of the bundled local backend? See [Connecting to a remote backend](#connecting-to-a-remote-backend) below — and for the full picture of how the remote-hosted dashboard connection works (the auth gate, the `/api/ws` chat socket, and WebSocket close-code triage), see [Web Dashboard → Connecting Hermes Desktop to a remote backend](./features/web-dashboard.md#connecting-hermes-desktop-to-a-remote-backend). +#### Choosing a model + +The model picker lives in the **composer**, just left of the microphone. Click it to switch the model, reasoning effort, and fast mode from one dropdown. + +- **The composer picker is sticky UI state and never touches your default.** It's remembered locally (per device) and **follows** across new chats and restarts instead of snapping back to the default — pick a model once and the next `Cmd/Ctrl+N` opens on it. With a live chat, switching models scopes the change to that **current chat**; either way the selection rides along when the session is created/switched and is **never** written to the profile default. (Switching [profiles](#sessions--profiles) reseeds to that profile's own default.) +- **Set the default in Settings → Model.** That "main" model is your **per-profile global default** — it's what new chats, crons, subagents, and auxiliary tasks start from, and it's the only place that writes it. Each [profile](#sessions--profiles) keeps its own default. +- **Per-model effort/fast presets.** Each model remembers its own reasoning effort and fast-mode choice in the desktop app, re-applied to the session whenever you pick that model. These presets are a desktop convenience and don't change crons or subagents. + ### File browser Explore and preview the working directory without leaving the app — useful for following along as the agent reads, writes, and edits files. Set the initial project directory with `hermes desktop --cwd ` (or the `HERMES_DESKTOP_CWD` environment variable).