From 6de3963e37698b0789bdec5a08761bd85ff4502f Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Wed, 10 Jun 2026 13:16:50 -0500 Subject: [PATCH] fix(desktop): keep model runtime state per session (#43702) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(desktop): keep model runtime state per session (cherry picked from commit f72ee87d99ee38cb7b5badeb9a8af869bb92073a) * fix(desktop): keep footer model state scoped to active session (cherry picked from commit d91942ebd4671ff857b5c8526dbf133f04782ecb) * fix(desktop): restore stored runtime when resuming sessions (cherry picked from commit 32b3793418257617b8da57e26151f079c2620d00) * fix(desktop): persist live runtime changes for resume (cherry picked from commit c58467779436dcef44a80ad55b52664752dc0837) * fix(desktop): persist resumed endpoint runtime * chore(attribution): map pinguarmy's commit email in AUTHOR_MAP The salvaged commits on this branch preserve @pinguarmy's authorship (郝鹏宇 / peterhao@Peters-MacBook-Air.local). Add the mapping so the check-attribution CI gate resolves the email to the GitHub username. --------- Co-authored-by: 郝鹏宇 --- apps/desktop/src/app/desktop-controller.tsx | 7 + .../app/session/hooks/use-message-stream.ts | 25 ++- .../session/hooks/use-model-controls.test.tsx | 77 ++++++++ .../app/session/hooks/use-model-controls.ts | 15 +- .../app/session/hooks/use-session-actions.ts | 23 ++- .../session/hooks/use-session-state-cache.ts | 21 +- .../src/app/shell/model-menu-panel.tsx | 9 +- apps/desktop/src/app/types.ts | 6 + apps/desktop/src/lib/chat-runtime.ts | 6 + .../src/lib/model-status-label.test.ts | 2 + scripts/release.py | 1 + tests/test_tui_gateway_server.py | 133 ++++++++++++- tui_gateway/server.py | 181 +++++++++++++++++- 13 files changed, 474 insertions(+), 32 deletions(-) create mode 100644 apps/desktop/src/app/session/hooks/use-model-controls.test.tsx diff --git a/apps/desktop/src/app/desktop-controller.tsx b/apps/desktop/src/app/desktop-controller.tsx index 8652a6b833b..ab4f3f0eb0e 100644 --- a/apps/desktop/src/app/desktop-controller.tsx +++ b/apps/desktop/src/app/desktop-controller.tsx @@ -743,6 +743,13 @@ export function DesktopController() { } }, [gatewayState, refreshCronJobs]) + useEffect(() => { + if (gatewayState === 'open' && !activeSessionId && freshDraftReady) { + void refreshCurrentModel() + void refreshHermesConfig() + } + }, [activeSessionId, freshDraftReady, gatewayState, refreshCurrentModel, refreshHermesConfig]) + useRouteResume({ activeSessionId, activeSessionIdRef, diff --git a/apps/desktop/src/app/session/hooks/use-message-stream.ts b/apps/desktop/src/app/session/hooks/use-message-stream.ts index 703941c9367..75ff43b5ee8 100644 --- a/apps/desktop/src/app/session/hooks/use-message-stream.ts +++ b/apps/desktop/src/app/session/hooks/use-message-stream.ts @@ -633,14 +633,21 @@ export function useMessageStream({ const runningChanged = typeof payload?.running === 'boolean' if (apply) { - const runtimeInfo: { branch?: string; cwd?: string } = {} + const runtimeInfo: Partial< + Pick< + ClientSessionState, + 'branch' | 'cwd' | 'fast' | 'model' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo' + > + > = {} if (modelChanged) { setCurrentModel(payload!.model || '') + runtimeInfo.model = payload!.model || '' } if (providerChanged) { setCurrentProvider(payload!.provider || '') + runtimeInfo.provider = payload!.provider || '' } if (typeof payload?.cwd === 'string') { @@ -653,32 +660,32 @@ export function useMessageStream({ runtimeInfo.branch = payload.branch } - if (sessionId && (runtimeInfo.cwd !== undefined || runtimeInfo.branch !== undefined)) { - updateSessionState(sessionId, state => ({ - ...state, - branch: runtimeInfo.branch ?? state.branch, - cwd: runtimeInfo.cwd ?? state.cwd - })) - } - if (typeof payload?.personality === 'string') { setCurrentPersonality(normalizePersonalityValue(payload.personality)) } if (typeof payload?.reasoning_effort === 'string') { setCurrentReasoningEffort(payload.reasoning_effort) + runtimeInfo.reasoningEffort = payload.reasoning_effort } if (typeof payload?.service_tier === 'string') { setCurrentServiceTier(payload.service_tier) + runtimeInfo.serviceTier = payload.service_tier } if (typeof payload?.fast === 'boolean') { setCurrentFastMode(payload.fast) + runtimeInfo.fast = payload.fast } if (typeof payload?.yolo === 'boolean') { setYoloActive(payload.yolo) + runtimeInfo.yolo = payload.yolo + } + + if (sessionId && Object.keys(runtimeInfo).length > 0) { + updateSessionState(sessionId, state => ({ ...state, ...runtimeInfo })) } if (runningChanged && sessionId) { diff --git a/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx b/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx new file mode 100644 index 00000000000..8f52018982a --- /dev/null +++ b/apps/desktop/src/app/session/hooks/use-model-controls.test.tsx @@ -0,0 +1,77 @@ +import { renderHook } from '@testing-library/react' +import { QueryClient } from '@tanstack/react-query' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' + +import { getGlobalModelInfo } from '@/hermes' +import { + $activeSessionId, + $currentModel, + $currentProvider, + setCurrentModel, + setCurrentProvider +} from '@/store/session' + +import { useModelControls } from './use-model-controls' + +vi.mock('@/hermes', () => ({ + getGlobalModelInfo: vi.fn(), + setGlobalModel: vi.fn() +})) + +describe('useModelControls.refreshCurrentModel', () => { + beforeEach(() => { + $activeSessionId.set(null) + setCurrentModel('') + setCurrentProvider('') + }) + + afterEach(() => { + vi.restoreAllMocks() + $activeSessionId.set(null) + setCurrentModel('') + setCurrentProvider('') + }) + + it('applies the global model when there is no active runtime session', async () => { + vi.mocked(getGlobalModelInfo).mockResolvedValue({ + model: 'openai/gpt-5.5', + provider: 'openai-codex' + }) + + const { result } = renderHook(() => + useModelControls({ + activeSessionId: null, + queryClient: new QueryClient(), + requestGateway: vi.fn() + }) + ) + + await result.current.refreshCurrentModel() + + expect($currentModel.get()).toBe('openai/gpt-5.5') + expect($currentProvider.get()).toBe('openai-codex') + }) + + it('does not clobber the active session footer state with global model info', async () => { + setCurrentModel('deepseek/deepseek-v4-pro') + setCurrentProvider('deepseek') + $activeSessionId.set('runtime-1') + vi.mocked(getGlobalModelInfo).mockResolvedValue({ + model: 'openai/gpt-5.5', + provider: 'openai-codex' + }) + + const { result } = renderHook(() => + useModelControls({ + activeSessionId: 'runtime-1', + queryClient: new QueryClient(), + requestGateway: vi.fn() + }) + ) + + await result.current.refreshCurrentModel() + + expect($currentModel.get()).toBe('deepseek/deepseek-v4-pro') + expect($currentProvider.get()).toBe('deepseek') + }) +}) diff --git a/apps/desktop/src/app/session/hooks/use-model-controls.ts b/apps/desktop/src/app/session/hooks/use-model-controls.ts index 1a04b19da76..525c8d8385b 100644 --- a/apps/desktop/src/app/session/hooks/use-model-controls.ts +++ b/apps/desktop/src/app/session/hooks/use-model-controls.ts @@ -4,7 +4,13 @@ import { useCallback } from 'react' import { getGlobalModelInfo, setGlobalModel } from '@/hermes' import { useI18n } from '@/i18n' import { notifyError } from '@/store/notifications' -import { $currentModel, $currentProvider, setCurrentModel, setCurrentProvider } from '@/store/session' +import { + $activeSessionId, + $currentModel, + $currentProvider, + setCurrentModel, + setCurrentProvider +} from '@/store/session' import type { ModelOptionsResponse } from '@/types/hermes' interface ModelSelection { @@ -39,6 +45,13 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway try { const result = await getGlobalModelInfo() + // A resumed/live session owns the footer model state. Global config + // refreshes (gateway boot, profile swap, settings save) must not clobber + // the active chat's runtime model/provider in the status bar. + if ($activeSessionId.get()) { + return + } + if (typeof result.model === 'string') { setCurrentModel(result.model) } diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts index c3e22ca6b4b..51ee90924ae 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts @@ -19,7 +19,6 @@ import { $messages, $sessions, $yoloActive, - getRememberedWorkspaceCwd, workspaceCwdForNewSession, sessionPinId, setActiveSessionId, @@ -211,14 +210,16 @@ function patchSessionWorkspace(sessionId: string, cwd: string | undefined) { setSessions(prev => prev.map(session => (session.id === sessionId ? { ...session, cwd } : session))) } -function applyRuntimeInfo( - info: SessionCreateResponse['info'] | undefined -): Partial> | null { +function applyRuntimeInfo(info: SessionCreateResponse['info'] | undefined): Partial< + Pick +> | null { if (!info) { return null } - const sessionState: Partial> = {} + const sessionState: Partial< + Pick + > = {} reportBackendContract(info.desktop_contract) @@ -228,10 +229,12 @@ function applyRuntimeInfo( if (info.model) { setCurrentModel(info.model) + sessionState.model = info.model } if (info.provider) { setCurrentProvider(info.provider) + sessionState.provider = info.provider } if (info.cwd) { @@ -250,18 +253,22 @@ function applyRuntimeInfo( if (typeof info.reasoning_effort === 'string') { setCurrentReasoningEffort(info.reasoning_effort) + sessionState.reasoningEffort = info.reasoning_effort } if (typeof info.service_tier === 'string') { setCurrentServiceTier(info.service_tier) + sessionState.serviceTier = info.service_tier } if (typeof info.fast === 'boolean') { setCurrentFastMode(info.fast) + sessionState.fast = info.fast } if (typeof info.yolo === 'boolean') { setYoloActive(info.yolo) + sessionState.yolo = info.yolo } if (info.usage) { @@ -314,6 +321,12 @@ export function useSessionActions({ setTurnStartedAt(null) // New chats start in the configured default project dir when set, // otherwise the sticky last-used workspace (PR #37586). + setCurrentModel('') + setCurrentProvider('') + setCurrentReasoningEffort('') + setCurrentServiceTier('') + setCurrentFastMode(false) + setYoloActive(false) setCurrentCwd(workspaceCwdForNewSession()) setCurrentBranch('') clearComposerDraft() diff --git a/apps/desktop/src/app/session/hooks/use-session-state-cache.ts b/apps/desktop/src/app/session/hooks/use-session-state-cache.ts index b343ceb5ce4..72930561bae 100644 --- a/apps/desktop/src/app/session/hooks/use-session-state-cache.ts +++ b/apps/desktop/src/app/session/hooks/use-session-state-cache.ts @@ -5,7 +5,20 @@ import type { ChatMessage } from '@/lib/chat-messages' import { preserveLocalAssistantErrors } from '@/lib/chat-messages' import { createClientSessionState } from '@/lib/chat-runtime' import { setMutableRef } from '@/lib/mutable-ref' -import { $busy, $messages, noteSessionActivity, setSessionAttention, setSessionWorking, setTurnStartedAt } from '@/store/session' +import { + $busy, + $messages, + noteSessionActivity, + setCurrentFastMode, + setCurrentModel, + setCurrentProvider, + setCurrentReasoningEffort, + setCurrentServiceTier, + setSessionAttention, + setSessionWorking, + setTurnStartedAt, + setYoloActive +} from '@/store/session' import type { ClientSessionState } from '../../types' @@ -124,6 +137,12 @@ export function useSessionStateCache({ setMessages(nextMessages) } + setCurrentModel(pending.state.model) + setCurrentProvider(pending.state.provider) + setCurrentReasoningEffort(pending.state.reasoningEffort) + setCurrentServiceTier(pending.state.serviceTier) + setCurrentFastMode(pending.state.fast) + setYoloActive(pending.state.yolo) setBusy(pending.state.busy) setMutableRef(busyRef, pending.state.busy) setAwaitingResponse(pending.state.awaitingResponse) diff --git a/apps/desktop/src/app/shell/model-menu-panel.tsx b/apps/desktop/src/app/shell/model-menu-panel.tsx index 538d2acf522..4fe10abe72f 100644 --- a/apps/desktop/src/app/shell/model-menu-panel.tsx +++ b/apps/desktop/src/app/shell/model-menu-panel.tsx @@ -162,8 +162,9 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model currentFastMode ) - // Grayed text: active row shows live state (Fast + effort); - // others show a fast-capability hint. + // Grayed text is live session state only. Do not label inactive + // rows as "Fast" just because they have a fast-capable sibling: + // that makes an off Fast toggle look like it is already on. const meta = isCurrent ? [ fastControl.kind !== 'none' && fastControl.on ? copy.fast : null, @@ -171,9 +172,7 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model ] .filter(Boolean) .join(' ') - : caps?.fast || family.fastId - ? copy.fast - : '' + : '' // Every row is a hover-Edit submenu trigger. Activating it // (pointer or keyboard) switches to the family's base model; diff --git a/apps/desktop/src/app/types.ts b/apps/desktop/src/app/types.ts index 14f307eef93..672beb9a089 100644 --- a/apps/desktop/src/app/types.ts +++ b/apps/desktop/src/app/types.ts @@ -103,6 +103,12 @@ export interface ClientSessionState { messages: ChatMessage[] branch: string cwd: string + model: string + provider: string + reasoningEffort: string + serviceTier: string + fast: boolean + yolo: boolean busy: boolean awaitingResponse: boolean streamId: string | null diff --git a/apps/desktop/src/lib/chat-runtime.ts b/apps/desktop/src/lib/chat-runtime.ts index 3246f490d08..68beb83a043 100644 --- a/apps/desktop/src/lib/chat-runtime.ts +++ b/apps/desktop/src/lib/chat-runtime.ts @@ -40,6 +40,12 @@ export function createClientSessionState( messages, branch: '', cwd: '', + model: '', + provider: '', + reasoningEffort: '', + serviceTier: '', + fast: false, + yolo: false, busy: false, awaitingResponse: false, streamId: null, diff --git a/apps/desktop/src/lib/model-status-label.test.ts b/apps/desktop/src/lib/model-status-label.test.ts index 6c0bac9129d..58c03a3f122 100644 --- a/apps/desktop/src/lib/model-status-label.test.ts +++ b/apps/desktop/src/lib/model-status-label.test.ts @@ -5,6 +5,8 @@ import { displayModelName, formatModelStatusLabel, reasoningEffortLabel } from ' describe('model-status-label', () => { it('formats display names consistently', () => { expect(displayModelName('anthropic/claude-opus-4.8-fast')).toBe('Opus 4.8') + expect(displayModelName('openai/gpt-5.5-fast')).toBe('GPT-5.5') + expect(displayModelName('deepseek/deepseek-v4-pro-thinking')).toBe('Deepseek V4 Pro') expect(displayModelName('openai/gpt-5.5')).toBe('GPT-5.5') }) diff --git a/scripts/release.py b/scripts/release.py index bd40b25117d..a9d08577b76 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" # Auto-extracted from noreply emails + manual overrides AUTHOR_MAP = { + "peterhao@Peters-MacBook-Air.local": "pinguarmy", "barronlroth@gmail.com": "barronlroth", "ondrej.drapalik@gmail.com": "OndrejDrapalik", "tomasz.panek@gmail.com": "tomekpanek", diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 72dc43564c0..3b95b8dceb8 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -878,6 +878,97 @@ def test_session_resume_uses_parent_lineage_for_display(monkeypatch): assert captured["history_calls"] == [("tip", False), ("tip", True)] +def test_session_resume_passes_stored_runtime_to_agent(monkeypatch): + captured = {} + + class FakeDB: + def get_session(self, target): + return { + "id": target, + "model": "gpt-5.4", + "billing_provider": "openai-codex", + "model_config": '{"reasoning_config":{"enabled":true,"effort":"high"},"service_tier":"priority","base_url":"https://custom.example/v1","api_mode":"chat_completions"}', + } + + def reopen_session(self, target): + pass + + def get_messages_as_conversation(self, target, include_ancestors=False): + return [{"role": "user", "content": "hello"}] + + def fake_make_agent(sid, key, session_id=None, session_db=None, **kwargs): + captured.update(kwargs) + return types.SimpleNamespace(model="gpt-5.4", provider="openai-codex") + + monkeypatch.setattr(server, "_get_db", lambda: FakeDB()) + monkeypatch.setattr(server, "_enable_gateway_prompts", lambda: None) + monkeypatch.setattr(server, "_set_session_context", lambda target: []) + monkeypatch.setattr(server, "_clear_session_context", lambda tokens: None) + monkeypatch.setattr(server, "_make_agent", fake_make_agent) + monkeypatch.setattr(server, "_session_info", lambda agent, *a: {"model": agent.model, "provider": agent.provider}) + + def fake_init_session(sid, key, agent, history, cols=80): + server._sessions[sid] = {"agent": agent, "session_key": key} + + monkeypatch.setattr(server, "_init_session", fake_init_session) + + resp = server.handle_request( + {"id": "1", "method": "session.resume", "params": {"session_id": "stored-session"}} + ) + + assert resp["result"]["info"] == {"model": "gpt-5.4", "provider": "openai-codex"} + assert captured["model_override"] == { + "model": "gpt-5.4", + "provider": "openai-codex", + "base_url": "https://custom.example/v1", + "api_mode": "chat_completions", + } + assert captured["provider_override"] == "openai-codex" + assert captured["reasoning_config_override"] == {"enabled": True, "effort": "high"} + assert captured["service_tier_override"] == "priority" + runtime_sid = resp["result"]["session_id"] + assert server._sessions[runtime_sid]["model_override"] == captured["model_override"] + + +def test_persist_live_session_runtime_preserves_resume_metadata(monkeypatch): + updates = {} + + class FakeDB: + def get_session(self, session_id): + assert session_id == "stored-session" + return {"model_config": '{"_branched_from":"root"}'} + + def update_session_meta(self, session_id, model_config_json, model=None): + updates["meta"] = (session_id, json.loads(model_config_json), model) + + agent = types.SimpleNamespace( + model="gpt-5.4", + provider="openai-codex", + base_url="https://custom.example/v1", + api_mode="chat_completions", + reasoning_config={"enabled": True, "effort": "high"}, + service_tier="priority", + _session_db=FakeDB(), + ) + + server._persist_live_session_runtime({"agent": agent, "session_key": "stored-session"}) + + assert "model" not in updates + assert updates["meta"] == ( + "stored-session", + { + "_branched_from": "root", + "model": "gpt-5.4", + "provider": "openai-codex", + "base_url": "https://custom.example/v1", + "api_mode": "chat_completions", + "reasoning_config": {"enabled": True, "effort": "high"}, + "service_tier": "priority", + }, + "gpt-5.4", + ) + + def test_status_callback_emits_kind_and_text(): with patch("tui_gateway.server._emit") as emit: cb = server._agent_cbs("sid")["status_callback"] @@ -3593,8 +3684,9 @@ def test_session_info_includes_mcp_servers(monkeypatch): fake_mod.get_mcp_status = lambda: fake_status monkeypatch.setitem(sys.modules, "tools.mcp_tool", fake_mod) - info = server._session_info(types.SimpleNamespace(tools=[], model="")) + info = server._session_info(types.SimpleNamespace(tools=[], model="", provider="openai-codex")) + assert info["provider"] == "openai-codex" assert info["mcp_servers"] == fake_status @@ -5983,6 +6075,45 @@ def test_make_agent_defaults_to_90(monkeypatch): assert mock_agent.call_args.kwargs["max_iterations"] == 90 +def test_make_agent_uses_session_runtime_overrides(monkeypatch): + _setup_make_agent_mocks(monkeypatch, {}) + resolved = {} + + def fake_resolve_runtime_provider(requested=None, target_model=None): + resolved["requested"] = requested + resolved["target_model"] = target_model + return { + "provider": requested, + "base_url": None, + "api_key": None, + "api_mode": None, + "command": None, + "args": None, + "credential_pool": None, + } + + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", + fake_resolve_runtime_provider, + ) + + with patch("run_agent.AIAgent") as mock_agent: + server._make_agent( + "sid1", + "key1", + model_override="gpt-5.4", + provider_override="openai-codex", + reasoning_config_override={"enabled": True, "effort": "high"}, + service_tier_override="priority", + ) + + assert resolved == {"requested": "openai-codex", "target_model": "gpt-5.4"} + assert mock_agent.call_args.kwargs["model"] == "gpt-5.4" + assert mock_agent.call_args.kwargs["provider"] == "openai-codex" + assert mock_agent.call_args.kwargs["reasoning_config"] == {"enabled": True, "effort": "high"} + assert mock_agent.call_args.kwargs["service_tier"] == "priority" + + def test_make_agent_handles_null_agent_config(monkeypatch): _setup_make_agent_mocks(monkeypatch, {"agent": None, "max_turns": 80}) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 7aedc0e7813..390c31b092e 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1409,6 +1409,131 @@ def _resolve_startup_runtime() -> tuple[str, str | None]: return model, None +def _stored_session_runtime_overrides(row: dict | None) -> dict: + """Return runtime fields persisted with a stored session. + + ``session.resume`` is a session-scoped operation: reopening an older chat + must restore the model/provider/reasoning state that chat actually used, + not whatever global model the user most recently selected in another chat. + The durable session row stores the model directly, the billing provider in + ``billing_provider``, and richer runtime knobs in JSON ``model_config``. + """ + if not row: + return {} + + raw_config = row.get("model_config") + model_config: dict = {} + if isinstance(raw_config, dict): + model_config = raw_config + elif isinstance(raw_config, str) and raw_config.strip(): + try: + parsed = json.loads(raw_config) + if isinstance(parsed, dict): + model_config = parsed + except Exception: + logger.debug("failed to parse stored session model_config", exc_info=True) + + overrides: dict = {} + model = str(row.get("model") or model_config.get("model") or "").strip() + provider = str( + model_config.get("provider") + or model_config.get("billing_provider") + or row.get("billing_provider") + or "" + ).strip() + base_url = str(model_config.get("base_url") or "").strip() + api_mode = str(model_config.get("api_mode") or "").strip() + reasoning_config = model_config.get("reasoning_config") + service_tier = str(model_config.get("service_tier") or "").strip() + + if model: + # Use the same dict-shaped override that live /model switches use so a + # DB-restored session can preserve custom endpoint metadata across both + # initial resume and later rebuilds (/new). Deliberately do not persist + # or restore raw api_key here; endpoint credentials should continue to + # come from config/env/provider resolution rather than the session DB. + overrides["model_override"] = { + "model": model, + "provider": provider or None, + "base_url": base_url or None, + "api_mode": api_mode or None, + } + if provider: + overrides["provider_override"] = provider + if isinstance(reasoning_config, dict): + overrides["reasoning_config_override"] = reasoning_config + if service_tier: + overrides["service_tier_override"] = service_tier + + return overrides + + +def _runtime_model_config(agent, existing: dict | None = None) -> dict: + config = dict(existing or {}) + model = str(getattr(agent, "model", "") or "").strip() + provider = str(getattr(agent, "provider", "") or "").strip() + base_url = str(getattr(agent, "base_url", "") or "").strip() + api_mode = str(getattr(agent, "api_mode", "") or "").strip() + reasoning_config = getattr(agent, "reasoning_config", None) + service_tier = getattr(agent, "service_tier", None) + + if model: + config["model"] = model + if provider: + config["provider"] = provider + if base_url: + config["base_url"] = base_url + else: + config.pop("base_url", None) + if api_mode: + config["api_mode"] = api_mode + else: + config.pop("api_mode", None) + if isinstance(reasoning_config, dict): + config["reasoning_config"] = reasoning_config + else: + config.pop("reasoning_config", None) + if service_tier: + config["service_tier"] = service_tier + else: + config.pop("service_tier", None) + + return config + + +def _persist_live_session_runtime(session: dict | None) -> None: + """Persist active session runtime so future resumes restore the same footer.""" + if not session: + return + agent = session.get("agent") + session_key = str(session.get("session_key") or "").strip() + if agent is None or not session_key: + return + + db = getattr(agent, "_session_db", None) or _get_db() + if db is None: + return + + try: + row = db.get_session(session_key) or {} + raw_config = row.get("model_config") + existing_config = {} + if isinstance(raw_config, dict): + existing_config = raw_config + elif isinstance(raw_config, str) and raw_config.strip(): + parsed = json.loads(raw_config) + if isinstance(parsed, dict): + existing_config = parsed + model_config = _runtime_model_config(agent, existing_config) + model = str(getattr(agent, "model", "") or "").strip() + if hasattr(db, "update_session_meta"): + db.update_session_meta(session_key, json.dumps(model_config), model or None) + elif model and hasattr(db, "update_session_model"): + db.update_session_model(session_key, model) + except Exception: + logger.debug("failed to persist live session runtime", exc_info=True) + + def _write_config_key(key_path: str, value): cfg = _load_cfg() current = cfg @@ -1789,6 +1914,7 @@ def _apply_model_switch( api_mode=result.api_mode, ) _restart_slash_worker(sid, session) + _persist_live_session_runtime(session) _emit("session.info", sid, _session_info(agent, session)) # Record the switch as a PER-SESSION override so a later rebuild of THIS @@ -2104,6 +2230,7 @@ def _session_info(agent, session: dict | None = None) -> dict: yolo = False info: dict = { "model": getattr(agent, "model", ""), + "provider": getattr(agent, "provider", ""), "reasoning_effort": reasoning_effort, "service_tier": service_tier, "fast": service_tier == "priority", @@ -2891,7 +3018,10 @@ def _make_agent( key: str, session_id: str | None = None, session_db=None, - model_override: dict | None = None, + model_override: dict | str | None = None, + provider_override: str | None = None, + reasoning_config_override: dict | None = None, + service_tier_override: str | None = None, ): from run_agent import AIAgent from hermes_cli.runtime_provider import resolve_runtime_provider @@ -2927,12 +3057,11 @@ def _make_agent( part for part in (system_prompt, skills_prompt) if part ).strip() # Prefer a per-session model override (set by a prior in-session /model - # switch) over global config/env resolution. This keeps a rebuilt session - # (/new, resume) on the model the user picked FOR THIS SESSION, without - # reading process-global env vars that another session may have changed. - if model_override and model_override.get("model"): + # switch) over global config/env resolution. Resume-time stored sessions may + # also pass scalar model/provider/runtime knobs from the persisted DB row. + if isinstance(model_override, dict) and model_override.get("model"): model = str(model_override.get("model") or "") - requested_provider = model_override.get("provider") or None + requested_provider = model_override.get("provider") or provider_override or None override_base_url = model_override.get("base_url") override_api_key = model_override.get("api_key") override_api_mode = model_override.get("api_mode") @@ -2951,6 +3080,10 @@ def _make_agent( runtime["api_mode"] = override_api_mode else: model, requested_provider = _resolve_startup_runtime() + if isinstance(model_override, str) and model_override: + model = model_override + if provider_override: + requested_provider = provider_override runtime = resolve_runtime_provider( requested=requested_provider, target_model=model or None, @@ -2971,8 +3104,16 @@ def _make_agent( # display detail). See cli.py PR (decoupling fix) for the matching # change on the classic CLI side. verbose_logging=False, - reasoning_config=_load_reasoning_config(), - service_tier=_load_service_tier(), + reasoning_config=( + reasoning_config_override + if reasoning_config_override is not None + else _load_reasoning_config() + ), + service_tier=( + service_tier_override + if service_tier_override is not None + else _load_service_tier() + ), enabled_toolsets=_load_enabled_toolsets(), platform="tui", session_id=session_id or key, @@ -3660,8 +3801,17 @@ def _(rid, params: dict) -> dict: try: # Pass the profile's db so the agent persists turns to the right # state.db; home override is active here so config/skills/model - # resolve to the profile too. - agent = _make_agent(sid, target, session_id=target, session_db=db) + # resolve to the profile too. Runtime identity is restored from the + # stored session row so switching chats does not inherit whatever + # global model another chat last selected. + stored_runtime_overrides = _stored_session_runtime_overrides(found) + agent = _make_agent( + sid, + target, + session_id=target, + session_db=db, + **stored_runtime_overrides, + ) finally: _clear_session_context(tokens) except Exception as e: @@ -3698,6 +3848,10 @@ def _(rid, params: dict) -> dict: try: _init_session(sid, target, agent, history, cols=cols) if sid in _sessions: + if stored_runtime_overrides.get("model_override") is not None: + _sessions[sid]["model_override"] = stored_runtime_overrides[ + "model_override" + ] _sessions[sid]["display_history_prefix"] = display_history_prefix # Remember the profile home so each turn re-binds HERMES_HOME (the # agent persists to its own db, but mid-turn home reads — memory, @@ -6309,6 +6463,7 @@ def _(rid, params: dict) -> dict: if nv == "fast": current_overrides.update(overrides) agent.request_overrides = current_overrides + _persist_live_session_runtime(session) _emit( "session.info", params.get("session_id", ""), @@ -6475,6 +6630,12 @@ def _(rid, params: dict) -> dict: _write_config_key("agent.reasoning_effort", arg) if session and session.get("agent") is not None: session["agent"].reasoning_config = parsed + _persist_live_session_runtime(session) + _emit( + "session.info", + params.get("session_id", ""), + _session_info(session["agent"], session), + ) return _ok(rid, {"key": key, "value": arg}) except Exception as e: return _err(rid, 5001, str(e))