fix(desktop): keep model runtime state per session (#43702)

* fix(desktop): keep model runtime state per session

(cherry picked from commit f72ee87d99ee38cb7b5badeb9a8af869bb92073a)

* fix(desktop): keep footer model state scoped to active session

(cherry picked from commit d91942ebd4671ff857b5c8526dbf133f04782ecb)

* fix(desktop): restore stored runtime when resuming sessions

(cherry picked from commit 32b3793418257617b8da57e26151f079c2620d00)

* fix(desktop): persist live runtime changes for resume

(cherry picked from commit c58467779436dcef44a80ad55b52664752dc0837)

* fix(desktop): persist resumed endpoint runtime

* chore(attribution): map pinguarmy's commit email in AUTHOR_MAP

The salvaged commits on this branch preserve @pinguarmy's authorship
(郝鹏宇 / peterhao@Peters-MacBook-Air.local). Add the mapping so the
check-attribution CI gate resolves the email to the GitHub username.

---------

Co-authored-by: 郝鹏宇 <peterhao@Peters-MacBook-Air.local>
This commit is contained in:
brooklyn! 2026-06-10 13:16:50 -05:00 committed by GitHub
parent 07ac185904
commit 6de3963e37
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 474 additions and 32 deletions

View file

@ -743,6 +743,13 @@ export function DesktopController() {
}
}, [gatewayState, refreshCronJobs])
useEffect(() => {
if (gatewayState === 'open' && !activeSessionId && freshDraftReady) {
void refreshCurrentModel()
void refreshHermesConfig()
}
}, [activeSessionId, freshDraftReady, gatewayState, refreshCurrentModel, refreshHermesConfig])
useRouteResume({
activeSessionId,
activeSessionIdRef,

View file

@ -633,14 +633,21 @@ export function useMessageStream({
const runningChanged = typeof payload?.running === 'boolean'
if (apply) {
const runtimeInfo: { branch?: string; cwd?: string } = {}
const runtimeInfo: Partial<
Pick<
ClientSessionState,
'branch' | 'cwd' | 'fast' | 'model' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'
>
> = {}
if (modelChanged) {
setCurrentModel(payload!.model || '')
runtimeInfo.model = payload!.model || ''
}
if (providerChanged) {
setCurrentProvider(payload!.provider || '')
runtimeInfo.provider = payload!.provider || ''
}
if (typeof payload?.cwd === 'string') {
@ -653,32 +660,32 @@ export function useMessageStream({
runtimeInfo.branch = payload.branch
}
if (sessionId && (runtimeInfo.cwd !== undefined || runtimeInfo.branch !== undefined)) {
updateSessionState(sessionId, state => ({
...state,
branch: runtimeInfo.branch ?? state.branch,
cwd: runtimeInfo.cwd ?? state.cwd
}))
}
if (typeof payload?.personality === 'string') {
setCurrentPersonality(normalizePersonalityValue(payload.personality))
}
if (typeof payload?.reasoning_effort === 'string') {
setCurrentReasoningEffort(payload.reasoning_effort)
runtimeInfo.reasoningEffort = payload.reasoning_effort
}
if (typeof payload?.service_tier === 'string') {
setCurrentServiceTier(payload.service_tier)
runtimeInfo.serviceTier = payload.service_tier
}
if (typeof payload?.fast === 'boolean') {
setCurrentFastMode(payload.fast)
runtimeInfo.fast = payload.fast
}
if (typeof payload?.yolo === 'boolean') {
setYoloActive(payload.yolo)
runtimeInfo.yolo = payload.yolo
}
if (sessionId && Object.keys(runtimeInfo).length > 0) {
updateSessionState(sessionId, state => ({ ...state, ...runtimeInfo }))
}
if (runningChanged && sessionId) {

View file

@ -0,0 +1,77 @@
import { renderHook } from '@testing-library/react'
import { QueryClient } from '@tanstack/react-query'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import { getGlobalModelInfo } from '@/hermes'
import {
$activeSessionId,
$currentModel,
$currentProvider,
setCurrentModel,
setCurrentProvider
} from '@/store/session'
import { useModelControls } from './use-model-controls'
vi.mock('@/hermes', () => ({
getGlobalModelInfo: vi.fn(),
setGlobalModel: vi.fn()
}))
describe('useModelControls.refreshCurrentModel', () => {
beforeEach(() => {
$activeSessionId.set(null)
setCurrentModel('')
setCurrentProvider('')
})
afterEach(() => {
vi.restoreAllMocks()
$activeSessionId.set(null)
setCurrentModel('')
setCurrentProvider('')
})
it('applies the global model when there is no active runtime session', async () => {
vi.mocked(getGlobalModelInfo).mockResolvedValue({
model: 'openai/gpt-5.5',
provider: 'openai-codex'
})
const { result } = renderHook(() =>
useModelControls({
activeSessionId: null,
queryClient: new QueryClient(),
requestGateway: vi.fn()
})
)
await result.current.refreshCurrentModel()
expect($currentModel.get()).toBe('openai/gpt-5.5')
expect($currentProvider.get()).toBe('openai-codex')
})
it('does not clobber the active session footer state with global model info', async () => {
setCurrentModel('deepseek/deepseek-v4-pro')
setCurrentProvider('deepseek')
$activeSessionId.set('runtime-1')
vi.mocked(getGlobalModelInfo).mockResolvedValue({
model: 'openai/gpt-5.5',
provider: 'openai-codex'
})
const { result } = renderHook(() =>
useModelControls({
activeSessionId: 'runtime-1',
queryClient: new QueryClient(),
requestGateway: vi.fn()
})
)
await result.current.refreshCurrentModel()
expect($currentModel.get()).toBe('deepseek/deepseek-v4-pro')
expect($currentProvider.get()).toBe('deepseek')
})
})

View file

@ -4,7 +4,13 @@ import { useCallback } from 'react'
import { getGlobalModelInfo, setGlobalModel } from '@/hermes'
import { useI18n } from '@/i18n'
import { notifyError } from '@/store/notifications'
import { $currentModel, $currentProvider, setCurrentModel, setCurrentProvider } from '@/store/session'
import {
$activeSessionId,
$currentModel,
$currentProvider,
setCurrentModel,
setCurrentProvider
} from '@/store/session'
import type { ModelOptionsResponse } from '@/types/hermes'
interface ModelSelection {
@ -39,6 +45,13 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway
try {
const result = await getGlobalModelInfo()
// A resumed/live session owns the footer model state. Global config
// refreshes (gateway boot, profile swap, settings save) must not clobber
// the active chat's runtime model/provider in the status bar.
if ($activeSessionId.get()) {
return
}
if (typeof result.model === 'string') {
setCurrentModel(result.model)
}

View file

@ -19,7 +19,6 @@ import {
$messages,
$sessions,
$yoloActive,
getRememberedWorkspaceCwd,
workspaceCwdForNewSession,
sessionPinId,
setActiveSessionId,
@ -211,14 +210,16 @@ function patchSessionWorkspace(sessionId: string, cwd: string | undefined) {
setSessions(prev => prev.map(session => (session.id === sessionId ? { ...session, cwd } : session)))
}
function applyRuntimeInfo(
info: SessionCreateResponse['info'] | undefined
): Partial<Pick<ClientSessionState, 'branch' | 'cwd'>> | null {
function applyRuntimeInfo(info: SessionCreateResponse['info'] | undefined): Partial<
Pick<ClientSessionState, 'branch' | 'cwd' | 'fast' | 'model' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'>
> | null {
if (!info) {
return null
}
const sessionState: Partial<Pick<ClientSessionState, 'branch' | 'cwd'>> = {}
const sessionState: Partial<
Pick<ClientSessionState, 'branch' | 'cwd' | 'fast' | 'model' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'>
> = {}
reportBackendContract(info.desktop_contract)
@ -228,10 +229,12 @@ function applyRuntimeInfo(
if (info.model) {
setCurrentModel(info.model)
sessionState.model = info.model
}
if (info.provider) {
setCurrentProvider(info.provider)
sessionState.provider = info.provider
}
if (info.cwd) {
@ -250,18 +253,22 @@ function applyRuntimeInfo(
if (typeof info.reasoning_effort === 'string') {
setCurrentReasoningEffort(info.reasoning_effort)
sessionState.reasoningEffort = info.reasoning_effort
}
if (typeof info.service_tier === 'string') {
setCurrentServiceTier(info.service_tier)
sessionState.serviceTier = info.service_tier
}
if (typeof info.fast === 'boolean') {
setCurrentFastMode(info.fast)
sessionState.fast = info.fast
}
if (typeof info.yolo === 'boolean') {
setYoloActive(info.yolo)
sessionState.yolo = info.yolo
}
if (info.usage) {
@ -314,6 +321,12 @@ export function useSessionActions({
setTurnStartedAt(null)
// New chats start in the configured default project dir when set,
// otherwise the sticky last-used workspace (PR #37586).
setCurrentModel('')
setCurrentProvider('')
setCurrentReasoningEffort('')
setCurrentServiceTier('')
setCurrentFastMode(false)
setYoloActive(false)
setCurrentCwd(workspaceCwdForNewSession())
setCurrentBranch('')
clearComposerDraft()

View file

@ -5,7 +5,20 @@ import type { ChatMessage } from '@/lib/chat-messages'
import { preserveLocalAssistantErrors } from '@/lib/chat-messages'
import { createClientSessionState } from '@/lib/chat-runtime'
import { setMutableRef } from '@/lib/mutable-ref'
import { $busy, $messages, noteSessionActivity, setSessionAttention, setSessionWorking, setTurnStartedAt } from '@/store/session'
import {
$busy,
$messages,
noteSessionActivity,
setCurrentFastMode,
setCurrentModel,
setCurrentProvider,
setCurrentReasoningEffort,
setCurrentServiceTier,
setSessionAttention,
setSessionWorking,
setTurnStartedAt,
setYoloActive
} from '@/store/session'
import type { ClientSessionState } from '../../types'
@ -124,6 +137,12 @@ export function useSessionStateCache({
setMessages(nextMessages)
}
setCurrentModel(pending.state.model)
setCurrentProvider(pending.state.provider)
setCurrentReasoningEffort(pending.state.reasoningEffort)
setCurrentServiceTier(pending.state.serviceTier)
setCurrentFastMode(pending.state.fast)
setYoloActive(pending.state.yolo)
setBusy(pending.state.busy)
setMutableRef(busyRef, pending.state.busy)
setAwaitingResponse(pending.state.awaitingResponse)

View file

@ -162,8 +162,9 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
currentFastMode
)
// Grayed text: active row shows live state (Fast + effort);
// others show a fast-capability hint.
// Grayed text is live session state only. Do not label inactive
// rows as "Fast" just because they have a fast-capable sibling:
// that makes an off Fast toggle look like it is already on.
const meta = isCurrent
? [
fastControl.kind !== 'none' && fastControl.on ? copy.fast : null,
@ -171,9 +172,7 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
]
.filter(Boolean)
.join(' ')
: caps?.fast || family.fastId
? copy.fast
: ''
: ''
// Every row is a hover-Edit submenu trigger. Activating it
// (pointer or keyboard) switches to the family's base model;

View file

@ -103,6 +103,12 @@ export interface ClientSessionState {
messages: ChatMessage[]
branch: string
cwd: string
model: string
provider: string
reasoningEffort: string
serviceTier: string
fast: boolean
yolo: boolean
busy: boolean
awaitingResponse: boolean
streamId: string | null

View file

@ -40,6 +40,12 @@ export function createClientSessionState(
messages,
branch: '',
cwd: '',
model: '',
provider: '',
reasoningEffort: '',
serviceTier: '',
fast: false,
yolo: false,
busy: false,
awaitingResponse: false,
streamId: null,

View file

@ -5,6 +5,8 @@ import { displayModelName, formatModelStatusLabel, reasoningEffortLabel } from '
describe('model-status-label', () => {
it('formats display names consistently', () => {
expect(displayModelName('anthropic/claude-opus-4.8-fast')).toBe('Opus 4.8')
expect(displayModelName('openai/gpt-5.5-fast')).toBe('GPT-5.5')
expect(displayModelName('deepseek/deepseek-v4-pro-thinking')).toBe('Deepseek V4 Pro')
expect(displayModelName('openai/gpt-5.5')).toBe('GPT-5.5')
})

View file

@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"peterhao@Peters-MacBook-Air.local": "pinguarmy",
"barronlroth@gmail.com": "barronlroth",
"ondrej.drapalik@gmail.com": "OndrejDrapalik",
"tomasz.panek@gmail.com": "tomekpanek",

View file

@ -878,6 +878,97 @@ def test_session_resume_uses_parent_lineage_for_display(monkeypatch):
assert captured["history_calls"] == [("tip", False), ("tip", True)]
def test_session_resume_passes_stored_runtime_to_agent(monkeypatch):
captured = {}
class FakeDB:
def get_session(self, target):
return {
"id": target,
"model": "gpt-5.4",
"billing_provider": "openai-codex",
"model_config": '{"reasoning_config":{"enabled":true,"effort":"high"},"service_tier":"priority","base_url":"https://custom.example/v1","api_mode":"chat_completions"}',
}
def reopen_session(self, target):
pass
def get_messages_as_conversation(self, target, include_ancestors=False):
return [{"role": "user", "content": "hello"}]
def fake_make_agent(sid, key, session_id=None, session_db=None, **kwargs):
captured.update(kwargs)
return types.SimpleNamespace(model="gpt-5.4", provider="openai-codex")
monkeypatch.setattr(server, "_get_db", lambda: FakeDB())
monkeypatch.setattr(server, "_enable_gateway_prompts", lambda: None)
monkeypatch.setattr(server, "_set_session_context", lambda target: [])
monkeypatch.setattr(server, "_clear_session_context", lambda tokens: None)
monkeypatch.setattr(server, "_make_agent", fake_make_agent)
monkeypatch.setattr(server, "_session_info", lambda agent, *a: {"model": agent.model, "provider": agent.provider})
def fake_init_session(sid, key, agent, history, cols=80):
server._sessions[sid] = {"agent": agent, "session_key": key}
monkeypatch.setattr(server, "_init_session", fake_init_session)
resp = server.handle_request(
{"id": "1", "method": "session.resume", "params": {"session_id": "stored-session"}}
)
assert resp["result"]["info"] == {"model": "gpt-5.4", "provider": "openai-codex"}
assert captured["model_override"] == {
"model": "gpt-5.4",
"provider": "openai-codex",
"base_url": "https://custom.example/v1",
"api_mode": "chat_completions",
}
assert captured["provider_override"] == "openai-codex"
assert captured["reasoning_config_override"] == {"enabled": True, "effort": "high"}
assert captured["service_tier_override"] == "priority"
runtime_sid = resp["result"]["session_id"]
assert server._sessions[runtime_sid]["model_override"] == captured["model_override"]
def test_persist_live_session_runtime_preserves_resume_metadata(monkeypatch):
updates = {}
class FakeDB:
def get_session(self, session_id):
assert session_id == "stored-session"
return {"model_config": '{"_branched_from":"root"}'}
def update_session_meta(self, session_id, model_config_json, model=None):
updates["meta"] = (session_id, json.loads(model_config_json), model)
agent = types.SimpleNamespace(
model="gpt-5.4",
provider="openai-codex",
base_url="https://custom.example/v1",
api_mode="chat_completions",
reasoning_config={"enabled": True, "effort": "high"},
service_tier="priority",
_session_db=FakeDB(),
)
server._persist_live_session_runtime({"agent": agent, "session_key": "stored-session"})
assert "model" not in updates
assert updates["meta"] == (
"stored-session",
{
"_branched_from": "root",
"model": "gpt-5.4",
"provider": "openai-codex",
"base_url": "https://custom.example/v1",
"api_mode": "chat_completions",
"reasoning_config": {"enabled": True, "effort": "high"},
"service_tier": "priority",
},
"gpt-5.4",
)
def test_status_callback_emits_kind_and_text():
with patch("tui_gateway.server._emit") as emit:
cb = server._agent_cbs("sid")["status_callback"]
@ -3593,8 +3684,9 @@ def test_session_info_includes_mcp_servers(monkeypatch):
fake_mod.get_mcp_status = lambda: fake_status
monkeypatch.setitem(sys.modules, "tools.mcp_tool", fake_mod)
info = server._session_info(types.SimpleNamespace(tools=[], model=""))
info = server._session_info(types.SimpleNamespace(tools=[], model="", provider="openai-codex"))
assert info["provider"] == "openai-codex"
assert info["mcp_servers"] == fake_status
@ -5983,6 +6075,45 @@ def test_make_agent_defaults_to_90(monkeypatch):
assert mock_agent.call_args.kwargs["max_iterations"] == 90
def test_make_agent_uses_session_runtime_overrides(monkeypatch):
_setup_make_agent_mocks(monkeypatch, {})
resolved = {}
def fake_resolve_runtime_provider(requested=None, target_model=None):
resolved["requested"] = requested
resolved["target_model"] = target_model
return {
"provider": requested,
"base_url": None,
"api_key": None,
"api_mode": None,
"command": None,
"args": None,
"credential_pool": None,
}
monkeypatch.setattr(
"hermes_cli.runtime_provider.resolve_runtime_provider",
fake_resolve_runtime_provider,
)
with patch("run_agent.AIAgent") as mock_agent:
server._make_agent(
"sid1",
"key1",
model_override="gpt-5.4",
provider_override="openai-codex",
reasoning_config_override={"enabled": True, "effort": "high"},
service_tier_override="priority",
)
assert resolved == {"requested": "openai-codex", "target_model": "gpt-5.4"}
assert mock_agent.call_args.kwargs["model"] == "gpt-5.4"
assert mock_agent.call_args.kwargs["provider"] == "openai-codex"
assert mock_agent.call_args.kwargs["reasoning_config"] == {"enabled": True, "effort": "high"}
assert mock_agent.call_args.kwargs["service_tier"] == "priority"
def test_make_agent_handles_null_agent_config(monkeypatch):
_setup_make_agent_mocks(monkeypatch, {"agent": None, "max_turns": 80})

View file

@ -1409,6 +1409,131 @@ def _resolve_startup_runtime() -> tuple[str, str | None]:
return model, None
def _stored_session_runtime_overrides(row: dict | None) -> dict:
"""Return runtime fields persisted with a stored session.
``session.resume`` is a session-scoped operation: reopening an older chat
must restore the model/provider/reasoning state that chat actually used,
not whatever global model the user most recently selected in another chat.
The durable session row stores the model directly, the billing provider in
``billing_provider``, and richer runtime knobs in JSON ``model_config``.
"""
if not row:
return {}
raw_config = row.get("model_config")
model_config: dict = {}
if isinstance(raw_config, dict):
model_config = raw_config
elif isinstance(raw_config, str) and raw_config.strip():
try:
parsed = json.loads(raw_config)
if isinstance(parsed, dict):
model_config = parsed
except Exception:
logger.debug("failed to parse stored session model_config", exc_info=True)
overrides: dict = {}
model = str(row.get("model") or model_config.get("model") or "").strip()
provider = str(
model_config.get("provider")
or model_config.get("billing_provider")
or row.get("billing_provider")
or ""
).strip()
base_url = str(model_config.get("base_url") or "").strip()
api_mode = str(model_config.get("api_mode") or "").strip()
reasoning_config = model_config.get("reasoning_config")
service_tier = str(model_config.get("service_tier") or "").strip()
if model:
# Use the same dict-shaped override that live /model switches use so a
# DB-restored session can preserve custom endpoint metadata across both
# initial resume and later rebuilds (/new). Deliberately do not persist
# or restore raw api_key here; endpoint credentials should continue to
# come from config/env/provider resolution rather than the session DB.
overrides["model_override"] = {
"model": model,
"provider": provider or None,
"base_url": base_url or None,
"api_mode": api_mode or None,
}
if provider:
overrides["provider_override"] = provider
if isinstance(reasoning_config, dict):
overrides["reasoning_config_override"] = reasoning_config
if service_tier:
overrides["service_tier_override"] = service_tier
return overrides
def _runtime_model_config(agent, existing: dict | None = None) -> dict:
config = dict(existing or {})
model = str(getattr(agent, "model", "") or "").strip()
provider = str(getattr(agent, "provider", "") or "").strip()
base_url = str(getattr(agent, "base_url", "") or "").strip()
api_mode = str(getattr(agent, "api_mode", "") or "").strip()
reasoning_config = getattr(agent, "reasoning_config", None)
service_tier = getattr(agent, "service_tier", None)
if model:
config["model"] = model
if provider:
config["provider"] = provider
if base_url:
config["base_url"] = base_url
else:
config.pop("base_url", None)
if api_mode:
config["api_mode"] = api_mode
else:
config.pop("api_mode", None)
if isinstance(reasoning_config, dict):
config["reasoning_config"] = reasoning_config
else:
config.pop("reasoning_config", None)
if service_tier:
config["service_tier"] = service_tier
else:
config.pop("service_tier", None)
return config
def _persist_live_session_runtime(session: dict | None) -> None:
"""Persist active session runtime so future resumes restore the same footer."""
if not session:
return
agent = session.get("agent")
session_key = str(session.get("session_key") or "").strip()
if agent is None or not session_key:
return
db = getattr(agent, "_session_db", None) or _get_db()
if db is None:
return
try:
row = db.get_session(session_key) or {}
raw_config = row.get("model_config")
existing_config = {}
if isinstance(raw_config, dict):
existing_config = raw_config
elif isinstance(raw_config, str) and raw_config.strip():
parsed = json.loads(raw_config)
if isinstance(parsed, dict):
existing_config = parsed
model_config = _runtime_model_config(agent, existing_config)
model = str(getattr(agent, "model", "") or "").strip()
if hasattr(db, "update_session_meta"):
db.update_session_meta(session_key, json.dumps(model_config), model or None)
elif model and hasattr(db, "update_session_model"):
db.update_session_model(session_key, model)
except Exception:
logger.debug("failed to persist live session runtime", exc_info=True)
def _write_config_key(key_path: str, value):
cfg = _load_cfg()
current = cfg
@ -1789,6 +1914,7 @@ def _apply_model_switch(
api_mode=result.api_mode,
)
_restart_slash_worker(sid, session)
_persist_live_session_runtime(session)
_emit("session.info", sid, _session_info(agent, session))
# Record the switch as a PER-SESSION override so a later rebuild of THIS
@ -2104,6 +2230,7 @@ def _session_info(agent, session: dict | None = None) -> dict:
yolo = False
info: dict = {
"model": getattr(agent, "model", ""),
"provider": getattr(agent, "provider", ""),
"reasoning_effort": reasoning_effort,
"service_tier": service_tier,
"fast": service_tier == "priority",
@ -2891,7 +3018,10 @@ def _make_agent(
key: str,
session_id: str | None = None,
session_db=None,
model_override: dict | None = None,
model_override: dict | str | None = None,
provider_override: str | None = None,
reasoning_config_override: dict | None = None,
service_tier_override: str | None = None,
):
from run_agent import AIAgent
from hermes_cli.runtime_provider import resolve_runtime_provider
@ -2927,12 +3057,11 @@ def _make_agent(
part for part in (system_prompt, skills_prompt) if part
).strip()
# Prefer a per-session model override (set by a prior in-session /model
# switch) over global config/env resolution. This keeps a rebuilt session
# (/new, resume) on the model the user picked FOR THIS SESSION, without
# reading process-global env vars that another session may have changed.
if model_override and model_override.get("model"):
# switch) over global config/env resolution. Resume-time stored sessions may
# also pass scalar model/provider/runtime knobs from the persisted DB row.
if isinstance(model_override, dict) and model_override.get("model"):
model = str(model_override.get("model") or "")
requested_provider = model_override.get("provider") or None
requested_provider = model_override.get("provider") or provider_override or None
override_base_url = model_override.get("base_url")
override_api_key = model_override.get("api_key")
override_api_mode = model_override.get("api_mode")
@ -2951,6 +3080,10 @@ def _make_agent(
runtime["api_mode"] = override_api_mode
else:
model, requested_provider = _resolve_startup_runtime()
if isinstance(model_override, str) and model_override:
model = model_override
if provider_override:
requested_provider = provider_override
runtime = resolve_runtime_provider(
requested=requested_provider,
target_model=model or None,
@ -2971,8 +3104,16 @@ def _make_agent(
# display detail). See cli.py PR (decoupling fix) for the matching
# change on the classic CLI side.
verbose_logging=False,
reasoning_config=_load_reasoning_config(),
service_tier=_load_service_tier(),
reasoning_config=(
reasoning_config_override
if reasoning_config_override is not None
else _load_reasoning_config()
),
service_tier=(
service_tier_override
if service_tier_override is not None
else _load_service_tier()
),
enabled_toolsets=_load_enabled_toolsets(),
platform="tui",
session_id=session_id or key,
@ -3660,8 +3801,17 @@ def _(rid, params: dict) -> dict:
try:
# Pass the profile's db so the agent persists turns to the right
# state.db; home override is active here so config/skills/model
# resolve to the profile too.
agent = _make_agent(sid, target, session_id=target, session_db=db)
# resolve to the profile too. Runtime identity is restored from the
# stored session row so switching chats does not inherit whatever
# global model another chat last selected.
stored_runtime_overrides = _stored_session_runtime_overrides(found)
agent = _make_agent(
sid,
target,
session_id=target,
session_db=db,
**stored_runtime_overrides,
)
finally:
_clear_session_context(tokens)
except Exception as e:
@ -3698,6 +3848,10 @@ def _(rid, params: dict) -> dict:
try:
_init_session(sid, target, agent, history, cols=cols)
if sid in _sessions:
if stored_runtime_overrides.get("model_override") is not None:
_sessions[sid]["model_override"] = stored_runtime_overrides[
"model_override"
]
_sessions[sid]["display_history_prefix"] = display_history_prefix
# Remember the profile home so each turn re-binds HERMES_HOME (the
# agent persists to its own db, but mid-turn home reads — memory,
@ -6309,6 +6463,7 @@ def _(rid, params: dict) -> dict:
if nv == "fast":
current_overrides.update(overrides)
agent.request_overrides = current_overrides
_persist_live_session_runtime(session)
_emit(
"session.info",
params.get("session_id", ""),
@ -6475,6 +6630,12 @@ def _(rid, params: dict) -> dict:
_write_config_key("agent.reasoning_effort", arg)
if session and session.get("agent") is not None:
session["agent"].reasoning_config = parsed
_persist_live_session_runtime(session)
_emit(
"session.info",
params.get("session_id", ""),
_session_info(session["agent"], session),
)
return _ok(rid, {"key": key, "value": arg})
except Exception as e:
return _err(rid, 5001, str(e))