mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-11 08:42:11 +00:00
fix(desktop): keep model runtime state per session (#43702)
* fix(desktop): keep model runtime state per session (cherry picked from commit f72ee87d99ee38cb7b5badeb9a8af869bb92073a) * fix(desktop): keep footer model state scoped to active session (cherry picked from commit d91942ebd4671ff857b5c8526dbf133f04782ecb) * fix(desktop): restore stored runtime when resuming sessions (cherry picked from commit 32b3793418257617b8da57e26151f079c2620d00) * fix(desktop): persist live runtime changes for resume (cherry picked from commit c58467779436dcef44a80ad55b52664752dc0837) * fix(desktop): persist resumed endpoint runtime * chore(attribution): map pinguarmy's commit email in AUTHOR_MAP The salvaged commits on this branch preserve @pinguarmy's authorship (郝鹏宇 / peterhao@Peters-MacBook-Air.local). Add the mapping so the check-attribution CI gate resolves the email to the GitHub username. --------- Co-authored-by: 郝鹏宇 <peterhao@Peters-MacBook-Air.local>
This commit is contained in:
parent
07ac185904
commit
6de3963e37
13 changed files with 474 additions and 32 deletions
|
|
@ -743,6 +743,13 @@ export function DesktopController() {
|
|||
}
|
||||
}, [gatewayState, refreshCronJobs])
|
||||
|
||||
useEffect(() => {
|
||||
if (gatewayState === 'open' && !activeSessionId && freshDraftReady) {
|
||||
void refreshCurrentModel()
|
||||
void refreshHermesConfig()
|
||||
}
|
||||
}, [activeSessionId, freshDraftReady, gatewayState, refreshCurrentModel, refreshHermesConfig])
|
||||
|
||||
useRouteResume({
|
||||
activeSessionId,
|
||||
activeSessionIdRef,
|
||||
|
|
|
|||
|
|
@ -633,14 +633,21 @@ export function useMessageStream({
|
|||
const runningChanged = typeof payload?.running === 'boolean'
|
||||
|
||||
if (apply) {
|
||||
const runtimeInfo: { branch?: string; cwd?: string } = {}
|
||||
const runtimeInfo: Partial<
|
||||
Pick<
|
||||
ClientSessionState,
|
||||
'branch' | 'cwd' | 'fast' | 'model' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'
|
||||
>
|
||||
> = {}
|
||||
|
||||
if (modelChanged) {
|
||||
setCurrentModel(payload!.model || '')
|
||||
runtimeInfo.model = payload!.model || ''
|
||||
}
|
||||
|
||||
if (providerChanged) {
|
||||
setCurrentProvider(payload!.provider || '')
|
||||
runtimeInfo.provider = payload!.provider || ''
|
||||
}
|
||||
|
||||
if (typeof payload?.cwd === 'string') {
|
||||
|
|
@ -653,32 +660,32 @@ export function useMessageStream({
|
|||
runtimeInfo.branch = payload.branch
|
||||
}
|
||||
|
||||
if (sessionId && (runtimeInfo.cwd !== undefined || runtimeInfo.branch !== undefined)) {
|
||||
updateSessionState(sessionId, state => ({
|
||||
...state,
|
||||
branch: runtimeInfo.branch ?? state.branch,
|
||||
cwd: runtimeInfo.cwd ?? state.cwd
|
||||
}))
|
||||
}
|
||||
|
||||
if (typeof payload?.personality === 'string') {
|
||||
setCurrentPersonality(normalizePersonalityValue(payload.personality))
|
||||
}
|
||||
|
||||
if (typeof payload?.reasoning_effort === 'string') {
|
||||
setCurrentReasoningEffort(payload.reasoning_effort)
|
||||
runtimeInfo.reasoningEffort = payload.reasoning_effort
|
||||
}
|
||||
|
||||
if (typeof payload?.service_tier === 'string') {
|
||||
setCurrentServiceTier(payload.service_tier)
|
||||
runtimeInfo.serviceTier = payload.service_tier
|
||||
}
|
||||
|
||||
if (typeof payload?.fast === 'boolean') {
|
||||
setCurrentFastMode(payload.fast)
|
||||
runtimeInfo.fast = payload.fast
|
||||
}
|
||||
|
||||
if (typeof payload?.yolo === 'boolean') {
|
||||
setYoloActive(payload.yolo)
|
||||
runtimeInfo.yolo = payload.yolo
|
||||
}
|
||||
|
||||
if (sessionId && Object.keys(runtimeInfo).length > 0) {
|
||||
updateSessionState(sessionId, state => ({ ...state, ...runtimeInfo }))
|
||||
}
|
||||
|
||||
if (runningChanged && sessionId) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,77 @@
|
|||
import { renderHook } from '@testing-library/react'
|
||||
import { QueryClient } from '@tanstack/react-query'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { getGlobalModelInfo } from '@/hermes'
|
||||
import {
|
||||
$activeSessionId,
|
||||
$currentModel,
|
||||
$currentProvider,
|
||||
setCurrentModel,
|
||||
setCurrentProvider
|
||||
} from '@/store/session'
|
||||
|
||||
import { useModelControls } from './use-model-controls'
|
||||
|
||||
vi.mock('@/hermes', () => ({
|
||||
getGlobalModelInfo: vi.fn(),
|
||||
setGlobalModel: vi.fn()
|
||||
}))
|
||||
|
||||
describe('useModelControls.refreshCurrentModel', () => {
|
||||
beforeEach(() => {
|
||||
$activeSessionId.set(null)
|
||||
setCurrentModel('')
|
||||
setCurrentProvider('')
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks()
|
||||
$activeSessionId.set(null)
|
||||
setCurrentModel('')
|
||||
setCurrentProvider('')
|
||||
})
|
||||
|
||||
it('applies the global model when there is no active runtime session', async () => {
|
||||
vi.mocked(getGlobalModelInfo).mockResolvedValue({
|
||||
model: 'openai/gpt-5.5',
|
||||
provider: 'openai-codex'
|
||||
})
|
||||
|
||||
const { result } = renderHook(() =>
|
||||
useModelControls({
|
||||
activeSessionId: null,
|
||||
queryClient: new QueryClient(),
|
||||
requestGateway: vi.fn()
|
||||
})
|
||||
)
|
||||
|
||||
await result.current.refreshCurrentModel()
|
||||
|
||||
expect($currentModel.get()).toBe('openai/gpt-5.5')
|
||||
expect($currentProvider.get()).toBe('openai-codex')
|
||||
})
|
||||
|
||||
it('does not clobber the active session footer state with global model info', async () => {
|
||||
setCurrentModel('deepseek/deepseek-v4-pro')
|
||||
setCurrentProvider('deepseek')
|
||||
$activeSessionId.set('runtime-1')
|
||||
vi.mocked(getGlobalModelInfo).mockResolvedValue({
|
||||
model: 'openai/gpt-5.5',
|
||||
provider: 'openai-codex'
|
||||
})
|
||||
|
||||
const { result } = renderHook(() =>
|
||||
useModelControls({
|
||||
activeSessionId: 'runtime-1',
|
||||
queryClient: new QueryClient(),
|
||||
requestGateway: vi.fn()
|
||||
})
|
||||
)
|
||||
|
||||
await result.current.refreshCurrentModel()
|
||||
|
||||
expect($currentModel.get()).toBe('deepseek/deepseek-v4-pro')
|
||||
expect($currentProvider.get()).toBe('deepseek')
|
||||
})
|
||||
})
|
||||
|
|
@ -4,7 +4,13 @@ import { useCallback } from 'react'
|
|||
import { getGlobalModelInfo, setGlobalModel } from '@/hermes'
|
||||
import { useI18n } from '@/i18n'
|
||||
import { notifyError } from '@/store/notifications'
|
||||
import { $currentModel, $currentProvider, setCurrentModel, setCurrentProvider } from '@/store/session'
|
||||
import {
|
||||
$activeSessionId,
|
||||
$currentModel,
|
||||
$currentProvider,
|
||||
setCurrentModel,
|
||||
setCurrentProvider
|
||||
} from '@/store/session'
|
||||
import type { ModelOptionsResponse } from '@/types/hermes'
|
||||
|
||||
interface ModelSelection {
|
||||
|
|
@ -39,6 +45,13 @@ export function useModelControls({ activeSessionId, queryClient, requestGateway
|
|||
try {
|
||||
const result = await getGlobalModelInfo()
|
||||
|
||||
// A resumed/live session owns the footer model state. Global config
|
||||
// refreshes (gateway boot, profile swap, settings save) must not clobber
|
||||
// the active chat's runtime model/provider in the status bar.
|
||||
if ($activeSessionId.get()) {
|
||||
return
|
||||
}
|
||||
|
||||
if (typeof result.model === 'string') {
|
||||
setCurrentModel(result.model)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ import {
|
|||
$messages,
|
||||
$sessions,
|
||||
$yoloActive,
|
||||
getRememberedWorkspaceCwd,
|
||||
workspaceCwdForNewSession,
|
||||
sessionPinId,
|
||||
setActiveSessionId,
|
||||
|
|
@ -211,14 +210,16 @@ function patchSessionWorkspace(sessionId: string, cwd: string | undefined) {
|
|||
setSessions(prev => prev.map(session => (session.id === sessionId ? { ...session, cwd } : session)))
|
||||
}
|
||||
|
||||
function applyRuntimeInfo(
|
||||
info: SessionCreateResponse['info'] | undefined
|
||||
): Partial<Pick<ClientSessionState, 'branch' | 'cwd'>> | null {
|
||||
function applyRuntimeInfo(info: SessionCreateResponse['info'] | undefined): Partial<
|
||||
Pick<ClientSessionState, 'branch' | 'cwd' | 'fast' | 'model' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'>
|
||||
> | null {
|
||||
if (!info) {
|
||||
return null
|
||||
}
|
||||
|
||||
const sessionState: Partial<Pick<ClientSessionState, 'branch' | 'cwd'>> = {}
|
||||
const sessionState: Partial<
|
||||
Pick<ClientSessionState, 'branch' | 'cwd' | 'fast' | 'model' | 'provider' | 'reasoningEffort' | 'serviceTier' | 'yolo'>
|
||||
> = {}
|
||||
|
||||
reportBackendContract(info.desktop_contract)
|
||||
|
||||
|
|
@ -228,10 +229,12 @@ function applyRuntimeInfo(
|
|||
|
||||
if (info.model) {
|
||||
setCurrentModel(info.model)
|
||||
sessionState.model = info.model
|
||||
}
|
||||
|
||||
if (info.provider) {
|
||||
setCurrentProvider(info.provider)
|
||||
sessionState.provider = info.provider
|
||||
}
|
||||
|
||||
if (info.cwd) {
|
||||
|
|
@ -250,18 +253,22 @@ function applyRuntimeInfo(
|
|||
|
||||
if (typeof info.reasoning_effort === 'string') {
|
||||
setCurrentReasoningEffort(info.reasoning_effort)
|
||||
sessionState.reasoningEffort = info.reasoning_effort
|
||||
}
|
||||
|
||||
if (typeof info.service_tier === 'string') {
|
||||
setCurrentServiceTier(info.service_tier)
|
||||
sessionState.serviceTier = info.service_tier
|
||||
}
|
||||
|
||||
if (typeof info.fast === 'boolean') {
|
||||
setCurrentFastMode(info.fast)
|
||||
sessionState.fast = info.fast
|
||||
}
|
||||
|
||||
if (typeof info.yolo === 'boolean') {
|
||||
setYoloActive(info.yolo)
|
||||
sessionState.yolo = info.yolo
|
||||
}
|
||||
|
||||
if (info.usage) {
|
||||
|
|
@ -314,6 +321,12 @@ export function useSessionActions({
|
|||
setTurnStartedAt(null)
|
||||
// New chats start in the configured default project dir when set,
|
||||
// otherwise the sticky last-used workspace (PR #37586).
|
||||
setCurrentModel('')
|
||||
setCurrentProvider('')
|
||||
setCurrentReasoningEffort('')
|
||||
setCurrentServiceTier('')
|
||||
setCurrentFastMode(false)
|
||||
setYoloActive(false)
|
||||
setCurrentCwd(workspaceCwdForNewSession())
|
||||
setCurrentBranch('')
|
||||
clearComposerDraft()
|
||||
|
|
|
|||
|
|
@ -5,7 +5,20 @@ import type { ChatMessage } from '@/lib/chat-messages'
|
|||
import { preserveLocalAssistantErrors } from '@/lib/chat-messages'
|
||||
import { createClientSessionState } from '@/lib/chat-runtime'
|
||||
import { setMutableRef } from '@/lib/mutable-ref'
|
||||
import { $busy, $messages, noteSessionActivity, setSessionAttention, setSessionWorking, setTurnStartedAt } from '@/store/session'
|
||||
import {
|
||||
$busy,
|
||||
$messages,
|
||||
noteSessionActivity,
|
||||
setCurrentFastMode,
|
||||
setCurrentModel,
|
||||
setCurrentProvider,
|
||||
setCurrentReasoningEffort,
|
||||
setCurrentServiceTier,
|
||||
setSessionAttention,
|
||||
setSessionWorking,
|
||||
setTurnStartedAt,
|
||||
setYoloActive
|
||||
} from '@/store/session'
|
||||
|
||||
import type { ClientSessionState } from '../../types'
|
||||
|
||||
|
|
@ -124,6 +137,12 @@ export function useSessionStateCache({
|
|||
setMessages(nextMessages)
|
||||
}
|
||||
|
||||
setCurrentModel(pending.state.model)
|
||||
setCurrentProvider(pending.state.provider)
|
||||
setCurrentReasoningEffort(pending.state.reasoningEffort)
|
||||
setCurrentServiceTier(pending.state.serviceTier)
|
||||
setCurrentFastMode(pending.state.fast)
|
||||
setYoloActive(pending.state.yolo)
|
||||
setBusy(pending.state.busy)
|
||||
setMutableRef(busyRef, pending.state.busy)
|
||||
setAwaitingResponse(pending.state.awaitingResponse)
|
||||
|
|
|
|||
|
|
@ -162,8 +162,9 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
|
|||
currentFastMode
|
||||
)
|
||||
|
||||
// Grayed text: active row shows live state (Fast + effort);
|
||||
// others show a fast-capability hint.
|
||||
// Grayed text is live session state only. Do not label inactive
|
||||
// rows as "Fast" just because they have a fast-capable sibling:
|
||||
// that makes an off Fast toggle look like it is already on.
|
||||
const meta = isCurrent
|
||||
? [
|
||||
fastControl.kind !== 'none' && fastControl.on ? copy.fast : null,
|
||||
|
|
@ -171,9 +172,7 @@ export function ModelMenuPanel({ gateway, onSelectModel, requestGateway }: Model
|
|||
]
|
||||
.filter(Boolean)
|
||||
.join(' ')
|
||||
: caps?.fast || family.fastId
|
||||
? copy.fast
|
||||
: ''
|
||||
: ''
|
||||
|
||||
// Every row is a hover-Edit submenu trigger. Activating it
|
||||
// (pointer or keyboard) switches to the family's base model;
|
||||
|
|
|
|||
|
|
@ -103,6 +103,12 @@ export interface ClientSessionState {
|
|||
messages: ChatMessage[]
|
||||
branch: string
|
||||
cwd: string
|
||||
model: string
|
||||
provider: string
|
||||
reasoningEffort: string
|
||||
serviceTier: string
|
||||
fast: boolean
|
||||
yolo: boolean
|
||||
busy: boolean
|
||||
awaitingResponse: boolean
|
||||
streamId: string | null
|
||||
|
|
|
|||
|
|
@ -40,6 +40,12 @@ export function createClientSessionState(
|
|||
messages,
|
||||
branch: '',
|
||||
cwd: '',
|
||||
model: '',
|
||||
provider: '',
|
||||
reasoningEffort: '',
|
||||
serviceTier: '',
|
||||
fast: false,
|
||||
yolo: false,
|
||||
busy: false,
|
||||
awaitingResponse: false,
|
||||
streamId: null,
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import { displayModelName, formatModelStatusLabel, reasoningEffortLabel } from '
|
|||
describe('model-status-label', () => {
|
||||
it('formats display names consistently', () => {
|
||||
expect(displayModelName('anthropic/claude-opus-4.8-fast')).toBe('Opus 4.8')
|
||||
expect(displayModelName('openai/gpt-5.5-fast')).toBe('GPT-5.5')
|
||||
expect(displayModelName('deepseek/deepseek-v4-pro-thinking')).toBe('Deepseek V4 Pro')
|
||||
expect(displayModelName('openai/gpt-5.5')).toBe('GPT-5.5')
|
||||
})
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
|||
|
||||
# Auto-extracted from noreply emails + manual overrides
|
||||
AUTHOR_MAP = {
|
||||
"peterhao@Peters-MacBook-Air.local": "pinguarmy",
|
||||
"barronlroth@gmail.com": "barronlroth",
|
||||
"ondrej.drapalik@gmail.com": "OndrejDrapalik",
|
||||
"tomasz.panek@gmail.com": "tomekpanek",
|
||||
|
|
|
|||
|
|
@ -878,6 +878,97 @@ def test_session_resume_uses_parent_lineage_for_display(monkeypatch):
|
|||
assert captured["history_calls"] == [("tip", False), ("tip", True)]
|
||||
|
||||
|
||||
def test_session_resume_passes_stored_runtime_to_agent(monkeypatch):
|
||||
captured = {}
|
||||
|
||||
class FakeDB:
|
||||
def get_session(self, target):
|
||||
return {
|
||||
"id": target,
|
||||
"model": "gpt-5.4",
|
||||
"billing_provider": "openai-codex",
|
||||
"model_config": '{"reasoning_config":{"enabled":true,"effort":"high"},"service_tier":"priority","base_url":"https://custom.example/v1","api_mode":"chat_completions"}',
|
||||
}
|
||||
|
||||
def reopen_session(self, target):
|
||||
pass
|
||||
|
||||
def get_messages_as_conversation(self, target, include_ancestors=False):
|
||||
return [{"role": "user", "content": "hello"}]
|
||||
|
||||
def fake_make_agent(sid, key, session_id=None, session_db=None, **kwargs):
|
||||
captured.update(kwargs)
|
||||
return types.SimpleNamespace(model="gpt-5.4", provider="openai-codex")
|
||||
|
||||
monkeypatch.setattr(server, "_get_db", lambda: FakeDB())
|
||||
monkeypatch.setattr(server, "_enable_gateway_prompts", lambda: None)
|
||||
monkeypatch.setattr(server, "_set_session_context", lambda target: [])
|
||||
monkeypatch.setattr(server, "_clear_session_context", lambda tokens: None)
|
||||
monkeypatch.setattr(server, "_make_agent", fake_make_agent)
|
||||
monkeypatch.setattr(server, "_session_info", lambda agent, *a: {"model": agent.model, "provider": agent.provider})
|
||||
|
||||
def fake_init_session(sid, key, agent, history, cols=80):
|
||||
server._sessions[sid] = {"agent": agent, "session_key": key}
|
||||
|
||||
monkeypatch.setattr(server, "_init_session", fake_init_session)
|
||||
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "session.resume", "params": {"session_id": "stored-session"}}
|
||||
)
|
||||
|
||||
assert resp["result"]["info"] == {"model": "gpt-5.4", "provider": "openai-codex"}
|
||||
assert captured["model_override"] == {
|
||||
"model": "gpt-5.4",
|
||||
"provider": "openai-codex",
|
||||
"base_url": "https://custom.example/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
assert captured["provider_override"] == "openai-codex"
|
||||
assert captured["reasoning_config_override"] == {"enabled": True, "effort": "high"}
|
||||
assert captured["service_tier_override"] == "priority"
|
||||
runtime_sid = resp["result"]["session_id"]
|
||||
assert server._sessions[runtime_sid]["model_override"] == captured["model_override"]
|
||||
|
||||
|
||||
def test_persist_live_session_runtime_preserves_resume_metadata(monkeypatch):
|
||||
updates = {}
|
||||
|
||||
class FakeDB:
|
||||
def get_session(self, session_id):
|
||||
assert session_id == "stored-session"
|
||||
return {"model_config": '{"_branched_from":"root"}'}
|
||||
|
||||
def update_session_meta(self, session_id, model_config_json, model=None):
|
||||
updates["meta"] = (session_id, json.loads(model_config_json), model)
|
||||
|
||||
agent = types.SimpleNamespace(
|
||||
model="gpt-5.4",
|
||||
provider="openai-codex",
|
||||
base_url="https://custom.example/v1",
|
||||
api_mode="chat_completions",
|
||||
reasoning_config={"enabled": True, "effort": "high"},
|
||||
service_tier="priority",
|
||||
_session_db=FakeDB(),
|
||||
)
|
||||
|
||||
server._persist_live_session_runtime({"agent": agent, "session_key": "stored-session"})
|
||||
|
||||
assert "model" not in updates
|
||||
assert updates["meta"] == (
|
||||
"stored-session",
|
||||
{
|
||||
"_branched_from": "root",
|
||||
"model": "gpt-5.4",
|
||||
"provider": "openai-codex",
|
||||
"base_url": "https://custom.example/v1",
|
||||
"api_mode": "chat_completions",
|
||||
"reasoning_config": {"enabled": True, "effort": "high"},
|
||||
"service_tier": "priority",
|
||||
},
|
||||
"gpt-5.4",
|
||||
)
|
||||
|
||||
|
||||
def test_status_callback_emits_kind_and_text():
|
||||
with patch("tui_gateway.server._emit") as emit:
|
||||
cb = server._agent_cbs("sid")["status_callback"]
|
||||
|
|
@ -3593,8 +3684,9 @@ def test_session_info_includes_mcp_servers(monkeypatch):
|
|||
fake_mod.get_mcp_status = lambda: fake_status
|
||||
monkeypatch.setitem(sys.modules, "tools.mcp_tool", fake_mod)
|
||||
|
||||
info = server._session_info(types.SimpleNamespace(tools=[], model=""))
|
||||
info = server._session_info(types.SimpleNamespace(tools=[], model="", provider="openai-codex"))
|
||||
|
||||
assert info["provider"] == "openai-codex"
|
||||
assert info["mcp_servers"] == fake_status
|
||||
|
||||
|
||||
|
|
@ -5983,6 +6075,45 @@ def test_make_agent_defaults_to_90(monkeypatch):
|
|||
assert mock_agent.call_args.kwargs["max_iterations"] == 90
|
||||
|
||||
|
||||
def test_make_agent_uses_session_runtime_overrides(monkeypatch):
|
||||
_setup_make_agent_mocks(monkeypatch, {})
|
||||
resolved = {}
|
||||
|
||||
def fake_resolve_runtime_provider(requested=None, target_model=None):
|
||||
resolved["requested"] = requested
|
||||
resolved["target_model"] = target_model
|
||||
return {
|
||||
"provider": requested,
|
||||
"base_url": None,
|
||||
"api_key": None,
|
||||
"api_mode": None,
|
||||
"command": None,
|
||||
"args": None,
|
||||
"credential_pool": None,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
fake_resolve_runtime_provider,
|
||||
)
|
||||
|
||||
with patch("run_agent.AIAgent") as mock_agent:
|
||||
server._make_agent(
|
||||
"sid1",
|
||||
"key1",
|
||||
model_override="gpt-5.4",
|
||||
provider_override="openai-codex",
|
||||
reasoning_config_override={"enabled": True, "effort": "high"},
|
||||
service_tier_override="priority",
|
||||
)
|
||||
|
||||
assert resolved == {"requested": "openai-codex", "target_model": "gpt-5.4"}
|
||||
assert mock_agent.call_args.kwargs["model"] == "gpt-5.4"
|
||||
assert mock_agent.call_args.kwargs["provider"] == "openai-codex"
|
||||
assert mock_agent.call_args.kwargs["reasoning_config"] == {"enabled": True, "effort": "high"}
|
||||
assert mock_agent.call_args.kwargs["service_tier"] == "priority"
|
||||
|
||||
|
||||
def test_make_agent_handles_null_agent_config(monkeypatch):
|
||||
_setup_make_agent_mocks(monkeypatch, {"agent": None, "max_turns": 80})
|
||||
|
||||
|
|
|
|||
|
|
@ -1409,6 +1409,131 @@ def _resolve_startup_runtime() -> tuple[str, str | None]:
|
|||
return model, None
|
||||
|
||||
|
||||
def _stored_session_runtime_overrides(row: dict | None) -> dict:
|
||||
"""Return runtime fields persisted with a stored session.
|
||||
|
||||
``session.resume`` is a session-scoped operation: reopening an older chat
|
||||
must restore the model/provider/reasoning state that chat actually used,
|
||||
not whatever global model the user most recently selected in another chat.
|
||||
The durable session row stores the model directly, the billing provider in
|
||||
``billing_provider``, and richer runtime knobs in JSON ``model_config``.
|
||||
"""
|
||||
if not row:
|
||||
return {}
|
||||
|
||||
raw_config = row.get("model_config")
|
||||
model_config: dict = {}
|
||||
if isinstance(raw_config, dict):
|
||||
model_config = raw_config
|
||||
elif isinstance(raw_config, str) and raw_config.strip():
|
||||
try:
|
||||
parsed = json.loads(raw_config)
|
||||
if isinstance(parsed, dict):
|
||||
model_config = parsed
|
||||
except Exception:
|
||||
logger.debug("failed to parse stored session model_config", exc_info=True)
|
||||
|
||||
overrides: dict = {}
|
||||
model = str(row.get("model") or model_config.get("model") or "").strip()
|
||||
provider = str(
|
||||
model_config.get("provider")
|
||||
or model_config.get("billing_provider")
|
||||
or row.get("billing_provider")
|
||||
or ""
|
||||
).strip()
|
||||
base_url = str(model_config.get("base_url") or "").strip()
|
||||
api_mode = str(model_config.get("api_mode") or "").strip()
|
||||
reasoning_config = model_config.get("reasoning_config")
|
||||
service_tier = str(model_config.get("service_tier") or "").strip()
|
||||
|
||||
if model:
|
||||
# Use the same dict-shaped override that live /model switches use so a
|
||||
# DB-restored session can preserve custom endpoint metadata across both
|
||||
# initial resume and later rebuilds (/new). Deliberately do not persist
|
||||
# or restore raw api_key here; endpoint credentials should continue to
|
||||
# come from config/env/provider resolution rather than the session DB.
|
||||
overrides["model_override"] = {
|
||||
"model": model,
|
||||
"provider": provider or None,
|
||||
"base_url": base_url or None,
|
||||
"api_mode": api_mode or None,
|
||||
}
|
||||
if provider:
|
||||
overrides["provider_override"] = provider
|
||||
if isinstance(reasoning_config, dict):
|
||||
overrides["reasoning_config_override"] = reasoning_config
|
||||
if service_tier:
|
||||
overrides["service_tier_override"] = service_tier
|
||||
|
||||
return overrides
|
||||
|
||||
|
||||
def _runtime_model_config(agent, existing: dict | None = None) -> dict:
|
||||
config = dict(existing or {})
|
||||
model = str(getattr(agent, "model", "") or "").strip()
|
||||
provider = str(getattr(agent, "provider", "") or "").strip()
|
||||
base_url = str(getattr(agent, "base_url", "") or "").strip()
|
||||
api_mode = str(getattr(agent, "api_mode", "") or "").strip()
|
||||
reasoning_config = getattr(agent, "reasoning_config", None)
|
||||
service_tier = getattr(agent, "service_tier", None)
|
||||
|
||||
if model:
|
||||
config["model"] = model
|
||||
if provider:
|
||||
config["provider"] = provider
|
||||
if base_url:
|
||||
config["base_url"] = base_url
|
||||
else:
|
||||
config.pop("base_url", None)
|
||||
if api_mode:
|
||||
config["api_mode"] = api_mode
|
||||
else:
|
||||
config.pop("api_mode", None)
|
||||
if isinstance(reasoning_config, dict):
|
||||
config["reasoning_config"] = reasoning_config
|
||||
else:
|
||||
config.pop("reasoning_config", None)
|
||||
if service_tier:
|
||||
config["service_tier"] = service_tier
|
||||
else:
|
||||
config.pop("service_tier", None)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def _persist_live_session_runtime(session: dict | None) -> None:
|
||||
"""Persist active session runtime so future resumes restore the same footer."""
|
||||
if not session:
|
||||
return
|
||||
agent = session.get("agent")
|
||||
session_key = str(session.get("session_key") or "").strip()
|
||||
if agent is None or not session_key:
|
||||
return
|
||||
|
||||
db = getattr(agent, "_session_db", None) or _get_db()
|
||||
if db is None:
|
||||
return
|
||||
|
||||
try:
|
||||
row = db.get_session(session_key) or {}
|
||||
raw_config = row.get("model_config")
|
||||
existing_config = {}
|
||||
if isinstance(raw_config, dict):
|
||||
existing_config = raw_config
|
||||
elif isinstance(raw_config, str) and raw_config.strip():
|
||||
parsed = json.loads(raw_config)
|
||||
if isinstance(parsed, dict):
|
||||
existing_config = parsed
|
||||
model_config = _runtime_model_config(agent, existing_config)
|
||||
model = str(getattr(agent, "model", "") or "").strip()
|
||||
if hasattr(db, "update_session_meta"):
|
||||
db.update_session_meta(session_key, json.dumps(model_config), model or None)
|
||||
elif model and hasattr(db, "update_session_model"):
|
||||
db.update_session_model(session_key, model)
|
||||
except Exception:
|
||||
logger.debug("failed to persist live session runtime", exc_info=True)
|
||||
|
||||
|
||||
def _write_config_key(key_path: str, value):
|
||||
cfg = _load_cfg()
|
||||
current = cfg
|
||||
|
|
@ -1789,6 +1914,7 @@ def _apply_model_switch(
|
|||
api_mode=result.api_mode,
|
||||
)
|
||||
_restart_slash_worker(sid, session)
|
||||
_persist_live_session_runtime(session)
|
||||
_emit("session.info", sid, _session_info(agent, session))
|
||||
|
||||
# Record the switch as a PER-SESSION override so a later rebuild of THIS
|
||||
|
|
@ -2104,6 +2230,7 @@ def _session_info(agent, session: dict | None = None) -> dict:
|
|||
yolo = False
|
||||
info: dict = {
|
||||
"model": getattr(agent, "model", ""),
|
||||
"provider": getattr(agent, "provider", ""),
|
||||
"reasoning_effort": reasoning_effort,
|
||||
"service_tier": service_tier,
|
||||
"fast": service_tier == "priority",
|
||||
|
|
@ -2891,7 +3018,10 @@ def _make_agent(
|
|||
key: str,
|
||||
session_id: str | None = None,
|
||||
session_db=None,
|
||||
model_override: dict | None = None,
|
||||
model_override: dict | str | None = None,
|
||||
provider_override: str | None = None,
|
||||
reasoning_config_override: dict | None = None,
|
||||
service_tier_override: str | None = None,
|
||||
):
|
||||
from run_agent import AIAgent
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
|
@ -2927,12 +3057,11 @@ def _make_agent(
|
|||
part for part in (system_prompt, skills_prompt) if part
|
||||
).strip()
|
||||
# Prefer a per-session model override (set by a prior in-session /model
|
||||
# switch) over global config/env resolution. This keeps a rebuilt session
|
||||
# (/new, resume) on the model the user picked FOR THIS SESSION, without
|
||||
# reading process-global env vars that another session may have changed.
|
||||
if model_override and model_override.get("model"):
|
||||
# switch) over global config/env resolution. Resume-time stored sessions may
|
||||
# also pass scalar model/provider/runtime knobs from the persisted DB row.
|
||||
if isinstance(model_override, dict) and model_override.get("model"):
|
||||
model = str(model_override.get("model") or "")
|
||||
requested_provider = model_override.get("provider") or None
|
||||
requested_provider = model_override.get("provider") or provider_override or None
|
||||
override_base_url = model_override.get("base_url")
|
||||
override_api_key = model_override.get("api_key")
|
||||
override_api_mode = model_override.get("api_mode")
|
||||
|
|
@ -2951,6 +3080,10 @@ def _make_agent(
|
|||
runtime["api_mode"] = override_api_mode
|
||||
else:
|
||||
model, requested_provider = _resolve_startup_runtime()
|
||||
if isinstance(model_override, str) and model_override:
|
||||
model = model_override
|
||||
if provider_override:
|
||||
requested_provider = provider_override
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=requested_provider,
|
||||
target_model=model or None,
|
||||
|
|
@ -2971,8 +3104,16 @@ def _make_agent(
|
|||
# display detail). See cli.py PR (decoupling fix) for the matching
|
||||
# change on the classic CLI side.
|
||||
verbose_logging=False,
|
||||
reasoning_config=_load_reasoning_config(),
|
||||
service_tier=_load_service_tier(),
|
||||
reasoning_config=(
|
||||
reasoning_config_override
|
||||
if reasoning_config_override is not None
|
||||
else _load_reasoning_config()
|
||||
),
|
||||
service_tier=(
|
||||
service_tier_override
|
||||
if service_tier_override is not None
|
||||
else _load_service_tier()
|
||||
),
|
||||
enabled_toolsets=_load_enabled_toolsets(),
|
||||
platform="tui",
|
||||
session_id=session_id or key,
|
||||
|
|
@ -3660,8 +3801,17 @@ def _(rid, params: dict) -> dict:
|
|||
try:
|
||||
# Pass the profile's db so the agent persists turns to the right
|
||||
# state.db; home override is active here so config/skills/model
|
||||
# resolve to the profile too.
|
||||
agent = _make_agent(sid, target, session_id=target, session_db=db)
|
||||
# resolve to the profile too. Runtime identity is restored from the
|
||||
# stored session row so switching chats does not inherit whatever
|
||||
# global model another chat last selected.
|
||||
stored_runtime_overrides = _stored_session_runtime_overrides(found)
|
||||
agent = _make_agent(
|
||||
sid,
|
||||
target,
|
||||
session_id=target,
|
||||
session_db=db,
|
||||
**stored_runtime_overrides,
|
||||
)
|
||||
finally:
|
||||
_clear_session_context(tokens)
|
||||
except Exception as e:
|
||||
|
|
@ -3698,6 +3848,10 @@ def _(rid, params: dict) -> dict:
|
|||
try:
|
||||
_init_session(sid, target, agent, history, cols=cols)
|
||||
if sid in _sessions:
|
||||
if stored_runtime_overrides.get("model_override") is not None:
|
||||
_sessions[sid]["model_override"] = stored_runtime_overrides[
|
||||
"model_override"
|
||||
]
|
||||
_sessions[sid]["display_history_prefix"] = display_history_prefix
|
||||
# Remember the profile home so each turn re-binds HERMES_HOME (the
|
||||
# agent persists to its own db, but mid-turn home reads — memory,
|
||||
|
|
@ -6309,6 +6463,7 @@ def _(rid, params: dict) -> dict:
|
|||
if nv == "fast":
|
||||
current_overrides.update(overrides)
|
||||
agent.request_overrides = current_overrides
|
||||
_persist_live_session_runtime(session)
|
||||
_emit(
|
||||
"session.info",
|
||||
params.get("session_id", ""),
|
||||
|
|
@ -6475,6 +6630,12 @@ def _(rid, params: dict) -> dict:
|
|||
_write_config_key("agent.reasoning_effort", arg)
|
||||
if session and session.get("agent") is not None:
|
||||
session["agent"].reasoning_config = parsed
|
||||
_persist_live_session_runtime(session)
|
||||
_emit(
|
||||
"session.info",
|
||||
params.get("session_id", ""),
|
||||
_session_info(session["agent"], session),
|
||||
)
|
||||
return _ok(rid, {"key": key, "value": arg})
|
||||
except Exception as e:
|
||||
return _err(rid, 5001, str(e))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue