mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
Merge pull request #52620 from NousResearch/bb/desktop-session-switch-perf
This commit is contained in:
commit
edf35918be
6 changed files with 505 additions and 88 deletions
|
|
@ -256,4 +256,29 @@ describe('resumeSession failure recovery', () => {
|
|||
|
||||
expect($resumeFailedSessionId.get()).toBeNull()
|
||||
})
|
||||
|
||||
it('resumes via the gateway default (deferred build) — not lazy, no eager opt-out', async () => {
|
||||
// The switch-latency fix lives backend-side: a normal cold resume gets the
|
||||
// gateway's default DEFERRED build (transcript returns immediately, agent
|
||||
// pre-warms in the background). The client must NOT force the synchronous
|
||||
// path (eager_build) and is only `lazy` for subagent watch windows.
|
||||
let resumeParams: Record<string, unknown> | undefined
|
||||
|
||||
const requestGateway = vi.fn(async (method: string, params?: Record<string, unknown>) => {
|
||||
if (method === 'session.resume') {
|
||||
resumeParams = params
|
||||
|
||||
return { session_id: 'runtime-1', resumed: params?.session_id, messages: [], info: {} } as never
|
||||
}
|
||||
|
||||
return {} as never
|
||||
})
|
||||
|
||||
vi.mocked(getSessionMessages).mockResolvedValue({ messages: [] } as never)
|
||||
|
||||
await runResume(requestGateway)
|
||||
|
||||
expect(resumeParams).not.toHaveProperty('lazy')
|
||||
expect(resumeParams).not.toHaveProperty('eager_build')
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -706,6 +706,11 @@ export function useSessionActions({
|
|||
const resumePromise = requestGateway<SessionResumeResponse>('session.resume', {
|
||||
session_id: storedSessionId,
|
||||
cols: 96,
|
||||
// Watch windows attach lazily (live mirror). Every other cold resume
|
||||
// gets the gateway's default deferred build: the RPC returns the
|
||||
// transcript immediately instead of blocking the switch on _make_agent
|
||||
// (MCP discovery / prompt build), and the agent pre-warms in the
|
||||
// background while the prefetch above paints the transcript.
|
||||
...(watchWindow ? { lazy: true } : {}),
|
||||
...(sessionProfile ? { profile: sessionProfile } : {})
|
||||
})
|
||||
|
|
@ -754,7 +759,13 @@ export function useSessionActions({
|
|||
return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages
|
||||
})()
|
||||
|
||||
const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages)
|
||||
// Prefetch-hit fast path: `preferredMessages` IS the live `$messages`
|
||||
// array (already error-merged when `localSnapshot` was built), so reuse
|
||||
// the ref instead of rebuilding a throwaway transcript+Map every switch.
|
||||
const messagesForView =
|
||||
preferredMessages === currentMessages
|
||||
? currentMessages
|
||||
: preserveLocalAssistantErrors(preferredMessages, currentMessages)
|
||||
|
||||
setActiveSessionId(resumed.session_id)
|
||||
activeSessionIdRef.current = resumed.session_id
|
||||
|
|
|
|||
|
|
@ -901,6 +901,11 @@ DEFAULT_CONFIG = {
|
|||
# Global active chat session cap across CLI, TUI/dashboard, and messaging.
|
||||
# None/0 = unbounded.
|
||||
"max_concurrent_sessions": None,
|
||||
# Soft LRU cap on in-memory TUI/desktop/dashboard sessions. When more than
|
||||
# this many are live, the gateway evicts the least-recently-active DETACHED
|
||||
# sessions (no live client) so accumulated agents don't pile up under memory
|
||||
# pressure. Reopening one re-resumes it from disk. 0/null disables.
|
||||
"max_live_sessions": 16,
|
||||
"agent": {
|
||||
"max_turns": 90,
|
||||
# Inactivity timeout for gateway agent execution (seconds).
|
||||
|
|
|
|||
|
|
@ -1001,8 +1001,11 @@ def test_session_resume_follows_compression_tip(monkeypatch, tmp_path):
|
|||
)
|
||||
|
||||
try:
|
||||
# eager_build: this asserts the synchronously-built agent binds to the
|
||||
# resolved tip (captured["agent_session_id"]); the compression-tip
|
||||
# resolution itself runs before the build and is mode-agnostic.
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "session.resume", "params": {"session_id": "parent_root"}}
|
||||
{"id": "1", "method": "session.resume", "params": {"session_id": "parent_root", "eager_build": True}}
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
|
|
@ -1049,8 +1052,11 @@ def test_session_resume_passes_stored_runtime_to_agent(monkeypatch):
|
|||
|
||||
monkeypatch.setattr(server, "_init_session", fake_init_session)
|
||||
|
||||
# eager_build: this asserts the synchronous build contract (stored runtime
|
||||
# overrides reach _make_agent, info comes from _session_info). The deferred
|
||||
# default restores the same overrides via _start_agent_build off-thread.
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "session.resume", "params": {"session_id": "stored-session"}}
|
||||
{"id": "1", "method": "session.resume", "params": {"session_id": "stored-session", "eager_build": True}}
|
||||
)
|
||||
|
||||
assert resp["result"]["info"] == {"model": "gpt-5.4", "provider": "openai-codex"}
|
||||
|
|
@ -1137,11 +1143,13 @@ def test_session_resume_profile_uses_profile_db_cwd(monkeypatch, tmp_path):
|
|||
monkeypatch.setattr(approval, "load_permanent_allowlist", lambda: None)
|
||||
|
||||
try:
|
||||
# eager_build: asserts the synchronous build receives the profile's db
|
||||
# (the deferred default builds with the same db via _start_agent_build).
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": target, "profile": "worker"},
|
||||
"params": {"session_id": target, "profile": "worker", "eager_build": True},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -323,7 +323,9 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch):
|
|||
{
|
||||
"id": "r1",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": "20260409_010101_abc123", "cols": 100},
|
||||
# eager_build: exercise the synchronous build path (this test
|
||||
# monkeypatches _make_agent/_init_session/_session_info).
|
||||
"params": {"session_id": "20260409_010101_abc123", "cols": 100, "eager_build": True},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -336,6 +338,147 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch):
|
|||
]
|
||||
|
||||
|
||||
def test_session_resume_defaults_to_deferred_build(server, monkeypatch):
|
||||
"""A normal cold resume (no ``eager_build``) must return the full display
|
||||
transcript immediately and register an upgradable live session WITHOUT
|
||||
building the agent on the response path — that eager build is the
|
||||
multi-second switch latency. Deferred is the default; ``eager_build: true``
|
||||
opts back into the synchronous path."""
|
||||
|
||||
target = "20260409_010101_abc123"
|
||||
|
||||
class _DB:
|
||||
def get_session(self, _sid):
|
||||
return {
|
||||
"id": target,
|
||||
"model": "vendor/cool-model",
|
||||
"model_config": {"provider": "vendor"},
|
||||
}
|
||||
|
||||
def get_session_by_title(self, _title):
|
||||
return None
|
||||
|
||||
def resolve_resume_session_id(self, sid):
|
||||
return sid
|
||||
|
||||
def reopen_session(self, _sid):
|
||||
return None
|
||||
|
||||
def get_messages_as_conversation(self, _sid, include_ancestors=False):
|
||||
return [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "yo"},
|
||||
]
|
||||
|
||||
builds: list = []
|
||||
|
||||
monkeypatch.setattr(server, "_get_db", lambda: _DB())
|
||||
# The response path must never call _make_agent; route the deferred timer
|
||||
# through a recorder so a 50ms fire can't build (or crash) under the test.
|
||||
monkeypatch.setattr(
|
||||
server, "_make_agent", lambda *a, **k: (_ for _ in ()).throw(AssertionError("no eager build"))
|
||||
)
|
||||
monkeypatch.setattr(server, "_start_agent_build", lambda sid, session: builds.append(sid))
|
||||
monkeypatch.setattr(server, "_schedule_session_cap_enforcement", lambda: None)
|
||||
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "r1",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": target, "cols": 100},
|
||||
}
|
||||
)
|
||||
|
||||
assert "error" not in resp
|
||||
result = resp["result"]
|
||||
assert result["resumed"] == target
|
||||
assert result["session_key"] == target
|
||||
assert result["message_count"] == 2
|
||||
assert result["messages"] == [
|
||||
{"role": "user", "text": "hello"},
|
||||
{"role": "assistant", "text": "yo"},
|
||||
]
|
||||
# Lazy info contract (same shape session.create returns), with the session's
|
||||
# persisted model/provider restored rather than the global default.
|
||||
assert result["info"]["lazy"] is True
|
||||
assert result["info"]["model"] == "vendor/cool-model"
|
||||
assert result["info"]["provider"] == "vendor"
|
||||
assert result["info"]["desktop_contract"] == server.DESKTOP_BACKEND_CONTRACT
|
||||
|
||||
sid = result["session_id"]
|
||||
session = server._sessions[sid]
|
||||
# Registered but not built: agent is None and the resume key is carried so a
|
||||
# later prompt.submit / _sess() upgrade continues THIS stored conversation.
|
||||
assert session["agent"] is None
|
||||
assert session["resume_session_id"] == target
|
||||
assert not session["agent_ready"].is_set()
|
||||
# Not a watch spectator: a normal deferred resume is a real session.
|
||||
assert not session.get("lazy")
|
||||
# The persisted runtime identity is stashed for the deferred build so it
|
||||
# can't drop the provider ("No LLM provider configured").
|
||||
assert session["resume_runtime_overrides"]["model_override"]["model"] == "vendor/cool-model"
|
||||
assert server._find_live_session_by_key(target) == (sid, session)
|
||||
|
||||
|
||||
def test_enforce_session_cap_evicts_oldest_detached_only(server, monkeypatch):
|
||||
"""The LRU cap frees the least-recently-active DETACHED sessions when over
|
||||
the limit, and never a live-transport / running / mid-build one."""
|
||||
|
||||
monkeypatch.setattr(server, "_load_cfg", lambda: {"max_live_sessions": 2})
|
||||
evicted: list[str] = []
|
||||
monkeypatch.setattr(
|
||||
server, "_close_session_by_id", lambda sid, end_reason=None: evicted.append(sid)
|
||||
)
|
||||
|
||||
def _ready() -> threading.Event:
|
||||
ev = threading.Event()
|
||||
ev.set()
|
||||
return ev
|
||||
|
||||
detached = server._detached_ws_transport
|
||||
live = object() # no _closed attr -> live transport, never evictable
|
||||
|
||||
server._sessions.clear()
|
||||
server._sessions.update(
|
||||
{
|
||||
"old_detached": {"transport": detached, "last_active": 100.0, "agent_ready": _ready()},
|
||||
"new_detached": {"transport": detached, "last_active": 300.0, "agent_ready": _ready()},
|
||||
"running_detached": {
|
||||
"transport": detached,
|
||||
"last_active": 50.0,
|
||||
"running": True,
|
||||
"agent_ready": _ready(),
|
||||
},
|
||||
"focused_live": {"transport": live, "last_active": 200.0, "agent_ready": _ready()},
|
||||
}
|
||||
)
|
||||
|
||||
server._enforce_session_cap()
|
||||
|
||||
# 4 sessions, cap 2 -> evict 2. Only detached+idle+built are eligible, oldest
|
||||
# first; the running one and the live-transport one are exempt.
|
||||
assert evicted == ["old_detached", "new_detached"]
|
||||
|
||||
|
||||
def test_enforce_session_cap_disabled_is_noop(server, monkeypatch):
|
||||
monkeypatch.setattr(server, "_load_cfg", lambda: {"max_live_sessions": 0})
|
||||
evicted: list[str] = []
|
||||
monkeypatch.setattr(
|
||||
server, "_close_session_by_id", lambda sid, end_reason=None: evicted.append(sid)
|
||||
)
|
||||
server._sessions.clear()
|
||||
server._sessions.update(
|
||||
{
|
||||
f"s{i}": {"transport": server._detached_ws_transport, "last_active": float(i)}
|
||||
for i in range(5)
|
||||
}
|
||||
)
|
||||
|
||||
server._enforce_session_cap()
|
||||
|
||||
assert evicted == []
|
||||
|
||||
|
||||
def test_session_resume_handles_multimodal_list_content(server, monkeypatch):
|
||||
"""A user message persisted with list-shaped multimodal content used to
|
||||
crash session resume with ``'list' object has no attribute 'strip'``."""
|
||||
|
|
@ -374,7 +517,7 @@ def test_session_resume_handles_multimodal_list_content(server, monkeypatch):
|
|||
{
|
||||
"id": "r1",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": "20260502_000000_listcontent", "cols": 100},
|
||||
"params": {"session_id": "20260502_000000_listcontent", "cols": 100, "eager_build": True},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -688,7 +831,9 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch):
|
|||
{
|
||||
"id": "first",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": target, "cols": 100},
|
||||
# eager_build: this test drives the synchronous build race +
|
||||
# double-checked locking that only the eager path exercises.
|
||||
"params": {"session_id": target, "cols": 100, "eager_build": True},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -703,7 +848,7 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch):
|
|||
{
|
||||
"id": "second",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": target, "cols": 120},
|
||||
"params": {"session_id": target, "cols": 120, "eager_build": True},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -741,6 +741,76 @@ def _reap_idle_sessions() -> None:
|
|||
victims = [sid for sid, s in _sessions.items() if _session_is_evictable(sid, s, now)]
|
||||
for sid in victims:
|
||||
_close_session_by_id(sid, end_reason="idle_timeout")
|
||||
_enforce_session_cap()
|
||||
|
||||
|
||||
# Soft LRU cap on in-memory sessions. The 6h TTL reaper above only frees
|
||||
# sessions that have been idle for hours; a heavy user who reconnects often
|
||||
# accumulates detached sessions (the report's ``detached_sessions=5``) whose
|
||||
# agents sit resident for the full TTL. The cap evicts the least-recently-active
|
||||
# DETACHED sessions sooner so live agents don't pile up under memory pressure.
|
||||
# Default-on but provably safe: it only touches sessions with no live client
|
||||
# (reopening re-resumes them from the DB) and never a running / pending /
|
||||
# mid-build / live-transport one. 0/null disables.
|
||||
def _max_live_sessions() -> int:
|
||||
try:
|
||||
from hermes_cli.active_sessions import coerce_max_concurrent_sessions
|
||||
|
||||
cfg = _load_cfg() or {}
|
||||
raw = cfg.get("max_live_sessions")
|
||||
if raw is None:
|
||||
gateway_cfg = cfg.get("gateway")
|
||||
if isinstance(gateway_cfg, dict):
|
||||
raw = gateway_cfg.get("max_live_sessions")
|
||||
coerced = coerce_max_concurrent_sessions(raw, key="max_live_sessions")
|
||||
return int(coerced) if coerced else 0
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def _session_is_lru_evictable(sid: str, session: dict) -> bool:
|
||||
# Same hard exemptions as the TTL reaper (never evict a session mid-turn,
|
||||
# awaiting input, or still building), but WITHOUT the hours-scale age gate:
|
||||
# a detached session is eligible the moment it loses its client.
|
||||
if session.get("running") or _session_pending_kind(sid):
|
||||
return False
|
||||
ready = session.get("agent_ready")
|
||||
if ready is not None and not ready.is_set() and not session.get("lazy"):
|
||||
return False
|
||||
return _transport_is_dead(session.get("transport"))
|
||||
|
||||
|
||||
def _enforce_session_cap() -> None:
|
||||
cap = _max_live_sessions()
|
||||
if cap <= 0:
|
||||
return
|
||||
with _sessions_lock:
|
||||
total = len(_sessions)
|
||||
if total <= cap:
|
||||
return
|
||||
evictable = [
|
||||
(sid, s) for sid, s in _sessions.items() if _session_is_lru_evictable(sid, s)
|
||||
]
|
||||
# Oldest-touched first; only evict down to the cap (live/focused sessions on
|
||||
# a live transport are never eligible, so we may stop short of the cap).
|
||||
evictable.sort(key=lambda kv: float(kv[1].get("last_active") or 0.0))
|
||||
overflow = total - cap
|
||||
for sid, _s in evictable[:overflow]:
|
||||
_close_session_by_id(sid, end_reason="lru_evict")
|
||||
|
||||
|
||||
def _schedule_session_cap_enforcement() -> None:
|
||||
"""Run the LRU sweep off the response path (eviction can call agent.close)."""
|
||||
|
||||
def _run():
|
||||
try:
|
||||
_enforce_session_cap()
|
||||
except Exception:
|
||||
logger.debug("session cap enforcement failed", exc_info=True)
|
||||
|
||||
timer = threading.Timer(0.1, _run)
|
||||
timer.daemon = True
|
||||
timer.start()
|
||||
|
||||
|
||||
def _start_idle_reaper() -> None:
|
||||
|
|
@ -1111,15 +1181,24 @@ def _start_agent_build(sid: str, session: dict) -> None:
|
|||
kw = {"session_db": session_db}
|
||||
if resume_sid := current.get("resume_session_id"):
|
||||
kw["session_id"] = resume_sid
|
||||
# Model/effort/fast the desktop picked for a brand-new chat ride
|
||||
# in as per-session overrides so the first build uses them
|
||||
# directly (no global config, no build-then-switch).
|
||||
if override := current.get("model_override"):
|
||||
kw["model_override"] = override
|
||||
if (reasoning := current.get("create_reasoning_override")) is not None:
|
||||
kw["reasoning_config_override"] = reasoning
|
||||
if (tier := current.get("create_service_tier_override")) is not None:
|
||||
kw["service_tier_override"] = tier
|
||||
resume_overrides = current.get("resume_runtime_overrides")
|
||||
if isinstance(resume_overrides, dict) and resume_overrides:
|
||||
# Cold deferred resume: restore the full persisted runtime
|
||||
# identity (model/provider/base_url/api_mode/reasoning/tier)
|
||||
# exactly as the eager resume path's _stored_session_runtime_
|
||||
# overrides splat did, so a deferred build can't drop the
|
||||
# provider and fail with "No LLM provider configured".
|
||||
kw.update(resume_overrides)
|
||||
else:
|
||||
# Model/effort/fast the desktop picked for a brand-new chat
|
||||
# ride in as per-session overrides so the first build uses
|
||||
# them directly (no global config, no build-then-switch).
|
||||
if override := current.get("model_override"):
|
||||
kw["model_override"] = override
|
||||
if (reasoning := current.get("create_reasoning_override")) is not None:
|
||||
kw["reasoning_config_override"] = reasoning
|
||||
if (tier := current.get("create_service_tier_override")) is not None:
|
||||
kw["service_tier_override"] = tier
|
||||
agent = _make_agent(sid, key, **kw)
|
||||
finally:
|
||||
_clear_session_context(tokens)
|
||||
|
|
@ -1152,6 +1231,19 @@ def _start_agent_build(sid: str, session: dict) -> None:
|
|||
pass
|
||||
|
||||
_wire_callbacks(sid)
|
||||
# Surface the self-improvement review's "💾 …" summary as an event
|
||||
# the TUI/desktop render in-transcript, honoring
|
||||
# display.memory_notifications. _init_session wires this for the
|
||||
# eager/branch paths; deferred-built sessions (session.create and the
|
||||
# default cold resume) build through here, so without this their
|
||||
# review summaries would leak to stdout instead of the chat.
|
||||
try:
|
||||
agent.background_review_callback = lambda message, _sid=sid: _emit(
|
||||
"review.summary", _sid, {"text": str(message)}
|
||||
)
|
||||
agent.memory_notifications = _load_memory_notifications()
|
||||
except Exception:
|
||||
pass
|
||||
# Hydrate credits notices at session OPEN (not just on the first
|
||||
# message), so depletion / usage-band warnings show at "ready". Runs
|
||||
# off the build thread, after the notice_callback is wired. Fail-open.
|
||||
|
|
@ -4594,14 +4686,8 @@ def _(rid, params: dict) -> dict:
|
|||
# + skeleton panel, then build the real AIAgent just after this response is
|
||||
# flushed. This keeps startup responsive while still hydrating tools/skills
|
||||
# without requiring the user to submit a first prompt.
|
||||
def _deferred_build() -> None:
|
||||
session = _sessions.get(sid)
|
||||
if session is not None:
|
||||
_start_agent_build(sid, session)
|
||||
|
||||
build_timer = threading.Timer(0.05, _deferred_build)
|
||||
build_timer.daemon = True
|
||||
build_timer.start()
|
||||
_schedule_agent_build(sid)
|
||||
_schedule_session_cap_enforcement() # trim detached idle sessions over the cap
|
||||
|
||||
return _ok(
|
||||
rid,
|
||||
|
|
@ -4769,6 +4855,110 @@ def _(rid, params: dict) -> dict:
|
|||
return _ok(rid, {"verification": {"status": "unknown", "evidence": None}})
|
||||
|
||||
|
||||
def _lazy_resume_info(cwd: str, *, model: str = "", provider: str = "") -> dict:
|
||||
"""session.info for a not-yet-built session (the shape session.create
|
||||
returns). tools/skills land later when the deferred build emits session.info."""
|
||||
info = {
|
||||
"cwd": cwd,
|
||||
"branch": _git_branch_for_cwd(cwd),
|
||||
"model": model or _resolve_model(),
|
||||
"tools": {},
|
||||
"skills": {},
|
||||
"lazy": True,
|
||||
"desktop_contract": DESKTOP_BACKEND_CONTRACT,
|
||||
"profile_name": _current_profile_name(),
|
||||
}
|
||||
if provider:
|
||||
info["provider"] = provider
|
||||
return info
|
||||
|
||||
|
||||
def _deferred_session_record(
|
||||
session_key: str,
|
||||
*,
|
||||
cols: int,
|
||||
cwd: str,
|
||||
history: list,
|
||||
lease,
|
||||
source: str = "tui",
|
||||
close_on_disconnect: bool = False,
|
||||
display_history_prefix: list | None = None,
|
||||
profile_home: Path | None = None,
|
||||
lazy: bool = False,
|
||||
model_override=None,
|
||||
resume_runtime_overrides: dict | None = None,
|
||||
) -> dict:
|
||||
"""A live-session record whose AIAgent is built later (lazy watch / cold
|
||||
resume) — _init_session's shape minus the agent."""
|
||||
now = time.time()
|
||||
return {
|
||||
"agent": None,
|
||||
"agent_error": None,
|
||||
"agent_ready": threading.Event(),
|
||||
"attached_images": [],
|
||||
"close_on_disconnect": close_on_disconnect,
|
||||
"active_session_lease": lease,
|
||||
"cols": cols,
|
||||
"created_at": now,
|
||||
"cwd": cwd,
|
||||
"display_history_prefix": display_history_prefix or [],
|
||||
"edit_snapshots": {},
|
||||
"explicit_cwd": False,
|
||||
"history": history,
|
||||
"history_lock": threading.Lock(),
|
||||
"history_version": 0,
|
||||
"image_counter": 0,
|
||||
"inflight_turn": None,
|
||||
"last_active": now,
|
||||
"lazy": lazy,
|
||||
"model_override": model_override,
|
||||
"pending_title": None,
|
||||
"profile_home": str(profile_home) if profile_home is not None else None,
|
||||
"resume_runtime_overrides": resume_runtime_overrides,
|
||||
"resume_session_id": session_key,
|
||||
"running": False,
|
||||
"session_key": session_key,
|
||||
"show_reasoning": _load_show_reasoning(),
|
||||
"slash_worker": None,
|
||||
"source": source,
|
||||
"tool_progress_mode": _load_tool_progress_mode(),
|
||||
"tool_started_at": {},
|
||||
"transport": current_transport() or _stdio_transport,
|
||||
}
|
||||
|
||||
|
||||
def _claim_or_reuse_live(
|
||||
sid: str, session_key: str, record: dict, lease
|
||||
) -> tuple[str, dict] | None:
|
||||
"""Register ``record`` as the live session for ``session_key`` under the
|
||||
resume lock, or — if a concurrent resume already won — release ``lease`` and
|
||||
return the winner for the caller to reuse."""
|
||||
with _session_resume_lock:
|
||||
live = _find_live_session_by_key(session_key)
|
||||
if live is not None:
|
||||
if lease is not None:
|
||||
lease.release()
|
||||
return live
|
||||
with _sessions_lock:
|
||||
_sessions[sid] = record
|
||||
_register_session_cwd(_sessions[sid])
|
||||
return None
|
||||
|
||||
|
||||
def _schedule_agent_build(sid: str, delay: float = 0.05) -> None:
|
||||
"""Pre-warm a deferred session's agent off the response path (session.create
|
||||
and cold resume both build through here; _sess() also builds on demand)."""
|
||||
|
||||
def _run():
|
||||
session = _sessions.get(sid)
|
||||
if session is not None:
|
||||
_start_agent_build(sid, session)
|
||||
|
||||
timer = threading.Timer(delay, _run)
|
||||
timer.daemon = True
|
||||
timer.start()
|
||||
|
||||
|
||||
@method("session.resume")
|
||||
def _(rid, params: dict) -> dict:
|
||||
target = params.get("session_id", "")
|
||||
|
|
@ -4876,65 +5066,31 @@ def _(rid, params: dict) -> dict:
|
|||
return _err(rid, 4090, limit_message)
|
||||
try:
|
||||
db.reopen_session(target)
|
||||
# The child's OWN conversation only. Delegation children are
|
||||
# parent-linked rows, so include_ancestors would prepend the
|
||||
# parent's entire transcript — a watch window opened on a subagent
|
||||
# must show the subagent's branch, not the parent's prompt.
|
||||
# The child's OWN conversation only — include_ancestors would prepend
|
||||
# the parent's transcript onto the subagent's branch.
|
||||
history = db.get_messages_as_conversation(target)
|
||||
except Exception as e:
|
||||
if lease is not None:
|
||||
lease.release()
|
||||
return _err(rid, 5000, f"resume failed: {e}")
|
||||
messages = _history_to_messages(history)
|
||||
cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd())
|
||||
now = time.time()
|
||||
# A delegated child mid-run emits no native session events of its own —
|
||||
# report its liveness from the relay registry so the window paints a
|
||||
# busy indicator instead of a dead idle transcript.
|
||||
record = _deferred_session_record(
|
||||
target,
|
||||
cols=cols,
|
||||
cwd=cwd,
|
||||
history=history,
|
||||
lease=lease,
|
||||
source=str(params.get("source") or "tui").strip() or "tui",
|
||||
close_on_disconnect=is_truthy_value(params.get("close_on_disconnect", False)),
|
||||
profile_home=profile_home,
|
||||
lazy=True,
|
||||
)
|
||||
if (live := _claim_or_reuse_live(sid, target, record, lease)) is not None:
|
||||
return _ok(rid, _reuse_live_payload(*live))
|
||||
# A delegated child mid-run emits no session events of its own — report
|
||||
# its liveness from the relay registry so the window shows a busy turn.
|
||||
child_running = _child_run_active(target)
|
||||
source = str(params.get("source") or "tui").strip() or "tui"
|
||||
with _session_resume_lock:
|
||||
live = _find_live_session_by_key(target)
|
||||
if live is not None:
|
||||
if lease is not None:
|
||||
lease.release()
|
||||
return _ok(rid, _reuse_live_payload(*live))
|
||||
with _sessions_lock:
|
||||
_sessions[sid] = {
|
||||
"agent": None,
|
||||
"agent_error": None,
|
||||
"agent_ready": threading.Event(),
|
||||
"attached_images": [],
|
||||
"close_on_disconnect": is_truthy_value(
|
||||
params.get("close_on_disconnect", False)
|
||||
),
|
||||
"active_session_lease": lease,
|
||||
"cols": cols,
|
||||
"created_at": now,
|
||||
"display_history_prefix": [],
|
||||
"edit_snapshots": {},
|
||||
"explicit_cwd": False,
|
||||
"history": history,
|
||||
"history_lock": threading.Lock(),
|
||||
"history_version": 0,
|
||||
"image_counter": 0,
|
||||
"cwd": cwd,
|
||||
"inflight_turn": None,
|
||||
"last_active": now,
|
||||
"lazy": True,
|
||||
"pending_title": None,
|
||||
"profile_home": str(profile_home) if profile_home is not None else None,
|
||||
"resume_session_id": target,
|
||||
"running": False,
|
||||
"session_key": target,
|
||||
"show_reasoning": _load_show_reasoning(),
|
||||
"source": source,
|
||||
"slash_worker": None,
|
||||
"tool_progress_mode": _load_tool_progress_mode(),
|
||||
"tool_started_at": {},
|
||||
"transport": current_transport() or _stdio_transport,
|
||||
}
|
||||
_register_session_cwd(_sessions[sid])
|
||||
messages = _history_to_messages(history)
|
||||
return _ok(
|
||||
rid,
|
||||
{
|
||||
|
|
@ -4942,24 +5098,91 @@ def _(rid, params: dict) -> dict:
|
|||
"resumed": target,
|
||||
"message_count": len(messages),
|
||||
"messages": messages,
|
||||
"info": {
|
||||
"cwd": cwd,
|
||||
"branch": _git_branch_for_cwd(cwd),
|
||||
"model": _resolve_model(),
|
||||
"tools": {},
|
||||
"skills": {},
|
||||
"lazy": True,
|
||||
"desktop_contract": DESKTOP_BACKEND_CONTRACT,
|
||||
"profile_name": _current_profile_name(),
|
||||
},
|
||||
"info": _lazy_resume_info(cwd),
|
||||
"inflight": None,
|
||||
"running": child_running,
|
||||
"session_key": target,
|
||||
"started_at": now,
|
||||
"started_at": record["created_at"],
|
||||
"status": "streaming" if child_running else "idle",
|
||||
},
|
||||
)
|
||||
|
||||
# Cold resume default: register the live session and read its stored
|
||||
# transcript, but build the agent OFF the response path. _make_agent can
|
||||
# block for seconds (MCP discovery, prompt/skill build, AIAgent
|
||||
# construction), and every resume caller (desktop + Ink TUI) awaits this RPC
|
||||
# before it paints — so building eagerly is the bulk of the multi-second
|
||||
# "switching sessions is frozen" latency. Return the full display transcript
|
||||
# immediately and pre-warm the agent on a short timer (the same deferred-
|
||||
# build contract session.create uses); _sess() also builds on demand if the
|
||||
# first prompt beats the timer. A caller that needs the agent built
|
||||
# synchronously (e.g. tests of the build race) passes ``eager_build: true``
|
||||
# to fall through to the eager path below. Distinct from the lazy/watch
|
||||
# branch above: a normal resume restores the full ancestor history and the
|
||||
# session's persisted runtime identity, and is a real (upgradable) session.
|
||||
if not is_truthy_value(params.get("eager_build", False)):
|
||||
sid = uuid.uuid4().hex[:8]
|
||||
lease, limit_message = _claim_active_session_slot(target, live_session_id=sid)
|
||||
if limit_message is not None:
|
||||
return _err(rid, 4090, limit_message)
|
||||
# Interactive resume routes approvals/clarify through gateway prompts;
|
||||
# the deferred build wires the remaining per-session callbacks.
|
||||
_enable_gateway_prompts()
|
||||
try:
|
||||
db.reopen_session(target)
|
||||
history = db.get_messages_as_conversation(target)
|
||||
display_history = db.get_messages_as_conversation(target, include_ancestors=True)
|
||||
except Exception as e:
|
||||
if lease is not None:
|
||||
lease.release()
|
||||
return _err(rid, 5000, f"resume failed: {e}")
|
||||
prefix = display_history[: max(0, len(display_history) - len(history))]
|
||||
# Restore the model/provider/reasoning/tier this chat last used so the
|
||||
# deferred build (and the info below) match the eager path — without them
|
||||
# the build drops the provider ("No LLM provider configured").
|
||||
overrides = _stored_session_runtime_overrides(found) or {}
|
||||
model_override = overrides.get("model_override") or {}
|
||||
cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd())
|
||||
record = _deferred_session_record(
|
||||
target,
|
||||
cols=cols,
|
||||
cwd=cwd,
|
||||
history=history,
|
||||
lease=lease,
|
||||
source=str(params.get("source") or "tui").strip() or "tui",
|
||||
close_on_disconnect=is_truthy_value(params.get("close_on_disconnect", False)),
|
||||
display_history_prefix=prefix,
|
||||
profile_home=profile_home,
|
||||
model_override=overrides.get("model_override"),
|
||||
resume_runtime_overrides=overrides or None,
|
||||
)
|
||||
if (live := _claim_or_reuse_live(sid, target, record, lease)) is not None:
|
||||
return _ok(rid, _reuse_live_payload(*live))
|
||||
|
||||
_schedule_agent_build(sid)
|
||||
_schedule_session_cap_enforcement() # trim detached idle sessions over the cap
|
||||
|
||||
messages = _history_to_messages(display_history)
|
||||
return _ok(
|
||||
rid,
|
||||
{
|
||||
"session_id": sid,
|
||||
"resumed": target,
|
||||
"message_count": len(messages),
|
||||
"messages": messages,
|
||||
"info": _lazy_resume_info(
|
||||
cwd,
|
||||
model=model_override.get("model") or "",
|
||||
provider=overrides.get("provider_override") or "",
|
||||
),
|
||||
"inflight": None,
|
||||
"running": False,
|
||||
"session_key": target,
|
||||
"started_at": record["created_at"],
|
||||
"status": "idle",
|
||||
},
|
||||
)
|
||||
|
||||
# Build the agent OUTSIDE the lock — _make_agent can block for seconds
|
||||
# (MCP discovery, prompt/skill build, AIAgent construction). Holding
|
||||
# _session_resume_lock across it would stall session.close on the main
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue