From c4c590e4a14a064033c10067f0a72b3cd2a53b8b Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 25 Jun 2026 13:18:04 -0500 Subject: [PATCH 1/4] perf(desktop): make session switching fast under load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switching sessions in the desktop app could freeze the whole UI for several seconds on heavy, tool-rich chats. Root causes and fixes: - Cold `session.resume` built the AIAgent (MCP discovery, prompt/skill build) *before* returning, and the desktop awaits that RPC before it paints — so the entire switch blocked on the build. Add an opt-in `defer_build` resume path (the contract `session.create` already uses): return the full display transcript immediately, register an upgradable live session, and pre-warm the agent on a short timer. The persisted runtime identity (model/provider/base_url/api_mode/reasoning/tier) is restored on the deferred build so it can't drop the provider. - Nothing bounded how many in-memory agents accumulate; a user who reconnects often piled up detached sessions for the full 6h TTL. Add a soft LRU cap (`max_live_sessions`, default 16) that evicts the least-recently-active DETACHED sessions (no live client) — never a running, awaiting-input, mid-build, or live-transport one. Reopening re-resumes from disk. - On the prefetch-hit cold-resume path, skip rebuilding a throwaway merged-message array (and its 1000-entry Map) when the prefetch already painted the exact transcript; the downstream sameMessageList guard already drops the publish, so it was pure main-thread cost. The desktop opts into `defer_build` for every non-watch cold resume; the eager path stays for CLI/TUI and existing callers. --- .../hooks/use-session-actions.test.tsx | 25 ++ .../app/session/hooks/use-session-actions.ts | 20 +- hermes_cli/config.py | 5 + tests/tui_gateway/test_protocol.py | 140 +++++++++++ tui_gateway/server.py | 231 +++++++++++++++++- 5 files changed, 410 insertions(+), 11 deletions(-) diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx index a84a854ded4..31b2cc58afc 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx +++ b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx @@ -256,4 +256,29 @@ describe('resumeSession failure recovery', () => { expect($resumeFailedSessionId.get()).toBeNull() }) + + it('asks the backend to DEFER the agent build on a normal cold resume', async () => { + // The switch-latency fix: a non-watch cold resume tells the gateway to + // return the transcript immediately and build the agent in the background, + // rather than blocking the RPC (and the whole switch) on _make_agent. + let resumeParams: Record | undefined + + const requestGateway = vi.fn(async (method: string, params?: Record) => { + if (method === 'session.resume') { + resumeParams = params + + return { session_id: 'runtime-1', resumed: params?.session_id, messages: [], info: {} } as never + } + + return {} as never + }) + + vi.mocked(getSessionMessages).mockResolvedValue({ messages: [] } as never) + + await runResume(requestGateway) + + expect(resumeParams).toMatchObject({ defer_build: true }) + // Watch-window lazy attach is the OTHER mode; a normal resume isn't lazy. + expect(resumeParams).not.toHaveProperty('lazy') + }) }) diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts index 36dfea759f2..5c8be9762e1 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts @@ -706,7 +706,12 @@ export function useSessionActions({ const resumePromise = requestGateway('session.resume', { session_id: storedSessionId, cols: 96, - ...(watchWindow ? { lazy: true } : {}), + // Watch windows attach lazily (live mirror); every other cold resume + // asks the backend to DEFER the agent build so the RPC returns the + // transcript immediately instead of blocking the switch on + // _make_agent (MCP discovery / prompt build). The agent pre-warms in + // the background and the prefetch above paints the transcript. + ...(watchWindow ? { lazy: true } : { defer_build: true }), ...(sessionProfile ? { profile: sessionProfile } : {}) }) // The rejection is consumed by the `await` below; this guard only @@ -754,7 +759,18 @@ export function useSessionActions({ return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages })() - const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages) + // When the prefetch already painted these exact messages (the common + // cold-resume path), `preferredMessages` IS the live `$messages` array. + // Re-running preserveLocalAssistantErrors there would build a 1000-entry + // Map and map the whole transcript into a throwaway array on every + // switch — pure main-thread cost on the hot path (the downstream + // sameMessageList guard already drops the publish, so it buys nothing). + // The prefetch branch already merged local assistant errors when it + // built `localSnapshot`, so reuse the ref instead. + const messagesForView = + preferredMessages === currentMessages + ? currentMessages + : preserveLocalAssistantErrors(preferredMessages, currentMessages) setActiveSessionId(resumed.session_id) activeSessionIdRef.current = resumed.session_id diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 06e3ad5d7b9..b64086c91e8 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -901,6 +901,11 @@ DEFAULT_CONFIG = { # Global active chat session cap across CLI, TUI/dashboard, and messaging. # None/0 = unbounded. "max_concurrent_sessions": None, + # Soft LRU cap on in-memory TUI/desktop/dashboard sessions. When more than + # this many are live, the gateway evicts the least-recently-active DETACHED + # sessions (no live client) so accumulated agents don't pile up under memory + # pressure. Reopening one re-resumes it from disk. 0/null disables. + "max_live_sessions": 16, "agent": { "max_turns": 90, # Inactivity timeout for gateway agent execution (seconds). diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py index 3b385bf825a..8b45a672aed 100644 --- a/tests/tui_gateway/test_protocol.py +++ b/tests/tui_gateway/test_protocol.py @@ -336,6 +336,146 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch): ] +def test_session_resume_defer_build_returns_transcript_without_blocking(server, monkeypatch): + """``defer_build: true`` (desktop cold resume) must return the full display + transcript immediately and register an upgradable live session WITHOUT + building the agent on the response path — that eager build is the + multi-second switch latency.""" + + target = "20260409_010101_abc123" + + class _DB: + def get_session(self, _sid): + return { + "id": target, + "model": "vendor/cool-model", + "model_config": {"provider": "vendor"}, + } + + def get_session_by_title(self, _title): + return None + + def resolve_resume_session_id(self, sid): + return sid + + def reopen_session(self, _sid): + return None + + def get_messages_as_conversation(self, _sid, include_ancestors=False): + return [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "yo"}, + ] + + builds: list = [] + + monkeypatch.setattr(server, "_get_db", lambda: _DB()) + # The response path must never call _make_agent; route the deferred timer + # through a recorder so a 50ms fire can't build (or crash) under the test. + monkeypatch.setattr( + server, "_make_agent", lambda *a, **k: (_ for _ in ()).throw(AssertionError("no eager build")) + ) + monkeypatch.setattr(server, "_start_agent_build", lambda sid, session: builds.append(sid)) + monkeypatch.setattr(server, "_schedule_session_cap_enforcement", lambda: None) + + resp = server.handle_request( + { + "id": "r1", + "method": "session.resume", + "params": {"session_id": target, "cols": 100, "defer_build": True}, + } + ) + + assert "error" not in resp + result = resp["result"] + assert result["resumed"] == target + assert result["session_key"] == target + assert result["message_count"] == 2 + assert result["messages"] == [ + {"role": "user", "text": "hello"}, + {"role": "assistant", "text": "yo"}, + ] + # Lazy info contract (same shape session.create returns), with the session's + # persisted model/provider restored rather than the global default. + assert result["info"]["lazy"] is True + assert result["info"]["model"] == "vendor/cool-model" + assert result["info"]["provider"] == "vendor" + assert result["info"]["desktop_contract"] == server.DESKTOP_BACKEND_CONTRACT + + sid = result["session_id"] + session = server._sessions[sid] + # Registered but not built: agent is None and the resume key is carried so a + # later prompt.submit / _sess() upgrade continues THIS stored conversation. + assert session["agent"] is None + assert session["resume_session_id"] == target + assert not session["agent_ready"].is_set() + # Not a watch spectator: a normal deferred resume is a real session. + assert not session.get("lazy") + # The persisted runtime identity is stashed for the deferred build so it + # can't drop the provider ("No LLM provider configured"). + assert session["resume_runtime_overrides"]["model_override"]["model"] == "vendor/cool-model" + assert server._find_live_session_by_key(target) == (sid, session) + + +def test_enforce_session_cap_evicts_oldest_detached_only(server, monkeypatch): + """The LRU cap frees the least-recently-active DETACHED sessions when over + the limit, and never a live-transport / running / mid-build one.""" + + monkeypatch.setattr(server, "_load_cfg", lambda: {"max_live_sessions": 2}) + evicted: list[str] = [] + monkeypatch.setattr( + server, "_close_session_by_id", lambda sid, end_reason=None: evicted.append(sid) + ) + + def _ready() -> threading.Event: + ev = threading.Event() + ev.set() + return ev + + detached = server._detached_ws_transport + live = object() # no _closed attr -> live transport, never evictable + + server._sessions.clear() + server._sessions.update( + { + "old_detached": {"transport": detached, "last_active": 100.0, "agent_ready": _ready()}, + "new_detached": {"transport": detached, "last_active": 300.0, "agent_ready": _ready()}, + "running_detached": { + "transport": detached, + "last_active": 50.0, + "running": True, + "agent_ready": _ready(), + }, + "focused_live": {"transport": live, "last_active": 200.0, "agent_ready": _ready()}, + } + ) + + server._enforce_session_cap() + + # 4 sessions, cap 2 -> evict 2. Only detached+idle+built are eligible, oldest + # first; the running one and the live-transport one are exempt. + assert evicted == ["old_detached", "new_detached"] + + +def test_enforce_session_cap_disabled_is_noop(server, monkeypatch): + monkeypatch.setattr(server, "_load_cfg", lambda: {"max_live_sessions": 0}) + evicted: list[str] = [] + monkeypatch.setattr( + server, "_close_session_by_id", lambda sid, end_reason=None: evicted.append(sid) + ) + server._sessions.clear() + server._sessions.update( + { + f"s{i}": {"transport": server._detached_ws_transport, "last_active": float(i)} + for i in range(5) + } + ) + + server._enforce_session_cap() + + assert evicted == [] + + def test_session_resume_handles_multimodal_list_content(server, monkeypatch): """A user message persisted with list-shaped multimodal content used to crash session resume with ``'list' object has no attribute 'strip'``.""" diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 2bd20968590..40910e291ba 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -741,6 +741,79 @@ def _reap_idle_sessions() -> None: victims = [sid for sid, s in _sessions.items() if _session_is_evictable(sid, s, now)] for sid in victims: _close_session_by_id(sid, end_reason="idle_timeout") + _enforce_session_cap() + + +# Soft LRU cap on in-memory sessions. The 6h TTL reaper above only frees +# sessions that have been idle for hours; a heavy user who reconnects often +# accumulates detached sessions (the report's ``detached_sessions=5``) whose +# agents sit resident for the full TTL. The cap evicts the least-recently-active +# DETACHED sessions sooner so live agents don't pile up under memory pressure. +# Default-on but provably safe: it only touches sessions with no live client +# (reopening re-resumes them from the DB) and never a running / pending / +# mid-build / live-transport one. 0/null disables. +def _max_live_sessions() -> int: + try: + from hermes_cli.active_sessions import coerce_max_concurrent_sessions + + cfg = _load_cfg() or {} + raw = cfg.get("max_live_sessions") + if raw is None: + gateway_cfg = cfg.get("gateway") + if isinstance(gateway_cfg, dict): + raw = gateway_cfg.get("max_live_sessions") + coerced = coerce_max_concurrent_sessions(raw, key="max_live_sessions") + return int(coerced) if coerced else 0 + except Exception: + return 0 + + +def _session_is_lru_evictable(sid: str, session: dict) -> bool: + # Same hard exemptions as the TTL reaper (never evict a session mid-turn, + # awaiting input, or still building), but WITHOUT the hours-scale age gate: + # a detached session is eligible the moment it loses its client. + if session.get("running") or _session_pending_kind(sid): + return False + ready = session.get("agent_ready") + if ready is not None and not ready.is_set() and not session.get("lazy"): + return False + return _transport_is_dead(session.get("transport")) + + +def _enforce_session_cap() -> None: + cap = _max_live_sessions() + if cap <= 0: + return + with _sessions_lock: + total = len(_sessions) + if total <= cap: + return + evictable = [ + (sid, s) for sid, s in _sessions.items() if _session_is_lru_evictable(sid, s) + ] + # Oldest-touched first; only evict down to the cap (live/focused sessions on + # a live transport are never eligible, so we may stop short of the cap). + evictable.sort(key=lambda kv: float(kv[1].get("last_active") or 0.0)) + overflow = total - cap + for sid, _s in evictable[:overflow]: + _close_session_by_id(sid, end_reason="lru_evict") + + +def _schedule_session_cap_enforcement() -> None: + """Run the LRU sweep off the response path (eviction can call agent.close).""" + try: + timer = threading.Timer(0.1, lambda: _safe_enforce_session_cap()) + timer.daemon = True + timer.start() + except Exception: + pass + + +def _safe_enforce_session_cap() -> None: + try: + _enforce_session_cap() + except Exception: + logger.debug("session cap enforcement failed", exc_info=True) def _start_idle_reaper() -> None: @@ -1111,15 +1184,24 @@ def _start_agent_build(sid: str, session: dict) -> None: kw = {"session_db": session_db} if resume_sid := current.get("resume_session_id"): kw["session_id"] = resume_sid - # Model/effort/fast the desktop picked for a brand-new chat ride - # in as per-session overrides so the first build uses them - # directly (no global config, no build-then-switch). - if override := current.get("model_override"): - kw["model_override"] = override - if (reasoning := current.get("create_reasoning_override")) is not None: - kw["reasoning_config_override"] = reasoning - if (tier := current.get("create_service_tier_override")) is not None: - kw["service_tier_override"] = tier + resume_overrides = current.get("resume_runtime_overrides") + if isinstance(resume_overrides, dict) and resume_overrides: + # Cold deferred resume: restore the full persisted runtime + # identity (model/provider/base_url/api_mode/reasoning/tier) + # exactly as the eager resume path's _stored_session_runtime_ + # overrides splat did, so a deferred build can't drop the + # provider and fail with "No LLM provider configured". + kw.update(resume_overrides) + else: + # Model/effort/fast the desktop picked for a brand-new chat + # ride in as per-session overrides so the first build uses + # them directly (no global config, no build-then-switch). + if override := current.get("model_override"): + kw["model_override"] = override + if (reasoning := current.get("create_reasoning_override")) is not None: + kw["reasoning_config_override"] = reasoning + if (tier := current.get("create_service_tier_override")) is not None: + kw["service_tier_override"] = tier agent = _make_agent(sid, key, **kw) finally: _clear_session_context(tokens) @@ -4602,6 +4684,8 @@ def _(rid, params: dict) -> dict: build_timer = threading.Timer(0.05, _deferred_build) build_timer.daemon = True build_timer.start() + # A new live session just landed; trim detached idle ones over the cap. + _schedule_session_cap_enforcement() return _ok( rid, @@ -4960,6 +5044,135 @@ def _(rid, params: dict) -> dict: }, ) + # Deferred build (desktop cold resume): register the live session and read + # its stored transcript WITHOUT building the agent on the response path. + # _make_agent can block for seconds (MCP discovery, prompt/skill build, + # AIAgent construction), and the desktop awaits this RPC before it paints + # the chat — so the eager build below is the bulk of the multi-second + # "switching sessions is frozen" latency. We return the full display + # transcript immediately and pre-warm the agent on a short timer (the same + # deferred-build contract session.create uses); _sess() also builds on + # demand if the first prompt beats the timer. Distinct from the lazy/watch + # branch above: a normal resume restores the full ancestor history and the + # session's persisted runtime identity, and is a real (upgradable) session, + # not a never-built spectator window. + if is_truthy_value(params.get("defer_build", False)): + sid = uuid.uuid4().hex[:8] + lease, limit_message = _claim_active_session_slot(target, live_session_id=sid) + if limit_message is not None: + return _err(rid, 4090, limit_message) + try: + db.reopen_session(target) + history = db.get_messages_as_conversation(target) + display_history = db.get_messages_as_conversation( + target, include_ancestors=True + ) + except Exception as e: + if lease is not None: + lease.release() + return _err(rid, 5000, f"resume failed: {e}") + display_history_prefix = display_history[ + : max(0, len(display_history) - len(history)) + ] + messages = _history_to_messages(display_history) + # Restore the model/provider/reasoning/tier this chat actually used so + # the deferred build (and the immediate info payload) match the eager + # path — without these a deferred build drops the provider and resume + # fails with "No LLM provider configured". + stored_runtime_overrides = _stored_session_runtime_overrides(found) or {} + cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd()) + now = time.time() + source = str(params.get("source") or "tui").strip() or "tui" + with _session_resume_lock: + live = _find_live_session_by_key(target) + if live is not None: + if lease is not None: + lease.release() + return _ok(rid, _reuse_live_payload(*live)) + with _sessions_lock: + _sessions[sid] = { + "agent": None, + "agent_error": None, + "agent_ready": threading.Event(), + "attached_images": [], + "close_on_disconnect": is_truthy_value( + params.get("close_on_disconnect", False) + ), + "active_session_lease": lease, + "cols": cols, + "created_at": now, + "display_history_prefix": display_history_prefix, + "edit_snapshots": {}, + "explicit_cwd": False, + "history": history, + "history_lock": threading.Lock(), + "history_version": 0, + "image_counter": 0, + "cwd": cwd, + "inflight_turn": None, + "last_active": now, + "model_override": stored_runtime_overrides.get("model_override"), + "pending_title": None, + "profile_home": str(profile_home) if profile_home is not None else None, + "resume_session_id": target, + "resume_runtime_overrides": stored_runtime_overrides or None, + "running": False, + "session_key": target, + "show_reasoning": _load_show_reasoning(), + "source": source, + "slash_worker": None, + "tool_progress_mode": _load_tool_progress_mode(), + "tool_started_at": {}, + "transport": current_transport() or _stdio_transport, + } + _register_session_cwd(_sessions[sid]) + + def _deferred_build() -> None: + session = _sessions.get(sid) + if session is not None: + _start_agent_build(sid, session) + + build_timer = threading.Timer(0.05, _deferred_build) + build_timer.daemon = True + build_timer.start() + # A new live session just landed; trim detached idle ones over the cap. + _schedule_session_cap_enforcement() + + model_override = stored_runtime_overrides.get("model_override") + resumed_model = "" + if isinstance(model_override, dict): + resumed_model = str(model_override.get("model") or "").strip() + elif isinstance(model_override, str): + resumed_model = model_override.strip() + info = { + "cwd": cwd, + "branch": _git_branch_for_cwd(cwd), + "model": resumed_model or _resolve_model(), + "tools": {}, + "skills": {}, + "lazy": True, + "desktop_contract": DESKTOP_BACKEND_CONTRACT, + "profile_name": _current_profile_name(), + } + provider_override = stored_runtime_overrides.get("provider_override") + if provider_override: + info["provider"] = provider_override + return _ok( + rid, + { + "session_id": sid, + "resumed": target, + "message_count": len(messages), + "messages": messages, + "info": info, + "inflight": None, + "running": False, + "session_key": target, + "started_at": now, + "status": "idle", + }, + ) + # Build the agent OUTSIDE the lock — _make_agent can block for seconds # (MCP discovery, prompt/skill build, AIAgent construction). Holding # _session_resume_lock across it would stall session.close on the main From 3bf00e459af707635c890abcb1d748aec029a918 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 25 Jun 2026 13:25:21 -0500 Subject: [PATCH 2/4] perf(desktop): make deferred resume the default, not an opt-in flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review: gating the faster path behind a `defer_build` flag that the only caller always sends is pointless. Flip it — `session.resume` now defers the agent build by default for every caller (desktop + Ink TUI); a caller that needs the agent built synchronously passes `eager_build: true` (used by the build-race test). The desktop no longer sends a flag. While verifying the flip, fixed two real parity gaps the deferred path had vs the old eager (`_init_session`) path: - `_enable_gateway_prompts()` was never called on a deferred resume, so approvals/clarify wouldn't route through the gateway prompt callbacks. - `_start_agent_build` never wired `background_review_callback` / `memory_notifications`, so a deferred-built session's self-improvement "💾 …" summary leaked to stdout instead of rendering in-transcript. Wiring it there also fixes it for `session.create` sessions, which build through the same path. ACP is unaffected (it uses its own session_manager, not this RPC); the Ink TUI already consumes the same lazy `info` shape from session.create and upgrades on the later `session.info` event. --- .../hooks/use-session-actions.test.tsx | 12 +++--- .../app/session/hooks/use-session-actions.ts | 12 +++--- tests/tui_gateway/test_protocol.py | 21 ++++++---- tui_gateway/server.py | 41 +++++++++++++------ 4 files changed, 54 insertions(+), 32 deletions(-) diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx index 31b2cc58afc..f47b6b62504 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx +++ b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx @@ -257,10 +257,11 @@ describe('resumeSession failure recovery', () => { expect($resumeFailedSessionId.get()).toBeNull() }) - it('asks the backend to DEFER the agent build on a normal cold resume', async () => { - // The switch-latency fix: a non-watch cold resume tells the gateway to - // return the transcript immediately and build the agent in the background, - // rather than blocking the RPC (and the whole switch) on _make_agent. + it('resumes via the gateway default (deferred build) — not lazy, no eager opt-out', async () => { + // The switch-latency fix lives backend-side: a normal cold resume gets the + // gateway's default DEFERRED build (transcript returns immediately, agent + // pre-warms in the background). The client must NOT force the synchronous + // path (eager_build) and is only `lazy` for subagent watch windows. let resumeParams: Record | undefined const requestGateway = vi.fn(async (method: string, params?: Record) => { @@ -277,8 +278,7 @@ describe('resumeSession failure recovery', () => { await runResume(requestGateway) - expect(resumeParams).toMatchObject({ defer_build: true }) - // Watch-window lazy attach is the OTHER mode; a normal resume isn't lazy. expect(resumeParams).not.toHaveProperty('lazy') + expect(resumeParams).not.toHaveProperty('eager_build') }) }) diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts index 5c8be9762e1..efc99d5b84c 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts @@ -706,12 +706,12 @@ export function useSessionActions({ const resumePromise = requestGateway('session.resume', { session_id: storedSessionId, cols: 96, - // Watch windows attach lazily (live mirror); every other cold resume - // asks the backend to DEFER the agent build so the RPC returns the - // transcript immediately instead of blocking the switch on - // _make_agent (MCP discovery / prompt build). The agent pre-warms in - // the background and the prefetch above paints the transcript. - ...(watchWindow ? { lazy: true } : { defer_build: true }), + // Watch windows attach lazily (live mirror). Every other cold resume + // gets the gateway's default deferred build: the RPC returns the + // transcript immediately instead of blocking the switch on _make_agent + // (MCP discovery / prompt build), and the agent pre-warms in the + // background while the prefetch above paints the transcript. + ...(watchWindow ? { lazy: true } : {}), ...(sessionProfile ? { profile: sessionProfile } : {}) }) // The rejection is consumed by the `await` below; this guard only diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py index 8b45a672aed..c0003dfc818 100644 --- a/tests/tui_gateway/test_protocol.py +++ b/tests/tui_gateway/test_protocol.py @@ -323,7 +323,9 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch): { "id": "r1", "method": "session.resume", - "params": {"session_id": "20260409_010101_abc123", "cols": 100}, + # eager_build: exercise the synchronous build path (this test + # monkeypatches _make_agent/_init_session/_session_info). + "params": {"session_id": "20260409_010101_abc123", "cols": 100, "eager_build": True}, } ) @@ -336,11 +338,12 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch): ] -def test_session_resume_defer_build_returns_transcript_without_blocking(server, monkeypatch): - """``defer_build: true`` (desktop cold resume) must return the full display +def test_session_resume_defaults_to_deferred_build(server, monkeypatch): + """A normal cold resume (no ``eager_build``) must return the full display transcript immediately and register an upgradable live session WITHOUT building the agent on the response path — that eager build is the - multi-second switch latency.""" + multi-second switch latency. Deferred is the default; ``eager_build: true`` + opts back into the synchronous path.""" target = "20260409_010101_abc123" @@ -382,7 +385,7 @@ def test_session_resume_defer_build_returns_transcript_without_blocking(server, { "id": "r1", "method": "session.resume", - "params": {"session_id": target, "cols": 100, "defer_build": True}, + "params": {"session_id": target, "cols": 100}, } ) @@ -514,7 +517,7 @@ def test_session_resume_handles_multimodal_list_content(server, monkeypatch): { "id": "r1", "method": "session.resume", - "params": {"session_id": "20260502_000000_listcontent", "cols": 100}, + "params": {"session_id": "20260502_000000_listcontent", "cols": 100, "eager_build": True}, } ) @@ -828,7 +831,9 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch): { "id": "first", "method": "session.resume", - "params": {"session_id": target, "cols": 100}, + # eager_build: this test drives the synchronous build race + + # double-checked locking that only the eager path exercises. + "params": {"session_id": target, "cols": 100, "eager_build": True}, } ) @@ -843,7 +848,7 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch): { "id": "second", "method": "session.resume", - "params": {"session_id": target, "cols": 120}, + "params": {"session_id": target, "cols": 120, "eager_build": True}, } ) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 40910e291ba..6891c49eceb 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1234,6 +1234,19 @@ def _start_agent_build(sid: str, session: dict) -> None: pass _wire_callbacks(sid) + # Surface the self-improvement review's "💾 …" summary as an event + # the TUI/desktop render in-transcript, honoring + # display.memory_notifications. _init_session wires this for the + # eager/branch paths; deferred-built sessions (session.create and the + # default cold resume) build through here, so without this their + # review summaries would leak to stdout instead of the chat. + try: + agent.background_review_callback = lambda message, _sid=sid: _emit( + "review.summary", _sid, {"text": str(message)} + ) + agent.memory_notifications = _load_memory_notifications() + except Exception: + pass # Hydrate credits notices at session OPEN (not just on the first # message), so depletion / usage-band warnings show at "ready". Runs # off the build thread, after the notice_callback is wired. Fail-open. @@ -5044,23 +5057,27 @@ def _(rid, params: dict) -> dict: }, ) - # Deferred build (desktop cold resume): register the live session and read - # its stored transcript WITHOUT building the agent on the response path. - # _make_agent can block for seconds (MCP discovery, prompt/skill build, - # AIAgent construction), and the desktop awaits this RPC before it paints - # the chat — so the eager build below is the bulk of the multi-second - # "switching sessions is frozen" latency. We return the full display - # transcript immediately and pre-warm the agent on a short timer (the same - # deferred-build contract session.create uses); _sess() also builds on - # demand if the first prompt beats the timer. Distinct from the lazy/watch + # Cold resume default: register the live session and read its stored + # transcript, but build the agent OFF the response path. _make_agent can + # block for seconds (MCP discovery, prompt/skill build, AIAgent + # construction), and every resume caller (desktop + Ink TUI) awaits this RPC + # before it paints — so building eagerly is the bulk of the multi-second + # "switching sessions is frozen" latency. Return the full display transcript + # immediately and pre-warm the agent on a short timer (the same deferred- + # build contract session.create uses); _sess() also builds on demand if the + # first prompt beats the timer. A caller that needs the agent built + # synchronously (e.g. tests of the build race) passes ``eager_build: true`` + # to fall through to the eager path below. Distinct from the lazy/watch # branch above: a normal resume restores the full ancestor history and the - # session's persisted runtime identity, and is a real (upgradable) session, - # not a never-built spectator window. - if is_truthy_value(params.get("defer_build", False)): + # session's persisted runtime identity, and is a real (upgradable) session. + if not is_truthy_value(params.get("eager_build", False)): sid = uuid.uuid4().hex[:8] lease, limit_message = _claim_active_session_slot(target, live_session_id=sid) if limit_message is not None: return _err(rid, 4090, limit_message) + # Interactive resume routes approvals/clarify through gateway prompts; + # the deferred build wires the remaining per-session callbacks. + _enable_gateway_prompts() try: db.reopen_session(target) history = db.get_messages_as_conversation(target) From 1ca1f9f2c7a19e92da5e5f13f4880f409b99635b Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 25 Jun 2026 13:54:53 -0500 Subject: [PATCH 3/4] refactor(tui_gateway): DRY the deferred-session paths Collapse the duplicated cold-resume / lazy-watch / create scaffolding into shared helpers: _deferred_session_record (the live-session dict minus the agent), _lazy_resume_info (the not-yet-built session.info), _claim_or_reuse_live (lock + double-checked register-or-reuse), and _schedule_agent_build (the pre-warm timer). Net -12 lines, three copies of the ~30-key session dict and the lazy-info block down to one each. No behavior change. --- .../app/session/hooks/use-session-actions.ts | 11 +- tui_gateway/server.py | 337 +++++++++--------- 2 files changed, 168 insertions(+), 180 deletions(-) diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts index efc99d5b84c..fb06c5a6048 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts @@ -759,14 +759,9 @@ export function useSessionActions({ return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages })() - // When the prefetch already painted these exact messages (the common - // cold-resume path), `preferredMessages` IS the live `$messages` array. - // Re-running preserveLocalAssistantErrors there would build a 1000-entry - // Map and map the whole transcript into a throwaway array on every - // switch — pure main-thread cost on the hot path (the downstream - // sameMessageList guard already drops the publish, so it buys nothing). - // The prefetch branch already merged local assistant errors when it - // built `localSnapshot`, so reuse the ref instead. + // Prefetch-hit fast path: `preferredMessages` IS the live `$messages` + // array (already error-merged when `localSnapshot` was built), so reuse + // the ref instead of rebuilding a throwaway transcript+Map every switch. const messagesForView = preferredMessages === currentMessages ? currentMessages diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 6891c49eceb..624ce6b7be8 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -801,19 +801,16 @@ def _enforce_session_cap() -> None: def _schedule_session_cap_enforcement() -> None: """Run the LRU sweep off the response path (eviction can call agent.close).""" - try: - timer = threading.Timer(0.1, lambda: _safe_enforce_session_cap()) - timer.daemon = True - timer.start() - except Exception: - pass + def _run(): + try: + _enforce_session_cap() + except Exception: + logger.debug("session cap enforcement failed", exc_info=True) -def _safe_enforce_session_cap() -> None: - try: - _enforce_session_cap() - except Exception: - logger.debug("session cap enforcement failed", exc_info=True) + timer = threading.Timer(0.1, _run) + timer.daemon = True + timer.start() def _start_idle_reaper() -> None: @@ -4689,16 +4686,8 @@ def _(rid, params: dict) -> dict: # + skeleton panel, then build the real AIAgent just after this response is # flushed. This keeps startup responsive while still hydrating tools/skills # without requiring the user to submit a first prompt. - def _deferred_build() -> None: - session = _sessions.get(sid) - if session is not None: - _start_agent_build(sid, session) - - build_timer = threading.Timer(0.05, _deferred_build) - build_timer.daemon = True - build_timer.start() - # A new live session just landed; trim detached idle ones over the cap. - _schedule_session_cap_enforcement() + _schedule_agent_build(sid) + _schedule_session_cap_enforcement() # trim detached idle sessions over the cap return _ok( rid, @@ -4866,6 +4855,110 @@ def _(rid, params: dict) -> dict: return _ok(rid, {"verification": {"status": "unknown", "evidence": None}}) +def _lazy_resume_info(cwd: str, *, model: str = "", provider: str = "") -> dict: + """session.info for a not-yet-built session (the shape session.create + returns). tools/skills land later when the deferred build emits session.info.""" + info = { + "cwd": cwd, + "branch": _git_branch_for_cwd(cwd), + "model": model or _resolve_model(), + "tools": {}, + "skills": {}, + "lazy": True, + "desktop_contract": DESKTOP_BACKEND_CONTRACT, + "profile_name": _current_profile_name(), + } + if provider: + info["provider"] = provider + return info + + +def _deferred_session_record( + session_key: str, + *, + cols: int, + cwd: str, + history: list, + lease, + source: str = "tui", + close_on_disconnect: bool = False, + display_history_prefix: list | None = None, + profile_home: Path | None = None, + lazy: bool = False, + model_override=None, + resume_runtime_overrides: dict | None = None, +) -> dict: + """A live-session record whose AIAgent is built later (lazy watch / cold + resume) — _init_session's shape minus the agent.""" + now = time.time() + return { + "agent": None, + "agent_error": None, + "agent_ready": threading.Event(), + "attached_images": [], + "close_on_disconnect": close_on_disconnect, + "active_session_lease": lease, + "cols": cols, + "created_at": now, + "cwd": cwd, + "display_history_prefix": display_history_prefix or [], + "edit_snapshots": {}, + "explicit_cwd": False, + "history": history, + "history_lock": threading.Lock(), + "history_version": 0, + "image_counter": 0, + "inflight_turn": None, + "last_active": now, + "lazy": lazy, + "model_override": model_override, + "pending_title": None, + "profile_home": str(profile_home) if profile_home is not None else None, + "resume_runtime_overrides": resume_runtime_overrides, + "resume_session_id": session_key, + "running": False, + "session_key": session_key, + "show_reasoning": _load_show_reasoning(), + "slash_worker": None, + "source": source, + "tool_progress_mode": _load_tool_progress_mode(), + "tool_started_at": {}, + "transport": current_transport() or _stdio_transport, + } + + +def _claim_or_reuse_live( + sid: str, session_key: str, record: dict, lease +) -> tuple[str, dict] | None: + """Register ``record`` as the live session for ``session_key`` under the + resume lock, or — if a concurrent resume already won — release ``lease`` and + return the winner for the caller to reuse.""" + with _session_resume_lock: + live = _find_live_session_by_key(session_key) + if live is not None: + if lease is not None: + lease.release() + return live + with _sessions_lock: + _sessions[sid] = record + _register_session_cwd(_sessions[sid]) + return None + + +def _schedule_agent_build(sid: str, delay: float = 0.05) -> None: + """Pre-warm a deferred session's agent off the response path (session.create + and cold resume both build through here; _sess() also builds on demand).""" + + def _run(): + session = _sessions.get(sid) + if session is not None: + _start_agent_build(sid, session) + + timer = threading.Timer(delay, _run) + timer.daemon = True + timer.start() + + @method("session.resume") def _(rid, params: dict) -> dict: target = params.get("session_id", "") @@ -4973,65 +5066,31 @@ def _(rid, params: dict) -> dict: return _err(rid, 4090, limit_message) try: db.reopen_session(target) - # The child's OWN conversation only. Delegation children are - # parent-linked rows, so include_ancestors would prepend the - # parent's entire transcript — a watch window opened on a subagent - # must show the subagent's branch, not the parent's prompt. + # The child's OWN conversation only — include_ancestors would prepend + # the parent's transcript onto the subagent's branch. history = db.get_messages_as_conversation(target) except Exception as e: if lease is not None: lease.release() return _err(rid, 5000, f"resume failed: {e}") - messages = _history_to_messages(history) cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd()) - now = time.time() - # A delegated child mid-run emits no native session events of its own — - # report its liveness from the relay registry so the window paints a - # busy indicator instead of a dead idle transcript. + record = _deferred_session_record( + target, + cols=cols, + cwd=cwd, + history=history, + lease=lease, + source=str(params.get("source") or "tui").strip() or "tui", + close_on_disconnect=is_truthy_value(params.get("close_on_disconnect", False)), + profile_home=profile_home, + lazy=True, + ) + if (live := _claim_or_reuse_live(sid, target, record, lease)) is not None: + return _ok(rid, _reuse_live_payload(*live)) + # A delegated child mid-run emits no session events of its own — report + # its liveness from the relay registry so the window shows a busy turn. child_running = _child_run_active(target) - source = str(params.get("source") or "tui").strip() or "tui" - with _session_resume_lock: - live = _find_live_session_by_key(target) - if live is not None: - if lease is not None: - lease.release() - return _ok(rid, _reuse_live_payload(*live)) - with _sessions_lock: - _sessions[sid] = { - "agent": None, - "agent_error": None, - "agent_ready": threading.Event(), - "attached_images": [], - "close_on_disconnect": is_truthy_value( - params.get("close_on_disconnect", False) - ), - "active_session_lease": lease, - "cols": cols, - "created_at": now, - "display_history_prefix": [], - "edit_snapshots": {}, - "explicit_cwd": False, - "history": history, - "history_lock": threading.Lock(), - "history_version": 0, - "image_counter": 0, - "cwd": cwd, - "inflight_turn": None, - "last_active": now, - "lazy": True, - "pending_title": None, - "profile_home": str(profile_home) if profile_home is not None else None, - "resume_session_id": target, - "running": False, - "session_key": target, - "show_reasoning": _load_show_reasoning(), - "source": source, - "slash_worker": None, - "tool_progress_mode": _load_tool_progress_mode(), - "tool_started_at": {}, - "transport": current_transport() or _stdio_transport, - } - _register_session_cwd(_sessions[sid]) + messages = _history_to_messages(history) return _ok( rid, { @@ -5039,20 +5098,11 @@ def _(rid, params: dict) -> dict: "resumed": target, "message_count": len(messages), "messages": messages, - "info": { - "cwd": cwd, - "branch": _git_branch_for_cwd(cwd), - "model": _resolve_model(), - "tools": {}, - "skills": {}, - "lazy": True, - "desktop_contract": DESKTOP_BACKEND_CONTRACT, - "profile_name": _current_profile_name(), - }, + "info": _lazy_resume_info(cwd), "inflight": None, "running": child_running, "session_key": target, - "started_at": now, + "started_at": record["created_at"], "status": "streaming" if child_running else "idle", }, ) @@ -5081,99 +5131,38 @@ def _(rid, params: dict) -> dict: try: db.reopen_session(target) history = db.get_messages_as_conversation(target) - display_history = db.get_messages_as_conversation( - target, include_ancestors=True - ) + display_history = db.get_messages_as_conversation(target, include_ancestors=True) except Exception as e: if lease is not None: lease.release() return _err(rid, 5000, f"resume failed: {e}") - display_history_prefix = display_history[ - : max(0, len(display_history) - len(history)) - ] - messages = _history_to_messages(display_history) - # Restore the model/provider/reasoning/tier this chat actually used so - # the deferred build (and the immediate info payload) match the eager - # path — without these a deferred build drops the provider and resume - # fails with "No LLM provider configured". - stored_runtime_overrides = _stored_session_runtime_overrides(found) or {} + prefix = display_history[: max(0, len(display_history) - len(history))] + # Restore the model/provider/reasoning/tier this chat last used so the + # deferred build (and the info below) match the eager path — without them + # the build drops the provider ("No LLM provider configured"). + overrides = _stored_session_runtime_overrides(found) or {} + model_override = overrides.get("model_override") or {} cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd()) - now = time.time() - source = str(params.get("source") or "tui").strip() or "tui" - with _session_resume_lock: - live = _find_live_session_by_key(target) - if live is not None: - if lease is not None: - lease.release() - return _ok(rid, _reuse_live_payload(*live)) - with _sessions_lock: - _sessions[sid] = { - "agent": None, - "agent_error": None, - "agent_ready": threading.Event(), - "attached_images": [], - "close_on_disconnect": is_truthy_value( - params.get("close_on_disconnect", False) - ), - "active_session_lease": lease, - "cols": cols, - "created_at": now, - "display_history_prefix": display_history_prefix, - "edit_snapshots": {}, - "explicit_cwd": False, - "history": history, - "history_lock": threading.Lock(), - "history_version": 0, - "image_counter": 0, - "cwd": cwd, - "inflight_turn": None, - "last_active": now, - "model_override": stored_runtime_overrides.get("model_override"), - "pending_title": None, - "profile_home": str(profile_home) if profile_home is not None else None, - "resume_session_id": target, - "resume_runtime_overrides": stored_runtime_overrides or None, - "running": False, - "session_key": target, - "show_reasoning": _load_show_reasoning(), - "source": source, - "slash_worker": None, - "tool_progress_mode": _load_tool_progress_mode(), - "tool_started_at": {}, - "transport": current_transport() or _stdio_transport, - } - _register_session_cwd(_sessions[sid]) + record = _deferred_session_record( + target, + cols=cols, + cwd=cwd, + history=history, + lease=lease, + source=str(params.get("source") or "tui").strip() or "tui", + close_on_disconnect=is_truthy_value(params.get("close_on_disconnect", False)), + display_history_prefix=prefix, + profile_home=profile_home, + model_override=overrides.get("model_override"), + resume_runtime_overrides=overrides or None, + ) + if (live := _claim_or_reuse_live(sid, target, record, lease)) is not None: + return _ok(rid, _reuse_live_payload(*live)) - def _deferred_build() -> None: - session = _sessions.get(sid) - if session is not None: - _start_agent_build(sid, session) + _schedule_agent_build(sid) + _schedule_session_cap_enforcement() # trim detached idle sessions over the cap - build_timer = threading.Timer(0.05, _deferred_build) - build_timer.daemon = True - build_timer.start() - # A new live session just landed; trim detached idle ones over the cap. - _schedule_session_cap_enforcement() - - model_override = stored_runtime_overrides.get("model_override") - resumed_model = "" - if isinstance(model_override, dict): - resumed_model = str(model_override.get("model") or "").strip() - elif isinstance(model_override, str): - resumed_model = model_override.strip() - info = { - "cwd": cwd, - "branch": _git_branch_for_cwd(cwd), - "model": resumed_model or _resolve_model(), - "tools": {}, - "skills": {}, - "lazy": True, - "desktop_contract": DESKTOP_BACKEND_CONTRACT, - "profile_name": _current_profile_name(), - } - provider_override = stored_runtime_overrides.get("provider_override") - if provider_override: - info["provider"] = provider_override + messages = _history_to_messages(display_history) return _ok( rid, { @@ -5181,11 +5170,15 @@ def _(rid, params: dict) -> dict: "resumed": target, "message_count": len(messages), "messages": messages, - "info": info, + "info": _lazy_resume_info( + cwd, + model=model_override.get("model") or "", + provider=overrides.get("provider_override") or "", + ), "inflight": None, "running": False, "session_key": target, - "started_at": now, + "started_at": record["created_at"], "status": "idle", }, ) From e8561d61e6f29dac1a6520f3253516bae2313100 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 25 Jun 2026 14:13:07 -0500 Subject: [PATCH 4/4] test(tui_gateway): pin synchronous-build resume tests to eager_build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These three assert the eager build contract — stored runtime overrides / profile db reach _make_agent synchronously, and the agent binds to the compression tip. Under deferred-by-default the build runs off-thread, so they raced the timer (green in CI, flaky locally). Pin them to eager_build; deferred coverage lives in the protocol tests. --- tests/test_tui_gateway_server.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 6592a42a9a8..e761e46158c 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -1001,8 +1001,11 @@ def test_session_resume_follows_compression_tip(monkeypatch, tmp_path): ) try: + # eager_build: this asserts the synchronously-built agent binds to the + # resolved tip (captured["agent_session_id"]); the compression-tip + # resolution itself runs before the build and is mode-agnostic. resp = server.handle_request( - {"id": "1", "method": "session.resume", "params": {"session_id": "parent_root"}} + {"id": "1", "method": "session.resume", "params": {"session_id": "parent_root", "eager_build": True}} ) finally: db.close() @@ -1049,8 +1052,11 @@ def test_session_resume_passes_stored_runtime_to_agent(monkeypatch): monkeypatch.setattr(server, "_init_session", fake_init_session) + # eager_build: this asserts the synchronous build contract (stored runtime + # overrides reach _make_agent, info comes from _session_info). The deferred + # default restores the same overrides via _start_agent_build off-thread. resp = server.handle_request( - {"id": "1", "method": "session.resume", "params": {"session_id": "stored-session"}} + {"id": "1", "method": "session.resume", "params": {"session_id": "stored-session", "eager_build": True}} ) assert resp["result"]["info"] == {"model": "gpt-5.4", "provider": "openai-codex"} @@ -1137,11 +1143,13 @@ def test_session_resume_profile_uses_profile_db_cwd(monkeypatch, tmp_path): monkeypatch.setattr(approval, "load_permanent_allowlist", lambda: None) try: + # eager_build: asserts the synchronous build receives the profile's db + # (the deferred default builds with the same db via _start_agent_build). resp = server.handle_request( { "id": "1", "method": "session.resume", - "params": {"session_id": target, "profile": "worker"}, + "params": {"session_id": target, "profile": "worker", "eager_build": True}, } )