diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx index a84a854ded4..f47b6b62504 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx +++ b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx @@ -256,4 +256,29 @@ describe('resumeSession failure recovery', () => { expect($resumeFailedSessionId.get()).toBeNull() }) + + it('resumes via the gateway default (deferred build) — not lazy, no eager opt-out', async () => { + // The switch-latency fix lives backend-side: a normal cold resume gets the + // gateway's default DEFERRED build (transcript returns immediately, agent + // pre-warms in the background). The client must NOT force the synchronous + // path (eager_build) and is only `lazy` for subagent watch windows. + let resumeParams: Record | undefined + + const requestGateway = vi.fn(async (method: string, params?: Record) => { + if (method === 'session.resume') { + resumeParams = params + + return { session_id: 'runtime-1', resumed: params?.session_id, messages: [], info: {} } as never + } + + return {} as never + }) + + vi.mocked(getSessionMessages).mockResolvedValue({ messages: [] } as never) + + await runResume(requestGateway) + + expect(resumeParams).not.toHaveProperty('lazy') + expect(resumeParams).not.toHaveProperty('eager_build') + }) }) diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts index 36dfea759f2..fb06c5a6048 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts @@ -706,6 +706,11 @@ export function useSessionActions({ const resumePromise = requestGateway('session.resume', { session_id: storedSessionId, cols: 96, + // Watch windows attach lazily (live mirror). Every other cold resume + // gets the gateway's default deferred build: the RPC returns the + // transcript immediately instead of blocking the switch on _make_agent + // (MCP discovery / prompt build), and the agent pre-warms in the + // background while the prefetch above paints the transcript. ...(watchWindow ? { lazy: true } : {}), ...(sessionProfile ? { profile: sessionProfile } : {}) }) @@ -754,7 +759,13 @@ export function useSessionActions({ return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages })() - const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages) + // Prefetch-hit fast path: `preferredMessages` IS the live `$messages` + // array (already error-merged when `localSnapshot` was built), so reuse + // the ref instead of rebuilding a throwaway transcript+Map every switch. + const messagesForView = + preferredMessages === currentMessages + ? currentMessages + : preserveLocalAssistantErrors(preferredMessages, currentMessages) setActiveSessionId(resumed.session_id) activeSessionIdRef.current = resumed.session_id diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 06e3ad5d7b9..b64086c91e8 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -901,6 +901,11 @@ DEFAULT_CONFIG = { # Global active chat session cap across CLI, TUI/dashboard, and messaging. # None/0 = unbounded. "max_concurrent_sessions": None, + # Soft LRU cap on in-memory TUI/desktop/dashboard sessions. When more than + # this many are live, the gateway evicts the least-recently-active DETACHED + # sessions (no live client) so accumulated agents don't pile up under memory + # pressure. Reopening one re-resumes it from disk. 0/null disables. + "max_live_sessions": 16, "agent": { "max_turns": 90, # Inactivity timeout for gateway agent execution (seconds). diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 6592a42a9a8..e761e46158c 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -1001,8 +1001,11 @@ def test_session_resume_follows_compression_tip(monkeypatch, tmp_path): ) try: + # eager_build: this asserts the synchronously-built agent binds to the + # resolved tip (captured["agent_session_id"]); the compression-tip + # resolution itself runs before the build and is mode-agnostic. resp = server.handle_request( - {"id": "1", "method": "session.resume", "params": {"session_id": "parent_root"}} + {"id": "1", "method": "session.resume", "params": {"session_id": "parent_root", "eager_build": True}} ) finally: db.close() @@ -1049,8 +1052,11 @@ def test_session_resume_passes_stored_runtime_to_agent(monkeypatch): monkeypatch.setattr(server, "_init_session", fake_init_session) + # eager_build: this asserts the synchronous build contract (stored runtime + # overrides reach _make_agent, info comes from _session_info). The deferred + # default restores the same overrides via _start_agent_build off-thread. resp = server.handle_request( - {"id": "1", "method": "session.resume", "params": {"session_id": "stored-session"}} + {"id": "1", "method": "session.resume", "params": {"session_id": "stored-session", "eager_build": True}} ) assert resp["result"]["info"] == {"model": "gpt-5.4", "provider": "openai-codex"} @@ -1137,11 +1143,13 @@ def test_session_resume_profile_uses_profile_db_cwd(monkeypatch, tmp_path): monkeypatch.setattr(approval, "load_permanent_allowlist", lambda: None) try: + # eager_build: asserts the synchronous build receives the profile's db + # (the deferred default builds with the same db via _start_agent_build). resp = server.handle_request( { "id": "1", "method": "session.resume", - "params": {"session_id": target, "profile": "worker"}, + "params": {"session_id": target, "profile": "worker", "eager_build": True}, } ) diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py index 3b385bf825a..c0003dfc818 100644 --- a/tests/tui_gateway/test_protocol.py +++ b/tests/tui_gateway/test_protocol.py @@ -323,7 +323,9 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch): { "id": "r1", "method": "session.resume", - "params": {"session_id": "20260409_010101_abc123", "cols": 100}, + # eager_build: exercise the synchronous build path (this test + # monkeypatches _make_agent/_init_session/_session_info). + "params": {"session_id": "20260409_010101_abc123", "cols": 100, "eager_build": True}, } ) @@ -336,6 +338,147 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch): ] +def test_session_resume_defaults_to_deferred_build(server, monkeypatch): + """A normal cold resume (no ``eager_build``) must return the full display + transcript immediately and register an upgradable live session WITHOUT + building the agent on the response path — that eager build is the + multi-second switch latency. Deferred is the default; ``eager_build: true`` + opts back into the synchronous path.""" + + target = "20260409_010101_abc123" + + class _DB: + def get_session(self, _sid): + return { + "id": target, + "model": "vendor/cool-model", + "model_config": {"provider": "vendor"}, + } + + def get_session_by_title(self, _title): + return None + + def resolve_resume_session_id(self, sid): + return sid + + def reopen_session(self, _sid): + return None + + def get_messages_as_conversation(self, _sid, include_ancestors=False): + return [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "yo"}, + ] + + builds: list = [] + + monkeypatch.setattr(server, "_get_db", lambda: _DB()) + # The response path must never call _make_agent; route the deferred timer + # through a recorder so a 50ms fire can't build (or crash) under the test. + monkeypatch.setattr( + server, "_make_agent", lambda *a, **k: (_ for _ in ()).throw(AssertionError("no eager build")) + ) + monkeypatch.setattr(server, "_start_agent_build", lambda sid, session: builds.append(sid)) + monkeypatch.setattr(server, "_schedule_session_cap_enforcement", lambda: None) + + resp = server.handle_request( + { + "id": "r1", + "method": "session.resume", + "params": {"session_id": target, "cols": 100}, + } + ) + + assert "error" not in resp + result = resp["result"] + assert result["resumed"] == target + assert result["session_key"] == target + assert result["message_count"] == 2 + assert result["messages"] == [ + {"role": "user", "text": "hello"}, + {"role": "assistant", "text": "yo"}, + ] + # Lazy info contract (same shape session.create returns), with the session's + # persisted model/provider restored rather than the global default. + assert result["info"]["lazy"] is True + assert result["info"]["model"] == "vendor/cool-model" + assert result["info"]["provider"] == "vendor" + assert result["info"]["desktop_contract"] == server.DESKTOP_BACKEND_CONTRACT + + sid = result["session_id"] + session = server._sessions[sid] + # Registered but not built: agent is None and the resume key is carried so a + # later prompt.submit / _sess() upgrade continues THIS stored conversation. + assert session["agent"] is None + assert session["resume_session_id"] == target + assert not session["agent_ready"].is_set() + # Not a watch spectator: a normal deferred resume is a real session. + assert not session.get("lazy") + # The persisted runtime identity is stashed for the deferred build so it + # can't drop the provider ("No LLM provider configured"). + assert session["resume_runtime_overrides"]["model_override"]["model"] == "vendor/cool-model" + assert server._find_live_session_by_key(target) == (sid, session) + + +def test_enforce_session_cap_evicts_oldest_detached_only(server, monkeypatch): + """The LRU cap frees the least-recently-active DETACHED sessions when over + the limit, and never a live-transport / running / mid-build one.""" + + monkeypatch.setattr(server, "_load_cfg", lambda: {"max_live_sessions": 2}) + evicted: list[str] = [] + monkeypatch.setattr( + server, "_close_session_by_id", lambda sid, end_reason=None: evicted.append(sid) + ) + + def _ready() -> threading.Event: + ev = threading.Event() + ev.set() + return ev + + detached = server._detached_ws_transport + live = object() # no _closed attr -> live transport, never evictable + + server._sessions.clear() + server._sessions.update( + { + "old_detached": {"transport": detached, "last_active": 100.0, "agent_ready": _ready()}, + "new_detached": {"transport": detached, "last_active": 300.0, "agent_ready": _ready()}, + "running_detached": { + "transport": detached, + "last_active": 50.0, + "running": True, + "agent_ready": _ready(), + }, + "focused_live": {"transport": live, "last_active": 200.0, "agent_ready": _ready()}, + } + ) + + server._enforce_session_cap() + + # 4 sessions, cap 2 -> evict 2. Only detached+idle+built are eligible, oldest + # first; the running one and the live-transport one are exempt. + assert evicted == ["old_detached", "new_detached"] + + +def test_enforce_session_cap_disabled_is_noop(server, monkeypatch): + monkeypatch.setattr(server, "_load_cfg", lambda: {"max_live_sessions": 0}) + evicted: list[str] = [] + monkeypatch.setattr( + server, "_close_session_by_id", lambda sid, end_reason=None: evicted.append(sid) + ) + server._sessions.clear() + server._sessions.update( + { + f"s{i}": {"transport": server._detached_ws_transport, "last_active": float(i)} + for i in range(5) + } + ) + + server._enforce_session_cap() + + assert evicted == [] + + def test_session_resume_handles_multimodal_list_content(server, monkeypatch): """A user message persisted with list-shaped multimodal content used to crash session resume with ``'list' object has no attribute 'strip'``.""" @@ -374,7 +517,7 @@ def test_session_resume_handles_multimodal_list_content(server, monkeypatch): { "id": "r1", "method": "session.resume", - "params": {"session_id": "20260502_000000_listcontent", "cols": 100}, + "params": {"session_id": "20260502_000000_listcontent", "cols": 100, "eager_build": True}, } ) @@ -688,7 +831,9 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch): { "id": "first", "method": "session.resume", - "params": {"session_id": target, "cols": 100}, + # eager_build: this test drives the synchronous build race + + # double-checked locking that only the eager path exercises. + "params": {"session_id": target, "cols": 100, "eager_build": True}, } ) @@ -703,7 +848,7 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch): { "id": "second", "method": "session.resume", - "params": {"session_id": target, "cols": 120}, + "params": {"session_id": target, "cols": 120, "eager_build": True}, } ) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 2bd20968590..624ce6b7be8 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -741,6 +741,76 @@ def _reap_idle_sessions() -> None: victims = [sid for sid, s in _sessions.items() if _session_is_evictable(sid, s, now)] for sid in victims: _close_session_by_id(sid, end_reason="idle_timeout") + _enforce_session_cap() + + +# Soft LRU cap on in-memory sessions. The 6h TTL reaper above only frees +# sessions that have been idle for hours; a heavy user who reconnects often +# accumulates detached sessions (the report's ``detached_sessions=5``) whose +# agents sit resident for the full TTL. The cap evicts the least-recently-active +# DETACHED sessions sooner so live agents don't pile up under memory pressure. +# Default-on but provably safe: it only touches sessions with no live client +# (reopening re-resumes them from the DB) and never a running / pending / +# mid-build / live-transport one. 0/null disables. +def _max_live_sessions() -> int: + try: + from hermes_cli.active_sessions import coerce_max_concurrent_sessions + + cfg = _load_cfg() or {} + raw = cfg.get("max_live_sessions") + if raw is None: + gateway_cfg = cfg.get("gateway") + if isinstance(gateway_cfg, dict): + raw = gateway_cfg.get("max_live_sessions") + coerced = coerce_max_concurrent_sessions(raw, key="max_live_sessions") + return int(coerced) if coerced else 0 + except Exception: + return 0 + + +def _session_is_lru_evictable(sid: str, session: dict) -> bool: + # Same hard exemptions as the TTL reaper (never evict a session mid-turn, + # awaiting input, or still building), but WITHOUT the hours-scale age gate: + # a detached session is eligible the moment it loses its client. + if session.get("running") or _session_pending_kind(sid): + return False + ready = session.get("agent_ready") + if ready is not None and not ready.is_set() and not session.get("lazy"): + return False + return _transport_is_dead(session.get("transport")) + + +def _enforce_session_cap() -> None: + cap = _max_live_sessions() + if cap <= 0: + return + with _sessions_lock: + total = len(_sessions) + if total <= cap: + return + evictable = [ + (sid, s) for sid, s in _sessions.items() if _session_is_lru_evictable(sid, s) + ] + # Oldest-touched first; only evict down to the cap (live/focused sessions on + # a live transport are never eligible, so we may stop short of the cap). + evictable.sort(key=lambda kv: float(kv[1].get("last_active") or 0.0)) + overflow = total - cap + for sid, _s in evictable[:overflow]: + _close_session_by_id(sid, end_reason="lru_evict") + + +def _schedule_session_cap_enforcement() -> None: + """Run the LRU sweep off the response path (eviction can call agent.close).""" + + def _run(): + try: + _enforce_session_cap() + except Exception: + logger.debug("session cap enforcement failed", exc_info=True) + + timer = threading.Timer(0.1, _run) + timer.daemon = True + timer.start() def _start_idle_reaper() -> None: @@ -1111,15 +1181,24 @@ def _start_agent_build(sid: str, session: dict) -> None: kw = {"session_db": session_db} if resume_sid := current.get("resume_session_id"): kw["session_id"] = resume_sid - # Model/effort/fast the desktop picked for a brand-new chat ride - # in as per-session overrides so the first build uses them - # directly (no global config, no build-then-switch). - if override := current.get("model_override"): - kw["model_override"] = override - if (reasoning := current.get("create_reasoning_override")) is not None: - kw["reasoning_config_override"] = reasoning - if (tier := current.get("create_service_tier_override")) is not None: - kw["service_tier_override"] = tier + resume_overrides = current.get("resume_runtime_overrides") + if isinstance(resume_overrides, dict) and resume_overrides: + # Cold deferred resume: restore the full persisted runtime + # identity (model/provider/base_url/api_mode/reasoning/tier) + # exactly as the eager resume path's _stored_session_runtime_ + # overrides splat did, so a deferred build can't drop the + # provider and fail with "No LLM provider configured". + kw.update(resume_overrides) + else: + # Model/effort/fast the desktop picked for a brand-new chat + # ride in as per-session overrides so the first build uses + # them directly (no global config, no build-then-switch). + if override := current.get("model_override"): + kw["model_override"] = override + if (reasoning := current.get("create_reasoning_override")) is not None: + kw["reasoning_config_override"] = reasoning + if (tier := current.get("create_service_tier_override")) is not None: + kw["service_tier_override"] = tier agent = _make_agent(sid, key, **kw) finally: _clear_session_context(tokens) @@ -1152,6 +1231,19 @@ def _start_agent_build(sid: str, session: dict) -> None: pass _wire_callbacks(sid) + # Surface the self-improvement review's "💾 …" summary as an event + # the TUI/desktop render in-transcript, honoring + # display.memory_notifications. _init_session wires this for the + # eager/branch paths; deferred-built sessions (session.create and the + # default cold resume) build through here, so without this their + # review summaries would leak to stdout instead of the chat. + try: + agent.background_review_callback = lambda message, _sid=sid: _emit( + "review.summary", _sid, {"text": str(message)} + ) + agent.memory_notifications = _load_memory_notifications() + except Exception: + pass # Hydrate credits notices at session OPEN (not just on the first # message), so depletion / usage-band warnings show at "ready". Runs # off the build thread, after the notice_callback is wired. Fail-open. @@ -4594,14 +4686,8 @@ def _(rid, params: dict) -> dict: # + skeleton panel, then build the real AIAgent just after this response is # flushed. This keeps startup responsive while still hydrating tools/skills # without requiring the user to submit a first prompt. - def _deferred_build() -> None: - session = _sessions.get(sid) - if session is not None: - _start_agent_build(sid, session) - - build_timer = threading.Timer(0.05, _deferred_build) - build_timer.daemon = True - build_timer.start() + _schedule_agent_build(sid) + _schedule_session_cap_enforcement() # trim detached idle sessions over the cap return _ok( rid, @@ -4769,6 +4855,110 @@ def _(rid, params: dict) -> dict: return _ok(rid, {"verification": {"status": "unknown", "evidence": None}}) +def _lazy_resume_info(cwd: str, *, model: str = "", provider: str = "") -> dict: + """session.info for a not-yet-built session (the shape session.create + returns). tools/skills land later when the deferred build emits session.info.""" + info = { + "cwd": cwd, + "branch": _git_branch_for_cwd(cwd), + "model": model or _resolve_model(), + "tools": {}, + "skills": {}, + "lazy": True, + "desktop_contract": DESKTOP_BACKEND_CONTRACT, + "profile_name": _current_profile_name(), + } + if provider: + info["provider"] = provider + return info + + +def _deferred_session_record( + session_key: str, + *, + cols: int, + cwd: str, + history: list, + lease, + source: str = "tui", + close_on_disconnect: bool = False, + display_history_prefix: list | None = None, + profile_home: Path | None = None, + lazy: bool = False, + model_override=None, + resume_runtime_overrides: dict | None = None, +) -> dict: + """A live-session record whose AIAgent is built later (lazy watch / cold + resume) — _init_session's shape minus the agent.""" + now = time.time() + return { + "agent": None, + "agent_error": None, + "agent_ready": threading.Event(), + "attached_images": [], + "close_on_disconnect": close_on_disconnect, + "active_session_lease": lease, + "cols": cols, + "created_at": now, + "cwd": cwd, + "display_history_prefix": display_history_prefix or [], + "edit_snapshots": {}, + "explicit_cwd": False, + "history": history, + "history_lock": threading.Lock(), + "history_version": 0, + "image_counter": 0, + "inflight_turn": None, + "last_active": now, + "lazy": lazy, + "model_override": model_override, + "pending_title": None, + "profile_home": str(profile_home) if profile_home is not None else None, + "resume_runtime_overrides": resume_runtime_overrides, + "resume_session_id": session_key, + "running": False, + "session_key": session_key, + "show_reasoning": _load_show_reasoning(), + "slash_worker": None, + "source": source, + "tool_progress_mode": _load_tool_progress_mode(), + "tool_started_at": {}, + "transport": current_transport() or _stdio_transport, + } + + +def _claim_or_reuse_live( + sid: str, session_key: str, record: dict, lease +) -> tuple[str, dict] | None: + """Register ``record`` as the live session for ``session_key`` under the + resume lock, or — if a concurrent resume already won — release ``lease`` and + return the winner for the caller to reuse.""" + with _session_resume_lock: + live = _find_live_session_by_key(session_key) + if live is not None: + if lease is not None: + lease.release() + return live + with _sessions_lock: + _sessions[sid] = record + _register_session_cwd(_sessions[sid]) + return None + + +def _schedule_agent_build(sid: str, delay: float = 0.05) -> None: + """Pre-warm a deferred session's agent off the response path (session.create + and cold resume both build through here; _sess() also builds on demand).""" + + def _run(): + session = _sessions.get(sid) + if session is not None: + _start_agent_build(sid, session) + + timer = threading.Timer(delay, _run) + timer.daemon = True + timer.start() + + @method("session.resume") def _(rid, params: dict) -> dict: target = params.get("session_id", "") @@ -4876,65 +5066,31 @@ def _(rid, params: dict) -> dict: return _err(rid, 4090, limit_message) try: db.reopen_session(target) - # The child's OWN conversation only. Delegation children are - # parent-linked rows, so include_ancestors would prepend the - # parent's entire transcript — a watch window opened on a subagent - # must show the subagent's branch, not the parent's prompt. + # The child's OWN conversation only — include_ancestors would prepend + # the parent's transcript onto the subagent's branch. history = db.get_messages_as_conversation(target) except Exception as e: if lease is not None: lease.release() return _err(rid, 5000, f"resume failed: {e}") - messages = _history_to_messages(history) cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd()) - now = time.time() - # A delegated child mid-run emits no native session events of its own — - # report its liveness from the relay registry so the window paints a - # busy indicator instead of a dead idle transcript. + record = _deferred_session_record( + target, + cols=cols, + cwd=cwd, + history=history, + lease=lease, + source=str(params.get("source") or "tui").strip() or "tui", + close_on_disconnect=is_truthy_value(params.get("close_on_disconnect", False)), + profile_home=profile_home, + lazy=True, + ) + if (live := _claim_or_reuse_live(sid, target, record, lease)) is not None: + return _ok(rid, _reuse_live_payload(*live)) + # A delegated child mid-run emits no session events of its own — report + # its liveness from the relay registry so the window shows a busy turn. child_running = _child_run_active(target) - source = str(params.get("source") or "tui").strip() or "tui" - with _session_resume_lock: - live = _find_live_session_by_key(target) - if live is not None: - if lease is not None: - lease.release() - return _ok(rid, _reuse_live_payload(*live)) - with _sessions_lock: - _sessions[sid] = { - "agent": None, - "agent_error": None, - "agent_ready": threading.Event(), - "attached_images": [], - "close_on_disconnect": is_truthy_value( - params.get("close_on_disconnect", False) - ), - "active_session_lease": lease, - "cols": cols, - "created_at": now, - "display_history_prefix": [], - "edit_snapshots": {}, - "explicit_cwd": False, - "history": history, - "history_lock": threading.Lock(), - "history_version": 0, - "image_counter": 0, - "cwd": cwd, - "inflight_turn": None, - "last_active": now, - "lazy": True, - "pending_title": None, - "profile_home": str(profile_home) if profile_home is not None else None, - "resume_session_id": target, - "running": False, - "session_key": target, - "show_reasoning": _load_show_reasoning(), - "source": source, - "slash_worker": None, - "tool_progress_mode": _load_tool_progress_mode(), - "tool_started_at": {}, - "transport": current_transport() or _stdio_transport, - } - _register_session_cwd(_sessions[sid]) + messages = _history_to_messages(history) return _ok( rid, { @@ -4942,24 +5098,91 @@ def _(rid, params: dict) -> dict: "resumed": target, "message_count": len(messages), "messages": messages, - "info": { - "cwd": cwd, - "branch": _git_branch_for_cwd(cwd), - "model": _resolve_model(), - "tools": {}, - "skills": {}, - "lazy": True, - "desktop_contract": DESKTOP_BACKEND_CONTRACT, - "profile_name": _current_profile_name(), - }, + "info": _lazy_resume_info(cwd), "inflight": None, "running": child_running, "session_key": target, - "started_at": now, + "started_at": record["created_at"], "status": "streaming" if child_running else "idle", }, ) + # Cold resume default: register the live session and read its stored + # transcript, but build the agent OFF the response path. _make_agent can + # block for seconds (MCP discovery, prompt/skill build, AIAgent + # construction), and every resume caller (desktop + Ink TUI) awaits this RPC + # before it paints — so building eagerly is the bulk of the multi-second + # "switching sessions is frozen" latency. Return the full display transcript + # immediately and pre-warm the agent on a short timer (the same deferred- + # build contract session.create uses); _sess() also builds on demand if the + # first prompt beats the timer. A caller that needs the agent built + # synchronously (e.g. tests of the build race) passes ``eager_build: true`` + # to fall through to the eager path below. Distinct from the lazy/watch + # branch above: a normal resume restores the full ancestor history and the + # session's persisted runtime identity, and is a real (upgradable) session. + if not is_truthy_value(params.get("eager_build", False)): + sid = uuid.uuid4().hex[:8] + lease, limit_message = _claim_active_session_slot(target, live_session_id=sid) + if limit_message is not None: + return _err(rid, 4090, limit_message) + # Interactive resume routes approvals/clarify through gateway prompts; + # the deferred build wires the remaining per-session callbacks. + _enable_gateway_prompts() + try: + db.reopen_session(target) + history = db.get_messages_as_conversation(target) + display_history = db.get_messages_as_conversation(target, include_ancestors=True) + except Exception as e: + if lease is not None: + lease.release() + return _err(rid, 5000, f"resume failed: {e}") + prefix = display_history[: max(0, len(display_history) - len(history))] + # Restore the model/provider/reasoning/tier this chat last used so the + # deferred build (and the info below) match the eager path — without them + # the build drops the provider ("No LLM provider configured"). + overrides = _stored_session_runtime_overrides(found) or {} + model_override = overrides.get("model_override") or {} + cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd()) + record = _deferred_session_record( + target, + cols=cols, + cwd=cwd, + history=history, + lease=lease, + source=str(params.get("source") or "tui").strip() or "tui", + close_on_disconnect=is_truthy_value(params.get("close_on_disconnect", False)), + display_history_prefix=prefix, + profile_home=profile_home, + model_override=overrides.get("model_override"), + resume_runtime_overrides=overrides or None, + ) + if (live := _claim_or_reuse_live(sid, target, record, lease)) is not None: + return _ok(rid, _reuse_live_payload(*live)) + + _schedule_agent_build(sid) + _schedule_session_cap_enforcement() # trim detached idle sessions over the cap + + messages = _history_to_messages(display_history) + return _ok( + rid, + { + "session_id": sid, + "resumed": target, + "message_count": len(messages), + "messages": messages, + "info": _lazy_resume_info( + cwd, + model=model_override.get("model") or "", + provider=overrides.get("provider_override") or "", + ), + "inflight": None, + "running": False, + "session_key": target, + "started_at": record["created_at"], + "status": "idle", + }, + ) + # Build the agent OUTSIDE the lock — _make_agent can block for seconds # (MCP discovery, prompt/skill build, AIAgent construction). Holding # _session_resume_lock across it would stall session.close on the main