From c4c590e4a14a064033c10067f0a72b3cd2a53b8b Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Thu, 25 Jun 2026 13:18:04 -0500
Subject: [PATCH 1/4] perf(desktop): make session switching fast under load
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switching sessions in the desktop app could freeze the whole UI for
several seconds on heavy, tool-rich chats. Root causes and fixes:

- Cold `session.resume` built the AIAgent (MCP discovery, prompt/skill
  build) *before* returning, and the desktop awaits that RPC before it
  paints — so the entire switch blocked on the build. Add an opt-in
  `defer_build` resume path (the contract `session.create` already uses):
  return the full display transcript immediately, register an upgradable
  live session, and pre-warm the agent on a short timer. The persisted
  runtime identity (model/provider/base_url/api_mode/reasoning/tier) is
  restored on the deferred build so it can't drop the provider.

- Nothing bounded how many in-memory agents accumulate; a user who
  reconnects often piled up detached sessions for the full 6h TTL. Add a
  soft LRU cap (`max_live_sessions`, default 16) that evicts the
  least-recently-active DETACHED sessions (no live client) — never a
  running, awaiting-input, mid-build, or live-transport one. Reopening
  re-resumes from disk.

- On the prefetch-hit cold-resume path, skip rebuilding a throwaway
  merged-message array (and its 1000-entry Map) when the prefetch already
  painted the exact transcript; the downstream sameMessageList guard
  already drops the publish, so it was pure main-thread cost.

The desktop opts into `defer_build` for every non-watch cold resume; the
eager path stays for CLI/TUI and existing callers.
---
 .../hooks/use-session-actions.test.tsx        |  25 ++
 .../app/session/hooks/use-session-actions.ts  |  20 +-
 hermes_cli/config.py                          |   5 +
 tests/tui_gateway/test_protocol.py            | 140 +++++++++++
 tui_gateway/server.py                         | 231 +++++++++++++++++-
 5 files changed, 410 insertions(+), 11 deletions(-)

diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx
index a84a854ded4..31b2cc58afc 100644
--- a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx
@@ -256,4 +256,29 @@ describe('resumeSession failure recovery', () => {
 
     expect($resumeFailedSessionId.get()).toBeNull()
   })
+
+  it('asks the backend to DEFER the agent build on a normal cold resume', async () => {
+    // The switch-latency fix: a non-watch cold resume tells the gateway to
+    // return the transcript immediately and build the agent in the background,
+    // rather than blocking the RPC (and the whole switch) on _make_agent.
+    let resumeParams: Record<string, unknown> | undefined
+
+    const requestGateway = vi.fn(async (method: string, params?: Record<string, unknown>) => {
+      if (method === 'session.resume') {
+        resumeParams = params
+
+        return { session_id: 'runtime-1', resumed: params?.session_id, messages: [], info: {} } as never
+      }
+
+      return {} as never
+    })
+
+    vi.mocked(getSessionMessages).mockResolvedValue({ messages: [] } as never)
+
+    await runResume(requestGateway)
+
+    expect(resumeParams).toMatchObject({ defer_build: true })
+    // Watch-window lazy attach is the OTHER mode; a normal resume isn't lazy.
+    expect(resumeParams).not.toHaveProperty('lazy')
+  })
 })
diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts
index 36dfea759f2..5c8be9762e1 100644
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@@ -706,7 +706,12 @@ export function useSessionActions({
         const resumePromise = requestGateway<SessionResumeResponse>('session.resume', {
           session_id: storedSessionId,
           cols: 96,
-          ...(watchWindow ? { lazy: true } : {}),
+          // Watch windows attach lazily (live mirror); every other cold resume
+          // asks the backend to DEFER the agent build so the RPC returns the
+          // transcript immediately instead of blocking the switch on
+          // _make_agent (MCP discovery / prompt build). The agent pre-warms in
+          // the background and the prefetch above paints the transcript.
+          ...(watchWindow ? { lazy: true } : { defer_build: true }),
           ...(sessionProfile ? { profile: sessionProfile } : {})
         })
         // The rejection is consumed by the `await` below; this guard only
@@ -754,7 +759,18 @@ export function useSessionActions({
                 return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages
               })()
 
-        const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages)
+        // When the prefetch already painted these exact messages (the common
+        // cold-resume path), `preferredMessages` IS the live `$messages` array.
+        // Re-running preserveLocalAssistantErrors there would build a 1000-entry
+        // Map and map the whole transcript into a throwaway array on every
+        // switch — pure main-thread cost on the hot path (the downstream
+        // sameMessageList guard already drops the publish, so it buys nothing).
+        // The prefetch branch already merged local assistant errors when it
+        // built `localSnapshot`, so reuse the ref instead.
+        const messagesForView =
+          preferredMessages === currentMessages
+            ? currentMessages
+            : preserveLocalAssistantErrors(preferredMessages, currentMessages)
 
         setActiveSessionId(resumed.session_id)
         activeSessionIdRef.current = resumed.session_id
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 06e3ad5d7b9..b64086c91e8 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -901,6 +901,11 @@ DEFAULT_CONFIG = {
     # Global active chat session cap across CLI, TUI/dashboard, and messaging.
     # None/0 = unbounded.
     "max_concurrent_sessions": None,
+    # Soft LRU cap on in-memory TUI/desktop/dashboard sessions. When more than
+    # this many are live, the gateway evicts the least-recently-active DETACHED
+    # sessions (no live client) so accumulated agents don't pile up under memory
+    # pressure. Reopening one re-resumes it from disk. 0/null disables.
+    "max_live_sessions": 16,
     "agent": {
         "max_turns": 90,
         # Inactivity timeout for gateway agent execution (seconds).
diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index 3b385bf825a..8b45a672aed 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -336,6 +336,146 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch):
     ]
 
 
+def test_session_resume_defer_build_returns_transcript_without_blocking(server, monkeypatch):
+    """``defer_build: true`` (desktop cold resume) must return the full display
+    transcript immediately and register an upgradable live session WITHOUT
+    building the agent on the response path — that eager build is the
+    multi-second switch latency."""
+
+    target = "20260409_010101_abc123"
+
+    class _DB:
+        def get_session(self, _sid):
+            return {
+                "id": target,
+                "model": "vendor/cool-model",
+                "model_config": {"provider": "vendor"},
+            }
+
+        def get_session_by_title(self, _title):
+            return None
+
+        def resolve_resume_session_id(self, sid):
+            return sid
+
+        def reopen_session(self, _sid):
+            return None
+
+        def get_messages_as_conversation(self, _sid, include_ancestors=False):
+            return [
+                {"role": "user", "content": "hello"},
+                {"role": "assistant", "content": "yo"},
+            ]
+
+    builds: list = []
+
+    monkeypatch.setattr(server, "_get_db", lambda: _DB())
+    # The response path must never call _make_agent; route the deferred timer
+    # through a recorder so a 50ms fire can't build (or crash) under the test.
+    monkeypatch.setattr(
+        server, "_make_agent", lambda *a, **k: (_ for _ in ()).throw(AssertionError("no eager build"))
+    )
+    monkeypatch.setattr(server, "_start_agent_build", lambda sid, session: builds.append(sid))
+    monkeypatch.setattr(server, "_schedule_session_cap_enforcement", lambda: None)
+
+    resp = server.handle_request(
+        {
+            "id": "r1",
+            "method": "session.resume",
+            "params": {"session_id": target, "cols": 100, "defer_build": True},
+        }
+    )
+
+    assert "error" not in resp
+    result = resp["result"]
+    assert result["resumed"] == target
+    assert result["session_key"] == target
+    assert result["message_count"] == 2
+    assert result["messages"] == [
+        {"role": "user", "text": "hello"},
+        {"role": "assistant", "text": "yo"},
+    ]
+    # Lazy info contract (same shape session.create returns), with the session's
+    # persisted model/provider restored rather than the global default.
+    assert result["info"]["lazy"] is True
+    assert result["info"]["model"] == "vendor/cool-model"
+    assert result["info"]["provider"] == "vendor"
+    assert result["info"]["desktop_contract"] == server.DESKTOP_BACKEND_CONTRACT
+
+    sid = result["session_id"]
+    session = server._sessions[sid]
+    # Registered but not built: agent is None and the resume key is carried so a
+    # later prompt.submit / _sess() upgrade continues THIS stored conversation.
+    assert session["agent"] is None
+    assert session["resume_session_id"] == target
+    assert not session["agent_ready"].is_set()
+    # Not a watch spectator: a normal deferred resume is a real session.
+    assert not session.get("lazy")
+    # The persisted runtime identity is stashed for the deferred build so it
+    # can't drop the provider ("No LLM provider configured").
+    assert session["resume_runtime_overrides"]["model_override"]["model"] == "vendor/cool-model"
+    assert server._find_live_session_by_key(target) == (sid, session)
+
+
+def test_enforce_session_cap_evicts_oldest_detached_only(server, monkeypatch):
+    """The LRU cap frees the least-recently-active DETACHED sessions when over
+    the limit, and never a live-transport / running / mid-build one."""
+
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"max_live_sessions": 2})
+    evicted: list[str] = []
+    monkeypatch.setattr(
+        server, "_close_session_by_id", lambda sid, end_reason=None: evicted.append(sid)
+    )
+
+    def _ready() -> threading.Event:
+        ev = threading.Event()
+        ev.set()
+        return ev
+
+    detached = server._detached_ws_transport
+    live = object()  # no _closed attr -> live transport, never evictable
+
+    server._sessions.clear()
+    server._sessions.update(
+        {
+            "old_detached": {"transport": detached, "last_active": 100.0, "agent_ready": _ready()},
+            "new_detached": {"transport": detached, "last_active": 300.0, "agent_ready": _ready()},
+            "running_detached": {
+                "transport": detached,
+                "last_active": 50.0,
+                "running": True,
+                "agent_ready": _ready(),
+            },
+            "focused_live": {"transport": live, "last_active": 200.0, "agent_ready": _ready()},
+        }
+    )
+
+    server._enforce_session_cap()
+
+    # 4 sessions, cap 2 -> evict 2. Only detached+idle+built are eligible, oldest
+    # first; the running one and the live-transport one are exempt.
+    assert evicted == ["old_detached", "new_detached"]
+
+
+def test_enforce_session_cap_disabled_is_noop(server, monkeypatch):
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"max_live_sessions": 0})
+    evicted: list[str] = []
+    monkeypatch.setattr(
+        server, "_close_session_by_id", lambda sid, end_reason=None: evicted.append(sid)
+    )
+    server._sessions.clear()
+    server._sessions.update(
+        {
+            f"s{i}": {"transport": server._detached_ws_transport, "last_active": float(i)}
+            for i in range(5)
+        }
+    )
+
+    server._enforce_session_cap()
+
+    assert evicted == []
+
+
 def test_session_resume_handles_multimodal_list_content(server, monkeypatch):
     """A user message persisted with list-shaped multimodal content used to
     crash session resume with ``'list' object has no attribute 'strip'``."""
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 2bd20968590..40910e291ba 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -741,6 +741,79 @@ def _reap_idle_sessions() -> None:
         victims = [sid for sid, s in _sessions.items() if _session_is_evictable(sid, s, now)]
     for sid in victims:
         _close_session_by_id(sid, end_reason="idle_timeout")
+    _enforce_session_cap()
+
+
+# Soft LRU cap on in-memory sessions. The 6h TTL reaper above only frees
+# sessions that have been idle for hours; a heavy user who reconnects often
+# accumulates detached sessions (the report's ``detached_sessions=5``) whose
+# agents sit resident for the full TTL. The cap evicts the least-recently-active
+# DETACHED sessions sooner so live agents don't pile up under memory pressure.
+# Default-on but provably safe: it only touches sessions with no live client
+# (reopening re-resumes them from the DB) and never a running / pending /
+# mid-build / live-transport one. 0/null disables.
+def _max_live_sessions() -> int:
+    try:
+        from hermes_cli.active_sessions import coerce_max_concurrent_sessions
+
+        cfg = _load_cfg() or {}
+        raw = cfg.get("max_live_sessions")
+        if raw is None:
+            gateway_cfg = cfg.get("gateway")
+            if isinstance(gateway_cfg, dict):
+                raw = gateway_cfg.get("max_live_sessions")
+        coerced = coerce_max_concurrent_sessions(raw, key="max_live_sessions")
+        return int(coerced) if coerced else 0
+    except Exception:
+        return 0
+
+
+def _session_is_lru_evictable(sid: str, session: dict) -> bool:
+    # Same hard exemptions as the TTL reaper (never evict a session mid-turn,
+    # awaiting input, or still building), but WITHOUT the hours-scale age gate:
+    # a detached session is eligible the moment it loses its client.
+    if session.get("running") or _session_pending_kind(sid):
+        return False
+    ready = session.get("agent_ready")
+    if ready is not None and not ready.is_set() and not session.get("lazy"):
+        return False
+    return _transport_is_dead(session.get("transport"))
+
+
+def _enforce_session_cap() -> None:
+    cap = _max_live_sessions()
+    if cap <= 0:
+        return
+    with _sessions_lock:
+        total = len(_sessions)
+        if total <= cap:
+            return
+        evictable = [
+            (sid, s) for sid, s in _sessions.items() if _session_is_lru_evictable(sid, s)
+        ]
+    # Oldest-touched first; only evict down to the cap (live/focused sessions on
+    # a live transport are never eligible, so we may stop short of the cap).
+    evictable.sort(key=lambda kv: float(kv[1].get("last_active") or 0.0))
+    overflow = total - cap
+    for sid, _s in evictable[:overflow]:
+        _close_session_by_id(sid, end_reason="lru_evict")
+
+
+def _schedule_session_cap_enforcement() -> None:
+    """Run the LRU sweep off the response path (eviction can call agent.close)."""
+    try:
+        timer = threading.Timer(0.1, lambda: _safe_enforce_session_cap())
+        timer.daemon = True
+        timer.start()
+    except Exception:
+        pass
+
+
+def _safe_enforce_session_cap() -> None:
+    try:
+        _enforce_session_cap()
+    except Exception:
+        logger.debug("session cap enforcement failed", exc_info=True)
 
 
 def _start_idle_reaper() -> None:
@@ -1111,15 +1184,24 @@ def _start_agent_build(sid: str, session: dict) -> None:
                 kw = {"session_db": session_db}
                 if resume_sid := current.get("resume_session_id"):
                     kw["session_id"] = resume_sid
-                # Model/effort/fast the desktop picked for a brand-new chat ride
-                # in as per-session overrides so the first build uses them
-                # directly (no global config, no build-then-switch).
-                if override := current.get("model_override"):
-                    kw["model_override"] = override
-                if (reasoning := current.get("create_reasoning_override")) is not None:
-                    kw["reasoning_config_override"] = reasoning
-                if (tier := current.get("create_service_tier_override")) is not None:
-                    kw["service_tier_override"] = tier
+                resume_overrides = current.get("resume_runtime_overrides")
+                if isinstance(resume_overrides, dict) and resume_overrides:
+                    # Cold deferred resume: restore the full persisted runtime
+                    # identity (model/provider/base_url/api_mode/reasoning/tier)
+                    # exactly as the eager resume path's _stored_session_runtime_
+                    # overrides splat did, so a deferred build can't drop the
+                    # provider and fail with "No LLM provider configured".
+                    kw.update(resume_overrides)
+                else:
+                    # Model/effort/fast the desktop picked for a brand-new chat
+                    # ride in as per-session overrides so the first build uses
+                    # them directly (no global config, no build-then-switch).
+                    if override := current.get("model_override"):
+                        kw["model_override"] = override
+                    if (reasoning := current.get("create_reasoning_override")) is not None:
+                        kw["reasoning_config_override"] = reasoning
+                    if (tier := current.get("create_service_tier_override")) is not None:
+                        kw["service_tier_override"] = tier
                 agent = _make_agent(sid, key, **kw)
             finally:
                 _clear_session_context(tokens)
@@ -4602,6 +4684,8 @@ def _(rid, params: dict) -> dict:
     build_timer = threading.Timer(0.05, _deferred_build)
     build_timer.daemon = True
     build_timer.start()
+    # A new live session just landed; trim detached idle ones over the cap.
+    _schedule_session_cap_enforcement()
 
     return _ok(
         rid,
@@ -4960,6 +5044,135 @@ def _(rid, params: dict) -> dict:
             },
         )
 
+    # Deferred build (desktop cold resume): register the live session and read
+    # its stored transcript WITHOUT building the agent on the response path.
+    # _make_agent can block for seconds (MCP discovery, prompt/skill build,
+    # AIAgent construction), and the desktop awaits this RPC before it paints
+    # the chat — so the eager build below is the bulk of the multi-second
+    # "switching sessions is frozen" latency. We return the full display
+    # transcript immediately and pre-warm the agent on a short timer (the same
+    # deferred-build contract session.create uses); _sess() also builds on
+    # demand if the first prompt beats the timer. Distinct from the lazy/watch
+    # branch above: a normal resume restores the full ancestor history and the
+    # session's persisted runtime identity, and is a real (upgradable) session,
+    # not a never-built spectator window.
+    if is_truthy_value(params.get("defer_build", False)):
+        sid = uuid.uuid4().hex[:8]
+        lease, limit_message = _claim_active_session_slot(target, live_session_id=sid)
+        if limit_message is not None:
+            return _err(rid, 4090, limit_message)
+        try:
+            db.reopen_session(target)
+            history = db.get_messages_as_conversation(target)
+            display_history = db.get_messages_as_conversation(
+                target, include_ancestors=True
+            )
+        except Exception as e:
+            if lease is not None:
+                lease.release()
+            return _err(rid, 5000, f"resume failed: {e}")
+        display_history_prefix = display_history[
+            : max(0, len(display_history) - len(history))
+        ]
+        messages = _history_to_messages(display_history)
+        # Restore the model/provider/reasoning/tier this chat actually used so
+        # the deferred build (and the immediate info payload) match the eager
+        # path — without these a deferred build drops the provider and resume
+        # fails with "No LLM provider configured".
+        stored_runtime_overrides = _stored_session_runtime_overrides(found) or {}
+        cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd())
+        now = time.time()
+        source = str(params.get("source") or "tui").strip() or "tui"
+        with _session_resume_lock:
+            live = _find_live_session_by_key(target)
+            if live is not None:
+                if lease is not None:
+                    lease.release()
+                return _ok(rid, _reuse_live_payload(*live))
+            with _sessions_lock:
+                _sessions[sid] = {
+                    "agent": None,
+                    "agent_error": None,
+                    "agent_ready": threading.Event(),
+                    "attached_images": [],
+                    "close_on_disconnect": is_truthy_value(
+                        params.get("close_on_disconnect", False)
+                    ),
+                    "active_session_lease": lease,
+                    "cols": cols,
+                    "created_at": now,
+                    "display_history_prefix": display_history_prefix,
+                    "edit_snapshots": {},
+                    "explicit_cwd": False,
+                    "history": history,
+                    "history_lock": threading.Lock(),
+                    "history_version": 0,
+                    "image_counter": 0,
+                    "cwd": cwd,
+                    "inflight_turn": None,
+                    "last_active": now,
+                    "model_override": stored_runtime_overrides.get("model_override"),
+                    "pending_title": None,
+                    "profile_home": str(profile_home) if profile_home is not None else None,
+                    "resume_session_id": target,
+                    "resume_runtime_overrides": stored_runtime_overrides or None,
+                    "running": False,
+                    "session_key": target,
+                    "show_reasoning": _load_show_reasoning(),
+                    "source": source,
+                    "slash_worker": None,
+                    "tool_progress_mode": _load_tool_progress_mode(),
+                    "tool_started_at": {},
+                    "transport": current_transport() or _stdio_transport,
+                }
+                _register_session_cwd(_sessions[sid])
+
+        def _deferred_build() -> None:
+            session = _sessions.get(sid)
+            if session is not None:
+                _start_agent_build(sid, session)
+
+        build_timer = threading.Timer(0.05, _deferred_build)
+        build_timer.daemon = True
+        build_timer.start()
+        # A new live session just landed; trim detached idle ones over the cap.
+        _schedule_session_cap_enforcement()
+
+        model_override = stored_runtime_overrides.get("model_override")
+        resumed_model = ""
+        if isinstance(model_override, dict):
+            resumed_model = str(model_override.get("model") or "").strip()
+        elif isinstance(model_override, str):
+            resumed_model = model_override.strip()
+        info = {
+            "cwd": cwd,
+            "branch": _git_branch_for_cwd(cwd),
+            "model": resumed_model or _resolve_model(),
+            "tools": {},
+            "skills": {},
+            "lazy": True,
+            "desktop_contract": DESKTOP_BACKEND_CONTRACT,
+            "profile_name": _current_profile_name(),
+        }
+        provider_override = stored_runtime_overrides.get("provider_override")
+        if provider_override:
+            info["provider"] = provider_override
+        return _ok(
+            rid,
+            {
+                "session_id": sid,
+                "resumed": target,
+                "message_count": len(messages),
+                "messages": messages,
+                "info": info,
+                "inflight": None,
+                "running": False,
+                "session_key": target,
+                "started_at": now,
+                "status": "idle",
+            },
+        )
+
     # Build the agent OUTSIDE the lock — _make_agent can block for seconds
     # (MCP discovery, prompt/skill build, AIAgent construction). Holding
     # _session_resume_lock across it would stall session.close on the main

From 3bf00e459af707635c890abcb1d748aec029a918 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Thu, 25 Jun 2026 13:25:21 -0500
Subject: [PATCH 2/4] perf(desktop): make deferred resume the default, not an
 opt-in flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per review: gating the faster path behind a `defer_build` flag that the
only caller always sends is pointless. Flip it — `session.resume` now
defers the agent build by default for every caller (desktop + Ink TUI);
a caller that needs the agent built synchronously passes `eager_build:
true` (used by the build-race test). The desktop no longer sends a flag.

While verifying the flip, fixed two real parity gaps the deferred path
had vs the old eager (`_init_session`) path:

- `_enable_gateway_prompts()` was never called on a deferred resume, so
  approvals/clarify wouldn't route through the gateway prompt callbacks.
- `_start_agent_build` never wired `background_review_callback` /
  `memory_notifications`, so a deferred-built session's self-improvement
  "💾 …" summary leaked to stdout instead of rendering in-transcript.
  Wiring it there also fixes it for `session.create` sessions, which
  build through the same path.

ACP is unaffected (it uses its own session_manager, not this RPC); the
Ink TUI already consumes the same lazy `info` shape from session.create
and upgrades on the later `session.info` event.
---
 .../hooks/use-session-actions.test.tsx        | 12 +++---
 .../app/session/hooks/use-session-actions.ts  | 12 +++---
 tests/tui_gateway/test_protocol.py            | 21 ++++++----
 tui_gateway/server.py                         | 41 +++++++++++++------
 4 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx
index 31b2cc58afc..f47b6b62504 100644
--- a/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.test.tsx
@@ -257,10 +257,11 @@ describe('resumeSession failure recovery', () => {
     expect($resumeFailedSessionId.get()).toBeNull()
   })
 
-  it('asks the backend to DEFER the agent build on a normal cold resume', async () => {
-    // The switch-latency fix: a non-watch cold resume tells the gateway to
-    // return the transcript immediately and build the agent in the background,
-    // rather than blocking the RPC (and the whole switch) on _make_agent.
+  it('resumes via the gateway default (deferred build) — not lazy, no eager opt-out', async () => {
+    // The switch-latency fix lives backend-side: a normal cold resume gets the
+    // gateway's default DEFERRED build (transcript returns immediately, agent
+    // pre-warms in the background). The client must NOT force the synchronous
+    // path (eager_build) and is only `lazy` for subagent watch windows.
     let resumeParams: Record<string, unknown> | undefined
 
     const requestGateway = vi.fn(async (method: string, params?: Record<string, unknown>) => {
@@ -277,8 +278,7 @@ describe('resumeSession failure recovery', () => {
 
     await runResume(requestGateway)
 
-    expect(resumeParams).toMatchObject({ defer_build: true })
-    // Watch-window lazy attach is the OTHER mode; a normal resume isn't lazy.
     expect(resumeParams).not.toHaveProperty('lazy')
+    expect(resumeParams).not.toHaveProperty('eager_build')
   })
 })
diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts
index 5c8be9762e1..efc99d5b84c 100644
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@@ -706,12 +706,12 @@ export function useSessionActions({
         const resumePromise = requestGateway<SessionResumeResponse>('session.resume', {
           session_id: storedSessionId,
           cols: 96,
-          // Watch windows attach lazily (live mirror); every other cold resume
-          // asks the backend to DEFER the agent build so the RPC returns the
-          // transcript immediately instead of blocking the switch on
-          // _make_agent (MCP discovery / prompt build). The agent pre-warms in
-          // the background and the prefetch above paints the transcript.
-          ...(watchWindow ? { lazy: true } : { defer_build: true }),
+          // Watch windows attach lazily (live mirror). Every other cold resume
+          // gets the gateway's default deferred build: the RPC returns the
+          // transcript immediately instead of blocking the switch on _make_agent
+          // (MCP discovery / prompt build), and the agent pre-warms in the
+          // background while the prefetch above paints the transcript.
+          ...(watchWindow ? { lazy: true } : {}),
           ...(sessionProfile ? { profile: sessionProfile } : {})
         })
         // The rejection is consumed by the `await` below; this guard only
diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index 8b45a672aed..c0003dfc818 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -323,7 +323,9 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch):
         {
             "id": "r1",
             "method": "session.resume",
-            "params": {"session_id": "20260409_010101_abc123", "cols": 100},
+            # eager_build: exercise the synchronous build path (this test
+            # monkeypatches _make_agent/_init_session/_session_info).
+            "params": {"session_id": "20260409_010101_abc123", "cols": 100, "eager_build": True},
         }
     )
 
@@ -336,11 +338,12 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch):
     ]
 
 
-def test_session_resume_defer_build_returns_transcript_without_blocking(server, monkeypatch):
-    """``defer_build: true`` (desktop cold resume) must return the full display
+def test_session_resume_defaults_to_deferred_build(server, monkeypatch):
+    """A normal cold resume (no ``eager_build``) must return the full display
     transcript immediately and register an upgradable live session WITHOUT
     building the agent on the response path — that eager build is the
-    multi-second switch latency."""
+    multi-second switch latency. Deferred is the default; ``eager_build: true``
+    opts back into the synchronous path."""
 
     target = "20260409_010101_abc123"
 
@@ -382,7 +385,7 @@ def test_session_resume_defer_build_returns_transcript_without_blocking(server,
         {
             "id": "r1",
             "method": "session.resume",
-            "params": {"session_id": target, "cols": 100, "defer_build": True},
+            "params": {"session_id": target, "cols": 100},
         }
     )
 
@@ -514,7 +517,7 @@ def test_session_resume_handles_multimodal_list_content(server, monkeypatch):
         {
             "id": "r1",
             "method": "session.resume",
-            "params": {"session_id": "20260502_000000_listcontent", "cols": 100},
+            "params": {"session_id": "20260502_000000_listcontent", "cols": 100, "eager_build": True},
         }
     )
 
@@ -828,7 +831,9 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch):
                 {
                     "id": "first",
                     "method": "session.resume",
-                    "params": {"session_id": target, "cols": 100},
+                    # eager_build: this test drives the synchronous build race +
+                    # double-checked locking that only the eager path exercises.
+                    "params": {"session_id": target, "cols": 100, "eager_build": True},
                 }
             )
 
@@ -843,7 +848,7 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch):
                 {
                     "id": "second",
                     "method": "session.resume",
-                    "params": {"session_id": target, "cols": 120},
+                    "params": {"session_id": target, "cols": 120, "eager_build": True},
                 }
             )
 
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 40910e291ba..6891c49eceb 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1234,6 +1234,19 @@ def _start_agent_build(sid: str, session: dict) -> None:
                 pass
 
             _wire_callbacks(sid)
+            # Surface the self-improvement review's "💾 …" summary as an event
+            # the TUI/desktop render in-transcript, honoring
+            # display.memory_notifications. _init_session wires this for the
+            # eager/branch paths; deferred-built sessions (session.create and the
+            # default cold resume) build through here, so without this their
+            # review summaries would leak to stdout instead of the chat.
+            try:
+                agent.background_review_callback = lambda message, _sid=sid: _emit(
+                    "review.summary", _sid, {"text": str(message)}
+                )
+                agent.memory_notifications = _load_memory_notifications()
+            except Exception:
+                pass
             # Hydrate credits notices at session OPEN (not just on the first
             # message), so depletion / usage-band warnings show at "ready". Runs
             # off the build thread, after the notice_callback is wired. Fail-open.
@@ -5044,23 +5057,27 @@ def _(rid, params: dict) -> dict:
             },
         )
 
-    # Deferred build (desktop cold resume): register the live session and read
-    # its stored transcript WITHOUT building the agent on the response path.
-    # _make_agent can block for seconds (MCP discovery, prompt/skill build,
-    # AIAgent construction), and the desktop awaits this RPC before it paints
-    # the chat — so the eager build below is the bulk of the multi-second
-    # "switching sessions is frozen" latency. We return the full display
-    # transcript immediately and pre-warm the agent on a short timer (the same
-    # deferred-build contract session.create uses); _sess() also builds on
-    # demand if the first prompt beats the timer. Distinct from the lazy/watch
+    # Cold resume default: register the live session and read its stored
+    # transcript, but build the agent OFF the response path. _make_agent can
+    # block for seconds (MCP discovery, prompt/skill build, AIAgent
+    # construction), and every resume caller (desktop + Ink TUI) awaits this RPC
+    # before it paints — so building eagerly is the bulk of the multi-second
+    # "switching sessions is frozen" latency. Return the full display transcript
+    # immediately and pre-warm the agent on a short timer (the same deferred-
+    # build contract session.create uses); _sess() also builds on demand if the
+    # first prompt beats the timer. A caller that needs the agent built
+    # synchronously (e.g. tests of the build race) passes ``eager_build: true``
+    # to fall through to the eager path below. Distinct from the lazy/watch
     # branch above: a normal resume restores the full ancestor history and the
-    # session's persisted runtime identity, and is a real (upgradable) session,
-    # not a never-built spectator window.
-    if is_truthy_value(params.get("defer_build", False)):
+    # session's persisted runtime identity, and is a real (upgradable) session.
+    if not is_truthy_value(params.get("eager_build", False)):
         sid = uuid.uuid4().hex[:8]
         lease, limit_message = _claim_active_session_slot(target, live_session_id=sid)
         if limit_message is not None:
             return _err(rid, 4090, limit_message)
+        # Interactive resume routes approvals/clarify through gateway prompts;
+        # the deferred build wires the remaining per-session callbacks.
+        _enable_gateway_prompts()
         try:
             db.reopen_session(target)
             history = db.get_messages_as_conversation(target)

From 1ca1f9f2c7a19e92da5e5f13f4880f409b99635b Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Thu, 25 Jun 2026 13:54:53 -0500
Subject: [PATCH 3/4] refactor(tui_gateway): DRY the deferred-session paths

Collapse the duplicated cold-resume / lazy-watch / create scaffolding into
shared helpers: _deferred_session_record (the live-session dict minus the
agent), _lazy_resume_info (the not-yet-built session.info), _claim_or_reuse_live
(lock + double-checked register-or-reuse), and _schedule_agent_build (the
pre-warm timer). Net -12 lines, three copies of the ~30-key session dict and
the lazy-info block down to one each. No behavior change.
---
 .../app/session/hooks/use-session-actions.ts  |  11 +-
 tui_gateway/server.py                         | 337 +++++++++---------
 2 files changed, 168 insertions(+), 180 deletions(-)

diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts
index efc99d5b84c..fb06c5a6048 100644
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@@ -759,14 +759,9 @@ export function useSessionActions({
                 return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages
               })()
 
-        // When the prefetch already painted these exact messages (the common
-        // cold-resume path), `preferredMessages` IS the live `$messages` array.
-        // Re-running preserveLocalAssistantErrors there would build a 1000-entry
-        // Map and map the whole transcript into a throwaway array on every
-        // switch — pure main-thread cost on the hot path (the downstream
-        // sameMessageList guard already drops the publish, so it buys nothing).
-        // The prefetch branch already merged local assistant errors when it
-        // built `localSnapshot`, so reuse the ref instead.
+        // Prefetch-hit fast path: `preferredMessages` IS the live `$messages`
+        // array (already error-merged when `localSnapshot` was built), so reuse
+        // the ref instead of rebuilding a throwaway transcript+Map every switch.
         const messagesForView =
           preferredMessages === currentMessages
             ? currentMessages
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 6891c49eceb..624ce6b7be8 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -801,19 +801,16 @@ def _enforce_session_cap() -> None:
 
 def _schedule_session_cap_enforcement() -> None:
     """Run the LRU sweep off the response path (eviction can call agent.close)."""
-    try:
-        timer = threading.Timer(0.1, lambda: _safe_enforce_session_cap())
-        timer.daemon = True
-        timer.start()
-    except Exception:
-        pass
 
+    def _run():
+        try:
+            _enforce_session_cap()
+        except Exception:
+            logger.debug("session cap enforcement failed", exc_info=True)
 
-def _safe_enforce_session_cap() -> None:
-    try:
-        _enforce_session_cap()
-    except Exception:
-        logger.debug("session cap enforcement failed", exc_info=True)
+    timer = threading.Timer(0.1, _run)
+    timer.daemon = True
+    timer.start()
 
 
 def _start_idle_reaper() -> None:
@@ -4689,16 +4686,8 @@ def _(rid, params: dict) -> dict:
     # + skeleton panel, then build the real AIAgent just after this response is
     # flushed.  This keeps startup responsive while still hydrating tools/skills
     # without requiring the user to submit a first prompt.
-    def _deferred_build() -> None:
-        session = _sessions.get(sid)
-        if session is not None:
-            _start_agent_build(sid, session)
-
-    build_timer = threading.Timer(0.05, _deferred_build)
-    build_timer.daemon = True
-    build_timer.start()
-    # A new live session just landed; trim detached idle ones over the cap.
-    _schedule_session_cap_enforcement()
+    _schedule_agent_build(sid)
+    _schedule_session_cap_enforcement()  # trim detached idle sessions over the cap
 
     return _ok(
         rid,
@@ -4866,6 +4855,110 @@ def _(rid, params: dict) -> dict:
         return _ok(rid, {"verification": {"status": "unknown", "evidence": None}})
 
 
+def _lazy_resume_info(cwd: str, *, model: str = "", provider: str = "") -> dict:
+    """session.info for a not-yet-built session (the shape session.create
+    returns). tools/skills land later when the deferred build emits session.info."""
+    info = {
+        "cwd": cwd,
+        "branch": _git_branch_for_cwd(cwd),
+        "model": model or _resolve_model(),
+        "tools": {},
+        "skills": {},
+        "lazy": True,
+        "desktop_contract": DESKTOP_BACKEND_CONTRACT,
+        "profile_name": _current_profile_name(),
+    }
+    if provider:
+        info["provider"] = provider
+    return info
+
+
+def _deferred_session_record(
+    session_key: str,
+    *,
+    cols: int,
+    cwd: str,
+    history: list,
+    lease,
+    source: str = "tui",
+    close_on_disconnect: bool = False,
+    display_history_prefix: list | None = None,
+    profile_home: Path | None = None,
+    lazy: bool = False,
+    model_override=None,
+    resume_runtime_overrides: dict | None = None,
+) -> dict:
+    """A live-session record whose AIAgent is built later (lazy watch / cold
+    resume) — _init_session's shape minus the agent."""
+    now = time.time()
+    return {
+        "agent": None,
+        "agent_error": None,
+        "agent_ready": threading.Event(),
+        "attached_images": [],
+        "close_on_disconnect": close_on_disconnect,
+        "active_session_lease": lease,
+        "cols": cols,
+        "created_at": now,
+        "cwd": cwd,
+        "display_history_prefix": display_history_prefix or [],
+        "edit_snapshots": {},
+        "explicit_cwd": False,
+        "history": history,
+        "history_lock": threading.Lock(),
+        "history_version": 0,
+        "image_counter": 0,
+        "inflight_turn": None,
+        "last_active": now,
+        "lazy": lazy,
+        "model_override": model_override,
+        "pending_title": None,
+        "profile_home": str(profile_home) if profile_home is not None else None,
+        "resume_runtime_overrides": resume_runtime_overrides,
+        "resume_session_id": session_key,
+        "running": False,
+        "session_key": session_key,
+        "show_reasoning": _load_show_reasoning(),
+        "slash_worker": None,
+        "source": source,
+        "tool_progress_mode": _load_tool_progress_mode(),
+        "tool_started_at": {},
+        "transport": current_transport() or _stdio_transport,
+    }
+
+
+def _claim_or_reuse_live(
+    sid: str, session_key: str, record: dict, lease
+) -> tuple[str, dict] | None:
+    """Register ``record`` as the live session for ``session_key`` under the
+    resume lock, or — if a concurrent resume already won — release ``lease`` and
+    return the winner for the caller to reuse."""
+    with _session_resume_lock:
+        live = _find_live_session_by_key(session_key)
+        if live is not None:
+            if lease is not None:
+                lease.release()
+            return live
+        with _sessions_lock:
+            _sessions[sid] = record
+            _register_session_cwd(_sessions[sid])
+    return None
+
+
+def _schedule_agent_build(sid: str, delay: float = 0.05) -> None:
+    """Pre-warm a deferred session's agent off the response path (session.create
+    and cold resume both build through here; _sess() also builds on demand)."""
+
+    def _run():
+        session = _sessions.get(sid)
+        if session is not None:
+            _start_agent_build(sid, session)
+
+    timer = threading.Timer(delay, _run)
+    timer.daemon = True
+    timer.start()
+
+
 @method("session.resume")
 def _(rid, params: dict) -> dict:
     target = params.get("session_id", "")
@@ -4973,65 +5066,31 @@ def _(rid, params: dict) -> dict:
             return _err(rid, 4090, limit_message)
         try:
             db.reopen_session(target)
-            # The child's OWN conversation only. Delegation children are
-            # parent-linked rows, so include_ancestors would prepend the
-            # parent's entire transcript — a watch window opened on a subagent
-            # must show the subagent's branch, not the parent's prompt.
+            # The child's OWN conversation only — include_ancestors would prepend
+            # the parent's transcript onto the subagent's branch.
             history = db.get_messages_as_conversation(target)
         except Exception as e:
             if lease is not None:
                 lease.release()
             return _err(rid, 5000, f"resume failed: {e}")
-        messages = _history_to_messages(history)
         cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd())
-        now = time.time()
-        # A delegated child mid-run emits no native session events of its own —
-        # report its liveness from the relay registry so the window paints a
-        # busy indicator instead of a dead idle transcript.
+        record = _deferred_session_record(
+            target,
+            cols=cols,
+            cwd=cwd,
+            history=history,
+            lease=lease,
+            source=str(params.get("source") or "tui").strip() or "tui",
+            close_on_disconnect=is_truthy_value(params.get("close_on_disconnect", False)),
+            profile_home=profile_home,
+            lazy=True,
+        )
+        if (live := _claim_or_reuse_live(sid, target, record, lease)) is not None:
+            return _ok(rid, _reuse_live_payload(*live))
+        # A delegated child mid-run emits no session events of its own — report
+        # its liveness from the relay registry so the window shows a busy turn.
         child_running = _child_run_active(target)
-        source = str(params.get("source") or "tui").strip() or "tui"
-        with _session_resume_lock:
-            live = _find_live_session_by_key(target)
-            if live is not None:
-                if lease is not None:
-                    lease.release()
-                return _ok(rid, _reuse_live_payload(*live))
-            with _sessions_lock:
-                _sessions[sid] = {
-                    "agent": None,
-                    "agent_error": None,
-                    "agent_ready": threading.Event(),
-                    "attached_images": [],
-                    "close_on_disconnect": is_truthy_value(
-                        params.get("close_on_disconnect", False)
-                    ),
-                    "active_session_lease": lease,
-                    "cols": cols,
-                    "created_at": now,
-                    "display_history_prefix": [],
-                    "edit_snapshots": {},
-                    "explicit_cwd": False,
-                    "history": history,
-                    "history_lock": threading.Lock(),
-                    "history_version": 0,
-                    "image_counter": 0,
-                    "cwd": cwd,
-                    "inflight_turn": None,
-                    "last_active": now,
-                    "lazy": True,
-                    "pending_title": None,
-                    "profile_home": str(profile_home) if profile_home is not None else None,
-                    "resume_session_id": target,
-                    "running": False,
-                    "session_key": target,
-                    "show_reasoning": _load_show_reasoning(),
-                    "source": source,
-                    "slash_worker": None,
-                    "tool_progress_mode": _load_tool_progress_mode(),
-                    "tool_started_at": {},
-                    "transport": current_transport() or _stdio_transport,
-                }
-                _register_session_cwd(_sessions[sid])
+        messages = _history_to_messages(history)
         return _ok(
             rid,
             {
@@ -5039,20 +5098,11 @@ def _(rid, params: dict) -> dict:
                 "resumed": target,
                 "message_count": len(messages),
                 "messages": messages,
-                "info": {
-                    "cwd": cwd,
-                    "branch": _git_branch_for_cwd(cwd),
-                    "model": _resolve_model(),
-                    "tools": {},
-                    "skills": {},
-                    "lazy": True,
-                    "desktop_contract": DESKTOP_BACKEND_CONTRACT,
-                    "profile_name": _current_profile_name(),
-                },
+                "info": _lazy_resume_info(cwd),
                 "inflight": None,
                 "running": child_running,
                 "session_key": target,
-                "started_at": now,
+                "started_at": record["created_at"],
                 "status": "streaming" if child_running else "idle",
             },
         )
@@ -5081,99 +5131,38 @@ def _(rid, params: dict) -> dict:
         try:
             db.reopen_session(target)
             history = db.get_messages_as_conversation(target)
-            display_history = db.get_messages_as_conversation(
-                target, include_ancestors=True
-            )
+            display_history = db.get_messages_as_conversation(target, include_ancestors=True)
         except Exception as e:
             if lease is not None:
                 lease.release()
             return _err(rid, 5000, f"resume failed: {e}")
-        display_history_prefix = display_history[
-            : max(0, len(display_history) - len(history))
-        ]
-        messages = _history_to_messages(display_history)
-        # Restore the model/provider/reasoning/tier this chat actually used so
-        # the deferred build (and the immediate info payload) match the eager
-        # path — without these a deferred build drops the provider and resume
-        # fails with "No LLM provider configured".
-        stored_runtime_overrides = _stored_session_runtime_overrides(found) or {}
+        prefix = display_history[: max(0, len(display_history) - len(history))]
+        # Restore the model/provider/reasoning/tier this chat last used so the
+        # deferred build (and the info below) match the eager path — without them
+        # the build drops the provider ("No LLM provider configured").
+        overrides = _stored_session_runtime_overrides(found) or {}
+        model_override = overrides.get("model_override") or {}
         cwd = profile_resume_cwd or os.getenv("TERMINAL_CWD", os.getcwd())
-        now = time.time()
-        source = str(params.get("source") or "tui").strip() or "tui"
-        with _session_resume_lock:
-            live = _find_live_session_by_key(target)
-            if live is not None:
-                if lease is not None:
-                    lease.release()
-                return _ok(rid, _reuse_live_payload(*live))
-            with _sessions_lock:
-                _sessions[sid] = {
-                    "agent": None,
-                    "agent_error": None,
-                    "agent_ready": threading.Event(),
-                    "attached_images": [],
-                    "close_on_disconnect": is_truthy_value(
-                        params.get("close_on_disconnect", False)
-                    ),
-                    "active_session_lease": lease,
-                    "cols": cols,
-                    "created_at": now,
-                    "display_history_prefix": display_history_prefix,
-                    "edit_snapshots": {},
-                    "explicit_cwd": False,
-                    "history": history,
-                    "history_lock": threading.Lock(),
-                    "history_version": 0,
-                    "image_counter": 0,
-                    "cwd": cwd,
-                    "inflight_turn": None,
-                    "last_active": now,
-                    "model_override": stored_runtime_overrides.get("model_override"),
-                    "pending_title": None,
-                    "profile_home": str(profile_home) if profile_home is not None else None,
-                    "resume_session_id": target,
-                    "resume_runtime_overrides": stored_runtime_overrides or None,
-                    "running": False,
-                    "session_key": target,
-                    "show_reasoning": _load_show_reasoning(),
-                    "source": source,
-                    "slash_worker": None,
-                    "tool_progress_mode": _load_tool_progress_mode(),
-                    "tool_started_at": {},
-                    "transport": current_transport() or _stdio_transport,
-                }
-                _register_session_cwd(_sessions[sid])
+        record = _deferred_session_record(
+            target,
+            cols=cols,
+            cwd=cwd,
+            history=history,
+            lease=lease,
+            source=str(params.get("source") or "tui").strip() or "tui",
+            close_on_disconnect=is_truthy_value(params.get("close_on_disconnect", False)),
+            display_history_prefix=prefix,
+            profile_home=profile_home,
+            model_override=overrides.get("model_override"),
+            resume_runtime_overrides=overrides or None,
+        )
+        if (live := _claim_or_reuse_live(sid, target, record, lease)) is not None:
+            return _ok(rid, _reuse_live_payload(*live))
 
-        def _deferred_build() -> None:
-            session = _sessions.get(sid)
-            if session is not None:
-                _start_agent_build(sid, session)
+        _schedule_agent_build(sid)
+        _schedule_session_cap_enforcement()  # trim detached idle sessions over the cap
 
-        build_timer = threading.Timer(0.05, _deferred_build)
-        build_timer.daemon = True
-        build_timer.start()
-        # A new live session just landed; trim detached idle ones over the cap.
-        _schedule_session_cap_enforcement()
-
-        model_override = stored_runtime_overrides.get("model_override")
-        resumed_model = ""
-        if isinstance(model_override, dict):
-            resumed_model = str(model_override.get("model") or "").strip()
-        elif isinstance(model_override, str):
-            resumed_model = model_override.strip()
-        info = {
-            "cwd": cwd,
-            "branch": _git_branch_for_cwd(cwd),
-            "model": resumed_model or _resolve_model(),
-            "tools": {},
-            "skills": {},
-            "lazy": True,
-            "desktop_contract": DESKTOP_BACKEND_CONTRACT,
-            "profile_name": _current_profile_name(),
-        }
-        provider_override = stored_runtime_overrides.get("provider_override")
-        if provider_override:
-            info["provider"] = provider_override
+        messages = _history_to_messages(display_history)
         return _ok(
             rid,
             {
@@ -5181,11 +5170,15 @@ def _(rid, params: dict) -> dict:
                 "resumed": target,
                 "message_count": len(messages),
                 "messages": messages,
-                "info": info,
+                "info": _lazy_resume_info(
+                    cwd,
+                    model=model_override.get("model") or "",
+                    provider=overrides.get("provider_override") or "",
+                ),
                 "inflight": None,
                 "running": False,
                 "session_key": target,
-                "started_at": now,
+                "started_at": record["created_at"],
                 "status": "idle",
             },
         )

From e8561d61e6f29dac1a6520f3253516bae2313100 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Thu, 25 Jun 2026 14:13:07 -0500
Subject: [PATCH 4/4] test(tui_gateway): pin synchronous-build resume tests to
 eager_build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These three assert the eager build contract — stored runtime overrides /
profile db reach _make_agent synchronously, and the agent binds to the
compression tip. Under deferred-by-default the build runs off-thread, so
they raced the timer (green in CI, flaky locally). Pin them to
eager_build; deferred coverage lives in the protocol tests.
---
 tests/test_tui_gateway_server.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 6592a42a9a8..e761e46158c 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -1001,8 +1001,11 @@ def test_session_resume_follows_compression_tip(monkeypatch, tmp_path):
     )
 
     try:
+        # eager_build: this asserts the synchronously-built agent binds to the
+        # resolved tip (captured["agent_session_id"]); the compression-tip
+        # resolution itself runs before the build and is mode-agnostic.
         resp = server.handle_request(
-            {"id": "1", "method": "session.resume", "params": {"session_id": "parent_root"}}
+            {"id": "1", "method": "session.resume", "params": {"session_id": "parent_root", "eager_build": True}}
         )
     finally:
         db.close()
@@ -1049,8 +1052,11 @@ def test_session_resume_passes_stored_runtime_to_agent(monkeypatch):
 
     monkeypatch.setattr(server, "_init_session", fake_init_session)
 
+    # eager_build: this asserts the synchronous build contract (stored runtime
+    # overrides reach _make_agent, info comes from _session_info). The deferred
+    # default restores the same overrides via _start_agent_build off-thread.
     resp = server.handle_request(
-        {"id": "1", "method": "session.resume", "params": {"session_id": "stored-session"}}
+        {"id": "1", "method": "session.resume", "params": {"session_id": "stored-session", "eager_build": True}}
     )
 
     assert resp["result"]["info"] == {"model": "gpt-5.4", "provider": "openai-codex"}
@@ -1137,11 +1143,13 @@ def test_session_resume_profile_uses_profile_db_cwd(monkeypatch, tmp_path):
     monkeypatch.setattr(approval, "load_permanent_allowlist", lambda: None)
 
     try:
+        # eager_build: asserts the synchronous build receives the profile's db
+        # (the deferred default builds with the same db via _start_agent_build).
         resp = server.handle_request(
             {
                 "id": "1",
                 "method": "session.resume",
-                "params": {"session_id": target, "profile": "worker"},
+                "params": {"session_id": target, "profile": "worker", "eager_build": True},
             }
         )