From 0399d4b97668c020c8c583eda190b90fc9fca4e8 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Tue, 28 Apr 2026 19:42:31 -0500
Subject: [PATCH 01/11] perf(tui): shave ~190ms off `hermes --tui` cold start
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two targeted fixes on the critical path from `hermes --tui` launch to
`gateway.ready`:

1. **Defer `@hermes/ink` import in memoryMonitor.ts.** The static top-level
   import dragged the full ~414KB Ink bundle (React + renderer + all
   components/hooks) onto the critical path *before* `gw.start()` could
   spawn the Python gateway — serialising ~155ms of Node work in front of
   it on every launch. `evictInkCaches` only runs inside the 10-second
   tick under heap pressure, so it moves to a lazy dynamic import. First
   tick hits the ESM cache because the app entry has long since imported
   `@hermes/ink`.

2. **Gate `tools.mcp_tool` import on config in tui_gateway/entry.py.**
   Importing the module transitively pulls the MCP SDK + pydantic + httpx
   + jsonschema + starlette formparsers (~200ms). The overwhelming
   majority of users have no `mcp_servers` configured, so this runs for
   nothing. A cheap `load_config()` check (~25ms) skips the 200ms import
   when no servers are declared, with a conservative fallback to the old
   behaviour if the config probe itself fails.

## Measurements (macOS Terminal.app, Apple Silicon, n=12)

| Metric                     | Before (p50) | After (p50) | Δ        |
|----------------------------|--------------|-------------|----------|
| Python gateway boot alone  | 252–365ms    | 105–151ms   | −180ms   |
| `hermes --tui` banner paint | 686ms        | 665ms       | −21ms    |
| `hermes --tui` → ready      | **1843ms**   | **1655ms**  | **−188ms (−10.2%)** |
| `hermes --tui` → ready p90  | 1932ms       | 1778ms      | −154ms   |
| stdev (ready)              | 126ms        | 83ms        | also more consistent |

## Tests

- `scripts/run_tests.sh tests/tui_gateway/ tests/tools/test_mcp_tool.py`:
  195 passed.  (The one pre-existing failure in
  `test_session_resume_returns_hydrated_messages` reproduces on main —
  unrelated, it's a mock-DB kwarg mismatch.)
- `ui-tui` vitest: 430 tests, all pass.
- `npm run type-check` in ui-tui: clean.

## Notes

- Node-side first paint ("banner") didn't move meaningfully because that
  latency is dominated by Ink's render pipeline + React mount, not by
  which imports load first.
- The win shows up entirely in the time from banner to `gateway.ready`
  — exactly where we expected it, since both fixes shorten the Python
  gateway's boot path or let it overlap more with Node startup.
- No user-visible behaviour change. Memory monitoring still fires every
  10s; MCP still works when `mcp_servers` is configured.
---
 tui_gateway/entry.py            | 24 +++++++++++++++++++++---
 ui-tui/src/lib/memoryMonitor.ts | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py
index 70fc851820..2c1804aac1 100644
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@@ -165,11 +165,29 @@ def main():
     # a model_tools.py module-level side effect; moved to explicit
     # startup calls to avoid freezing the gateway's loop on lazy import
     # (#16856).
+    #
+    # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the
+    # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers —
+    # ~200ms on macOS), which runs on the TUI's critical path before
+    # ``gateway.ready`` can be emitted.  The overwhelming majority of users
+    # have no ``mcp_servers`` configured, in which case every byte of that
+    # import is wasted.  Check the config first (cheap — it's already been
+    # loaded once by ``_config_mtime`` elsewhere) and only pay the import
+    # cost when there's actually MCP work to do.
     try:
-        from tools.mcp_tool import discover_mcp_tools
-        discover_mcp_tools()
+        from hermes_cli.config import load_config
+        _mcp_servers = (load_config() or {}).get("mcp_servers")
+        _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
     except Exception:
-        pass
+        # Be conservative: if we can't decide, fall back to the old
+        # behaviour and let the discovery path handle its own errors.
+        _has_mcp_servers = True
+    if _has_mcp_servers:
+        try:
+            from tools.mcp_tool import discover_mcp_tools
+            discover_mcp_tools()
+        except Exception:
+            pass
 
     if not write_json({
         "jsonrpc": "2.0",
diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts
index bbdb229705..26a0cdbc2b 100644
--- a/ui-tui/src/lib/memoryMonitor.ts
+++ b/ui-tui/src/lib/memoryMonitor.ts
@@ -1,5 +1,3 @@
-import { evictInkCaches } from '@hermes/ink'
-
 import { type HeapDumpResult, performHeapDump } from './memory.js'
 
 export type MemoryLevel = 'critical' | 'high' | 'normal'
@@ -20,6 +18,26 @@ export interface MemoryMonitorOptions {
 
 const GB = 1024 ** 3
 
+// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level
+// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto
+// the critical path before the Python gateway can even be spawned. That
+// serialised roughly 150ms of Node work in front of gw.start() on every
+// cold `hermes --tui` launch.
+//
+// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and
+// only when heap pressure crosses the high-water mark — by then Ink has
+// long since been loaded by the app entry. This dynamic import is a no-op
+// on the hot path (module is already in the ESM cache); when a startup
+// spike somehow trips the threshold before the app registers its own Ink
+// import, we pay the load cost exactly once, inside the tick that needs it.
+let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null
+async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> {
+  if (_evictInkCaches) return _evictInkCaches
+  const mod = await import('@hermes/ink')
+  _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
+  return _evictInkCaches
+}
+
 export function startMemoryMonitor({
   criticalBytes = 2.5 * GB,
   highBytes = 1.5 * GB,
@@ -43,7 +61,16 @@ export function startMemoryMonitor({
 
     // Prune Ink content caches before dump/exit — half on 'high' (recoverable),
     // full on 'critical' (post-dump RSS reduction, keeps user running).
-    evictInkCaches(level === 'critical' ? 'all' : 'half')
+    // Deferred import keeps `@hermes/ink` off the cold-start critical path;
+    // by the time a tick fires 10s after launch the app has already loaded
+    // the same module, so this resolves instantly from the ESM cache.
+    try {
+      const evictInkCaches = await _ensureEvictInkCaches()
+      evictInkCaches(level === 'critical' ? 'all' : 'half')
+    } catch {
+      // Best-effort: if the dynamic import fails for any reason we still
+      // continue to the heap dump below so the user gets diagnostics.
+    }
 
     dumped.add(level)
     const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)

From 9e398e1809dd30c26ed899e362af6bb04c948894 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Tue, 28 Apr 2026 22:42:17 -0500
Subject: [PATCH 02/11] perf(tui): avoid importing classic CLI during tool
 discovery

TUI session readiness was still laggy after the gateway-ready fixes. Profiling
session.create -> session.info showed the slow phase is background AIAgent
construction (~1.1s). A cProfile run of tui_gateway.server::_make_agent showed
model_tools/tool discovery importing tools.code_execution_tool, whose
module-level EXECUTE_CODE_SCHEMA calls _get_execution_mode(), which imported
cli.CLI_CONFIG.

That pulled the classic interactive CLI stack (prompt_toolkit/Rich and REPL
setup) into every agent startup path, including hermes --tui where it is not
used. Replace that with hermes_cli.config.read_raw_config(), which is cached and
reads only the raw code_execution section. Existing defaults still apply when
the key is absent.

Measurements on macOS Terminal.app:
- import run_agent: ~466ms -> ~347ms
- model_tools import: ~418ms -> ~272ms
- _make_agent: ~1452ms -> ~1239ms
- session.create -> session.info: ~1167ms -> ~999ms
- full hermes --tui ready p50: ~1655ms -> ~1537ms

Tests:
- scripts/run_tests.sh tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py
---
 tests/tools/test_code_execution.py | 16 ++++++++++++----
 tools/code_execution_tool.py       | 16 +++++++++++++---
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py
index 15f8faa9bb..6f6260ffe2 100644
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -770,11 +770,19 @@ class TestLoadConfig(unittest.TestCase):
 
     def test_returns_code_execution_section(self):
         from tools.code_execution_tool import _load_config
-        mock_cli = MagicMock()
-        mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 120, "max_tool_calls": 10}}
-        with patch.dict("sys.modules", {"cli": mock_cli}):
+        with patch("hermes_cli.config.read_raw_config",
+                   return_value={"code_execution": {"timeout": 120, "max_tool_calls": 10}}):
             result = _load_config()
-        self.assertIsInstance(result, dict)
+        self.assertEqual(result, {"timeout": 120, "max_tool_calls": 10})
+
+    def test_does_not_import_interactive_cli(self):
+        from tools.code_execution_tool import _load_config
+        mock_cli = MagicMock()
+        mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 999}}
+        with patch.dict("sys.modules", {"cli": mock_cli}), \
+             patch("hermes_cli.config.read_raw_config", return_value={}):
+            result = _load_config()
+        self.assertEqual(result, {})
 
 
 # ---------------------------------------------------------------------------
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index db706e6a4c..3f83394c18 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -1309,10 +1309,20 @@ def _kill_process_group(proc, escalate: bool = False):
 
 
 def _load_config() -> dict:
-    """Load code_execution config from CLI_CONFIG if available."""
+    """Load code_execution config without importing the interactive CLI.
+
+    This helper is called while building the module-level execute_code schema
+    during tool discovery.  Importing ``cli`` here pulls prompt_toolkit/Rich and
+    a large chunk of the classic REPL onto every agent startup path, including
+    ``hermes --tui`` where it is never used.  Read the lightweight raw config
+    instead; the config layer already caches by (mtime, size), and an absent
+    key cleanly falls back to DEFAULT_EXECUTION_MODE.
+    """
     try:
-        from cli import CLI_CONFIG
-        return CLI_CONFIG.get("code_execution", {})
+        from hermes_cli.config import read_raw_config
+
+        cfg = read_raw_config().get("code_execution", {})
+        return cfg if isinstance(cfg, dict) else {}
     except Exception:
         return {}
 

From b66cbb7b4ca8dd8d3242f14caf5fd52807069dc8 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Tue, 28 Apr 2026 23:32:02 -0500
Subject: [PATCH 03/11] perf(tui): defer agent construction until first prompt

Match classic CLI perceived startup behavior: show the TUI shell and composer
before constructing the full AIAgent. session.create now returns a lightweight
placeholder session with lazy=true and no longer starts _make_agent eagerly.
The first method that needs the agent triggers _start_agent_build() via _sess();
prompt.submit is routed through the RPC worker pool so that the initial wait for
agent construction does not block the stdio dispatcher.

The intro panel renders skeleton rows for tools/skills while the real
session.info payload is absent, then hydrates to the real tools/skills panel once
AIAgent initialization completes. Also skip the startup /voice status probe and
avoid the input.detect_drop RPC for ordinary plain-text prompts to keep early
startup/first-submit paths cheap.

Measurements on macOS Terminal.app:
- Previous full ready p50 after earlier PR commits: ~1537ms
- Lazy skeleton panel p50: ~794ms
- Original baseline full ready p50: ~1843ms

So the visible startup surface is now ~743ms faster than the prior PR state and
~1.05s faster than the original baseline. First prompt still pays the same agent
construction cost if it races the background/skeleton state, matching classic
CLI's deferred behavior.

Tests:
- python -m py_compile tui_gateway/server.py
- cd ui-tui && npm run type-check && npm run build
- scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py
- cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts
---
 tui_gateway/server.py               | 242 ++++++++++++++--------------
 ui-tui/src/app/useConfigSync.ts     |   9 +-
 ui-tui/src/app/useSubmission.ts     |   7 +
 ui-tui/src/components/appLayout.tsx |   2 +-
 ui-tui/src/components/branding.tsx  |  21 ++-
 ui-tui/src/types.ts                 |   3 +-
 6 files changed, 148 insertions(+), 136 deletions(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 555d8396b4..e5b1447d76 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -141,6 +141,7 @@ _SLASH_WORKER_TIMEOUT_S = max(
 _LONG_HANDLERS = frozenset(
     {
         "cli.exec",
+        "prompt.submit",
         "session.branch",
         "session.resume",
         "shell.exec",
@@ -464,6 +465,117 @@ def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
     return _err(rid, 5032, err) if err else None
 
 
+def _start_agent_build(sid: str, session: dict) -> None:
+    """Start building the real AIAgent for a TUI session, once.
+
+    Classic `hermes` shows the prompt before constructing AIAgent; the TUI used
+    to eagerly build it during session.create, making startup feel blocked on
+    tool discovery/model metadata even though the composer was visible.  Keep
+    the shell responsive by deferring this work until the first prompt (or any
+    command that actually needs the agent), while retaining the same ready/error
+    event contract for the frontend.
+    """
+    ready = session.get("agent_ready")
+    if ready is None:
+        return
+    if ready.is_set() or session.get("agent_build_started"):
+        return
+    session["agent_build_started"] = True
+    key = session["session_key"]
+
+    def _build() -> None:
+        current = _sessions.get(sid)
+        if current is None:
+            ready.set()
+            return
+
+        worker = None
+        notify_registered = False
+        try:
+            tokens = _set_session_context(key)
+            try:
+                agent = _make_agent(sid, key)
+            finally:
+                _clear_session_context(tokens)
+
+            db = _get_db()
+            if db is not None:
+                db.create_session(key, source="tui", model=_resolve_model())
+                pending_title = (current.get("pending_title") or "").strip()
+                if pending_title:
+                    try:
+                        title_applied = db.set_session_title(key, pending_title)
+                        if title_applied:
+                            current["pending_title"] = None
+                        else:
+                            existing_row = db.get_session(key)
+                            existing_title = ((existing_row or {}).get("title") or "").strip()
+                            if existing_title == pending_title:
+                                current["pending_title"] = None
+                            else:
+                                logger.info(
+                                    "Pending title still queued for session %s (wanted=%r, current=%r)",
+                                    sid,
+                                    pending_title,
+                                    existing_title,
+                                )
+                    except ValueError as e:
+                        current["pending_title"] = None
+                        logger.info("Dropping pending title for session %s: %s", sid, e)
+                    except Exception:
+                        logger.warning("Failed to apply pending title for session %s", sid, exc_info=True)
+            current["agent"] = agent
+
+            try:
+                worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+                current["slash_worker"] = worker
+            except Exception:
+                pass
+
+            try:
+                from tools.approval import (
+                    register_gateway_notify,
+                    load_permanent_allowlist,
+                )
+                register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
+                notify_registered = True
+                load_permanent_allowlist()
+            except Exception:
+                pass
+
+            _wire_callbacks(sid)
+            _notify_session_boundary("on_session_reset", key)
+
+            info = _session_info(agent)
+            warn = _probe_credentials(agent)
+            if warn:
+                info["credential_warning"] = warn
+            cfg_warn = _probe_config_health(_load_cfg())
+            if cfg_warn:
+                info["config_warning"] = cfg_warn
+                logger.warning(cfg_warn)
+            _emit("session.info", sid, info)
+        except Exception as e:
+            current["agent_error"] = str(e)
+            _emit("error", sid, {"message": f"agent init failed: {e}"})
+        finally:
+            if _sessions.get(sid) is not current:
+                if worker is not None:
+                    try:
+                        worker.close()
+                    except Exception:
+                        pass
+                if notify_registered:
+                    try:
+                        from tools.approval import unregister_gateway_notify
+                        unregister_gateway_notify(key)
+                    except Exception:
+                        pass
+            ready.set()
+
+    threading.Thread(target=_build, daemon=True).start()
+
+
 def _sess_nowait(params, rid):
     s = _sessions.get(params.get("session_id") or "")
     return (s, None) if s else (None, _err(rid, 4001, "session not found"))
@@ -471,7 +583,10 @@ def _sess_nowait(params, rid):
 
 def _sess(params, rid):
     s, err = _sess_nowait(params, rid)
-    return (None, err) if err else (s, _wait_agent(s, rid))
+    if err:
+        return (None, err)
+    _start_agent_build(params.get("session_id") or "", s)
+    return (s, _wait_agent(s, rid))
 
 
 def _normalize_completion_path(path_part: str) -> str:
@@ -1611,130 +1726,6 @@ def _(rid, params: dict) -> dict:
         "transport": current_transport() or _stdio_transport,
     }
 
-    def _build() -> None:
-        session = _sessions.get(sid)
-        if session is None:
-            # session.close ran before the build thread got scheduled.
-            ready.set()
-            return
-
-        # Track what we allocate so we can clean up if session.close
-        # races us to the finish line.  session.close pops _sessions[sid]
-        # unconditionally and tries to close the slash_worker it finds;
-        # if _build is still mid-construction when close runs, close
-        # finds slash_worker=None / notify unregistered and returns
-        # cleanly — leaving us, the build thread, to later install the
-        # worker + notify on an orphaned session dict.  The finally
-        # block below detects the orphan and cleans up instead of
-        # leaking a subprocess and a global notify registration.
-        worker = None
-        notify_registered = False
-        try:
-            tokens = _set_session_context(key)
-            try:
-                agent = _make_agent(sid, key)
-            finally:
-                _clear_session_context(tokens)
-
-            db = _get_db()
-            if db is not None:
-                db.create_session(key, source="tui", model=_resolve_model())
-                pending_title = (session.get("pending_title") or "").strip()
-                if pending_title:
-                    try:
-                        title_applied = db.set_session_title(key, pending_title)
-                        if title_applied:
-                            session["pending_title"] = None
-                        else:
-                            existing_row = db.get_session(key)
-                            existing_title = (
-                                (existing_row or {}).get("title") or ""
-                            ).strip()
-                            if existing_title == pending_title:
-                                session["pending_title"] = None
-                            else:
-                                logger.info(
-                                    "Pending title still queued for session %s (wanted=%r, current=%r)",
-                                    sid,
-                                    pending_title,
-                                    existing_title,
-                                )
-                    except ValueError as e:
-                        # Queued title can become invalid/duplicate between queue time
-                        # and DB row creation. Drop the queue and log the reason so
-                        # future /title reads don't surface a stuck pending value.
-                        session["pending_title"] = None
-                        logger.info(
-                            "Dropping pending title for session %s: %s",
-                            sid,
-                            e,
-                        )
-                    except Exception:
-                        logger.warning(
-                            "Failed to apply pending title for session %s",
-                            sid,
-                            exc_info=True,
-                        )
-            session["agent"] = agent
-
-            try:
-                worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
-                session["slash_worker"] = worker
-            except Exception:
-                pass
-
-            try:
-                from tools.approval import (
-                    register_gateway_notify,
-                    load_permanent_allowlist,
-                )
-
-                register_gateway_notify(
-                    key, lambda data: _emit("approval.request", sid, data)
-                )
-                notify_registered = True
-                load_permanent_allowlist()
-            except Exception:
-                pass
-
-            _wire_callbacks(sid)
-            _notify_session_boundary("on_session_reset", key)
-
-            info = _session_info(agent)
-            warn = _probe_credentials(agent)
-            if warn:
-                info["credential_warning"] = warn
-            cfg_warn = _probe_config_health(_load_cfg())
-            if cfg_warn:
-                info["config_warning"] = cfg_warn
-                logger.warning(cfg_warn)
-            _emit("session.info", sid, info)
-        except Exception as e:
-            session["agent_error"] = str(e)
-            _emit("error", sid, {"message": f"agent init failed: {e}"})
-        finally:
-            # Orphan check: if session.close raced us and popped
-            # _sessions[sid] while we were building, the dict we just
-            # populated is unreachable.  Clean up the subprocess and
-            # the global notify registration ourselves — session.close
-            # couldn't see them at the time it ran.
-            if _sessions.get(sid) is not session:
-                if worker is not None:
-                    try:
-                        worker.close()
-                    except Exception:
-                        pass
-                if notify_registered:
-                    try:
-                        from tools.approval import unregister_gateway_notify
-
-                        unregister_gateway_notify(key)
-                    except Exception:
-                        pass
-            ready.set()
-
-    threading.Thread(target=_build, daemon=True).start()
-
     return _ok(
         rid,
         {
@@ -1744,6 +1735,7 @@ def _(rid, params: dict) -> dict:
                 "tools": {},
                 "skills": {},
                 "cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
+                "lazy": True,
             },
         },
     )
diff --git a/ui-tui/src/app/useConfigSync.ts b/ui-tui/src/app/useConfigSync.ts
index 931f92f762..db8517559c 100644
--- a/ui-tui/src/app/useConfigSync.ts
+++ b/ui-tui/src/app/useConfigSync.ts
@@ -5,8 +5,7 @@ import type { GatewayClient } from '../gatewayClient.js'
 import type {
   ConfigFullResponse,
   ConfigMtimeResponse,
-  ReloadMcpResponse,
-  VoiceToggleResponse
+  ReloadMcpResponse
 } from '../gatewayTypes.js'
 import { asRpcResult } from '../lib/rpc.js'
 
@@ -105,7 +104,11 @@ export function useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid }: U
       return
     }
 
-    quietRpc<VoiceToggleResponse>(gw, 'voice.toggle', { action: 'status' }).then(r => setVoiceEnabled(!!r?.enabled))
+    // Keep startup cheap: voice.toggle status probes optional audio/STT deps and
+    // can run long enough to delay prompt.submit on the single stdio RPC pipe.
+    // Environment flags are enough to initialize the UI bit; the heavier status
+    // check still runs when the user opens /voice.
+    setVoiceEnabled(process.env.HERMES_VOICE === '1')
     quietRpc<ConfigMtimeResponse>(gw, 'config.get', { key: 'mtime' }).then(r => {
       mtimeRef.current = Number(r?.mtime ?? 0)
     })
diff --git a/ui-tui/src/app/useSubmission.ts b/ui-tui/src/app/useSubmission.ts
index 2c2c6d48d9..ed86332b08 100644
--- a/ui-tui/src/app/useSubmission.ts
+++ b/ui-tui/src/app/useSubmission.ts
@@ -126,6 +126,13 @@ export function useSubmission(opts: UseSubmissionOptions) {
         return sys('session not ready yet')
       }
 
+      // Plain prompts are the common path and should not pay an extra RPC
+      // before prompt.submit. File-drop detection can still run for inputs
+      // that contain an absolute/tilde path or file:// URI.
+      if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\/)[^\s]+/.test(text)) {
+        return startSubmit(text, expand(text), showUserMessage)
+      }
+
       gw.request<InputDetectDropResponse>('input.detect_drop', { session_id: sid, text })
         .then(r => {
           if (!r?.matched) {
diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx
index 84470c4ccf..69aa6c0592 100644
--- a/ui-tui/src/components/appLayout.tsx
+++ b/ui-tui/src/components/appLayout.tsx
@@ -68,7 +68,7 @@ const TranscriptPane = memo(function TranscriptPane({
                 <Box flexDirection="column" paddingTop={1}>
                   <Banner t={ui.theme} />
 
-                  {row.msg.info?.version && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
+                  {row.msg.info && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
                 </Box>
               ) : row.msg.kind === 'panel' && row.msg.panelData ? (
                 <Panel sections={row.msg.panelData.sections} t={ui.theme} title={row.msg.panelData.title} />
diff --git a/ui-tui/src/components/branding.tsx b/ui-tui/src/components/branding.tsx
index 25e161fd71..0a7509f696 100644
--- a/ui-tui/src/components/branding.tsx
+++ b/ui-tui/src/components/branding.tsx
@@ -64,9 +64,11 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
   }
 
   const section = (title: string, data: Record<string, string[]>, max = 8, overflowLabel = 'more…') => {
+    const skeletonRows = title === 'Tools' ? ['browser', 'terminal', 'file'] : ['apple', 'creative', 'software-development']
     const entries = Object.entries(data).sort()
     const shown = entries.slice(0, max)
     const overflow = entries.length - max
+    const skeleton = info.lazy && entries.length === 0
 
     return (
       <Box flexDirection="column" marginTop={1}>
@@ -74,12 +76,19 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
           Available {title}
         </Text>
 
-        {shown.map(([k, vs]) => (
-          <Text key={k} wrap="truncate">
-            <Text color={t.color.muted}>{strip(k)}: </Text>
-            <Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
-          </Text>
-        ))}
+        {skeleton
+          ? skeletonRows.map(k => (
+              <Text dimColor key={k} wrap="truncate">
+                <Text color={t.color.muted}>{k}: </Text>
+                <Text color={t.color.text}>━━━━━━━━━━━━━━</Text>
+              </Text>
+            ))
+          : shown.map(([k, vs]) => (
+              <Text key={k} wrap="truncate">
+                <Text color={t.color.muted}>{strip(k)}: </Text>
+                <Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
+              </Text>
+            ))}
 
         {overflow > 0 && (
           <Text color={t.color.muted}>
diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts
index 6aea78e3e4..b3ecc8fbb6 100644
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@@ -143,11 +143,12 @@ export interface McpServerStatus {
 export interface SessionInfo {
   cwd?: string
   fast?: boolean
+  lazy?: boolean
   mcp_servers?: McpServerStatus[]
   model: string
   reasoning_effort?: string
-  service_tier?: string
   release_date?: string
+  service_tier?: string
   skills: Record<string, string[]>
   tools: Record<string, string[]>
   update_behind?: number | null

From 0a6ecea676523d808d1f0657a8f1a80debba14f1 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Tue, 28 Apr 2026 23:48:07 -0500
Subject: [PATCH 04/11] fix(tui): hydrate lazy startup panel and use animated
 loaders

The lazy startup panel could remain stuck on the placeholder when no first
prompt was submitted because agent construction only started from _sess(). Keep
session.create cheap, but schedule _start_agent_build shortly after returning
the placeholder so tools/skills hydrate automatically.

Also replace the ugly placeholder bar rows with compact unicode-animations
braille loaders for the tools and skills sections.

Tests:
- python -m py_compile tui_gateway/server.py
- cd ui-tui && npm run type-check && npm run build
- cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts
- scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py
---
 tui_gateway/server.py              | 11 +++++++
 ui-tui/src/components/branding.tsx | 46 +++++++++++++++++++++---------
 2 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index e5b1447d76..2ba156587d 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1726,6 +1726,17 @@ def _(rid, params: dict) -> dict:
         "transport": current_transport() or _stdio_transport,
     }
 
+    # Return the lightweight session immediately so Ink can paint the composer
+    # + skeleton panel, then build the real AIAgent just after this response is
+    # flushed.  This keeps startup responsive while still hydrating tools/skills
+    # without requiring the user to submit a first prompt.
+    def _deferred_build() -> None:
+        session = _sessions.get(sid)
+        if session is not None:
+            _start_agent_build(sid, session)
+
+    threading.Timer(0.05, _deferred_build).start()
+
     return _ok(
         rid,
         {
diff --git a/ui-tui/src/components/branding.tsx b/ui-tui/src/components/branding.tsx
index 0a7509f696..84e502aada 100644
--- a/ui-tui/src/components/branding.tsx
+++ b/ui-tui/src/components/branding.tsx
@@ -1,10 +1,32 @@
 import { Box, Text, useStdout } from '@hermes/ink'
+import { useEffect, useState } from 'react'
+import unicodeSpinners from 'unicode-animations'
 
 import { artWidth, caduceus, CADUCEUS_WIDTH, logo, LOGO_WIDTH } from '../banner.js'
 import { flat } from '../lib/text.js'
 import type { Theme } from '../theme.js'
 import type { PanelSection, SessionInfo } from '../types.js'
 
+const LOADER_TICK_MS = 120
+
+function InlineLoader({ label, t }: { label: string; t: Theme }) {
+  const [tick, setTick] = useState(0)
+  const spinner = unicodeSpinners.braille
+  const frame = spinner.frames[tick % spinner.frames.length] ?? '⠋'
+
+  useEffect(() => {
+    const id = setInterval(() => setTick(n => n + 1), Math.max(LOADER_TICK_MS, spinner.interval))
+
+    return () => clearInterval(id)
+  }, [spinner.interval])
+
+  return (
+    <Text color={t.color.muted} wrap="truncate">
+      <Text color={t.color.accent}>{frame}</Text> {label}
+    </Text>
+  )
+}
+
 export function ArtLines({ lines }: { lines: [string, string][] }) {
   return (
     <>
@@ -64,7 +86,6 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
   }
 
   const section = (title: string, data: Record<string, string[]>, max = 8, overflowLabel = 'more…') => {
-    const skeletonRows = title === 'Tools' ? ['browser', 'terminal', 'file'] : ['apple', 'creative', 'software-development']
     const entries = Object.entries(data).sort()
     const shown = entries.slice(0, max)
     const overflow = entries.length - max
@@ -76,19 +97,16 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
           Available {title}
         </Text>
 
-        {skeleton
-          ? skeletonRows.map(k => (
-              <Text dimColor key={k} wrap="truncate">
-                <Text color={t.color.muted}>{k}: </Text>
-                <Text color={t.color.text}>━━━━━━━━━━━━━━</Text>
-              </Text>
-            ))
-          : shown.map(([k, vs]) => (
-              <Text key={k} wrap="truncate">
-                <Text color={t.color.muted}>{strip(k)}: </Text>
-                <Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
-              </Text>
-            ))}
+        {skeleton ? (
+          <InlineLoader label={title === 'Tools' ? 'discovering tools' : 'scanning skills'} t={t} />
+        ) : (
+          shown.map(([k, vs]) => (
+            <Text key={k} wrap="truncate">
+              <Text color={t.color.muted}>{strip(k)}: </Text>
+              <Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
+            </Text>
+          ))
+        )}
 
         {overflow > 0 && (
           <Text color={t.color.muted}>

From a2819e182047ed5d78d21038b83f63b1ec297438 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Tue, 28 Apr 2026 23:54:33 -0500
Subject: [PATCH 05/11] fix(tui): address lazy startup review races

Copilot correctly flagged two concurrency windows:

- memoryMonitor could re-enter while awaiting the lazy @hermes/ink import or
  heap dump, producing duplicate imports/dumps under sustained pressure.
- _start_agent_build used a check-then-set guard without synchronization, so
  concurrent agent-backed RPCs could start duplicate agent builders.

Fix both with single-flight guards: cache the dynamic import promise and track
per-level dump in-flight state in memoryMonitor, and protect the TUI agent build
flag with a per-session lock.

Tests:
- python -m py_compile tui_gateway/server.py
- cd ui-tui && npm run type-check && npm run build
- cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts
- scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py
---
 tui_gateway/server.py           |  8 +++++---
 ui-tui/src/lib/memoryMonitor.ts | 31 ++++++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 2ba156587d..6ece5da2e6 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -478,9 +478,11 @@ def _start_agent_build(sid: str, session: dict) -> None:
     ready = session.get("agent_ready")
     if ready is None:
         return
-    if ready.is_set() or session.get("agent_build_started"):
-        return
-    session["agent_build_started"] = True
+    lock = session.setdefault("agent_build_lock", threading.Lock())
+    with lock:
+        if ready.is_set() or session.get("agent_build_started"):
+            return
+        session["agent_build_started"] = True
     key = session["session_key"]
 
     def _build() -> None:
diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts
index 26a0cdbc2b..41b357568f 100644
--- a/ui-tui/src/lib/memoryMonitor.ts
+++ b/ui-tui/src/lib/memoryMonitor.ts
@@ -31,11 +31,20 @@ const GB = 1024 ** 3
 // spike somehow trips the threshold before the app registers its own Ink
 // import, we pay the load cost exactly once, inside the tick that needs it.
 let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null
+let _evictInkCachesPromise: Promise<(level: 'all' | 'half') => unknown> | null = null
+
 async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> {
-  if (_evictInkCaches) return _evictInkCaches
-  const mod = await import('@hermes/ink')
-  _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
-  return _evictInkCaches
+  if (_evictInkCaches) {
+    return _evictInkCaches
+  }
+
+  _evictInkCachesPromise ??= import('@hermes/ink').then(mod => {
+    _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
+
+    return _evictInkCaches
+  })
+
+  return _evictInkCachesPromise
 }
 
 export function startMemoryMonitor({
@@ -46,19 +55,25 @@ export function startMemoryMonitor({
   onHigh
 }: MemoryMonitorOptions = {}): () => void {
   const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
+  const inFlight = new Set<Exclude<MemoryLevel, 'normal'>>()
 
   const tick = async () => {
     const { heapUsed, rss } = process.memoryUsage()
     const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
 
     if (level === 'normal') {
-      return void dumped.clear()
-    }
+      dumped.clear()
+      inFlight.clear()
 
-    if (dumped.has(level)) {
       return
     }
 
+    if (dumped.has(level) || inFlight.has(level)) {
+      return
+    }
+
+    inFlight.add(level)
+
     // Prune Ink content caches before dump/exit — half on 'high' (recoverable),
     // full on 'critical' (post-dump RSS reduction, keeps user running).
     // Deferred import keeps `@hermes/ink` off the cold-start critical path;
@@ -75,6 +90,8 @@ export function startMemoryMonitor({
     dumped.add(level)
     const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
 
+    inFlight.delete(level)
+
     const snap: MemorySnapshot = { heapUsed, level, rss }
 
     ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)

From 72a3af63d4f14dcb986290a0b9d0ec53abbbd68c Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Wed, 29 Apr 2026 00:04:12 -0500
Subject: [PATCH 06/11] fix(tui): keep prompt submit off the RPC pool

A cleanup review found that adding prompt.submit to _LONG_HANDLERS made the RPC
pool own the full first-turn wait even though the handler itself already spawns
a turn thread. Keep prompt.submit inline and make it return immediately:

- look up the session without waiting
- kick the lazy agent build
- spawn a short waiter thread that blocks on agent_ready, then starts the
  existing turn dispatcher

This keeps stdin dispatch responsive, avoids occupying a bounded pool worker for
a normal chat turn, and preserves the lazy-start hydration behavior.

Tests:
- python -m py_compile tui_gateway/server.py
- cd ui-tui && npm run type-check && npm run build
- scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py
- cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts
---
 tui_gateway/server.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 6ece5da2e6..ad07ce97f0 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -141,7 +141,6 @@ _SLASH_WORKER_TIMEOUT_S = max(
 _LONG_HANDLERS = frozenset(
     {
         "cli.exec",
-        "prompt.submit",
         "session.branch",
         "session.resume",
         "shell.exec",
@@ -2426,12 +2425,28 @@ def _(rid, params: dict) -> dict:
 @method("prompt.submit")
 def _(rid, params: dict) -> dict:
     sid, text = params.get("session_id", ""), params.get("text", "")
-    session, err = _sess(params, rid)
+    session, err = _sess_nowait(params, rid)
     if err:
         return err
+
+    _start_agent_build(sid, session)
+
+    def run_after_agent_ready() -> None:
+        err = _wait_agent(session, rid)
+        if err:
+            session.get("transport", current_transport() or _stdio_transport).write(err)
+            return
+        _run_prompt_submit(rid, sid, session, text)
+
+    threading.Thread(target=run_after_agent_ready, daemon=True).start()
+    return _ok(rid, {"status": "streaming"})
+
+
+def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
     with session["history_lock"]:
         if session.get("running"):
-            return _err(rid, 4009, "session busy")
+            _emit("error", sid, {"message": "session busy"})
+            return
         session["running"] = True
         history = list(session["history"])
         history_version = int(session.get("history_version", 0))
@@ -2671,7 +2686,6 @@ def _(rid, params: dict) -> dict:
                 session["running"] = False
 
     threading.Thread(target=run, daemon=True).start()
-    return _ok(rid, {"status": "streaming"})
 
 
 @method("clipboard.paste")

From 88a9efdb1ac6d0ac5665fa087ebd7271073387fd Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Wed, 29 Apr 2026 00:08:34 -0500
Subject: [PATCH 07/11] fix(tui): tighten cold-start edge cases after review

Clean up the remaining review nits:

- let the deferred @hermes/ink import retry after a transient failure instead
  of memoizing a rejected promise forever
- keep memory-monitor in-flight state inside a finally so future exceptions
  cannot suppress that memory level indefinitely
- use read_raw_config for the TUI MCP cold-start probe instead of full
  load_config()
- keep input.detect_drop for explicit relative path prefixes (./ and ../)
  while preserving the no-RPC fast path for ordinary plain prompts

Tests:
- python -m py_compile tui_gateway/server.py tui_gateway/entry.py
- cd ui-tui && npm run type-check && npm run build
- scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py
- cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts
---
 tui_gateway/entry.py            |  4 ++--
 ui-tui/src/app/useSubmission.ts |  6 ++---
 ui-tui/src/lib/memoryMonitor.ts | 42 +++++++++++++++++++--------------
 3 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py
index 2c1804aac1..d3be53a6c4 100644
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@@ -175,8 +175,8 @@ def main():
     # loaded once by ``_config_mtime`` elsewhere) and only pay the import
     # cost when there's actually MCP work to do.
     try:
-        from hermes_cli.config import load_config
-        _mcp_servers = (load_config() or {}).get("mcp_servers")
+        from hermes_cli.config import read_raw_config
+        _mcp_servers = (read_raw_config() or {}).get("mcp_servers")
         _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
     except Exception:
         # Be conservative: if we can't decide, fall back to the old
diff --git a/ui-tui/src/app/useSubmission.ts b/ui-tui/src/app/useSubmission.ts
index ed86332b08..a7d2631dbd 100644
--- a/ui-tui/src/app/useSubmission.ts
+++ b/ui-tui/src/app/useSubmission.ts
@@ -127,9 +127,9 @@ export function useSubmission(opts: UseSubmissionOptions) {
       }
 
       // Plain prompts are the common path and should not pay an extra RPC
-      // before prompt.submit. File-drop detection can still run for inputs
-      // that contain an absolute/tilde path or file:// URI.
-      if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\/)[^\s]+/.test(text)) {
+      // before prompt.submit. File-drop detection still runs for absolute,
+      // tilde, file://, and explicit relative paths.
+      if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\.?\.\/|\/)[^\s]+/.test(text)) {
         return startSubmit(text, expand(text), showUserMessage)
       }
 
diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts
index 41b357568f..e792df4cde 100644
--- a/ui-tui/src/lib/memoryMonitor.ts
+++ b/ui-tui/src/lib/memoryMonitor.ts
@@ -38,11 +38,16 @@ async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unkno
     return _evictInkCaches
   }
 
-  _evictInkCachesPromise ??= import('@hermes/ink').then(mod => {
-    _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
+  _evictInkCachesPromise ??= import('@hermes/ink')
+    .then(mod => {
+      _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
 
-    return _evictInkCaches
-  })
+      return _evictInkCaches
+    })
+    .catch(err => {
+      _evictInkCachesPromise = null
+      throw err
+    })
 
   return _evictInkCachesPromise
 }
@@ -80,21 +85,22 @@ export function startMemoryMonitor({
     // by the time a tick fires 10s after launch the app has already loaded
     // the same module, so this resolves instantly from the ESM cache.
     try {
-      const evictInkCaches = await _ensureEvictInkCaches()
-      evictInkCaches(level === 'critical' ? 'all' : 'half')
-    } catch {
-      // Best-effort: if the dynamic import fails for any reason we still
-      // continue to the heap dump below so the user gets diagnostics.
+      try {
+        const evictInkCaches = await _ensureEvictInkCaches()
+        evictInkCaches(level === 'critical' ? 'all' : 'half')
+      } catch {
+        // Best-effort: if the dynamic import fails for any reason we still
+        // continue to the heap dump below so the user gets diagnostics.
+      }
+
+      dumped.add(level)
+      const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
+      const snap: MemorySnapshot = { heapUsed, level, rss }
+
+      ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
+    } finally {
+      inFlight.delete(level)
     }
-
-    dumped.add(level)
-    const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
-
-    inFlight.delete(level)
-
-    const snap: MemorySnapshot = { heapUsed, level, rss }
-
-    ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
   }
 
   const handle = setInterval(() => void tick(), intervalMs)

From cc5efb6fc16fc620dd2f4f47d0fd244da06f3739 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Wed, 29 Apr 2026 00:22:38 -0500
Subject: [PATCH 08/11] fix(tui): keep non-agent session RPCs lazy

Respond to Copilot's lazy-start review: session metadata/history/usage do not
need a constructed AIAgent, so keep them on the no-wait session path. This
preserves the deferred startup model and avoids blocking simple session RPCs on
agent initialization.

Tests:
- python -m py_compile tui_gateway/server.py tui_gateway/entry.py
- cd ui-tui && npm run type-check && npm run build
- scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py
- cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts
---
 tui_gateway/server.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index ad07ce97f0..ca0ecfe729 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1736,7 +1736,9 @@ def _(rid, params: dict) -> dict:
         if session is not None:
             _start_agent_build(sid, session)
 
-    threading.Timer(0.05, _deferred_build).start()
+    build_timer = threading.Timer(0.05, _deferred_build)
+    build_timer.daemon = True
+    build_timer.start()
 
     return _ok(
         rid,
@@ -1889,7 +1891,7 @@ def _(rid, params: dict) -> dict:
 
 @method("session.title")
 def _(rid, params: dict) -> dict:
-    session, err = _sess(params, rid)
+    session, err = _sess_nowait(params, rid)
     if err:
         return err
     db = _get_db()
@@ -1952,13 +1954,16 @@ def _(rid, params: dict) -> dict:
 
 @method("session.usage")
 def _(rid, params: dict) -> dict:
-    session, err = _sess(params, rid)
-    return err or _ok(rid, _get_usage(session["agent"]))
+    session, err = _sess_nowait(params, rid)
+    if err:
+        return err
+    agent = session.get("agent")
+    return _ok(rid, _get_usage(agent) if agent is not None else {"calls": 0, "input": 0, "output": 0, "total": 0})
 
 
 @method("session.history")
 def _(rid, params: dict) -> dict:
-    session, err = _sess(params, rid)
+    session, err = _sess_nowait(params, rid)
     if err:
         return err
     history = list(session.get("history", []))

From d341af22c0ae91d05bd0116e7bd6d37f4fb855d6 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Wed, 29 Apr 2026 00:25:09 -0500
Subject: [PATCH 09/11] fix(tui): preserve busy and init error signaling

Finish the Copilot review cleanup for lazy prompt submission:

- prompt.submit now claims session.running before returning success, preserving
  the existing RPC-level session busy error so the frontend can queue.
- agent-init timeout/failure now emits a normal error event instead of writing a
  second JSON-RPC response for an already-settled request id.

Tests:
- python -m py_compile tui_gateway/server.py tui_gateway/entry.py
- cd ui-tui && npm run type-check && npm run build
- scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py
- cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts
---
 tui_gateway/server.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index ca0ecfe729..4dd24a3d9f 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -2433,13 +2433,19 @@ def _(rid, params: dict) -> dict:
     session, err = _sess_nowait(params, rid)
     if err:
         return err
+    with session["history_lock"]:
+        if session.get("running"):
+            return _err(rid, 4009, "session busy")
+        session["running"] = True
 
     _start_agent_build(sid, session)
 
     def run_after_agent_ready() -> None:
         err = _wait_agent(session, rid)
         if err:
-            session.get("transport", current_transport() or _stdio_transport).write(err)
+            _emit("error", sid, {"message": err.get("error", {}).get("message", "agent initialization failed")})
+            with session["history_lock"]:
+                session["running"] = False
             return
         _run_prompt_submit(rid, sid, session, text)
 
@@ -2449,10 +2455,6 @@ def _(rid, params: dict) -> dict:
 
 def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
     with session["history_lock"]:
-        if session.get("running"):
-            _emit("error", sid, {"message": "session busy"})
-            return
-        session["running"] = True
         history = list(session["history"])
         history_version = int(session.get("history_version", 0))
         images = list(session.get("attached_images", []))

From c2fd0fa684fa89041f212dee57ac013e32f22c95 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Wed, 29 Apr 2026 00:44:04 -0500
Subject: [PATCH 10/11] fix(tui): preserve memory monitor in-flight guard

Copilot caught that clearing inFlight on a transient normal-memory tick could
allow a second dump/eviction to start before the first async tick completed.
Only clear dumped on normal; let the in-flight tick's finally remove its own
level.

Tests:
- cd ui-tui && npm run type-check && npm run build
---
 ui-tui/src/lib/memoryMonitor.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts
index e792df4cde..623e5cfa49 100644
--- a/ui-tui/src/lib/memoryMonitor.ts
+++ b/ui-tui/src/lib/memoryMonitor.ts
@@ -68,7 +68,6 @@ export function startMemoryMonitor({
 
     if (level === 'normal') {
       dumped.clear()
-      inFlight.clear()
 
       return
     }

From 22cc7492ffd01eb6f867b2395d60778d25e8e41d Mon Sep 17 00:00:00 2001
From: brooklyn! <brooklyn.bb.nicholson@gmail.com>
Date: Tue, 28 Apr 2026 22:44:58 -0700
Subject: [PATCH 11/11] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 ui-tui/src/lib/memoryMonitor.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts
index 623e5cfa49..eaf11574a4 100644
--- a/ui-tui/src/lib/memoryMonitor.ts
+++ b/ui-tui/src/lib/memoryMonitor.ts
@@ -68,7 +68,6 @@ export function startMemoryMonitor({
 
     if (level === 'normal') {
       dumped.clear()
-
       return
     }