Merge pull request #17190 from NousResearch/bb/tui-cold-start-profiling

perf(tui): cut visible cold start ~57% with lazy agent init
2026-05-07 02:51:50 +00:00 · 2026-04-28 22:45:14 -07:00 · 2026-04-28 22:45:14 -07:00 · 5e68503d2f
commit 5e68503d2f
parent fa9383d27b 22cc7492ff
10 changed files with 309 additions and 161 deletions
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@ -770,11 +770,19 @@ class TestLoadConfig(unittest.TestCase):

    def test_returns_code_execution_section(self):
        from tools.code_execution_tool import _load_config
-        mock_cli = MagicMock()
-        mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 120, "max_tool_calls": 10}}
-        with patch.dict("sys.modules", {"cli": mock_cli}):
+        with patch("hermes_cli.config.read_raw_config",
+                   return_value={"code_execution": {"timeout": 120, "max_tool_calls": 10}}):
            result = _load_config()
-        self.assertIsInstance(result, dict)
+        self.assertEqual(result, {"timeout": 120, "max_tool_calls": 10})
+
+    def test_does_not_import_interactive_cli(self):
+        from tools.code_execution_tool import _load_config
+        mock_cli = MagicMock()
+        mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 999}}
+        with patch.dict("sys.modules", {"cli": mock_cli}), \
+             patch("hermes_cli.config.read_raw_config", return_value={}):
+            result = _load_config()
+        self.assertEqual(result, {})


 # ---------------------------------------------------------------------------
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@ -1309,10 +1309,20 @@ def _kill_process_group(proc, escalate: bool = False):


 def _load_config() -> dict:
-    """Load code_execution config from CLI_CONFIG if available."""
+    """Load code_execution config without importing the interactive CLI.
+
+    This helper is called while building the module-level execute_code schema
+    during tool discovery.  Importing ``cli`` here pulls prompt_toolkit/Rich and
+    a large chunk of the classic REPL onto every agent startup path, including
+    ``hermes --tui`` where it is never used.  Read the lightweight raw config
+    instead; the config layer already caches by (mtime, size), and an absent
+    key cleanly falls back to DEFAULT_EXECUTION_MODE.
+    """
    try:
-        from cli import CLI_CONFIG
-        return CLI_CONFIG.get("code_execution", {})
+        from hermes_cli.config import read_raw_config
+
+        cfg = read_raw_config().get("code_execution", {})
+        return cfg if isinstance(cfg, dict) else {}
    except Exception:
        return {}

--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@ -165,11 +165,29 @@ def main():
    # a model_tools.py module-level side effect; moved to explicit
    # startup calls to avoid freezing the gateway's loop on lazy import
    # (#16856).
+    #
+    # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the
+    # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers —
+    # ~200ms on macOS), which runs on the TUI's critical path before
+    # ``gateway.ready`` can be emitted.  The overwhelming majority of users
+    # have no ``mcp_servers`` configured, in which case every byte of that
+    # import is wasted.  Check the config first (cheap — it's already been
+    # loaded once by ``_config_mtime`` elsewhere) and only pay the import
+    # cost when there's actually MCP work to do.
    try:
-        from tools.mcp_tool import discover_mcp_tools
-        discover_mcp_tools()
+        from hermes_cli.config import read_raw_config
+        _mcp_servers = (read_raw_config() or {}).get("mcp_servers")
+        _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
    except Exception:
-        pass
+        # Be conservative: if we can't decide, fall back to the old
+        # behaviour and let the discovery path handle its own errors.
+        _has_mcp_servers = True
+    if _has_mcp_servers:
+        try:
+            from tools.mcp_tool import discover_mcp_tools
+            discover_mcp_tools()
+        except Exception:
+            pass

    if not write_json({
        "jsonrpc": "2.0",
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -465,6 +465,119 @@ def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
    return _err(rid, 5032, err) if err else None


+def _start_agent_build(sid: str, session: dict) -> None:
+    """Start building the real AIAgent for a TUI session, once.
+
+    Classic `hermes` shows the prompt before constructing AIAgent; the TUI used
+    to eagerly build it during session.create, making startup feel blocked on
+    tool discovery/model metadata even though the composer was visible.  Keep
+    the shell responsive by deferring this work until the first prompt (or any
+    command that actually needs the agent), while retaining the same ready/error
+    event contract for the frontend.
+    """
+    ready = session.get("agent_ready")
+    if ready is None:
+        return
+    lock = session.setdefault("agent_build_lock", threading.Lock())
+    with lock:
+        if ready.is_set() or session.get("agent_build_started"):
+            return
+        session["agent_build_started"] = True
+    key = session["session_key"]
+
+    def _build() -> None:
+        current = _sessions.get(sid)
+        if current is None:
+            ready.set()
+            return
+
+        worker = None
+        notify_registered = False
+        try:
+            tokens = _set_session_context(key)
+            try:
+                agent = _make_agent(sid, key)
+            finally:
+                _clear_session_context(tokens)
+
+            db = _get_db()
+            if db is not None:
+                db.create_session(key, source="tui", model=_resolve_model())
+                pending_title = (current.get("pending_title") or "").strip()
+                if pending_title:
+                    try:
+                        title_applied = db.set_session_title(key, pending_title)
+                        if title_applied:
+                            current["pending_title"] = None
+                        else:
+                            existing_row = db.get_session(key)
+                            existing_title = ((existing_row or {}).get("title") or "").strip()
+                            if existing_title == pending_title:
+                                current["pending_title"] = None
+                            else:
+                                logger.info(
+                                    "Pending title still queued for session %s (wanted=%r, current=%r)",
+                                    sid,
+                                    pending_title,
+                                    existing_title,
+                                )
+                    except ValueError as e:
+                        current["pending_title"] = None
+                        logger.info("Dropping pending title for session %s: %s", sid, e)
+                    except Exception:
+                        logger.warning("Failed to apply pending title for session %s", sid, exc_info=True)
+            current["agent"] = agent
+
+            try:
+                worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
+                current["slash_worker"] = worker
+            except Exception:
+                pass
+
+            try:
+                from tools.approval import (
+                    register_gateway_notify,
+                    load_permanent_allowlist,
+                )
+                register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
+                notify_registered = True
+                load_permanent_allowlist()
+            except Exception:
+                pass
+
+            _wire_callbacks(sid)
+            _notify_session_boundary("on_session_reset", key)
+
+            info = _session_info(agent)
+            warn = _probe_credentials(agent)
+            if warn:
+                info["credential_warning"] = warn
+            cfg_warn = _probe_config_health(_load_cfg())
+            if cfg_warn:
+                info["config_warning"] = cfg_warn
+                logger.warning(cfg_warn)
+            _emit("session.info", sid, info)
+        except Exception as e:
+            current["agent_error"] = str(e)
+            _emit("error", sid, {"message": f"agent init failed: {e}"})
+        finally:
+            if _sessions.get(sid) is not current:
+                if worker is not None:
+                    try:
+                        worker.close()
+                    except Exception:
+                        pass
+                if notify_registered:
+                    try:
+                        from tools.approval import unregister_gateway_notify
+                        unregister_gateway_notify(key)
+                    except Exception:
+                        pass
+            ready.set()
+
+    threading.Thread(target=_build, daemon=True).start()
+
+
 def _sess_nowait(params, rid):
    s = _sessions.get(params.get("session_id") or "")
    return (s, None) if s else (None, _err(rid, 4001, "session not found"))
@ -472,7 +585,10 @@ def _sess_nowait(params, rid):

 def _sess(params, rid):
    s, err = _sess_nowait(params, rid)
-    return (None, err) if err else (s, _wait_agent(s, rid))
+    if err:
+        return (None, err)
+    _start_agent_build(params.get("session_id") or "", s)
+    return (s, _wait_agent(s, rid))


 def _normalize_completion_path(path_part: str) -> str:
@ -1627,129 +1743,18 @@ def _(rid, params: dict) -> dict:
        "transport": current_transport() or _stdio_transport,
    }

-    def _build() -> None:
+    # Return the lightweight session immediately so Ink can paint the composer
+    # + skeleton panel, then build the real AIAgent just after this response is
+    # flushed.  This keeps startup responsive while still hydrating tools/skills
+    # without requiring the user to submit a first prompt.
+    def _deferred_build() -> None:
        session = _sessions.get(sid)
-        if session is None:
-            # session.close ran before the build thread got scheduled.
-            ready.set()
-            return
+        if session is not None:
+            _start_agent_build(sid, session)

-        # Track what we allocate so we can clean up if session.close
-        # races us to the finish line.  session.close pops _sessions[sid]
-        # unconditionally and tries to close the slash_worker it finds;
-        # if _build is still mid-construction when close runs, close
-        # finds slash_worker=None / notify unregistered and returns
-        # cleanly — leaving us, the build thread, to later install the
-        # worker + notify on an orphaned session dict.  The finally
-        # block below detects the orphan and cleans up instead of
-        # leaking a subprocess and a global notify registration.
-        worker = None
-        notify_registered = False
-        try:
-            tokens = _set_session_context(key)
-            try:
-                agent = _make_agent(sid, key)
-            finally:
-                _clear_session_context(tokens)
-
-            db = _get_db()
-            if db is not None:
-                db.create_session(key, source="tui", model=_resolve_model())
-                pending_title = (session.get("pending_title") or "").strip()
-                if pending_title:
-                    try:
-                        title_applied = db.set_session_title(key, pending_title)
-                        if title_applied:
-                            session["pending_title"] = None
-                        else:
-                            existing_row = db.get_session(key)
-                            existing_title = (
-                                (existing_row or {}).get("title") or ""
-                            ).strip()
-                            if existing_title == pending_title:
-                                session["pending_title"] = None
-                            else:
-                                logger.info(
-                                    "Pending title still queued for session %s (wanted=%r, current=%r)",
-                                    sid,
-                                    pending_title,
-                                    existing_title,
-                                )
-                    except ValueError as e:
-                        # Queued title can become invalid/duplicate between queue time
-                        # and DB row creation. Drop the queue and log the reason so
-                        # future /title reads don't surface a stuck pending value.
-                        session["pending_title"] = None
-                        logger.info(
-                            "Dropping pending title for session %s: %s",
-                            sid,
-                            e,
-                        )
-                    except Exception:
-                        logger.warning(
-                            "Failed to apply pending title for session %s",
-                            sid,
-                            exc_info=True,
-                        )
-            session["agent"] = agent
-
-            try:
-                worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
-                session["slash_worker"] = worker
-            except Exception:
-                pass
-
-            try:
-                from tools.approval import (
-                    register_gateway_notify,
-                    load_permanent_allowlist,
-                )
-
-                register_gateway_notify(
-                    key, lambda data: _emit("approval.request", sid, data)
-                )
-                notify_registered = True
-                load_permanent_allowlist()
-            except Exception:
-                pass
-
-            _wire_callbacks(sid)
-            _notify_session_boundary("on_session_reset", key)
-
-            info = _session_info(agent)
-            warn = _probe_credentials(agent)
-            if warn:
-                info["credential_warning"] = warn
-            cfg_warn = _probe_config_health(_load_cfg())
-            if cfg_warn:
-                info["config_warning"] = cfg_warn
-                logger.warning(cfg_warn)
-            _emit("session.info", sid, info)
-        except Exception as e:
-            session["agent_error"] = str(e)
-            _emit("error", sid, {"message": f"agent init failed: {e}"})
-        finally:
-            # Orphan check: if session.close raced us and popped
-            # _sessions[sid] while we were building, the dict we just
-            # populated is unreachable.  Clean up the subprocess and
-            # the global notify registration ourselves — session.close
-            # couldn't see them at the time it ran.
-            if _sessions.get(sid) is not session:
-                if worker is not None:
-                    try:
-                        worker.close()
-                    except Exception:
-                        pass
-                if notify_registered:
-                    try:
-                        from tools.approval import unregister_gateway_notify
-
-                        unregister_gateway_notify(key)
-                    except Exception:
-                        pass
-            ready.set()
-
-    threading.Thread(target=_build, daemon=True).start()
+    build_timer = threading.Timer(0.05, _deferred_build)
+    build_timer.daemon = True
+    build_timer.start()

    return _ok(
        rid,
@ -1760,6 +1765,7 @@ def _(rid, params: dict) -> dict:
                "tools": {},
                "skills": {},
                "cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
+                "lazy": True,
            },
        },
    )
@ -1901,7 +1907,7 @@ def _(rid, params: dict) -> dict:

@method("session.title")
 def _(rid, params: dict) -> dict:
-    session, err = _sess(params, rid)
+    session, err = _sess_nowait(params, rid)
    if err:
        return err
    db = _get_db()
@ -1964,13 +1970,16 @@ def _(rid, params: dict) -> dict:

@method("session.usage")
 def _(rid, params: dict) -> dict:
-    session, err = _sess(params, rid)
-    return err or _ok(rid, _get_usage(session["agent"]))
+    session, err = _sess_nowait(params, rid)
+    if err:
+        return err
+    agent = session.get("agent")
+    return _ok(rid, _get_usage(agent) if agent is not None else {"calls": 0, "input": 0, "output": 0, "total": 0})


@method("session.history")
 def _(rid, params: dict) -> dict:
-    session, err = _sess(params, rid)
+    session, err = _sess_nowait(params, rid)
    if err:
        return err
    history = list(session.get("history", []))
@ -2437,13 +2446,31 @@ def _(rid, params: dict) -> dict:
@method("prompt.submit")
 def _(rid, params: dict) -> dict:
    sid, text = params.get("session_id", ""), params.get("text", "")
-    session, err = _sess(params, rid)
+    session, err = _sess_nowait(params, rid)
    if err:
        return err
    with session["history_lock"]:
        if session.get("running"):
            return _err(rid, 4009, "session busy")
        session["running"] = True
+
+    _start_agent_build(sid, session)
+
+    def run_after_agent_ready() -> None:
+        err = _wait_agent(session, rid)
+        if err:
+            _emit("error", sid, {"message": err.get("error", {}).get("message", "agent initialization failed")})
+            with session["history_lock"]:
+                session["running"] = False
+            return
+        _run_prompt_submit(rid, sid, session, text)
+
+    threading.Thread(target=run_after_agent_ready, daemon=True).start()
+    return _ok(rid, {"status": "streaming"})
+
+
+def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
+    with session["history_lock"]:
        history = list(session["history"])
        history_version = int(session.get("history_version", 0))
        images = list(session.get("attached_images", []))
@ -2682,7 +2709,6 @@ def _(rid, params: dict) -> dict:
                session["running"] = False

    threading.Thread(target=run, daemon=True).start()
-    return _ok(rid, {"status": "streaming"})


@method("clipboard.paste")
--- a/ui-tui/src/app/useConfigSync.ts
+++ b/ui-tui/src/app/useConfigSync.ts
@ -5,8 +5,7 @@ import type { GatewayClient } from '../gatewayClient.js'
 import type {
  ConfigFullResponse,
  ConfigMtimeResponse,
-  ReloadMcpResponse,
-  VoiceToggleResponse
+  ReloadMcpResponse
 } from '../gatewayTypes.js'
 import { asRpcResult } from '../lib/rpc.js'

@ -118,7 +117,11 @@ export function useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid }: U
      return
    }

-    quietRpc<VoiceToggleResponse>(gw, 'voice.toggle', { action: 'status' }).then(r => setVoiceEnabled(!!r?.enabled))
+    // Keep startup cheap: voice.toggle status probes optional audio/STT deps and
+    // can run long enough to delay prompt.submit on the single stdio RPC pipe.
+    // Environment flags are enough to initialize the UI bit; the heavier status
+    // check still runs when the user opens /voice.
+    setVoiceEnabled(process.env.HERMES_VOICE === '1')
    quietRpc<ConfigMtimeResponse>(gw, 'config.get', { key: 'mtime' }).then(r => {
      mtimeRef.current = Number(r?.mtime ?? 0)
    })
--- a/ui-tui/src/app/useSubmission.ts
+++ b/ui-tui/src/app/useSubmission.ts
@ -126,6 +126,13 @@ export function useSubmission(opts: UseSubmissionOptions) {
        return sys('session not ready yet')
      }

+      // Plain prompts are the common path and should not pay an extra RPC
+      // before prompt.submit. File-drop detection still runs for absolute,
+      // tilde, file://, and explicit relative paths.
+      if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\.?\.\/|\/)[^\s]+/.test(text)) {
+        return startSubmit(text, expand(text), showUserMessage)
+      }
+
      gw.request<InputDetectDropResponse>('input.detect_drop', { session_id: sid, text })
        .then(r => {
          if (!r?.matched) {
--- a/ui-tui/src/components/appLayout.tsx
+++ b/ui-tui/src/components/appLayout.tsx
@ -68,7 +68,7 @@ const TranscriptPane = memo(function TranscriptPane({
                <Box flexDirection="column" paddingTop={1}>
                  <Banner t={ui.theme} />

-                  {row.msg.info?.version && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
+                  {row.msg.info && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
                </Box>
              ) : row.msg.kind === 'panel' && row.msg.panelData ? (
                <Panel sections={row.msg.panelData.sections} t={ui.theme} title={row.msg.panelData.title} />
--- a/ui-tui/src/components/branding.tsx
+++ b/ui-tui/src/components/branding.tsx
@ -1,10 +1,32 @@
 import { Box, Text, useStdout } from '@hermes/ink'
+import { useEffect, useState } from 'react'
+import unicodeSpinners from 'unicode-animations'

 import { artWidth, caduceus, CADUCEUS_WIDTH, logo, LOGO_WIDTH } from '../banner.js'
 import { flat } from '../lib/text.js'
 import type { Theme } from '../theme.js'
 import type { PanelSection, SessionInfo } from '../types.js'

+const LOADER_TICK_MS = 120
+
+function InlineLoader({ label, t }: { label: string; t: Theme }) {
+  const [tick, setTick] = useState(0)
+  const spinner = unicodeSpinners.braille
+  const frame = spinner.frames[tick % spinner.frames.length] ?? '⠋'
+
+  useEffect(() => {
+    const id = setInterval(() => setTick(n => n + 1), Math.max(LOADER_TICK_MS, spinner.interval))
+
+    return () => clearInterval(id)
+  }, [spinner.interval])
+
+  return (
+    <Text color={t.color.muted} wrap="truncate">
+      <Text color={t.color.accent}>{frame}</Text> {label}
+    </Text>
+  )
+}
+
 export function ArtLines({ lines }: { lines: [string, string][] }) {
  return (
    <>
@ -67,6 +89,7 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
    const entries = Object.entries(data).sort()
    const shown = entries.slice(0, max)
    const overflow = entries.length - max
+    const skeleton = info.lazy && entries.length === 0

    return (
      <Box flexDirection="column" marginTop={1}>
@ -74,12 +97,16 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
          Available {title}
        </Text>

-        {shown.map(([k, vs]) => (
-          <Text key={k} wrap="truncate">
-            <Text color={t.color.muted}>{strip(k)}: </Text>
-            <Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
-          </Text>
-        ))}
+        {skeleton ? (
+          <InlineLoader label={title === 'Tools' ? 'discovering tools' : 'scanning skills'} t={t} />
+        ) : (
+          shown.map(([k, vs]) => (
+            <Text key={k} wrap="truncate">
+              <Text color={t.color.muted}>{strip(k)}: </Text>
+              <Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
+            </Text>
+          ))
+        )}

        {overflow > 0 && (
          <Text color={t.color.muted}>
--- a/ui-tui/src/lib/memoryMonitor.ts
+++ b/ui-tui/src/lib/memoryMonitor.ts
@ -1,5 +1,3 @@
-import { evictInkCaches } from '@hermes/ink'
-
 import { type HeapDumpResult, performHeapDump } from './memory.js'

 export type MemoryLevel = 'critical' | 'high' | 'normal'
@ -20,6 +18,40 @@ export interface MemoryMonitorOptions {

 const GB = 1024 ** 3

+// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level
+// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto
+// the critical path before the Python gateway can even be spawned. That
+// serialised roughly 150ms of Node work in front of gw.start() on every
+// cold `hermes --tui` launch.
+//
+// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and
+// only when heap pressure crosses the high-water mark — by then Ink has
+// long since been loaded by the app entry. This dynamic import is a no-op
+// on the hot path (module is already in the ESM cache); when a startup
+// spike somehow trips the threshold before the app registers its own Ink
+// import, we pay the load cost exactly once, inside the tick that needs it.
+let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null
+let _evictInkCachesPromise: Promise<(level: 'all' | 'half') => unknown> | null = null
+
+async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> {
+  if (_evictInkCaches) {
+    return _evictInkCaches
+  }
+
+  _evictInkCachesPromise ??= import('@hermes/ink')
+    .then(mod => {
+      _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
+
+      return _evictInkCaches
+    })
+    .catch(err => {
+      _evictInkCachesPromise = null
+      throw err
+    })
+
+  return _evictInkCachesPromise
+}
+
 export function startMemoryMonitor({
  criticalBytes = 2.5 * GB,
  highBytes = 1.5 * GB,
@ -28,29 +60,45 @@ export function startMemoryMonitor({
  onHigh
 }: MemoryMonitorOptions = {}): () => void {
  const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
+  const inFlight = new Set<Exclude<MemoryLevel, 'normal'>>()

  const tick = async () => {
    const { heapUsed, rss } = process.memoryUsage()
    const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'

    if (level === 'normal') {
-      return void dumped.clear()
-    }
-
-    if (dumped.has(level)) {
+      dumped.clear()
      return
    }

+    if (dumped.has(level) || inFlight.has(level)) {
+      return
+    }
+
+    inFlight.add(level)
+
    // Prune Ink content caches before dump/exit — half on 'high' (recoverable),
    // full on 'critical' (post-dump RSS reduction, keeps user running).
-    evictInkCaches(level === 'critical' ? 'all' : 'half')
+    // Deferred import keeps `@hermes/ink` off the cold-start critical path;
+    // by the time a tick fires 10s after launch the app has already loaded
+    // the same module, so this resolves instantly from the ESM cache.
+    try {
+      try {
+        const evictInkCaches = await _ensureEvictInkCaches()
+        evictInkCaches(level === 'critical' ? 'all' : 'half')
+      } catch {
+        // Best-effort: if the dynamic import fails for any reason we still
+        // continue to the heap dump below so the user gets diagnostics.
+      }

-    dumped.add(level)
-    const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
+      dumped.add(level)
+      const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
+      const snap: MemorySnapshot = { heapUsed, level, rss }

-    const snap: MemorySnapshot = { heapUsed, level, rss }
-
-    ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
+      ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
+    } finally {
+      inFlight.delete(level)
+    }
  }

  const handle = setInterval(() => void tick(), intervalMs)
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@ -143,11 +143,12 @@ export interface McpServerStatus {
 export interface SessionInfo {
  cwd?: string
  fast?: boolean
+  lazy?: boolean
  mcp_servers?: McpServerStatus[]
  model: string
  reasoning_effort?: string
-  service_tier?: string
  release_date?: string
+  service_tier?: string
  skills: Record<string, string[]>
  tools: Record<string, string[]>
  update_behind?: number | null