From 0399d4b97668c020c8c583eda190b90fc9fca4e8 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 28 Apr 2026 19:42:31 -0500 Subject: [PATCH 01/11] perf(tui): shave ~190ms off `hermes --tui` cold start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two targeted fixes on the critical path from `hermes --tui` launch to `gateway.ready`: 1. **Defer `@hermes/ink` import in memoryMonitor.ts.** The static top-level import dragged the full ~414KB Ink bundle (React + renderer + all components/hooks) onto the critical path *before* `gw.start()` could spawn the Python gateway — serialising ~155ms of Node work in front of it on every launch. `evictInkCaches` only runs inside the 10-second tick under heap pressure, so it moves to a lazy dynamic import. First tick hits the ESM cache because the app entry has long since imported `@hermes/ink`. 2. **Gate `tools.mcp_tool` import on config in tui_gateway/entry.py.** Importing the module transitively pulls the MCP SDK + pydantic + httpx + jsonschema + starlette formparsers (~200ms). The overwhelming majority of users have no `mcp_servers` configured, so this runs for nothing. A cheap `load_config()` check (~25ms) skips the 200ms import when no servers are declared, with a conservative fallback to the old behaviour if the config probe itself fails. ## Measurements (macOS Terminal.app, Apple Silicon, n=12) | Metric | Before (p50) | After (p50) | Δ | |----------------------------|--------------|-------------|----------| | Python gateway boot alone | 252–365ms | 105–151ms | −180ms | | `hermes --tui` banner paint | 686ms | 665ms | −21ms | | `hermes --tui` → ready | **1843ms** | **1655ms** | **−188ms (−10.2%)** | | `hermes --tui` → ready p90 | 1932ms | 1778ms | −154ms | | stdev (ready) | 126ms | 83ms | also more consistent | ## Tests - `scripts/run_tests.sh tests/tui_gateway/ tests/tools/test_mcp_tool.py`: 195 passed. (The one pre-existing failure in `test_session_resume_returns_hydrated_messages` reproduces on main — unrelated, it's a mock-DB kwarg mismatch.) - `ui-tui` vitest: 430 tests, all pass. - `npm run type-check` in ui-tui: clean. ## Notes - Node-side first paint ("banner") didn't move meaningfully because that latency is dominated by Ink's render pipeline + React mount, not by which imports load first. - The win shows up entirely in the time from banner to `gateway.ready` — exactly where we expected it, since both fixes shorten the Python gateway's boot path or let it overlap more with Node startup. - No user-visible behaviour change. Memory monitoring still fires every 10s; MCP still works when `mcp_servers` is configured. --- tui_gateway/entry.py | 24 +++++++++++++++++++++--- ui-tui/src/lib/memoryMonitor.ts | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 70fc851820..2c1804aac1 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -165,11 +165,29 @@ def main(): # a model_tools.py module-level side effect; moved to explicit # startup calls to avoid freezing the gateway's loop on lazy import # (#16856). + # + # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the + # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers — + # ~200ms on macOS), which runs on the TUI's critical path before + # ``gateway.ready`` can be emitted. The overwhelming majority of users + # have no ``mcp_servers`` configured, in which case every byte of that + # import is wasted. Check the config first (cheap — it's already been + # loaded once by ``_config_mtime`` elsewhere) and only pay the import + # cost when there's actually MCP work to do. try: - from tools.mcp_tool import discover_mcp_tools - discover_mcp_tools() + from hermes_cli.config import load_config + _mcp_servers = (load_config() or {}).get("mcp_servers") + _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0 except Exception: - pass + # Be conservative: if we can't decide, fall back to the old + # behaviour and let the discovery path handle its own errors. + _has_mcp_servers = True + if _has_mcp_servers: + try: + from tools.mcp_tool import discover_mcp_tools + discover_mcp_tools() + except Exception: + pass if not write_json({ "jsonrpc": "2.0", diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index bbdb229705..26a0cdbc2b 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -1,5 +1,3 @@ -import { evictInkCaches } from '@hermes/ink' - import { type HeapDumpResult, performHeapDump } from './memory.js' export type MemoryLevel = 'critical' | 'high' | 'normal' @@ -20,6 +18,26 @@ export interface MemoryMonitorOptions { const GB = 1024 ** 3 +// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level +// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto +// the critical path before the Python gateway can even be spawned. That +// serialised roughly 150ms of Node work in front of gw.start() on every +// cold `hermes --tui` launch. +// +// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and +// only when heap pressure crosses the high-water mark — by then Ink has +// long since been loaded by the app entry. This dynamic import is a no-op +// on the hot path (module is already in the ESM cache); when a startup +// spike somehow trips the threshold before the app registers its own Ink +// import, we pay the load cost exactly once, inside the tick that needs it. +let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null +async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> { + if (_evictInkCaches) return _evictInkCaches + const mod = await import('@hermes/ink') + _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown + return _evictInkCaches +} + export function startMemoryMonitor({ criticalBytes = 2.5 * GB, highBytes = 1.5 * GB, @@ -43,7 +61,16 @@ export function startMemoryMonitor({ // Prune Ink content caches before dump/exit — half on 'high' (recoverable), // full on 'critical' (post-dump RSS reduction, keeps user running). - evictInkCaches(level === 'critical' ? 'all' : 'half') + // Deferred import keeps `@hermes/ink` off the cold-start critical path; + // by the time a tick fires 10s after launch the app has already loaded + // the same module, so this resolves instantly from the ESM cache. + try { + const evictInkCaches = await _ensureEvictInkCaches() + evictInkCaches(level === 'critical' ? 'all' : 'half') + } catch { + // Best-effort: if the dynamic import fails for any reason we still + // continue to the heap dump below so the user gets diagnostics. + } dumped.add(level) const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null) From 9e398e1809dd30c26ed899e362af6bb04c948894 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 28 Apr 2026 22:42:17 -0500 Subject: [PATCH 02/11] perf(tui): avoid importing classic CLI during tool discovery TUI session readiness was still laggy after the gateway-ready fixes. Profiling session.create -> session.info showed the slow phase is background AIAgent construction (~1.1s). A cProfile run of tui_gateway.server::_make_agent showed model_tools/tool discovery importing tools.code_execution_tool, whose module-level EXECUTE_CODE_SCHEMA calls _get_execution_mode(), which imported cli.CLI_CONFIG. That pulled the classic interactive CLI stack (prompt_toolkit/Rich and REPL setup) into every agent startup path, including hermes --tui where it is not used. Replace that with hermes_cli.config.read_raw_config(), which is cached and reads only the raw code_execution section. Existing defaults still apply when the key is absent. Measurements on macOS Terminal.app: - import run_agent: ~466ms -> ~347ms - model_tools import: ~418ms -> ~272ms - _make_agent: ~1452ms -> ~1239ms - session.create -> session.info: ~1167ms -> ~999ms - full hermes --tui ready p50: ~1655ms -> ~1537ms Tests: - scripts/run_tests.sh tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py --- tests/tools/test_code_execution.py | 16 ++++++++++++---- tools/code_execution_tool.py | 16 +++++++++++++--- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py index 15f8faa9bb..6f6260ffe2 100644 --- a/tests/tools/test_code_execution.py +++ b/tests/tools/test_code_execution.py @@ -770,11 +770,19 @@ class TestLoadConfig(unittest.TestCase): def test_returns_code_execution_section(self): from tools.code_execution_tool import _load_config - mock_cli = MagicMock() - mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 120, "max_tool_calls": 10}} - with patch.dict("sys.modules", {"cli": mock_cli}): + with patch("hermes_cli.config.read_raw_config", + return_value={"code_execution": {"timeout": 120, "max_tool_calls": 10}}): result = _load_config() - self.assertIsInstance(result, dict) + self.assertEqual(result, {"timeout": 120, "max_tool_calls": 10}) + + def test_does_not_import_interactive_cli(self): + from tools.code_execution_tool import _load_config + mock_cli = MagicMock() + mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 999}} + with patch.dict("sys.modules", {"cli": mock_cli}), \ + patch("hermes_cli.config.read_raw_config", return_value={}): + result = _load_config() + self.assertEqual(result, {}) # --------------------------------------------------------------------------- diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index db706e6a4c..3f83394c18 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -1309,10 +1309,20 @@ def _kill_process_group(proc, escalate: bool = False): def _load_config() -> dict: - """Load code_execution config from CLI_CONFIG if available.""" + """Load code_execution config without importing the interactive CLI. + + This helper is called while building the module-level execute_code schema + during tool discovery. Importing ``cli`` here pulls prompt_toolkit/Rich and + a large chunk of the classic REPL onto every agent startup path, including + ``hermes --tui`` where it is never used. Read the lightweight raw config + instead; the config layer already caches by (mtime, size), and an absent + key cleanly falls back to DEFAULT_EXECUTION_MODE. + """ try: - from cli import CLI_CONFIG - return CLI_CONFIG.get("code_execution", {}) + from hermes_cli.config import read_raw_config + + cfg = read_raw_config().get("code_execution", {}) + return cfg if isinstance(cfg, dict) else {} except Exception: return {} From b66cbb7b4ca8dd8d3242f14caf5fd52807069dc8 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 28 Apr 2026 23:32:02 -0500 Subject: [PATCH 03/11] perf(tui): defer agent construction until first prompt Match classic CLI perceived startup behavior: show the TUI shell and composer before constructing the full AIAgent. session.create now returns a lightweight placeholder session with lazy=true and no longer starts _make_agent eagerly. The first method that needs the agent triggers _start_agent_build() via _sess(); prompt.submit is routed through the RPC worker pool so that the initial wait for agent construction does not block the stdio dispatcher. The intro panel renders skeleton rows for tools/skills while the real session.info payload is absent, then hydrates to the real tools/skills panel once AIAgent initialization completes. Also skip the startup /voice status probe and avoid the input.detect_drop RPC for ordinary plain-text prompts to keep early startup/first-submit paths cheap. Measurements on macOS Terminal.app: - Previous full ready p50 after earlier PR commits: ~1537ms - Lazy skeleton panel p50: ~794ms - Original baseline full ready p50: ~1843ms So the visible startup surface is now ~743ms faster than the prior PR state and ~1.05s faster than the original baseline. First prompt still pays the same agent construction cost if it races the background/skeleton state, matching classic CLI's deferred behavior. Tests: - python -m py_compile tui_gateway/server.py - cd ui-tui && npm run type-check && npm run build - scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py - cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts --- tui_gateway/server.py | 242 ++++++++++++++-------------- ui-tui/src/app/useConfigSync.ts | 9 +- ui-tui/src/app/useSubmission.ts | 7 + ui-tui/src/components/appLayout.tsx | 2 +- ui-tui/src/components/branding.tsx | 21 ++- ui-tui/src/types.ts | 3 +- 6 files changed, 148 insertions(+), 136 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 555d8396b4..e5b1447d76 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -141,6 +141,7 @@ _SLASH_WORKER_TIMEOUT_S = max( _LONG_HANDLERS = frozenset( { "cli.exec", + "prompt.submit", "session.branch", "session.resume", "shell.exec", @@ -464,6 +465,117 @@ def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None: return _err(rid, 5032, err) if err else None +def _start_agent_build(sid: str, session: dict) -> None: + """Start building the real AIAgent for a TUI session, once. + + Classic `hermes` shows the prompt before constructing AIAgent; the TUI used + to eagerly build it during session.create, making startup feel blocked on + tool discovery/model metadata even though the composer was visible. Keep + the shell responsive by deferring this work until the first prompt (or any + command that actually needs the agent), while retaining the same ready/error + event contract for the frontend. + """ + ready = session.get("agent_ready") + if ready is None: + return + if ready.is_set() or session.get("agent_build_started"): + return + session["agent_build_started"] = True + key = session["session_key"] + + def _build() -> None: + current = _sessions.get(sid) + if current is None: + ready.set() + return + + worker = None + notify_registered = False + try: + tokens = _set_session_context(key) + try: + agent = _make_agent(sid, key) + finally: + _clear_session_context(tokens) + + db = _get_db() + if db is not None: + db.create_session(key, source="tui", model=_resolve_model()) + pending_title = (current.get("pending_title") or "").strip() + if pending_title: + try: + title_applied = db.set_session_title(key, pending_title) + if title_applied: + current["pending_title"] = None + else: + existing_row = db.get_session(key) + existing_title = ((existing_row or {}).get("title") or "").strip() + if existing_title == pending_title: + current["pending_title"] = None + else: + logger.info( + "Pending title still queued for session %s (wanted=%r, current=%r)", + sid, + pending_title, + existing_title, + ) + except ValueError as e: + current["pending_title"] = None + logger.info("Dropping pending title for session %s: %s", sid, e) + except Exception: + logger.warning("Failed to apply pending title for session %s", sid, exc_info=True) + current["agent"] = agent + + try: + worker = _SlashWorker(key, getattr(agent, "model", _resolve_model())) + current["slash_worker"] = worker + except Exception: + pass + + try: + from tools.approval import ( + register_gateway_notify, + load_permanent_allowlist, + ) + register_gateway_notify(key, lambda data: _emit("approval.request", sid, data)) + notify_registered = True + load_permanent_allowlist() + except Exception: + pass + + _wire_callbacks(sid) + _notify_session_boundary("on_session_reset", key) + + info = _session_info(agent) + warn = _probe_credentials(agent) + if warn: + info["credential_warning"] = warn + cfg_warn = _probe_config_health(_load_cfg()) + if cfg_warn: + info["config_warning"] = cfg_warn + logger.warning(cfg_warn) + _emit("session.info", sid, info) + except Exception as e: + current["agent_error"] = str(e) + _emit("error", sid, {"message": f"agent init failed: {e}"}) + finally: + if _sessions.get(sid) is not current: + if worker is not None: + try: + worker.close() + except Exception: + pass + if notify_registered: + try: + from tools.approval import unregister_gateway_notify + unregister_gateway_notify(key) + except Exception: + pass + ready.set() + + threading.Thread(target=_build, daemon=True).start() + + def _sess_nowait(params, rid): s = _sessions.get(params.get("session_id") or "") return (s, None) if s else (None, _err(rid, 4001, "session not found")) @@ -471,7 +583,10 @@ def _sess_nowait(params, rid): def _sess(params, rid): s, err = _sess_nowait(params, rid) - return (None, err) if err else (s, _wait_agent(s, rid)) + if err: + return (None, err) + _start_agent_build(params.get("session_id") or "", s) + return (s, _wait_agent(s, rid)) def _normalize_completion_path(path_part: str) -> str: @@ -1611,130 +1726,6 @@ def _(rid, params: dict) -> dict: "transport": current_transport() or _stdio_transport, } - def _build() -> None: - session = _sessions.get(sid) - if session is None: - # session.close ran before the build thread got scheduled. - ready.set() - return - - # Track what we allocate so we can clean up if session.close - # races us to the finish line. session.close pops _sessions[sid] - # unconditionally and tries to close the slash_worker it finds; - # if _build is still mid-construction when close runs, close - # finds slash_worker=None / notify unregistered and returns - # cleanly — leaving us, the build thread, to later install the - # worker + notify on an orphaned session dict. The finally - # block below detects the orphan and cleans up instead of - # leaking a subprocess and a global notify registration. - worker = None - notify_registered = False - try: - tokens = _set_session_context(key) - try: - agent = _make_agent(sid, key) - finally: - _clear_session_context(tokens) - - db = _get_db() - if db is not None: - db.create_session(key, source="tui", model=_resolve_model()) - pending_title = (session.get("pending_title") or "").strip() - if pending_title: - try: - title_applied = db.set_session_title(key, pending_title) - if title_applied: - session["pending_title"] = None - else: - existing_row = db.get_session(key) - existing_title = ( - (existing_row or {}).get("title") or "" - ).strip() - if existing_title == pending_title: - session["pending_title"] = None - else: - logger.info( - "Pending title still queued for session %s (wanted=%r, current=%r)", - sid, - pending_title, - existing_title, - ) - except ValueError as e: - # Queued title can become invalid/duplicate between queue time - # and DB row creation. Drop the queue and log the reason so - # future /title reads don't surface a stuck pending value. - session["pending_title"] = None - logger.info( - "Dropping pending title for session %s: %s", - sid, - e, - ) - except Exception: - logger.warning( - "Failed to apply pending title for session %s", - sid, - exc_info=True, - ) - session["agent"] = agent - - try: - worker = _SlashWorker(key, getattr(agent, "model", _resolve_model())) - session["slash_worker"] = worker - except Exception: - pass - - try: - from tools.approval import ( - register_gateway_notify, - load_permanent_allowlist, - ) - - register_gateway_notify( - key, lambda data: _emit("approval.request", sid, data) - ) - notify_registered = True - load_permanent_allowlist() - except Exception: - pass - - _wire_callbacks(sid) - _notify_session_boundary("on_session_reset", key) - - info = _session_info(agent) - warn = _probe_credentials(agent) - if warn: - info["credential_warning"] = warn - cfg_warn = _probe_config_health(_load_cfg()) - if cfg_warn: - info["config_warning"] = cfg_warn - logger.warning(cfg_warn) - _emit("session.info", sid, info) - except Exception as e: - session["agent_error"] = str(e) - _emit("error", sid, {"message": f"agent init failed: {e}"}) - finally: - # Orphan check: if session.close raced us and popped - # _sessions[sid] while we were building, the dict we just - # populated is unreachable. Clean up the subprocess and - # the global notify registration ourselves — session.close - # couldn't see them at the time it ran. - if _sessions.get(sid) is not session: - if worker is not None: - try: - worker.close() - except Exception: - pass - if notify_registered: - try: - from tools.approval import unregister_gateway_notify - - unregister_gateway_notify(key) - except Exception: - pass - ready.set() - - threading.Thread(target=_build, daemon=True).start() - return _ok( rid, { @@ -1744,6 +1735,7 @@ def _(rid, params: dict) -> dict: "tools": {}, "skills": {}, "cwd": os.getenv("TERMINAL_CWD", os.getcwd()), + "lazy": True, }, }, ) diff --git a/ui-tui/src/app/useConfigSync.ts b/ui-tui/src/app/useConfigSync.ts index 931f92f762..db8517559c 100644 --- a/ui-tui/src/app/useConfigSync.ts +++ b/ui-tui/src/app/useConfigSync.ts @@ -5,8 +5,7 @@ import type { GatewayClient } from '../gatewayClient.js' import type { ConfigFullResponse, ConfigMtimeResponse, - ReloadMcpResponse, - VoiceToggleResponse + ReloadMcpResponse } from '../gatewayTypes.js' import { asRpcResult } from '../lib/rpc.js' @@ -105,7 +104,11 @@ export function useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid }: U return } - quietRpc(gw, 'voice.toggle', { action: 'status' }).then(r => setVoiceEnabled(!!r?.enabled)) + // Keep startup cheap: voice.toggle status probes optional audio/STT deps and + // can run long enough to delay prompt.submit on the single stdio RPC pipe. + // Environment flags are enough to initialize the UI bit; the heavier status + // check still runs when the user opens /voice. + setVoiceEnabled(process.env.HERMES_VOICE === '1') quietRpc(gw, 'config.get', { key: 'mtime' }).then(r => { mtimeRef.current = Number(r?.mtime ?? 0) }) diff --git a/ui-tui/src/app/useSubmission.ts b/ui-tui/src/app/useSubmission.ts index 2c2c6d48d9..ed86332b08 100644 --- a/ui-tui/src/app/useSubmission.ts +++ b/ui-tui/src/app/useSubmission.ts @@ -126,6 +126,13 @@ export function useSubmission(opts: UseSubmissionOptions) { return sys('session not ready yet') } + // Plain prompts are the common path and should not pay an extra RPC + // before prompt.submit. File-drop detection can still run for inputs + // that contain an absolute/tilde path or file:// URI. + if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\/)[^\s]+/.test(text)) { + return startSubmit(text, expand(text), showUserMessage) + } + gw.request('input.detect_drop', { session_id: sid, text }) .then(r => { if (!r?.matched) { diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx index 84470c4ccf..69aa6c0592 100644 --- a/ui-tui/src/components/appLayout.tsx +++ b/ui-tui/src/components/appLayout.tsx @@ -68,7 +68,7 @@ const TranscriptPane = memo(function TranscriptPane({ - {row.msg.info?.version && } + {row.msg.info && } ) : row.msg.kind === 'panel' && row.msg.panelData ? ( diff --git a/ui-tui/src/components/branding.tsx b/ui-tui/src/components/branding.tsx index 25e161fd71..0a7509f696 100644 --- a/ui-tui/src/components/branding.tsx +++ b/ui-tui/src/components/branding.tsx @@ -64,9 +64,11 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) { } const section = (title: string, data: Record, max = 8, overflowLabel = 'more…') => { + const skeletonRows = title === 'Tools' ? ['browser', 'terminal', 'file'] : ['apple', 'creative', 'software-development'] const entries = Object.entries(data).sort() const shown = entries.slice(0, max) const overflow = entries.length - max + const skeleton = info.lazy && entries.length === 0 return ( @@ -74,12 +76,19 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) { Available {title} - {shown.map(([k, vs]) => ( - - {strip(k)}: - {truncLine(strip(k) + ': ', vs)} - - ))} + {skeleton + ? skeletonRows.map(k => ( + + {k}: + ━━━━━━━━━━━━━━ + + )) + : shown.map(([k, vs]) => ( + + {strip(k)}: + {truncLine(strip(k) + ': ', vs)} + + ))} {overflow > 0 && ( diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts index 6aea78e3e4..b3ecc8fbb6 100644 --- a/ui-tui/src/types.ts +++ b/ui-tui/src/types.ts @@ -143,11 +143,12 @@ export interface McpServerStatus { export interface SessionInfo { cwd?: string fast?: boolean + lazy?: boolean mcp_servers?: McpServerStatus[] model: string reasoning_effort?: string - service_tier?: string release_date?: string + service_tier?: string skills: Record tools: Record update_behind?: number | null From 0a6ecea676523d808d1f0657a8f1a80debba14f1 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 28 Apr 2026 23:48:07 -0500 Subject: [PATCH 04/11] fix(tui): hydrate lazy startup panel and use animated loaders The lazy startup panel could remain stuck on the placeholder when no first prompt was submitted because agent construction only started from _sess(). Keep session.create cheap, but schedule _start_agent_build shortly after returning the placeholder so tools/skills hydrate automatically. Also replace the ugly placeholder bar rows with compact unicode-animations braille loaders for the tools and skills sections. Tests: - python -m py_compile tui_gateway/server.py - cd ui-tui && npm run type-check && npm run build - cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts - scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py --- tui_gateway/server.py | 11 +++++++ ui-tui/src/components/branding.tsx | 46 +++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index e5b1447d76..2ba156587d 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1726,6 +1726,17 @@ def _(rid, params: dict) -> dict: "transport": current_transport() or _stdio_transport, } + # Return the lightweight session immediately so Ink can paint the composer + # + skeleton panel, then build the real AIAgent just after this response is + # flushed. This keeps startup responsive while still hydrating tools/skills + # without requiring the user to submit a first prompt. + def _deferred_build() -> None: + session = _sessions.get(sid) + if session is not None: + _start_agent_build(sid, session) + + threading.Timer(0.05, _deferred_build).start() + return _ok( rid, { diff --git a/ui-tui/src/components/branding.tsx b/ui-tui/src/components/branding.tsx index 0a7509f696..84e502aada 100644 --- a/ui-tui/src/components/branding.tsx +++ b/ui-tui/src/components/branding.tsx @@ -1,10 +1,32 @@ import { Box, Text, useStdout } from '@hermes/ink' +import { useEffect, useState } from 'react' +import unicodeSpinners from 'unicode-animations' import { artWidth, caduceus, CADUCEUS_WIDTH, logo, LOGO_WIDTH } from '../banner.js' import { flat } from '../lib/text.js' import type { Theme } from '../theme.js' import type { PanelSection, SessionInfo } from '../types.js' +const LOADER_TICK_MS = 120 + +function InlineLoader({ label, t }: { label: string; t: Theme }) { + const [tick, setTick] = useState(0) + const spinner = unicodeSpinners.braille + const frame = spinner.frames[tick % spinner.frames.length] ?? '⠋' + + useEffect(() => { + const id = setInterval(() => setTick(n => n + 1), Math.max(LOADER_TICK_MS, spinner.interval)) + + return () => clearInterval(id) + }, [spinner.interval]) + + return ( + + {frame} {label} + + ) +} + export function ArtLines({ lines }: { lines: [string, string][] }) { return ( <> @@ -64,7 +86,6 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) { } const section = (title: string, data: Record, max = 8, overflowLabel = 'more…') => { - const skeletonRows = title === 'Tools' ? ['browser', 'terminal', 'file'] : ['apple', 'creative', 'software-development'] const entries = Object.entries(data).sort() const shown = entries.slice(0, max) const overflow = entries.length - max @@ -76,19 +97,16 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) { Available {title} - {skeleton - ? skeletonRows.map(k => ( - - {k}: - ━━━━━━━━━━━━━━ - - )) - : shown.map(([k, vs]) => ( - - {strip(k)}: - {truncLine(strip(k) + ': ', vs)} - - ))} + {skeleton ? ( + + ) : ( + shown.map(([k, vs]) => ( + + {strip(k)}: + {truncLine(strip(k) + ': ', vs)} + + )) + )} {overflow > 0 && ( From a2819e182047ed5d78d21038b83f63b1ec297438 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 28 Apr 2026 23:54:33 -0500 Subject: [PATCH 05/11] fix(tui): address lazy startup review races Copilot correctly flagged two concurrency windows: - memoryMonitor could re-enter while awaiting the lazy @hermes/ink import or heap dump, producing duplicate imports/dumps under sustained pressure. - _start_agent_build used a check-then-set guard without synchronization, so concurrent agent-backed RPCs could start duplicate agent builders. Fix both with single-flight guards: cache the dynamic import promise and track per-level dump in-flight state in memoryMonitor, and protect the TUI agent build flag with a per-session lock. Tests: - python -m py_compile tui_gateway/server.py - cd ui-tui && npm run type-check && npm run build - cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts - scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py --- tui_gateway/server.py | 8 +++++--- ui-tui/src/lib/memoryMonitor.ts | 31 ++++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 2ba156587d..6ece5da2e6 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -478,9 +478,11 @@ def _start_agent_build(sid: str, session: dict) -> None: ready = session.get("agent_ready") if ready is None: return - if ready.is_set() or session.get("agent_build_started"): - return - session["agent_build_started"] = True + lock = session.setdefault("agent_build_lock", threading.Lock()) + with lock: + if ready.is_set() or session.get("agent_build_started"): + return + session["agent_build_started"] = True key = session["session_key"] def _build() -> None: diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index 26a0cdbc2b..41b357568f 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -31,11 +31,20 @@ const GB = 1024 ** 3 // spike somehow trips the threshold before the app registers its own Ink // import, we pay the load cost exactly once, inside the tick that needs it. let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null +let _evictInkCachesPromise: Promise<(level: 'all' | 'half') => unknown> | null = null + async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> { - if (_evictInkCaches) return _evictInkCaches - const mod = await import('@hermes/ink') - _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown - return _evictInkCaches + if (_evictInkCaches) { + return _evictInkCaches + } + + _evictInkCachesPromise ??= import('@hermes/ink').then(mod => { + _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown + + return _evictInkCaches + }) + + return _evictInkCachesPromise } export function startMemoryMonitor({ @@ -46,19 +55,25 @@ export function startMemoryMonitor({ onHigh }: MemoryMonitorOptions = {}): () => void { const dumped = new Set>() + const inFlight = new Set>() const tick = async () => { const { heapUsed, rss } = process.memoryUsage() const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal' if (level === 'normal') { - return void dumped.clear() - } + dumped.clear() + inFlight.clear() - if (dumped.has(level)) { return } + if (dumped.has(level) || inFlight.has(level)) { + return + } + + inFlight.add(level) + // Prune Ink content caches before dump/exit — half on 'high' (recoverable), // full on 'critical' (post-dump RSS reduction, keeps user running). // Deferred import keeps `@hermes/ink` off the cold-start critical path; @@ -75,6 +90,8 @@ export function startMemoryMonitor({ dumped.add(level) const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null) + inFlight.delete(level) + const snap: MemorySnapshot = { heapUsed, level, rss } ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump) From 72a3af63d4f14dcb986290a0b9d0ec53abbbd68c Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 29 Apr 2026 00:04:12 -0500 Subject: [PATCH 06/11] fix(tui): keep prompt submit off the RPC pool A cleanup review found that adding prompt.submit to _LONG_HANDLERS made the RPC pool own the full first-turn wait even though the handler itself already spawns a turn thread. Keep prompt.submit inline and make it return immediately: - look up the session without waiting - kick the lazy agent build - spawn a short waiter thread that blocks on agent_ready, then starts the existing turn dispatcher This keeps stdin dispatch responsive, avoids occupying a bounded pool worker for a normal chat turn, and preserves the lazy-start hydration behavior. Tests: - python -m py_compile tui_gateway/server.py - cd ui-tui && npm run type-check && npm run build - scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py - cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts --- tui_gateway/server.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 6ece5da2e6..ad07ce97f0 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -141,7 +141,6 @@ _SLASH_WORKER_TIMEOUT_S = max( _LONG_HANDLERS = frozenset( { "cli.exec", - "prompt.submit", "session.branch", "session.resume", "shell.exec", @@ -2426,12 +2425,28 @@ def _(rid, params: dict) -> dict: @method("prompt.submit") def _(rid, params: dict) -> dict: sid, text = params.get("session_id", ""), params.get("text", "") - session, err = _sess(params, rid) + session, err = _sess_nowait(params, rid) if err: return err + + _start_agent_build(sid, session) + + def run_after_agent_ready() -> None: + err = _wait_agent(session, rid) + if err: + session.get("transport", current_transport() or _stdio_transport).write(err) + return + _run_prompt_submit(rid, sid, session, text) + + threading.Thread(target=run_after_agent_ready, daemon=True).start() + return _ok(rid, {"status": "streaming"}) + + +def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None: with session["history_lock"]: if session.get("running"): - return _err(rid, 4009, "session busy") + _emit("error", sid, {"message": "session busy"}) + return session["running"] = True history = list(session["history"]) history_version = int(session.get("history_version", 0)) @@ -2671,7 +2686,6 @@ def _(rid, params: dict) -> dict: session["running"] = False threading.Thread(target=run, daemon=True).start() - return _ok(rid, {"status": "streaming"}) @method("clipboard.paste") From 88a9efdb1ac6d0ac5665fa087ebd7271073387fd Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 29 Apr 2026 00:08:34 -0500 Subject: [PATCH 07/11] fix(tui): tighten cold-start edge cases after review Clean up the remaining review nits: - let the deferred @hermes/ink import retry after a transient failure instead of memoizing a rejected promise forever - keep memory-monitor in-flight state inside a finally so future exceptions cannot suppress that memory level indefinitely - use read_raw_config for the TUI MCP cold-start probe instead of full load_config() - keep input.detect_drop for explicit relative path prefixes (./ and ../) while preserving the no-RPC fast path for ordinary plain prompts Tests: - python -m py_compile tui_gateway/server.py tui_gateway/entry.py - cd ui-tui && npm run type-check && npm run build - scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py - cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts --- tui_gateway/entry.py | 4 ++-- ui-tui/src/app/useSubmission.ts | 6 ++--- ui-tui/src/lib/memoryMonitor.ts | 42 +++++++++++++++++++-------------- 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 2c1804aac1..d3be53a6c4 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -175,8 +175,8 @@ def main(): # loaded once by ``_config_mtime`` elsewhere) and only pay the import # cost when there's actually MCP work to do. try: - from hermes_cli.config import load_config - _mcp_servers = (load_config() or {}).get("mcp_servers") + from hermes_cli.config import read_raw_config + _mcp_servers = (read_raw_config() or {}).get("mcp_servers") _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0 except Exception: # Be conservative: if we can't decide, fall back to the old diff --git a/ui-tui/src/app/useSubmission.ts b/ui-tui/src/app/useSubmission.ts index ed86332b08..a7d2631dbd 100644 --- a/ui-tui/src/app/useSubmission.ts +++ b/ui-tui/src/app/useSubmission.ts @@ -127,9 +127,9 @@ export function useSubmission(opts: UseSubmissionOptions) { } // Plain prompts are the common path and should not pay an extra RPC - // before prompt.submit. File-drop detection can still run for inputs - // that contain an absolute/tilde path or file:// URI. - if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\/)[^\s]+/.test(text)) { + // before prompt.submit. File-drop detection still runs for absolute, + // tilde, file://, and explicit relative paths. + if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\.?\.\/|\/)[^\s]+/.test(text)) { return startSubmit(text, expand(text), showUserMessage) } diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index 41b357568f..e792df4cde 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -38,11 +38,16 @@ async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unkno return _evictInkCaches } - _evictInkCachesPromise ??= import('@hermes/ink').then(mod => { - _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown + _evictInkCachesPromise ??= import('@hermes/ink') + .then(mod => { + _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown - return _evictInkCaches - }) + return _evictInkCaches + }) + .catch(err => { + _evictInkCachesPromise = null + throw err + }) return _evictInkCachesPromise } @@ -80,21 +85,22 @@ export function startMemoryMonitor({ // by the time a tick fires 10s after launch the app has already loaded // the same module, so this resolves instantly from the ESM cache. try { - const evictInkCaches = await _ensureEvictInkCaches() - evictInkCaches(level === 'critical' ? 'all' : 'half') - } catch { - // Best-effort: if the dynamic import fails for any reason we still - // continue to the heap dump below so the user gets diagnostics. + try { + const evictInkCaches = await _ensureEvictInkCaches() + evictInkCaches(level === 'critical' ? 'all' : 'half') + } catch { + // Best-effort: if the dynamic import fails for any reason we still + // continue to the heap dump below so the user gets diagnostics. + } + + dumped.add(level) + const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null) + const snap: MemorySnapshot = { heapUsed, level, rss } + + ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump) + } finally { + inFlight.delete(level) } - - dumped.add(level) - const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null) - - inFlight.delete(level) - - const snap: MemorySnapshot = { heapUsed, level, rss } - - ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump) } const handle = setInterval(() => void tick(), intervalMs) From cc5efb6fc16fc620dd2f4f47d0fd244da06f3739 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 29 Apr 2026 00:22:38 -0500 Subject: [PATCH 08/11] fix(tui): keep non-agent session RPCs lazy Respond to Copilot's lazy-start review: session metadata/history/usage do not need a constructed AIAgent, so keep them on the no-wait session path. This preserves the deferred startup model and avoids blocking simple session RPCs on agent initialization. Tests: - python -m py_compile tui_gateway/server.py tui_gateway/entry.py - cd ui-tui && npm run type-check && npm run build - scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py - cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts --- tui_gateway/server.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index ad07ce97f0..ca0ecfe729 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1736,7 +1736,9 @@ def _(rid, params: dict) -> dict: if session is not None: _start_agent_build(sid, session) - threading.Timer(0.05, _deferred_build).start() + build_timer = threading.Timer(0.05, _deferred_build) + build_timer.daemon = True + build_timer.start() return _ok( rid, @@ -1889,7 +1891,7 @@ def _(rid, params: dict) -> dict: @method("session.title") def _(rid, params: dict) -> dict: - session, err = _sess(params, rid) + session, err = _sess_nowait(params, rid) if err: return err db = _get_db() @@ -1952,13 +1954,16 @@ def _(rid, params: dict) -> dict: @method("session.usage") def _(rid, params: dict) -> dict: - session, err = _sess(params, rid) - return err or _ok(rid, _get_usage(session["agent"])) + session, err = _sess_nowait(params, rid) + if err: + return err + agent = session.get("agent") + return _ok(rid, _get_usage(agent) if agent is not None else {"calls": 0, "input": 0, "output": 0, "total": 0}) @method("session.history") def _(rid, params: dict) -> dict: - session, err = _sess(params, rid) + session, err = _sess_nowait(params, rid) if err: return err history = list(session.get("history", [])) From d341af22c0ae91d05bd0116e7bd6d37f4fb855d6 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 29 Apr 2026 00:25:09 -0500 Subject: [PATCH 09/11] fix(tui): preserve busy and init error signaling Finish the Copilot review cleanup for lazy prompt submission: - prompt.submit now claims session.running before returning success, preserving the existing RPC-level session busy error so the frontend can queue. - agent-init timeout/failure now emits a normal error event instead of writing a second JSON-RPC response for an already-settled request id. Tests: - python -m py_compile tui_gateway/server.py tui_gateway/entry.py - cd ui-tui && npm run type-check && npm run build - scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py - cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts --- tui_gateway/server.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index ca0ecfe729..4dd24a3d9f 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2433,13 +2433,19 @@ def _(rid, params: dict) -> dict: session, err = _sess_nowait(params, rid) if err: return err + with session["history_lock"]: + if session.get("running"): + return _err(rid, 4009, "session busy") + session["running"] = True _start_agent_build(sid, session) def run_after_agent_ready() -> None: err = _wait_agent(session, rid) if err: - session.get("transport", current_transport() or _stdio_transport).write(err) + _emit("error", sid, {"message": err.get("error", {}).get("message", "agent initialization failed")}) + with session["history_lock"]: + session["running"] = False return _run_prompt_submit(rid, sid, session, text) @@ -2449,10 +2455,6 @@ def _(rid, params: dict) -> dict: def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None: with session["history_lock"]: - if session.get("running"): - _emit("error", sid, {"message": "session busy"}) - return - session["running"] = True history = list(session["history"]) history_version = int(session.get("history_version", 0)) images = list(session.get("attached_images", [])) From c2fd0fa684fa89041f212dee57ac013e32f22c95 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 29 Apr 2026 00:44:04 -0500 Subject: [PATCH 10/11] fix(tui): preserve memory monitor in-flight guard Copilot caught that clearing inFlight on a transient normal-memory tick could allow a second dump/eviction to start before the first async tick completed. Only clear dumped on normal; let the in-flight tick's finally remove its own level. Tests: - cd ui-tui && npm run type-check && npm run build --- ui-tui/src/lib/memoryMonitor.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index e792df4cde..623e5cfa49 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -68,7 +68,6 @@ export function startMemoryMonitor({ if (level === 'normal') { dumped.clear() - inFlight.clear() return } From 22cc7492ffd01eb6f867b2395d60778d25e8e41d Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 22:44:58 -0700 Subject: [PATCH 11/11] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- ui-tui/src/lib/memoryMonitor.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index 623e5cfa49..eaf11574a4 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -68,7 +68,6 @@ export function startMemoryMonitor({ if (level === 'normal') { dumped.clear() - return }