From 0399d4b97668c020c8c583eda190b90fc9fca4e8 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 28 Apr 2026 19:42:31 -0500 Subject: [PATCH] perf(tui): shave ~190ms off `hermes --tui` cold start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two targeted fixes on the critical path from `hermes --tui` launch to `gateway.ready`: 1. **Defer `@hermes/ink` import in memoryMonitor.ts.** The static top-level import dragged the full ~414KB Ink bundle (React + renderer + all components/hooks) onto the critical path *before* `gw.start()` could spawn the Python gateway — serialising ~155ms of Node work in front of it on every launch. `evictInkCaches` only runs inside the 10-second tick under heap pressure, so it moves to a lazy dynamic import. First tick hits the ESM cache because the app entry has long since imported `@hermes/ink`. 2. **Gate `tools.mcp_tool` import on config in tui_gateway/entry.py.** Importing the module transitively pulls the MCP SDK + pydantic + httpx + jsonschema + starlette formparsers (~200ms). The overwhelming majority of users have no `mcp_servers` configured, so this runs for nothing. A cheap `load_config()` check (~25ms) skips the 200ms import when no servers are declared, with a conservative fallback to the old behaviour if the config probe itself fails. ## Measurements (macOS Terminal.app, Apple Silicon, n=12) | Metric | Before (p50) | After (p50) | Δ | |----------------------------|--------------|-------------|----------| | Python gateway boot alone | 252–365ms | 105–151ms | −180ms | | `hermes --tui` banner paint | 686ms | 665ms | −21ms | | `hermes --tui` → ready | **1843ms** | **1655ms** | **−188ms (−10.2%)** | | `hermes --tui` → ready p90 | 1932ms | 1778ms | −154ms | | stdev (ready) | 126ms | 83ms | also more consistent | ## Tests - `scripts/run_tests.sh tests/tui_gateway/ tests/tools/test_mcp_tool.py`: 195 passed. (The one pre-existing failure in `test_session_resume_returns_hydrated_messages` reproduces on main — unrelated, it's a mock-DB kwarg mismatch.) - `ui-tui` vitest: 430 tests, all pass. - `npm run type-check` in ui-tui: clean. ## Notes - Node-side first paint ("banner") didn't move meaningfully because that latency is dominated by Ink's render pipeline + React mount, not by which imports load first. - The win shows up entirely in the time from banner to `gateway.ready` — exactly where we expected it, since both fixes shorten the Python gateway's boot path or let it overlap more with Node startup. - No user-visible behaviour change. Memory monitoring still fires every 10s; MCP still works when `mcp_servers` is configured. --- tui_gateway/entry.py | 24 +++++++++++++++++++++--- ui-tui/src/lib/memoryMonitor.ts | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 70fc851820..2c1804aac1 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -165,11 +165,29 @@ def main(): # a model_tools.py module-level side effect; moved to explicit # startup calls to avoid freezing the gateway's loop on lazy import # (#16856). + # + # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the + # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers — + # ~200ms on macOS), which runs on the TUI's critical path before + # ``gateway.ready`` can be emitted. The overwhelming majority of users + # have no ``mcp_servers`` configured, in which case every byte of that + # import is wasted. Check the config first (cheap — it's already been + # loaded once by ``_config_mtime`` elsewhere) and only pay the import + # cost when there's actually MCP work to do. try: - from tools.mcp_tool import discover_mcp_tools - discover_mcp_tools() + from hermes_cli.config import load_config + _mcp_servers = (load_config() or {}).get("mcp_servers") + _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0 except Exception: - pass + # Be conservative: if we can't decide, fall back to the old + # behaviour and let the discovery path handle its own errors. + _has_mcp_servers = True + if _has_mcp_servers: + try: + from tools.mcp_tool import discover_mcp_tools + discover_mcp_tools() + except Exception: + pass if not write_json({ "jsonrpc": "2.0", diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index bbdb229705..26a0cdbc2b 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -1,5 +1,3 @@ -import { evictInkCaches } from '@hermes/ink' - import { type HeapDumpResult, performHeapDump } from './memory.js' export type MemoryLevel = 'critical' | 'high' | 'normal' @@ -20,6 +18,26 @@ export interface MemoryMonitorOptions { const GB = 1024 ** 3 +// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level +// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto +// the critical path before the Python gateway can even be spawned. That +// serialised roughly 150ms of Node work in front of gw.start() on every +// cold `hermes --tui` launch. +// +// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and +// only when heap pressure crosses the high-water mark — by then Ink has +// long since been loaded by the app entry. This dynamic import is a no-op +// on the hot path (module is already in the ESM cache); when a startup +// spike somehow trips the threshold before the app registers its own Ink +// import, we pay the load cost exactly once, inside the tick that needs it. +let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null +async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> { + if (_evictInkCaches) return _evictInkCaches + const mod = await import('@hermes/ink') + _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown + return _evictInkCaches +} + export function startMemoryMonitor({ criticalBytes = 2.5 * GB, highBytes = 1.5 * GB, @@ -43,7 +61,16 @@ export function startMemoryMonitor({ // Prune Ink content caches before dump/exit — half on 'high' (recoverable), // full on 'critical' (post-dump RSS reduction, keeps user running). - evictInkCaches(level === 'critical' ? 'all' : 'half') + // Deferred import keeps `@hermes/ink` off the cold-start critical path; + // by the time a tick fires 10s after launch the app has already loaded + // the same module, so this resolves instantly from the ESM cache. + try { + const evictInkCaches = await _ensureEvictInkCaches() + evictInkCaches(level === 'critical' ? 'all' : 'half') + } catch { + // Best-effort: if the dynamic import fails for any reason we still + // continue to the heap dump below so the user gets diagnostics. + } dumped.add(level) const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)