diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 70fc851820..2c1804aac1 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -165,11 +165,29 @@ def main(): # a model_tools.py module-level side effect; moved to explicit # startup calls to avoid freezing the gateway's loop on lazy import # (#16856). + # + # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the + # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers — + # ~200ms on macOS), which runs on the TUI's critical path before + # ``gateway.ready`` can be emitted. The overwhelming majority of users + # have no ``mcp_servers`` configured, in which case every byte of that + # import is wasted. Check the config first (cheap — it's already been + # loaded once by ``_config_mtime`` elsewhere) and only pay the import + # cost when there's actually MCP work to do. try: - from tools.mcp_tool import discover_mcp_tools - discover_mcp_tools() + from hermes_cli.config import load_config + _mcp_servers = (load_config() or {}).get("mcp_servers") + _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0 except Exception: - pass + # Be conservative: if we can't decide, fall back to the old + # behaviour and let the discovery path handle its own errors. + _has_mcp_servers = True + if _has_mcp_servers: + try: + from tools.mcp_tool import discover_mcp_tools + discover_mcp_tools() + except Exception: + pass if not write_json({ "jsonrpc": "2.0", diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index bbdb229705..26a0cdbc2b 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -1,5 +1,3 @@ -import { evictInkCaches } from '@hermes/ink' - import { type HeapDumpResult, performHeapDump } from './memory.js' export type MemoryLevel = 'critical' | 'high' | 'normal' @@ -20,6 +18,26 @@ export interface MemoryMonitorOptions { const GB = 1024 ** 3 +// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level +// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto +// the critical path before the Python gateway can even be spawned. That +// serialised roughly 150ms of Node work in front of gw.start() on every +// cold `hermes --tui` launch. +// +// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and +// only when heap pressure crosses the high-water mark — by then Ink has +// long since been loaded by the app entry. This dynamic import is a no-op +// on the hot path (module is already in the ESM cache); when a startup +// spike somehow trips the threshold before the app registers its own Ink +// import, we pay the load cost exactly once, inside the tick that needs it. +let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null +async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> { + if (_evictInkCaches) return _evictInkCaches + const mod = await import('@hermes/ink') + _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown + return _evictInkCaches +} + export function startMemoryMonitor({ criticalBytes = 2.5 * GB, highBytes = 1.5 * GB, @@ -43,7 +61,16 @@ export function startMemoryMonitor({ // Prune Ink content caches before dump/exit — half on 'high' (recoverable), // full on 'critical' (post-dump RSS reduction, keeps user running). - evictInkCaches(level === 'critical' ? 'all' : 'half') + // Deferred import keeps `@hermes/ink` off the cold-start critical path; + // by the time a tick fires 10s after launch the app has already loaded + // the same module, so this resolves instantly from the ESM cache. + try { + const evictInkCaches = await _ensureEvictInkCaches() + evictInkCaches(level === 'critical' ? 'all' : 'half') + } catch { + // Best-effort: if the dynamic import fails for any reason we still + // continue to the heap dump below so the user gets diagnostics. + } dumped.add(level) const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)