diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py
index 70fc851820..2c1804aac1 100644
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@@ -165,11 +165,29 @@ def main():
     # a model_tools.py module-level side effect; moved to explicit
     # startup calls to avoid freezing the gateway's loop on lazy import
     # (#16856).
+    #
+    # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the
+    # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers —
+    # ~200ms on macOS), which runs on the TUI's critical path before
+    # ``gateway.ready`` can be emitted.  The overwhelming majority of users
+    # have no ``mcp_servers`` configured, in which case every byte of that
+    # import is wasted.  Check the config first (cheap — it's already been
+    # loaded once by ``_config_mtime`` elsewhere) and only pay the import
+    # cost when there's actually MCP work to do.
     try:
-        from tools.mcp_tool import discover_mcp_tools
-        discover_mcp_tools()
+        from hermes_cli.config import load_config
+        _mcp_servers = (load_config() or {}).get("mcp_servers")
+        _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
     except Exception:
-        pass
+        # Be conservative: if we can't decide, fall back to the old
+        # behaviour and let the discovery path handle its own errors.
+        _has_mcp_servers = True
+    if _has_mcp_servers:
+        try:
+            from tools.mcp_tool import discover_mcp_tools
+            discover_mcp_tools()
+        except Exception:
+            pass
 
     if not write_json({
         "jsonrpc": "2.0",
diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts
index bbdb229705..26a0cdbc2b 100644
--- a/ui-tui/src/lib/memoryMonitor.ts
+++ b/ui-tui/src/lib/memoryMonitor.ts
@@ -1,5 +1,3 @@
-import { evictInkCaches } from '@hermes/ink'
-
 import { type HeapDumpResult, performHeapDump } from './memory.js'
 
 export type MemoryLevel = 'critical' | 'high' | 'normal'
@@ -20,6 +18,26 @@ export interface MemoryMonitorOptions {
 
 const GB = 1024 ** 3
 
+// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level
+// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto
+// the critical path before the Python gateway can even be spawned. That
+// serialised roughly 150ms of Node work in front of gw.start() on every
+// cold `hermes --tui` launch.
+//
+// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and
+// only when heap pressure crosses the high-water mark — by then Ink has
+// long since been loaded by the app entry. This dynamic import is a no-op
+// on the hot path (module is already in the ESM cache); when a startup
+// spike somehow trips the threshold before the app registers its own Ink
+// import, we pay the load cost exactly once, inside the tick that needs it.
+let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null
+async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> {
+  if (_evictInkCaches) return _evictInkCaches
+  const mod = await import('@hermes/ink')
+  _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
+  return _evictInkCaches
+}
+
 export function startMemoryMonitor({
   criticalBytes = 2.5 * GB,
   highBytes = 1.5 * GB,
@@ -43,7 +61,16 @@ export function startMemoryMonitor({
 
     // Prune Ink content caches before dump/exit — half on 'high' (recoverable),
     // full on 'critical' (post-dump RSS reduction, keeps user running).
-    evictInkCaches(level === 'critical' ? 'all' : 'half')
+    // Deferred import keeps `@hermes/ink` off the cold-start critical path;
+    // by the time a tick fires 10s after launch the app has already loaded
+    // the same module, so this resolves instantly from the ESM cache.
+    try {
+      const evictInkCaches = await _ensureEvictInkCaches()
+      evictInkCaches(level === 'critical' ? 'all' : 'half')
+    } catch {
+      // Best-effort: if the dynamic import fails for any reason we still
+      // continue to the heap dump below so the user gets diagnostics.
+    }
 
     dumped.add(level)
     const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)