diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py index 15f8faa9bb..6f6260ffe2 100644 --- a/tests/tools/test_code_execution.py +++ b/tests/tools/test_code_execution.py @@ -770,11 +770,19 @@ class TestLoadConfig(unittest.TestCase): def test_returns_code_execution_section(self): from tools.code_execution_tool import _load_config - mock_cli = MagicMock() - mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 120, "max_tool_calls": 10}} - with patch.dict("sys.modules", {"cli": mock_cli}): + with patch("hermes_cli.config.read_raw_config", + return_value={"code_execution": {"timeout": 120, "max_tool_calls": 10}}): result = _load_config() - self.assertIsInstance(result, dict) + self.assertEqual(result, {"timeout": 120, "max_tool_calls": 10}) + + def test_does_not_import_interactive_cli(self): + from tools.code_execution_tool import _load_config + mock_cli = MagicMock() + mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 999}} + with patch.dict("sys.modules", {"cli": mock_cli}), \ + patch("hermes_cli.config.read_raw_config", return_value={}): + result = _load_config() + self.assertEqual(result, {}) # --------------------------------------------------------------------------- diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index db706e6a4c..3f83394c18 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -1309,10 +1309,20 @@ def _kill_process_group(proc, escalate: bool = False): def _load_config() -> dict: - """Load code_execution config from CLI_CONFIG if available.""" + """Load code_execution config without importing the interactive CLI. + + This helper is called while building the module-level execute_code schema + during tool discovery. Importing ``cli`` here pulls prompt_toolkit/Rich and + a large chunk of the classic REPL onto every agent startup path, including + ``hermes --tui`` where it is never used. Read the lightweight raw config + instead; the config layer already caches by (mtime, size), and an absent + key cleanly falls back to DEFAULT_EXECUTION_MODE. + """ try: - from cli import CLI_CONFIG - return CLI_CONFIG.get("code_execution", {}) + from hermes_cli.config import read_raw_config + + cfg = read_raw_config().get("code_execution", {}) + return cfg if isinstance(cfg, dict) else {} except Exception: return {} diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 70fc851820..d3be53a6c4 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -165,11 +165,29 @@ def main(): # a model_tools.py module-level side effect; moved to explicit # startup calls to avoid freezing the gateway's loop on lazy import # (#16856). + # + # Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the + # full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers — + # ~200ms on macOS), which runs on the TUI's critical path before + # ``gateway.ready`` can be emitted. The overwhelming majority of users + # have no ``mcp_servers`` configured, in which case every byte of that + # import is wasted. Check the config first (cheap — it's already been + # loaded once by ``_config_mtime`` elsewhere) and only pay the import + # cost when there's actually MCP work to do. try: - from tools.mcp_tool import discover_mcp_tools - discover_mcp_tools() + from hermes_cli.config import read_raw_config + _mcp_servers = (read_raw_config() or {}).get("mcp_servers") + _has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0 except Exception: - pass + # Be conservative: if we can't decide, fall back to the old + # behaviour and let the discovery path handle its own errors. + _has_mcp_servers = True + if _has_mcp_servers: + try: + from tools.mcp_tool import discover_mcp_tools + discover_mcp_tools() + except Exception: + pass if not write_json({ "jsonrpc": "2.0", diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 4072e49647..fee8e9550e 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -465,6 +465,119 @@ def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None: return _err(rid, 5032, err) if err else None +def _start_agent_build(sid: str, session: dict) -> None: + """Start building the real AIAgent for a TUI session, once. + + Classic `hermes` shows the prompt before constructing AIAgent; the TUI used + to eagerly build it during session.create, making startup feel blocked on + tool discovery/model metadata even though the composer was visible. Keep + the shell responsive by deferring this work until the first prompt (or any + command that actually needs the agent), while retaining the same ready/error + event contract for the frontend. + """ + ready = session.get("agent_ready") + if ready is None: + return + lock = session.setdefault("agent_build_lock", threading.Lock()) + with lock: + if ready.is_set() or session.get("agent_build_started"): + return + session["agent_build_started"] = True + key = session["session_key"] + + def _build() -> None: + current = _sessions.get(sid) + if current is None: + ready.set() + return + + worker = None + notify_registered = False + try: + tokens = _set_session_context(key) + try: + agent = _make_agent(sid, key) + finally: + _clear_session_context(tokens) + + db = _get_db() + if db is not None: + db.create_session(key, source="tui", model=_resolve_model()) + pending_title = (current.get("pending_title") or "").strip() + if pending_title: + try: + title_applied = db.set_session_title(key, pending_title) + if title_applied: + current["pending_title"] = None + else: + existing_row = db.get_session(key) + existing_title = ((existing_row or {}).get("title") or "").strip() + if existing_title == pending_title: + current["pending_title"] = None + else: + logger.info( + "Pending title still queued for session %s (wanted=%r, current=%r)", + sid, + pending_title, + existing_title, + ) + except ValueError as e: + current["pending_title"] = None + logger.info("Dropping pending title for session %s: %s", sid, e) + except Exception: + logger.warning("Failed to apply pending title for session %s", sid, exc_info=True) + current["agent"] = agent + + try: + worker = _SlashWorker(key, getattr(agent, "model", _resolve_model())) + current["slash_worker"] = worker + except Exception: + pass + + try: + from tools.approval import ( + register_gateway_notify, + load_permanent_allowlist, + ) + register_gateway_notify(key, lambda data: _emit("approval.request", sid, data)) + notify_registered = True + load_permanent_allowlist() + except Exception: + pass + + _wire_callbacks(sid) + _notify_session_boundary("on_session_reset", key) + + info = _session_info(agent) + warn = _probe_credentials(agent) + if warn: + info["credential_warning"] = warn + cfg_warn = _probe_config_health(_load_cfg()) + if cfg_warn: + info["config_warning"] = cfg_warn + logger.warning(cfg_warn) + _emit("session.info", sid, info) + except Exception as e: + current["agent_error"] = str(e) + _emit("error", sid, {"message": f"agent init failed: {e}"}) + finally: + if _sessions.get(sid) is not current: + if worker is not None: + try: + worker.close() + except Exception: + pass + if notify_registered: + try: + from tools.approval import unregister_gateway_notify + unregister_gateway_notify(key) + except Exception: + pass + ready.set() + + threading.Thread(target=_build, daemon=True).start() + + def _sess_nowait(params, rid): s = _sessions.get(params.get("session_id") or "") return (s, None) if s else (None, _err(rid, 4001, "session not found")) @@ -472,7 +585,10 @@ def _sess_nowait(params, rid): def _sess(params, rid): s, err = _sess_nowait(params, rid) - return (None, err) if err else (s, _wait_agent(s, rid)) + if err: + return (None, err) + _start_agent_build(params.get("session_id") or "", s) + return (s, _wait_agent(s, rid)) def _normalize_completion_path(path_part: str) -> str: @@ -1627,129 +1743,18 @@ def _(rid, params: dict) -> dict: "transport": current_transport() or _stdio_transport, } - def _build() -> None: + # Return the lightweight session immediately so Ink can paint the composer + # + skeleton panel, then build the real AIAgent just after this response is + # flushed. This keeps startup responsive while still hydrating tools/skills + # without requiring the user to submit a first prompt. + def _deferred_build() -> None: session = _sessions.get(sid) - if session is None: - # session.close ran before the build thread got scheduled. - ready.set() - return + if session is not None: + _start_agent_build(sid, session) - # Track what we allocate so we can clean up if session.close - # races us to the finish line. session.close pops _sessions[sid] - # unconditionally and tries to close the slash_worker it finds; - # if _build is still mid-construction when close runs, close - # finds slash_worker=None / notify unregistered and returns - # cleanly — leaving us, the build thread, to later install the - # worker + notify on an orphaned session dict. The finally - # block below detects the orphan and cleans up instead of - # leaking a subprocess and a global notify registration. - worker = None - notify_registered = False - try: - tokens = _set_session_context(key) - try: - agent = _make_agent(sid, key) - finally: - _clear_session_context(tokens) - - db = _get_db() - if db is not None: - db.create_session(key, source="tui", model=_resolve_model()) - pending_title = (session.get("pending_title") or "").strip() - if pending_title: - try: - title_applied = db.set_session_title(key, pending_title) - if title_applied: - session["pending_title"] = None - else: - existing_row = db.get_session(key) - existing_title = ( - (existing_row or {}).get("title") or "" - ).strip() - if existing_title == pending_title: - session["pending_title"] = None - else: - logger.info( - "Pending title still queued for session %s (wanted=%r, current=%r)", - sid, - pending_title, - existing_title, - ) - except ValueError as e: - # Queued title can become invalid/duplicate between queue time - # and DB row creation. Drop the queue and log the reason so - # future /title reads don't surface a stuck pending value. - session["pending_title"] = None - logger.info( - "Dropping pending title for session %s: %s", - sid, - e, - ) - except Exception: - logger.warning( - "Failed to apply pending title for session %s", - sid, - exc_info=True, - ) - session["agent"] = agent - - try: - worker = _SlashWorker(key, getattr(agent, "model", _resolve_model())) - session["slash_worker"] = worker - except Exception: - pass - - try: - from tools.approval import ( - register_gateway_notify, - load_permanent_allowlist, - ) - - register_gateway_notify( - key, lambda data: _emit("approval.request", sid, data) - ) - notify_registered = True - load_permanent_allowlist() - except Exception: - pass - - _wire_callbacks(sid) - _notify_session_boundary("on_session_reset", key) - - info = _session_info(agent) - warn = _probe_credentials(agent) - if warn: - info["credential_warning"] = warn - cfg_warn = _probe_config_health(_load_cfg()) - if cfg_warn: - info["config_warning"] = cfg_warn - logger.warning(cfg_warn) - _emit("session.info", sid, info) - except Exception as e: - session["agent_error"] = str(e) - _emit("error", sid, {"message": f"agent init failed: {e}"}) - finally: - # Orphan check: if session.close raced us and popped - # _sessions[sid] while we were building, the dict we just - # populated is unreachable. Clean up the subprocess and - # the global notify registration ourselves — session.close - # couldn't see them at the time it ran. - if _sessions.get(sid) is not session: - if worker is not None: - try: - worker.close() - except Exception: - pass - if notify_registered: - try: - from tools.approval import unregister_gateway_notify - - unregister_gateway_notify(key) - except Exception: - pass - ready.set() - - threading.Thread(target=_build, daemon=True).start() + build_timer = threading.Timer(0.05, _deferred_build) + build_timer.daemon = True + build_timer.start() return _ok( rid, @@ -1760,6 +1765,7 @@ def _(rid, params: dict) -> dict: "tools": {}, "skills": {}, "cwd": os.getenv("TERMINAL_CWD", os.getcwd()), + "lazy": True, }, }, ) @@ -1901,7 +1907,7 @@ def _(rid, params: dict) -> dict: @method("session.title") def _(rid, params: dict) -> dict: - session, err = _sess(params, rid) + session, err = _sess_nowait(params, rid) if err: return err db = _get_db() @@ -1964,13 +1970,16 @@ def _(rid, params: dict) -> dict: @method("session.usage") def _(rid, params: dict) -> dict: - session, err = _sess(params, rid) - return err or _ok(rid, _get_usage(session["agent"])) + session, err = _sess_nowait(params, rid) + if err: + return err + agent = session.get("agent") + return _ok(rid, _get_usage(agent) if agent is not None else {"calls": 0, "input": 0, "output": 0, "total": 0}) @method("session.history") def _(rid, params: dict) -> dict: - session, err = _sess(params, rid) + session, err = _sess_nowait(params, rid) if err: return err history = list(session.get("history", [])) @@ -2437,13 +2446,31 @@ def _(rid, params: dict) -> dict: @method("prompt.submit") def _(rid, params: dict) -> dict: sid, text = params.get("session_id", ""), params.get("text", "") - session, err = _sess(params, rid) + session, err = _sess_nowait(params, rid) if err: return err with session["history_lock"]: if session.get("running"): return _err(rid, 4009, "session busy") session["running"] = True + + _start_agent_build(sid, session) + + def run_after_agent_ready() -> None: + err = _wait_agent(session, rid) + if err: + _emit("error", sid, {"message": err.get("error", {}).get("message", "agent initialization failed")}) + with session["history_lock"]: + session["running"] = False + return + _run_prompt_submit(rid, sid, session, text) + + threading.Thread(target=run_after_agent_ready, daemon=True).start() + return _ok(rid, {"status": "streaming"}) + + +def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None: + with session["history_lock"]: history = list(session["history"]) history_version = int(session.get("history_version", 0)) images = list(session.get("attached_images", [])) @@ -2682,7 +2709,6 @@ def _(rid, params: dict) -> dict: session["running"] = False threading.Thread(target=run, daemon=True).start() - return _ok(rid, {"status": "streaming"}) @method("clipboard.paste") diff --git a/ui-tui/src/app/useConfigSync.ts b/ui-tui/src/app/useConfigSync.ts index 8695855759..eec93d32c8 100644 --- a/ui-tui/src/app/useConfigSync.ts +++ b/ui-tui/src/app/useConfigSync.ts @@ -5,8 +5,7 @@ import type { GatewayClient } from '../gatewayClient.js' import type { ConfigFullResponse, ConfigMtimeResponse, - ReloadMcpResponse, - VoiceToggleResponse + ReloadMcpResponse } from '../gatewayTypes.js' import { asRpcResult } from '../lib/rpc.js' @@ -118,7 +117,11 @@ export function useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid }: U return } - quietRpc(gw, 'voice.toggle', { action: 'status' }).then(r => setVoiceEnabled(!!r?.enabled)) + // Keep startup cheap: voice.toggle status probes optional audio/STT deps and + // can run long enough to delay prompt.submit on the single stdio RPC pipe. + // Environment flags are enough to initialize the UI bit; the heavier status + // check still runs when the user opens /voice. + setVoiceEnabled(process.env.HERMES_VOICE === '1') quietRpc(gw, 'config.get', { key: 'mtime' }).then(r => { mtimeRef.current = Number(r?.mtime ?? 0) }) diff --git a/ui-tui/src/app/useSubmission.ts b/ui-tui/src/app/useSubmission.ts index df6acfadbe..bbb288e001 100644 --- a/ui-tui/src/app/useSubmission.ts +++ b/ui-tui/src/app/useSubmission.ts @@ -126,6 +126,13 @@ export function useSubmission(opts: UseSubmissionOptions) { return sys('session not ready yet') } + // Plain prompts are the common path and should not pay an extra RPC + // before prompt.submit. File-drop detection still runs for absolute, + // tilde, file://, and explicit relative paths. + if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\.?\.\/|\/)[^\s]+/.test(text)) { + return startSubmit(text, expand(text), showUserMessage) + } + gw.request('input.detect_drop', { session_id: sid, text }) .then(r => { if (!r?.matched) { diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx index 16d96f390b..f97cc17e60 100644 --- a/ui-tui/src/components/appLayout.tsx +++ b/ui-tui/src/components/appLayout.tsx @@ -68,7 +68,7 @@ const TranscriptPane = memo(function TranscriptPane({ - {row.msg.info?.version && } + {row.msg.info && } ) : row.msg.kind === 'panel' && row.msg.panelData ? ( diff --git a/ui-tui/src/components/branding.tsx b/ui-tui/src/components/branding.tsx index 25e161fd71..84e502aada 100644 --- a/ui-tui/src/components/branding.tsx +++ b/ui-tui/src/components/branding.tsx @@ -1,10 +1,32 @@ import { Box, Text, useStdout } from '@hermes/ink' +import { useEffect, useState } from 'react' +import unicodeSpinners from 'unicode-animations' import { artWidth, caduceus, CADUCEUS_WIDTH, logo, LOGO_WIDTH } from '../banner.js' import { flat } from '../lib/text.js' import type { Theme } from '../theme.js' import type { PanelSection, SessionInfo } from '../types.js' +const LOADER_TICK_MS = 120 + +function InlineLoader({ label, t }: { label: string; t: Theme }) { + const [tick, setTick] = useState(0) + const spinner = unicodeSpinners.braille + const frame = spinner.frames[tick % spinner.frames.length] ?? '⠋' + + useEffect(() => { + const id = setInterval(() => setTick(n => n + 1), Math.max(LOADER_TICK_MS, spinner.interval)) + + return () => clearInterval(id) + }, [spinner.interval]) + + return ( + + {frame} {label} + + ) +} + export function ArtLines({ lines }: { lines: [string, string][] }) { return ( <> @@ -67,6 +89,7 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) { const entries = Object.entries(data).sort() const shown = entries.slice(0, max) const overflow = entries.length - max + const skeleton = info.lazy && entries.length === 0 return ( @@ -74,12 +97,16 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) { Available {title} - {shown.map(([k, vs]) => ( - - {strip(k)}: - {truncLine(strip(k) + ': ', vs)} - - ))} + {skeleton ? ( + + ) : ( + shown.map(([k, vs]) => ( + + {strip(k)}: + {truncLine(strip(k) + ': ', vs)} + + )) + )} {overflow > 0 && ( diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts index bbdb229705..eaf11574a4 100644 --- a/ui-tui/src/lib/memoryMonitor.ts +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -1,5 +1,3 @@ -import { evictInkCaches } from '@hermes/ink' - import { type HeapDumpResult, performHeapDump } from './memory.js' export type MemoryLevel = 'critical' | 'high' | 'normal' @@ -20,6 +18,40 @@ export interface MemoryMonitorOptions { const GB = 1024 ** 3 +// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level +// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto +// the critical path before the Python gateway can even be spawned. That +// serialised roughly 150ms of Node work in front of gw.start() on every +// cold `hermes --tui` launch. +// +// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and +// only when heap pressure crosses the high-water mark — by then Ink has +// long since been loaded by the app entry. This dynamic import is a no-op +// on the hot path (module is already in the ESM cache); when a startup +// spike somehow trips the threshold before the app registers its own Ink +// import, we pay the load cost exactly once, inside the tick that needs it. +let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null +let _evictInkCachesPromise: Promise<(level: 'all' | 'half') => unknown> | null = null + +async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> { + if (_evictInkCaches) { + return _evictInkCaches + } + + _evictInkCachesPromise ??= import('@hermes/ink') + .then(mod => { + _evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown + + return _evictInkCaches + }) + .catch(err => { + _evictInkCachesPromise = null + throw err + }) + + return _evictInkCachesPromise +} + export function startMemoryMonitor({ criticalBytes = 2.5 * GB, highBytes = 1.5 * GB, @@ -28,29 +60,45 @@ export function startMemoryMonitor({ onHigh }: MemoryMonitorOptions = {}): () => void { const dumped = new Set>() + const inFlight = new Set>() const tick = async () => { const { heapUsed, rss } = process.memoryUsage() const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal' if (level === 'normal') { - return void dumped.clear() - } - - if (dumped.has(level)) { + dumped.clear() return } + if (dumped.has(level) || inFlight.has(level)) { + return + } + + inFlight.add(level) + // Prune Ink content caches before dump/exit — half on 'high' (recoverable), // full on 'critical' (post-dump RSS reduction, keeps user running). - evictInkCaches(level === 'critical' ? 'all' : 'half') + // Deferred import keeps `@hermes/ink` off the cold-start critical path; + // by the time a tick fires 10s after launch the app has already loaded + // the same module, so this resolves instantly from the ESM cache. + try { + try { + const evictInkCaches = await _ensureEvictInkCaches() + evictInkCaches(level === 'critical' ? 'all' : 'half') + } catch { + // Best-effort: if the dynamic import fails for any reason we still + // continue to the heap dump below so the user gets diagnostics. + } - dumped.add(level) - const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null) + dumped.add(level) + const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null) + const snap: MemorySnapshot = { heapUsed, level, rss } - const snap: MemorySnapshot = { heapUsed, level, rss } - - ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump) + ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump) + } finally { + inFlight.delete(level) + } } const handle = setInterval(() => void tick(), intervalMs) diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts index 6aea78e3e4..b3ecc8fbb6 100644 --- a/ui-tui/src/types.ts +++ b/ui-tui/src/types.ts @@ -143,11 +143,12 @@ export interface McpServerStatus { export interface SessionInfo { cwd?: string fast?: boolean + lazy?: boolean mcp_servers?: McpServerStatus[] model: string reasoning_effort?: string - service_tier?: string release_date?: string + service_tier?: string skills: Record tools: Record update_behind?: number | null