mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
Merge pull request #17190 from NousResearch/bb/tui-cold-start-profiling
perf(tui): cut visible cold start ~57% with lazy agent init
This commit is contained in:
commit
5e68503d2f
10 changed files with 309 additions and 161 deletions
|
|
@ -770,11 +770,19 @@ class TestLoadConfig(unittest.TestCase):
|
||||||
|
|
||||||
def test_returns_code_execution_section(self):
|
def test_returns_code_execution_section(self):
|
||||||
from tools.code_execution_tool import _load_config
|
from tools.code_execution_tool import _load_config
|
||||||
mock_cli = MagicMock()
|
with patch("hermes_cli.config.read_raw_config",
|
||||||
mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 120, "max_tool_calls": 10}}
|
return_value={"code_execution": {"timeout": 120, "max_tool_calls": 10}}):
|
||||||
with patch.dict("sys.modules", {"cli": mock_cli}):
|
|
||||||
result = _load_config()
|
result = _load_config()
|
||||||
self.assertIsInstance(result, dict)
|
self.assertEqual(result, {"timeout": 120, "max_tool_calls": 10})
|
||||||
|
|
||||||
|
def test_does_not_import_interactive_cli(self):
|
||||||
|
from tools.code_execution_tool import _load_config
|
||||||
|
mock_cli = MagicMock()
|
||||||
|
mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 999}}
|
||||||
|
with patch.dict("sys.modules", {"cli": mock_cli}), \
|
||||||
|
patch("hermes_cli.config.read_raw_config", return_value={}):
|
||||||
|
result = _load_config()
|
||||||
|
self.assertEqual(result, {})
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -1309,10 +1309,20 @@ def _kill_process_group(proc, escalate: bool = False):
|
||||||
|
|
||||||
|
|
||||||
def _load_config() -> dict:
|
def _load_config() -> dict:
|
||||||
"""Load code_execution config from CLI_CONFIG if available."""
|
"""Load code_execution config without importing the interactive CLI.
|
||||||
|
|
||||||
|
This helper is called while building the module-level execute_code schema
|
||||||
|
during tool discovery. Importing ``cli`` here pulls prompt_toolkit/Rich and
|
||||||
|
a large chunk of the classic REPL onto every agent startup path, including
|
||||||
|
``hermes --tui`` where it is never used. Read the lightweight raw config
|
||||||
|
instead; the config layer already caches by (mtime, size), and an absent
|
||||||
|
key cleanly falls back to DEFAULT_EXECUTION_MODE.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
from cli import CLI_CONFIG
|
from hermes_cli.config import read_raw_config
|
||||||
return CLI_CONFIG.get("code_execution", {})
|
|
||||||
|
cfg = read_raw_config().get("code_execution", {})
|
||||||
|
return cfg if isinstance(cfg, dict) else {}
|
||||||
except Exception:
|
except Exception:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -165,11 +165,29 @@ def main():
|
||||||
# a model_tools.py module-level side effect; moved to explicit
|
# a model_tools.py module-level side effect; moved to explicit
|
||||||
# startup calls to avoid freezing the gateway's loop on lazy import
|
# startup calls to avoid freezing the gateway's loop on lazy import
|
||||||
# (#16856).
|
# (#16856).
|
||||||
|
#
|
||||||
|
# Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the
|
||||||
|
# full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers —
|
||||||
|
# ~200ms on macOS), which runs on the TUI's critical path before
|
||||||
|
# ``gateway.ready`` can be emitted. The overwhelming majority of users
|
||||||
|
# have no ``mcp_servers`` configured, in which case every byte of that
|
||||||
|
# import is wasted. Check the config first (cheap — it's already been
|
||||||
|
# loaded once by ``_config_mtime`` elsewhere) and only pay the import
|
||||||
|
# cost when there's actually MCP work to do.
|
||||||
try:
|
try:
|
||||||
from tools.mcp_tool import discover_mcp_tools
|
from hermes_cli.config import read_raw_config
|
||||||
discover_mcp_tools()
|
_mcp_servers = (read_raw_config() or {}).get("mcp_servers")
|
||||||
|
_has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
# Be conservative: if we can't decide, fall back to the old
|
||||||
|
# behaviour and let the discovery path handle its own errors.
|
||||||
|
_has_mcp_servers = True
|
||||||
|
if _has_mcp_servers:
|
||||||
|
try:
|
||||||
|
from tools.mcp_tool import discover_mcp_tools
|
||||||
|
discover_mcp_tools()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
if not write_json({
|
if not write_json({
|
||||||
"jsonrpc": "2.0",
|
"jsonrpc": "2.0",
|
||||||
|
|
|
||||||
|
|
@ -465,6 +465,119 @@ def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
|
||||||
return _err(rid, 5032, err) if err else None
|
return _err(rid, 5032, err) if err else None
|
||||||
|
|
||||||
|
|
||||||
|
def _start_agent_build(sid: str, session: dict) -> None:
|
||||||
|
"""Start building the real AIAgent for a TUI session, once.
|
||||||
|
|
||||||
|
Classic `hermes` shows the prompt before constructing AIAgent; the TUI used
|
||||||
|
to eagerly build it during session.create, making startup feel blocked on
|
||||||
|
tool discovery/model metadata even though the composer was visible. Keep
|
||||||
|
the shell responsive by deferring this work until the first prompt (or any
|
||||||
|
command that actually needs the agent), while retaining the same ready/error
|
||||||
|
event contract for the frontend.
|
||||||
|
"""
|
||||||
|
ready = session.get("agent_ready")
|
||||||
|
if ready is None:
|
||||||
|
return
|
||||||
|
lock = session.setdefault("agent_build_lock", threading.Lock())
|
||||||
|
with lock:
|
||||||
|
if ready.is_set() or session.get("agent_build_started"):
|
||||||
|
return
|
||||||
|
session["agent_build_started"] = True
|
||||||
|
key = session["session_key"]
|
||||||
|
|
||||||
|
def _build() -> None:
|
||||||
|
current = _sessions.get(sid)
|
||||||
|
if current is None:
|
||||||
|
ready.set()
|
||||||
|
return
|
||||||
|
|
||||||
|
worker = None
|
||||||
|
notify_registered = False
|
||||||
|
try:
|
||||||
|
tokens = _set_session_context(key)
|
||||||
|
try:
|
||||||
|
agent = _make_agent(sid, key)
|
||||||
|
finally:
|
||||||
|
_clear_session_context(tokens)
|
||||||
|
|
||||||
|
db = _get_db()
|
||||||
|
if db is not None:
|
||||||
|
db.create_session(key, source="tui", model=_resolve_model())
|
||||||
|
pending_title = (current.get("pending_title") or "").strip()
|
||||||
|
if pending_title:
|
||||||
|
try:
|
||||||
|
title_applied = db.set_session_title(key, pending_title)
|
||||||
|
if title_applied:
|
||||||
|
current["pending_title"] = None
|
||||||
|
else:
|
||||||
|
existing_row = db.get_session(key)
|
||||||
|
existing_title = ((existing_row or {}).get("title") or "").strip()
|
||||||
|
if existing_title == pending_title:
|
||||||
|
current["pending_title"] = None
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"Pending title still queued for session %s (wanted=%r, current=%r)",
|
||||||
|
sid,
|
||||||
|
pending_title,
|
||||||
|
existing_title,
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
current["pending_title"] = None
|
||||||
|
logger.info("Dropping pending title for session %s: %s", sid, e)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Failed to apply pending title for session %s", sid, exc_info=True)
|
||||||
|
current["agent"] = agent
|
||||||
|
|
||||||
|
try:
|
||||||
|
worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
|
||||||
|
current["slash_worker"] = worker
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
from tools.approval import (
|
||||||
|
register_gateway_notify,
|
||||||
|
load_permanent_allowlist,
|
||||||
|
)
|
||||||
|
register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
|
||||||
|
notify_registered = True
|
||||||
|
load_permanent_allowlist()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
_wire_callbacks(sid)
|
||||||
|
_notify_session_boundary("on_session_reset", key)
|
||||||
|
|
||||||
|
info = _session_info(agent)
|
||||||
|
warn = _probe_credentials(agent)
|
||||||
|
if warn:
|
||||||
|
info["credential_warning"] = warn
|
||||||
|
cfg_warn = _probe_config_health(_load_cfg())
|
||||||
|
if cfg_warn:
|
||||||
|
info["config_warning"] = cfg_warn
|
||||||
|
logger.warning(cfg_warn)
|
||||||
|
_emit("session.info", sid, info)
|
||||||
|
except Exception as e:
|
||||||
|
current["agent_error"] = str(e)
|
||||||
|
_emit("error", sid, {"message": f"agent init failed: {e}"})
|
||||||
|
finally:
|
||||||
|
if _sessions.get(sid) is not current:
|
||||||
|
if worker is not None:
|
||||||
|
try:
|
||||||
|
worker.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if notify_registered:
|
||||||
|
try:
|
||||||
|
from tools.approval import unregister_gateway_notify
|
||||||
|
unregister_gateway_notify(key)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
ready.set()
|
||||||
|
|
||||||
|
threading.Thread(target=_build, daemon=True).start()
|
||||||
|
|
||||||
|
|
||||||
def _sess_nowait(params, rid):
|
def _sess_nowait(params, rid):
|
||||||
s = _sessions.get(params.get("session_id") or "")
|
s = _sessions.get(params.get("session_id") or "")
|
||||||
return (s, None) if s else (None, _err(rid, 4001, "session not found"))
|
return (s, None) if s else (None, _err(rid, 4001, "session not found"))
|
||||||
|
|
@ -472,7 +585,10 @@ def _sess_nowait(params, rid):
|
||||||
|
|
||||||
def _sess(params, rid):
|
def _sess(params, rid):
|
||||||
s, err = _sess_nowait(params, rid)
|
s, err = _sess_nowait(params, rid)
|
||||||
return (None, err) if err else (s, _wait_agent(s, rid))
|
if err:
|
||||||
|
return (None, err)
|
||||||
|
_start_agent_build(params.get("session_id") or "", s)
|
||||||
|
return (s, _wait_agent(s, rid))
|
||||||
|
|
||||||
|
|
||||||
def _normalize_completion_path(path_part: str) -> str:
|
def _normalize_completion_path(path_part: str) -> str:
|
||||||
|
|
@ -1627,129 +1743,18 @@ def _(rid, params: dict) -> dict:
|
||||||
"transport": current_transport() or _stdio_transport,
|
"transport": current_transport() or _stdio_transport,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _build() -> None:
|
# Return the lightweight session immediately so Ink can paint the composer
|
||||||
|
# + skeleton panel, then build the real AIAgent just after this response is
|
||||||
|
# flushed. This keeps startup responsive while still hydrating tools/skills
|
||||||
|
# without requiring the user to submit a first prompt.
|
||||||
|
def _deferred_build() -> None:
|
||||||
session = _sessions.get(sid)
|
session = _sessions.get(sid)
|
||||||
if session is None:
|
if session is not None:
|
||||||
# session.close ran before the build thread got scheduled.
|
_start_agent_build(sid, session)
|
||||||
ready.set()
|
|
||||||
return
|
|
||||||
|
|
||||||
# Track what we allocate so we can clean up if session.close
|
build_timer = threading.Timer(0.05, _deferred_build)
|
||||||
# races us to the finish line. session.close pops _sessions[sid]
|
build_timer.daemon = True
|
||||||
# unconditionally and tries to close the slash_worker it finds;
|
build_timer.start()
|
||||||
# if _build is still mid-construction when close runs, close
|
|
||||||
# finds slash_worker=None / notify unregistered and returns
|
|
||||||
# cleanly — leaving us, the build thread, to later install the
|
|
||||||
# worker + notify on an orphaned session dict. The finally
|
|
||||||
# block below detects the orphan and cleans up instead of
|
|
||||||
# leaking a subprocess and a global notify registration.
|
|
||||||
worker = None
|
|
||||||
notify_registered = False
|
|
||||||
try:
|
|
||||||
tokens = _set_session_context(key)
|
|
||||||
try:
|
|
||||||
agent = _make_agent(sid, key)
|
|
||||||
finally:
|
|
||||||
_clear_session_context(tokens)
|
|
||||||
|
|
||||||
db = _get_db()
|
|
||||||
if db is not None:
|
|
||||||
db.create_session(key, source="tui", model=_resolve_model())
|
|
||||||
pending_title = (session.get("pending_title") or "").strip()
|
|
||||||
if pending_title:
|
|
||||||
try:
|
|
||||||
title_applied = db.set_session_title(key, pending_title)
|
|
||||||
if title_applied:
|
|
||||||
session["pending_title"] = None
|
|
||||||
else:
|
|
||||||
existing_row = db.get_session(key)
|
|
||||||
existing_title = (
|
|
||||||
(existing_row or {}).get("title") or ""
|
|
||||||
).strip()
|
|
||||||
if existing_title == pending_title:
|
|
||||||
session["pending_title"] = None
|
|
||||||
else:
|
|
||||||
logger.info(
|
|
||||||
"Pending title still queued for session %s (wanted=%r, current=%r)",
|
|
||||||
sid,
|
|
||||||
pending_title,
|
|
||||||
existing_title,
|
|
||||||
)
|
|
||||||
except ValueError as e:
|
|
||||||
# Queued title can become invalid/duplicate between queue time
|
|
||||||
# and DB row creation. Drop the queue and log the reason so
|
|
||||||
# future /title reads don't surface a stuck pending value.
|
|
||||||
session["pending_title"] = None
|
|
||||||
logger.info(
|
|
||||||
"Dropping pending title for session %s: %s",
|
|
||||||
sid,
|
|
||||||
e,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
logger.warning(
|
|
||||||
"Failed to apply pending title for session %s",
|
|
||||||
sid,
|
|
||||||
exc_info=True,
|
|
||||||
)
|
|
||||||
session["agent"] = agent
|
|
||||||
|
|
||||||
try:
|
|
||||||
worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
|
|
||||||
session["slash_worker"] = worker
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
from tools.approval import (
|
|
||||||
register_gateway_notify,
|
|
||||||
load_permanent_allowlist,
|
|
||||||
)
|
|
||||||
|
|
||||||
register_gateway_notify(
|
|
||||||
key, lambda data: _emit("approval.request", sid, data)
|
|
||||||
)
|
|
||||||
notify_registered = True
|
|
||||||
load_permanent_allowlist()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
_wire_callbacks(sid)
|
|
||||||
_notify_session_boundary("on_session_reset", key)
|
|
||||||
|
|
||||||
info = _session_info(agent)
|
|
||||||
warn = _probe_credentials(agent)
|
|
||||||
if warn:
|
|
||||||
info["credential_warning"] = warn
|
|
||||||
cfg_warn = _probe_config_health(_load_cfg())
|
|
||||||
if cfg_warn:
|
|
||||||
info["config_warning"] = cfg_warn
|
|
||||||
logger.warning(cfg_warn)
|
|
||||||
_emit("session.info", sid, info)
|
|
||||||
except Exception as e:
|
|
||||||
session["agent_error"] = str(e)
|
|
||||||
_emit("error", sid, {"message": f"agent init failed: {e}"})
|
|
||||||
finally:
|
|
||||||
# Orphan check: if session.close raced us and popped
|
|
||||||
# _sessions[sid] while we were building, the dict we just
|
|
||||||
# populated is unreachable. Clean up the subprocess and
|
|
||||||
# the global notify registration ourselves — session.close
|
|
||||||
# couldn't see them at the time it ran.
|
|
||||||
if _sessions.get(sid) is not session:
|
|
||||||
if worker is not None:
|
|
||||||
try:
|
|
||||||
worker.close()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
if notify_registered:
|
|
||||||
try:
|
|
||||||
from tools.approval import unregister_gateway_notify
|
|
||||||
|
|
||||||
unregister_gateway_notify(key)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
ready.set()
|
|
||||||
|
|
||||||
threading.Thread(target=_build, daemon=True).start()
|
|
||||||
|
|
||||||
return _ok(
|
return _ok(
|
||||||
rid,
|
rid,
|
||||||
|
|
@ -1760,6 +1765,7 @@ def _(rid, params: dict) -> dict:
|
||||||
"tools": {},
|
"tools": {},
|
||||||
"skills": {},
|
"skills": {},
|
||||||
"cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
|
"cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
|
||||||
|
"lazy": True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
@ -1901,7 +1907,7 @@ def _(rid, params: dict) -> dict:
|
||||||
|
|
||||||
@method("session.title")
|
@method("session.title")
|
||||||
def _(rid, params: dict) -> dict:
|
def _(rid, params: dict) -> dict:
|
||||||
session, err = _sess(params, rid)
|
session, err = _sess_nowait(params, rid)
|
||||||
if err:
|
if err:
|
||||||
return err
|
return err
|
||||||
db = _get_db()
|
db = _get_db()
|
||||||
|
|
@ -1964,13 +1970,16 @@ def _(rid, params: dict) -> dict:
|
||||||
|
|
||||||
@method("session.usage")
|
@method("session.usage")
|
||||||
def _(rid, params: dict) -> dict:
|
def _(rid, params: dict) -> dict:
|
||||||
session, err = _sess(params, rid)
|
session, err = _sess_nowait(params, rid)
|
||||||
return err or _ok(rid, _get_usage(session["agent"]))
|
if err:
|
||||||
|
return err
|
||||||
|
agent = session.get("agent")
|
||||||
|
return _ok(rid, _get_usage(agent) if agent is not None else {"calls": 0, "input": 0, "output": 0, "total": 0})
|
||||||
|
|
||||||
|
|
||||||
@method("session.history")
|
@method("session.history")
|
||||||
def _(rid, params: dict) -> dict:
|
def _(rid, params: dict) -> dict:
|
||||||
session, err = _sess(params, rid)
|
session, err = _sess_nowait(params, rid)
|
||||||
if err:
|
if err:
|
||||||
return err
|
return err
|
||||||
history = list(session.get("history", []))
|
history = list(session.get("history", []))
|
||||||
|
|
@ -2437,13 +2446,31 @@ def _(rid, params: dict) -> dict:
|
||||||
@method("prompt.submit")
|
@method("prompt.submit")
|
||||||
def _(rid, params: dict) -> dict:
|
def _(rid, params: dict) -> dict:
|
||||||
sid, text = params.get("session_id", ""), params.get("text", "")
|
sid, text = params.get("session_id", ""), params.get("text", "")
|
||||||
session, err = _sess(params, rid)
|
session, err = _sess_nowait(params, rid)
|
||||||
if err:
|
if err:
|
||||||
return err
|
return err
|
||||||
with session["history_lock"]:
|
with session["history_lock"]:
|
||||||
if session.get("running"):
|
if session.get("running"):
|
||||||
return _err(rid, 4009, "session busy")
|
return _err(rid, 4009, "session busy")
|
||||||
session["running"] = True
|
session["running"] = True
|
||||||
|
|
||||||
|
_start_agent_build(sid, session)
|
||||||
|
|
||||||
|
def run_after_agent_ready() -> None:
|
||||||
|
err = _wait_agent(session, rid)
|
||||||
|
if err:
|
||||||
|
_emit("error", sid, {"message": err.get("error", {}).get("message", "agent initialization failed")})
|
||||||
|
with session["history_lock"]:
|
||||||
|
session["running"] = False
|
||||||
|
return
|
||||||
|
_run_prompt_submit(rid, sid, session, text)
|
||||||
|
|
||||||
|
threading.Thread(target=run_after_agent_ready, daemon=True).start()
|
||||||
|
return _ok(rid, {"status": "streaming"})
|
||||||
|
|
||||||
|
|
||||||
|
def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
|
||||||
|
with session["history_lock"]:
|
||||||
history = list(session["history"])
|
history = list(session["history"])
|
||||||
history_version = int(session.get("history_version", 0))
|
history_version = int(session.get("history_version", 0))
|
||||||
images = list(session.get("attached_images", []))
|
images = list(session.get("attached_images", []))
|
||||||
|
|
@ -2682,7 +2709,6 @@ def _(rid, params: dict) -> dict:
|
||||||
session["running"] = False
|
session["running"] = False
|
||||||
|
|
||||||
threading.Thread(target=run, daemon=True).start()
|
threading.Thread(target=run, daemon=True).start()
|
||||||
return _ok(rid, {"status": "streaming"})
|
|
||||||
|
|
||||||
|
|
||||||
@method("clipboard.paste")
|
@method("clipboard.paste")
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,7 @@ import type { GatewayClient } from '../gatewayClient.js'
|
||||||
import type {
|
import type {
|
||||||
ConfigFullResponse,
|
ConfigFullResponse,
|
||||||
ConfigMtimeResponse,
|
ConfigMtimeResponse,
|
||||||
ReloadMcpResponse,
|
ReloadMcpResponse
|
||||||
VoiceToggleResponse
|
|
||||||
} from '../gatewayTypes.js'
|
} from '../gatewayTypes.js'
|
||||||
import { asRpcResult } from '../lib/rpc.js'
|
import { asRpcResult } from '../lib/rpc.js'
|
||||||
|
|
||||||
|
|
@ -118,7 +117,11 @@ export function useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid }: U
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
quietRpc<VoiceToggleResponse>(gw, 'voice.toggle', { action: 'status' }).then(r => setVoiceEnabled(!!r?.enabled))
|
// Keep startup cheap: voice.toggle status probes optional audio/STT deps and
|
||||||
|
// can run long enough to delay prompt.submit on the single stdio RPC pipe.
|
||||||
|
// Environment flags are enough to initialize the UI bit; the heavier status
|
||||||
|
// check still runs when the user opens /voice.
|
||||||
|
setVoiceEnabled(process.env.HERMES_VOICE === '1')
|
||||||
quietRpc<ConfigMtimeResponse>(gw, 'config.get', { key: 'mtime' }).then(r => {
|
quietRpc<ConfigMtimeResponse>(gw, 'config.get', { key: 'mtime' }).then(r => {
|
||||||
mtimeRef.current = Number(r?.mtime ?? 0)
|
mtimeRef.current = Number(r?.mtime ?? 0)
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -126,6 +126,13 @@ export function useSubmission(opts: UseSubmissionOptions) {
|
||||||
return sys('session not ready yet')
|
return sys('session not ready yet')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Plain prompts are the common path and should not pay an extra RPC
|
||||||
|
// before prompt.submit. File-drop detection still runs for absolute,
|
||||||
|
// tilde, file://, and explicit relative paths.
|
||||||
|
if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\.?\.\/|\/)[^\s]+/.test(text)) {
|
||||||
|
return startSubmit(text, expand(text), showUserMessage)
|
||||||
|
}
|
||||||
|
|
||||||
gw.request<InputDetectDropResponse>('input.detect_drop', { session_id: sid, text })
|
gw.request<InputDetectDropResponse>('input.detect_drop', { session_id: sid, text })
|
||||||
.then(r => {
|
.then(r => {
|
||||||
if (!r?.matched) {
|
if (!r?.matched) {
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ const TranscriptPane = memo(function TranscriptPane({
|
||||||
<Box flexDirection="column" paddingTop={1}>
|
<Box flexDirection="column" paddingTop={1}>
|
||||||
<Banner t={ui.theme} />
|
<Banner t={ui.theme} />
|
||||||
|
|
||||||
{row.msg.info?.version && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
|
{row.msg.info && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
|
||||||
</Box>
|
</Box>
|
||||||
) : row.msg.kind === 'panel' && row.msg.panelData ? (
|
) : row.msg.kind === 'panel' && row.msg.panelData ? (
|
||||||
<Panel sections={row.msg.panelData.sections} t={ui.theme} title={row.msg.panelData.title} />
|
<Panel sections={row.msg.panelData.sections} t={ui.theme} title={row.msg.panelData.title} />
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,32 @@
|
||||||
import { Box, Text, useStdout } from '@hermes/ink'
|
import { Box, Text, useStdout } from '@hermes/ink'
|
||||||
|
import { useEffect, useState } from 'react'
|
||||||
|
import unicodeSpinners from 'unicode-animations'
|
||||||
|
|
||||||
import { artWidth, caduceus, CADUCEUS_WIDTH, logo, LOGO_WIDTH } from '../banner.js'
|
import { artWidth, caduceus, CADUCEUS_WIDTH, logo, LOGO_WIDTH } from '../banner.js'
|
||||||
import { flat } from '../lib/text.js'
|
import { flat } from '../lib/text.js'
|
||||||
import type { Theme } from '../theme.js'
|
import type { Theme } from '../theme.js'
|
||||||
import type { PanelSection, SessionInfo } from '../types.js'
|
import type { PanelSection, SessionInfo } from '../types.js'
|
||||||
|
|
||||||
|
const LOADER_TICK_MS = 120
|
||||||
|
|
||||||
|
function InlineLoader({ label, t }: { label: string; t: Theme }) {
|
||||||
|
const [tick, setTick] = useState(0)
|
||||||
|
const spinner = unicodeSpinners.braille
|
||||||
|
const frame = spinner.frames[tick % spinner.frames.length] ?? '⠋'
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const id = setInterval(() => setTick(n => n + 1), Math.max(LOADER_TICK_MS, spinner.interval))
|
||||||
|
|
||||||
|
return () => clearInterval(id)
|
||||||
|
}, [spinner.interval])
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Text color={t.color.muted} wrap="truncate">
|
||||||
|
<Text color={t.color.accent}>{frame}</Text> {label}
|
||||||
|
</Text>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
export function ArtLines({ lines }: { lines: [string, string][] }) {
|
export function ArtLines({ lines }: { lines: [string, string][] }) {
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
|
|
@ -67,6 +89,7 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
|
||||||
const entries = Object.entries(data).sort()
|
const entries = Object.entries(data).sort()
|
||||||
const shown = entries.slice(0, max)
|
const shown = entries.slice(0, max)
|
||||||
const overflow = entries.length - max
|
const overflow = entries.length - max
|
||||||
|
const skeleton = info.lazy && entries.length === 0
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Box flexDirection="column" marginTop={1}>
|
<Box flexDirection="column" marginTop={1}>
|
||||||
|
|
@ -74,12 +97,16 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
|
||||||
Available {title}
|
Available {title}
|
||||||
</Text>
|
</Text>
|
||||||
|
|
||||||
{shown.map(([k, vs]) => (
|
{skeleton ? (
|
||||||
<Text key={k} wrap="truncate">
|
<InlineLoader label={title === 'Tools' ? 'discovering tools' : 'scanning skills'} t={t} />
|
||||||
<Text color={t.color.muted}>{strip(k)}: </Text>
|
) : (
|
||||||
<Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
|
shown.map(([k, vs]) => (
|
||||||
</Text>
|
<Text key={k} wrap="truncate">
|
||||||
))}
|
<Text color={t.color.muted}>{strip(k)}: </Text>
|
||||||
|
<Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
|
||||||
|
</Text>
|
||||||
|
))
|
||||||
|
)}
|
||||||
|
|
||||||
{overflow > 0 && (
|
{overflow > 0 && (
|
||||||
<Text color={t.color.muted}>
|
<Text color={t.color.muted}>
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,3 @@
|
||||||
import { evictInkCaches } from '@hermes/ink'
|
|
||||||
|
|
||||||
import { type HeapDumpResult, performHeapDump } from './memory.js'
|
import { type HeapDumpResult, performHeapDump } from './memory.js'
|
||||||
|
|
||||||
export type MemoryLevel = 'critical' | 'high' | 'normal'
|
export type MemoryLevel = 'critical' | 'high' | 'normal'
|
||||||
|
|
@ -20,6 +18,40 @@ export interface MemoryMonitorOptions {
|
||||||
|
|
||||||
const GB = 1024 ** 3
|
const GB = 1024 ** 3
|
||||||
|
|
||||||
|
// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level
|
||||||
|
// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto
|
||||||
|
// the critical path before the Python gateway can even be spawned. That
|
||||||
|
// serialised roughly 150ms of Node work in front of gw.start() on every
|
||||||
|
// cold `hermes --tui` launch.
|
||||||
|
//
|
||||||
|
// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and
|
||||||
|
// only when heap pressure crosses the high-water mark — by then Ink has
|
||||||
|
// long since been loaded by the app entry. This dynamic import is a no-op
|
||||||
|
// on the hot path (module is already in the ESM cache); when a startup
|
||||||
|
// spike somehow trips the threshold before the app registers its own Ink
|
||||||
|
// import, we pay the load cost exactly once, inside the tick that needs it.
|
||||||
|
let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null
|
||||||
|
let _evictInkCachesPromise: Promise<(level: 'all' | 'half') => unknown> | null = null
|
||||||
|
|
||||||
|
async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> {
|
||||||
|
if (_evictInkCaches) {
|
||||||
|
return _evictInkCaches
|
||||||
|
}
|
||||||
|
|
||||||
|
_evictInkCachesPromise ??= import('@hermes/ink')
|
||||||
|
.then(mod => {
|
||||||
|
_evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
|
||||||
|
|
||||||
|
return _evictInkCaches
|
||||||
|
})
|
||||||
|
.catch(err => {
|
||||||
|
_evictInkCachesPromise = null
|
||||||
|
throw err
|
||||||
|
})
|
||||||
|
|
||||||
|
return _evictInkCachesPromise
|
||||||
|
}
|
||||||
|
|
||||||
export function startMemoryMonitor({
|
export function startMemoryMonitor({
|
||||||
criticalBytes = 2.5 * GB,
|
criticalBytes = 2.5 * GB,
|
||||||
highBytes = 1.5 * GB,
|
highBytes = 1.5 * GB,
|
||||||
|
|
@ -28,29 +60,45 @@ export function startMemoryMonitor({
|
||||||
onHigh
|
onHigh
|
||||||
}: MemoryMonitorOptions = {}): () => void {
|
}: MemoryMonitorOptions = {}): () => void {
|
||||||
const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
|
const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
|
||||||
|
const inFlight = new Set<Exclude<MemoryLevel, 'normal'>>()
|
||||||
|
|
||||||
const tick = async () => {
|
const tick = async () => {
|
||||||
const { heapUsed, rss } = process.memoryUsage()
|
const { heapUsed, rss } = process.memoryUsage()
|
||||||
const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
|
const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
|
||||||
|
|
||||||
if (level === 'normal') {
|
if (level === 'normal') {
|
||||||
return void dumped.clear()
|
dumped.clear()
|
||||||
}
|
|
||||||
|
|
||||||
if (dumped.has(level)) {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dumped.has(level) || inFlight.has(level)) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inFlight.add(level)
|
||||||
|
|
||||||
// Prune Ink content caches before dump/exit — half on 'high' (recoverable),
|
// Prune Ink content caches before dump/exit — half on 'high' (recoverable),
|
||||||
// full on 'critical' (post-dump RSS reduction, keeps user running).
|
// full on 'critical' (post-dump RSS reduction, keeps user running).
|
||||||
evictInkCaches(level === 'critical' ? 'all' : 'half')
|
// Deferred import keeps `@hermes/ink` off the cold-start critical path;
|
||||||
|
// by the time a tick fires 10s after launch the app has already loaded
|
||||||
|
// the same module, so this resolves instantly from the ESM cache.
|
||||||
|
try {
|
||||||
|
try {
|
||||||
|
const evictInkCaches = await _ensureEvictInkCaches()
|
||||||
|
evictInkCaches(level === 'critical' ? 'all' : 'half')
|
||||||
|
} catch {
|
||||||
|
// Best-effort: if the dynamic import fails for any reason we still
|
||||||
|
// continue to the heap dump below so the user gets diagnostics.
|
||||||
|
}
|
||||||
|
|
||||||
dumped.add(level)
|
dumped.add(level)
|
||||||
const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
|
const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
|
||||||
|
const snap: MemorySnapshot = { heapUsed, level, rss }
|
||||||
|
|
||||||
const snap: MemorySnapshot = { heapUsed, level, rss }
|
;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
|
||||||
|
} finally {
|
||||||
;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
|
inFlight.delete(level)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const handle = setInterval(() => void tick(), intervalMs)
|
const handle = setInterval(() => void tick(), intervalMs)
|
||||||
|
|
|
||||||
|
|
@ -143,11 +143,12 @@ export interface McpServerStatus {
|
||||||
export interface SessionInfo {
|
export interface SessionInfo {
|
||||||
cwd?: string
|
cwd?: string
|
||||||
fast?: boolean
|
fast?: boolean
|
||||||
|
lazy?: boolean
|
||||||
mcp_servers?: McpServerStatus[]
|
mcp_servers?: McpServerStatus[]
|
||||||
model: string
|
model: string
|
||||||
reasoning_effort?: string
|
reasoning_effort?: string
|
||||||
service_tier?: string
|
|
||||||
release_date?: string
|
release_date?: string
|
||||||
|
service_tier?: string
|
||||||
skills: Record<string, string[]>
|
skills: Record<string, string[]>
|
||||||
tools: Record<string, string[]>
|
tools: Record<string, string[]>
|
||||||
update_behind?: number | null
|
update_behind?: number | null
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue