mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 01:51:44 +00:00
perf(tui): shave ~190ms off hermes --tui cold start
Two targeted fixes on the critical path from `hermes --tui` launch to
`gateway.ready`:
1. **Defer `@hermes/ink` import in memoryMonitor.ts.** The static top-level
import dragged the full ~414KB Ink bundle (React + renderer + all
components/hooks) onto the critical path *before* `gw.start()` could
spawn the Python gateway — serialising ~155ms of Node work in front of
it on every launch. `evictInkCaches` only runs inside the 10-second
tick under heap pressure, so it moves to a lazy dynamic import. First
tick hits the ESM cache because the app entry has long since imported
`@hermes/ink`.
2. **Gate `tools.mcp_tool` import on config in tui_gateway/entry.py.**
Importing the module transitively pulls the MCP SDK + pydantic + httpx
+ jsonschema + starlette formparsers (~200ms). The overwhelming
majority of users have no `mcp_servers` configured, so this runs for
nothing. A cheap `load_config()` check (~25ms) skips the 200ms import
when no servers are declared, with a conservative fallback to the old
behaviour if the config probe itself fails.
## Measurements (macOS Terminal.app, Apple Silicon, n=12)
| Metric | Before (p50) | After (p50) | Δ |
|----------------------------|--------------|-------------|----------|
| Python gateway boot alone | 252–365ms | 105–151ms | −180ms |
| `hermes --tui` banner paint | 686ms | 665ms | −21ms |
| `hermes --tui` → ready | **1843ms** | **1655ms** | **−188ms (−10.2%)** |
| `hermes --tui` → ready p90 | 1932ms | 1778ms | −154ms |
| stdev (ready) | 126ms | 83ms | also more consistent |
## Tests
- `scripts/run_tests.sh tests/tui_gateway/ tests/tools/test_mcp_tool.py`:
195 passed. (The one pre-existing failure in
`test_session_resume_returns_hydrated_messages` reproduces on main —
unrelated, it's a mock-DB kwarg mismatch.)
- `ui-tui` vitest: 430 tests, all pass.
- `npm run type-check` in ui-tui: clean.
## Notes
- Node-side first paint ("banner") didn't move meaningfully because that
latency is dominated by Ink's render pipeline + React mount, not by
which imports load first.
- The win shows up entirely in the time from banner to `gateway.ready`
— exactly where we expected it, since both fixes shorten the Python
gateway's boot path or let it overlap more with Node startup.
- No user-visible behaviour change. Memory monitoring still fires every
10s; MCP still works when `mcp_servers` is configured.
This commit is contained in:
parent
6b09df39be
commit
0399d4b976
2 changed files with 51 additions and 6 deletions
|
|
@ -165,11 +165,29 @@ def main():
|
|||
# a model_tools.py module-level side effect; moved to explicit
|
||||
# startup calls to avoid freezing the gateway's loop on lazy import
|
||||
# (#16856).
|
||||
#
|
||||
# Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the
|
||||
# full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers —
|
||||
# ~200ms on macOS), which runs on the TUI's critical path before
|
||||
# ``gateway.ready`` can be emitted. The overwhelming majority of users
|
||||
# have no ``mcp_servers`` configured, in which case every byte of that
|
||||
# import is wasted. Check the config first (cheap — it's already been
|
||||
# loaded once by ``_config_mtime`` elsewhere) and only pay the import
|
||||
# cost when there's actually MCP work to do.
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
from hermes_cli.config import load_config
|
||||
_mcp_servers = (load_config() or {}).get("mcp_servers")
|
||||
_has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
|
||||
except Exception:
|
||||
pass
|
||||
# Be conservative: if we can't decide, fall back to the old
|
||||
# behaviour and let the discovery path handle its own errors.
|
||||
_has_mcp_servers = True
|
||||
if _has_mcp_servers:
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not write_json({
|
||||
"jsonrpc": "2.0",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
import { evictInkCaches } from '@hermes/ink'
|
||||
|
||||
import { type HeapDumpResult, performHeapDump } from './memory.js'
|
||||
|
||||
export type MemoryLevel = 'critical' | 'high' | 'normal'
|
||||
|
|
@ -20,6 +18,26 @@ export interface MemoryMonitorOptions {
|
|||
|
||||
const GB = 1024 ** 3
|
||||
|
||||
// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level
|
||||
// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto
|
||||
// the critical path before the Python gateway can even be spawned. That
|
||||
// serialised roughly 150ms of Node work in front of gw.start() on every
|
||||
// cold `hermes --tui` launch.
|
||||
//
|
||||
// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and
|
||||
// only when heap pressure crosses the high-water mark — by then Ink has
|
||||
// long since been loaded by the app entry. This dynamic import is a no-op
|
||||
// on the hot path (module is already in the ESM cache); when a startup
|
||||
// spike somehow trips the threshold before the app registers its own Ink
|
||||
// import, we pay the load cost exactly once, inside the tick that needs it.
|
||||
let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null
|
||||
async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> {
|
||||
if (_evictInkCaches) return _evictInkCaches
|
||||
const mod = await import('@hermes/ink')
|
||||
_evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
|
||||
return _evictInkCaches
|
||||
}
|
||||
|
||||
export function startMemoryMonitor({
|
||||
criticalBytes = 2.5 * GB,
|
||||
highBytes = 1.5 * GB,
|
||||
|
|
@ -43,7 +61,16 @@ export function startMemoryMonitor({
|
|||
|
||||
// Prune Ink content caches before dump/exit — half on 'high' (recoverable),
|
||||
// full on 'critical' (post-dump RSS reduction, keeps user running).
|
||||
evictInkCaches(level === 'critical' ? 'all' : 'half')
|
||||
// Deferred import keeps `@hermes/ink` off the cold-start critical path;
|
||||
// by the time a tick fires 10s after launch the app has already loaded
|
||||
// the same module, so this resolves instantly from the ESM cache.
|
||||
try {
|
||||
const evictInkCaches = await _ensureEvictInkCaches()
|
||||
evictInkCaches(level === 'critical' ? 'all' : 'half')
|
||||
} catch {
|
||||
// Best-effort: if the dynamic import fails for any reason we still
|
||||
// continue to the heap dump below so the user gets diagnostics.
|
||||
}
|
||||
|
||||
dumped.add(level)
|
||||
const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue