diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 714ad82bf2..489a1652db 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1003,6 +1003,17 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False): ) env.setdefault("HERMES_PYTHON", sys.executable) env.setdefault("HERMES_CWD", os.getcwd()) + # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is + # ~1.5–4GB depending on version and can fatal-OOM on long sessions with + # large transcripts / reasoning blobs. Token-level merge: respect any + # user-supplied --max-old-space-size (they may have set it higher) and + # avoid duplicating --expose-gc. + _tokens = env.get("NODE_OPTIONS", "").split() + if not any(t.startswith("--max-old-space-size=") for t in _tokens): + _tokens.append("--max-old-space-size=8192") + if "--expose-gc" not in _tokens: + _tokens.append("--expose-gc") + env["NODE_OPTIONS"] = " ".join(_tokens) if resume_session_id: env["HERMES_TUI_RESUME"] = resume_session_id diff --git a/ui-tui/src/app/slash/commands/debug.ts b/ui-tui/src/app/slash/commands/debug.ts new file mode 100644 index 0000000000..b4bfc16bfb --- /dev/null +++ b/ui-tui/src/app/slash/commands/debug.ts @@ -0,0 +1,48 @@ +import { formatBytes, performHeapDump } from '../../../lib/memory.js' +import type { SlashCommand } from '../types.js' + +export const debugCommands: SlashCommand[] = [ + { + help: 'write a V8 heap snapshot + memory diagnostics (see HERMES_HEAPDUMP_DIR)', + name: 'heapdump', + run: (_arg, ctx) => { + const { heapUsed, rss } = process.memoryUsage() + + ctx.transcript.sys(`writing heap dump (heap ${formatBytes(heapUsed)} · rss ${formatBytes(rss)})…`) + + void performHeapDump('manual').then(r => { + if (ctx.stale()) { + return + } + + if (!r.success) { + return ctx.transcript.sys(`heapdump failed: ${r.error ?? 'unknown error'}`) + } + + ctx.transcript.sys(`heapdump: ${r.heapPath}`) + ctx.transcript.sys(`diagnostics: ${r.diagPath}`) + }) + } + }, + + { + help: 'print live V8 heap + rss numbers', + name: 'mem', + run: (_arg, ctx) => { + const { arrayBuffers, external, heapTotal, heapUsed, rss } = process.memoryUsage() + + ctx.transcript.panel('Memory', [ + { + rows: [ + ['heap used', formatBytes(heapUsed)], + ['heap total', formatBytes(heapTotal)], + ['external', formatBytes(external)], + ['array buffers', formatBytes(arrayBuffers)], + ['rss', formatBytes(rss)], + ['uptime', `${process.uptime().toFixed(0)}s`] + ] + } + ]) + } + } +] diff --git a/ui-tui/src/app/slash/registry.ts b/ui-tui/src/app/slash/registry.ts index ae7d7d50be..353b0a83d1 100644 --- a/ui-tui/src/app/slash/registry.ts +++ b/ui-tui/src/app/slash/registry.ts @@ -1,10 +1,17 @@ import { coreCommands } from './commands/core.js' +import { debugCommands } from './commands/debug.js' import { opsCommands } from './commands/ops.js' import { sessionCommands } from './commands/session.js' import { setupCommands } from './commands/setup.js' import type { SlashCommand } from './types.js' -export const SLASH_COMMANDS: SlashCommand[] = [...coreCommands, ...sessionCommands, ...opsCommands, ...setupCommands] +export const SLASH_COMMANDS: SlashCommand[] = [ + ...coreCommands, + ...sessionCommands, + ...opsCommands, + ...setupCommands, + ...debugCommands +] const byName = new Map( SLASH_COMMANDS.flatMap(cmd => [cmd.name, ...(cmd.aliases ?? [])].map(name => [name, cmd] as const)) diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 28b2a26f9a..77c2681c6c 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -161,7 +161,7 @@ export function useMainApp(gw: GatewayClient) { [historyItems, messageId] ) - const virtualHistory = useVirtualHistory(scrollRef, virtualRows) + const virtualHistory = useVirtualHistory(scrollRef, virtualRows, cols) const scrollWithSelection = useCallback( (delta: number) => { @@ -306,12 +306,20 @@ export function useMainApp(gw: GatewayClient) { return } - const onResize = () => - rpc('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid }) + let timer: ReturnType | undefined + + const onResize = () => { + clearTimeout(timer) + timer = setTimeout(() => { + timer = undefined + void rpc('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid }) + }, 100) + } stdout.on('resize', onResize) return () => { + clearTimeout(timer) stdout.off('resize', onResize) } }, [rpc, stdout, ui.sid]) diff --git a/ui-tui/src/entry.tsx b/ui-tui/src/entry.tsx index e0a4379342..6f1506e5aa 100644 --- a/ui-tui/src/entry.tsx +++ b/ui-tui/src/entry.tsx @@ -1,7 +1,9 @@ -#!/usr/bin/env node -// Order matters: paint banner + spawn python before loading @hermes/ink. +#!/usr/bin/env -S node --max-old-space-size=8192 --expose-gc import { bootBanner } from './bootBanner.js' import { GatewayClient } from './gatewayClient.js' +import { setupGracefulExit } from './lib/gracefulExit.js' +import { formatBytes, type HeapDumpResult, performHeapDump } from './lib/memory.js' +import { type MemorySnapshot, startMemoryMonitor } from './lib/memoryMonitor.js' if (!process.stdin.isTTY) { console.log('hermes-tui: no TTY') @@ -11,8 +13,37 @@ if (!process.stdin.isTTY) { process.stdout.write(bootBanner()) const gw = new GatewayClient() + gw.start() +const dumpNotice = (snap: MemorySnapshot, dump: HeapDumpResult | null) => + `hermes-tui: ${snap.level} memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n` + +setupGracefulExit({ + cleanups: [() => gw.kill()], + onError: (scope, err) => { + const message = err instanceof Error ? `${err.name}: ${err.message}` : String(err) + + process.stderr.write(`hermes-tui ${scope}: ${message.slice(0, 2000)}\n`) + }, + onSignal: signal => process.stderr.write(`hermes-tui: received ${signal}\n`) +}) + +const stopMemoryMonitor = startMemoryMonitor({ + onCritical: (snap, dump) => { + process.stderr.write(dumpNotice(snap, dump)) + process.stderr.write('hermes-tui: exiting to avoid OOM; restart to recover\n') + process.exit(137) + }, + onHigh: (snap, dump) => process.stderr.write(dumpNotice(snap, dump)) +}) + +if (process.env.HERMES_HEAPDUMP_ON_START === '1') { + void performHeapDump('manual') +} + +process.on('beforeExit', () => stopMemoryMonitor()) + const [{ render }, { App }] = await Promise.all([import('@hermes/ink'), import('./app.js')]) render(, { exitOnCtrlC: false }) diff --git a/ui-tui/src/gatewayClient.ts b/ui-tui/src/gatewayClient.ts index a238c7638d..9bf681f8b2 100644 --- a/ui-tui/src/gatewayClient.ts +++ b/ui-tui/src/gatewayClient.ts @@ -5,6 +5,7 @@ import { delimiter, resolve } from 'node:path' import { createInterface } from 'node:readline' import type { GatewayEvent } from './gatewayTypes.js' +import { CircularBuffer } from './lib/circularBuffer.js' const MAX_GATEWAY_LOG_LINES = 200 const MAX_LOG_LINE_BYTES = 4096 @@ -43,16 +44,19 @@ const asGatewayEvent = (value: unknown): GatewayEvent | null => : null interface Pending { + id: string + method: string reject: (e: Error) => void resolve: (v: unknown) => void + timeout: ReturnType } export class GatewayClient extends EventEmitter { private proc: ChildProcess | null = null private reqId = 0 - private logs: string[] = [] + private logs = new CircularBuffer(MAX_GATEWAY_LOG_LINES) private pending = new Map() - private bufferedEvents: GatewayEvent[] = [] + private bufferedEvents = new CircularBuffer(MAX_BUFFERED_EVENTS) private pendingExit: number | null | undefined private ready = false private readyTimer: ReturnType | null = null @@ -60,6 +64,13 @@ export class GatewayClient extends EventEmitter { private stdoutRl: ReturnType | null = null private stderrRl: ReturnType | null = null + constructor() { + super() + // useInput / createGatewayEventHandler can legitimately attach many + // listeners. Default 10-cap triggers spurious warnings. + this.setMaxListeners(0) + } + private publish(ev: GatewayEvent) { if (ev.type === 'gateway.ready') { this.ready = true @@ -74,9 +85,7 @@ export class GatewayClient extends EventEmitter { return void this.emit('event', ev) } - if (this.bufferedEvents.push(ev) > MAX_BUFFERED_EVENTS) { - this.bufferedEvents.splice(0, this.bufferedEvents.length - MAX_BUFFERED_EVENTS) - } + this.bufferedEvents.push(ev) } start() { @@ -88,7 +97,7 @@ export class GatewayClient extends EventEmitter { env.PYTHONPATH = pyPath ? `${root}${delimiter}${pyPath}` : root this.ready = false - this.bufferedEvents = [] + this.bufferedEvents.clear() this.pendingExit = undefined this.stdoutRl?.close() this.stderrRl?.close() @@ -165,15 +174,7 @@ export class GatewayClient extends EventEmitter { const p = id ? this.pending.get(id) : undefined if (p) { - this.pending.delete(id!) - - if (msg.error) { - const err = msg.error as { message?: unknown } | null | undefined - - p.reject(new Error(typeof err?.message === 'string' ? err.message : 'request failed')) - } else { - p.resolve(msg.result) - } + this.settle(p, msg.error ? this.toError(msg.error) : null, msg.result) return } @@ -187,24 +188,51 @@ export class GatewayClient extends EventEmitter { } } - private pushLog(line: string) { - if (this.logs.push(truncateLine(line)) > MAX_GATEWAY_LOG_LINES) { - this.logs.splice(0, this.logs.length - MAX_GATEWAY_LOG_LINES) + private toError(raw: unknown): Error { + const err = raw as { message?: unknown } | null | undefined + + return new Error(typeof err?.message === 'string' ? err.message : 'request failed') + } + + private settle(p: Pending, err: Error | null, result: unknown) { + clearTimeout(p.timeout) + this.pending.delete(p.id) + + if (err) { + p.reject(err) + } else { + p.resolve(result) } } + private pushLog(line: string) { + this.logs.push(truncateLine(line)) + } + private rejectPending(err: Error) { for (const p of this.pending.values()) { + clearTimeout(p.timeout) p.reject(err) } this.pending.clear() } + // Arrow class-field — stable identity, so `setTimeout(this.onTimeout, …, id)` + // doesn't allocate a bound function per request. + private onTimeout = (id: string) => { + const p = this.pending.get(id) + + if (p) { + this.pending.delete(id) + p.reject(new Error(`timeout: ${p.method}`)) + } + } + drain() { this.subscribed = true - for (const ev of this.bufferedEvents.splice(0)) { + for (const ev of this.bufferedEvents.drain()) { this.emit('event', ev) } @@ -217,7 +245,7 @@ export class GatewayClient extends EventEmitter { } getLogTail(limit = 20): string { - return this.logs.slice(-Math.max(1, limit)).join('\n') + return this.logs.tail(Math.max(1, limit)).join('\n') } request(method: string, params: Record = {}): Promise { @@ -231,29 +259,29 @@ export class GatewayClient extends EventEmitter { const id = `r${++this.reqId}` - return new Promise((resolve, reject) => { - const timeout = setTimeout(() => { - if (this.pending.delete(id)) { - reject(new Error(`timeout: ${method}`)) - } - }, REQUEST_TIMEOUT_MS) + return new Promise((resolve, reject) => { + const timeout = setTimeout(this.onTimeout, REQUEST_TIMEOUT_MS, id) + + timeout.unref?.() this.pending.set(id, { - reject: e => { - clearTimeout(timeout) - reject(e) - }, - resolve: v => { - clearTimeout(timeout) - resolve(v as T) - } + id, + method, + reject, + resolve: v => resolve(v as T), + timeout }) try { - this.proc!.stdin!.write(JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n') + this.proc!.stdin!.write(JSON.stringify({ id, jsonrpc: '2.0', method, params }) + '\n') } catch (e) { - clearTimeout(timeout) - this.pending.delete(id) + const pending = this.pending.get(id) + + if (pending) { + clearTimeout(pending.timeout) + this.pending.delete(id) + } + reject(e instanceof Error ? e : new Error(String(e))) } }) diff --git a/ui-tui/src/hooks/useVirtualHistory.ts b/ui-tui/src/hooks/useVirtualHistory.ts index efa2642df3..17bc8dfd3e 100644 --- a/ui-tui/src/hooks/useVirtualHistory.ts +++ b/ui-tui/src/hooks/useVirtualHistory.ts @@ -15,13 +15,15 @@ const OVERSCAN = 40 const MAX_MOUNTED = 260 const COLD_START = 40 const QUANTUM = OVERSCAN >> 1 +const FREEZE_RENDERS = 2 const upperBound = (arr: number[], target: number) => { - let lo = 0, - hi = arr.length + let lo = 0 + let hi = arr.length while (lo < hi) { const mid = (lo + hi) >> 1 + arr[mid]! <= target ? (lo = mid + 1) : (hi = mid) } @@ -31,6 +33,7 @@ const upperBound = (arr: number[], target: number) => { export function useVirtualHistory( scrollRef: RefObject, items: readonly { key: string }[], + columns: number, { estimate = ESTIMATE, overscan = OVERSCAN, maxMounted = MAX_MOUNTED, coldStartCount = COLD_START } = {} ) { const nodes = useRef(new Map()) @@ -40,6 +43,29 @@ export function useVirtualHistory( const [hasScrollRef, setHasScrollRef] = useState(false) const metrics = useRef({ sticky: true, top: 0, vp: 0 }) + // Width change: scale cached heights (not clear — clearing forces a + // pessimistic back-walk mounting ~190 rows at once, each a fresh + // marked.lexer + syntax highlight ≈ 3ms). Freeze mount range for 2 + // renders so warm memos survive; skip one measurement so useLayoutEffect + // doesn't poison the scaled cache with pre-resize Yoga heights. + const prevColumns = useRef(columns) + const skipMeasurement = useRef(false) + const prevRange = useRef(null) + const freezeRenders = useRef(0) + + if (prevColumns.current !== columns && prevColumns.current > 0 && columns > 0) { + const ratio = prevColumns.current / columns + + prevColumns.current = columns + + for (const [k, h] of heights.current) { + heights.current.set(k, Math.max(1, Math.round(h * ratio))) + } + + skipMeasurement.current = true + freezeRenders.current = FREEZE_RENDERS + } + useLayoutEffect(() => { setHasScrollRef(Boolean(scrollRef.current)) }, [scrollRef]) @@ -92,25 +118,41 @@ export function useVirtualHistory( return out }, [estimate, items, ver]) - const total = offsets[items.length] ?? 0 + const n = items.length + const total = offsets[n] ?? 0 const top = Math.max(0, scrollRef.current?.getScrollTop() ?? 0) const vp = Math.max(0, scrollRef.current?.getViewportHeight() ?? 0) const sticky = scrollRef.current?.isSticky() ?? true - let start = 0, - end = items.length + // During a freeze, drop the frozen range if items shrank past its start + // (/clear, compaction) — clamping would collapse to an empty mount and + // flash blank. Fall through to the normal path in that case. + const frozenRange = + freezeRenders.current > 0 && prevRange.current && prevRange.current[0] < n ? prevRange.current : null - if (items.length > 0) { + let start = 0 + let end = n + + if (frozenRange) { + start = frozenRange[0] + end = Math.min(frozenRange[1], n) + } else if (n > 0) { if (vp <= 0) { - start = Math.max(0, items.length - coldStartCount) + start = Math.max(0, n - coldStartCount) } else { - start = Math.max(0, Math.min(items.length - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1)) - end = Math.max(start + 1, Math.min(items.length, upperBound(offsets, top + vp + overscan))) + start = Math.max(0, Math.min(n - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1)) + end = Math.max(start + 1, Math.min(n, upperBound(offsets, top + vp + overscan))) } } if (end - start > maxMounted) { - sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(items.length, start + maxMounted)) + sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(n, start + maxMounted)) + } + + if (freezeRenders.current > 0) { + freezeRenders.current-- + } else { + prevRange.current = [start, end] } const measureRef = useCallback((key: string) => { @@ -127,18 +169,22 @@ export function useVirtualHistory( useLayoutEffect(() => { let dirty = false - for (let i = start; i < end; i++) { - const k = items[i]?.key + if (skipMeasurement.current) { + skipMeasurement.current = false + } else { + for (let i = start; i < end; i++) { + const k = items[i]?.key - if (!k) { - continue - } + if (!k) { + continue + } - const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0) + const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0) - if (h > 0 && heights.current.get(k) !== h) { - heights.current.set(k, h) - dirty = true + if (h > 0 && heights.current.get(k) !== h) { + heights.current.set(k, h) + dirty = true + } } } diff --git a/ui-tui/src/lib/circularBuffer.ts b/ui-tui/src/lib/circularBuffer.ts new file mode 100644 index 0000000000..31502fc227 --- /dev/null +++ b/ui-tui/src/lib/circularBuffer.ts @@ -0,0 +1,48 @@ +export class CircularBuffer { + private buf: T[] + private head = 0 + private len = 0 + + constructor(private capacity: number) { + if (!Number.isInteger(capacity) || capacity <= 0) { + throw new RangeError(`CircularBuffer capacity must be a positive integer, got ${capacity}`) + } + + this.buf = new Array(capacity) + } + + push(item: T) { + this.buf[this.head] = item + this.head = (this.head + 1) % this.capacity + + if (this.len < this.capacity) { + this.len++ + } + } + + tail(n = this.len): T[] { + const take = Math.min(Math.max(0, n), this.len) + const start = this.len < this.capacity ? 0 : this.head + const out: T[] = new Array(take) + + for (let i = 0; i < take; i++) { + out[i] = this.buf[(start + this.len - take + i) % this.capacity]! + } + + return out + } + + drain(): T[] { + const out = this.tail() + + this.clear() + + return out + } + + clear() { + this.buf = new Array(this.capacity) + this.head = 0 + this.len = 0 + } +} diff --git a/ui-tui/src/lib/gracefulExit.ts b/ui-tui/src/lib/gracefulExit.ts new file mode 100644 index 0000000000..2896fd1265 --- /dev/null +++ b/ui-tui/src/lib/gracefulExit.ts @@ -0,0 +1,47 @@ +interface SetupOptions { + cleanups?: (() => Promise | void)[] + failsafeMs?: number + onError?: (scope: 'uncaughtException' | 'unhandledRejection', err: unknown) => void + onSignal?: (signal: NodeJS.Signals) => void +} + +const SIGNAL_EXIT_CODE: Record<'SIGHUP' | 'SIGINT' | 'SIGTERM', number> = { + SIGHUP: 129, + SIGINT: 130, + SIGTERM: 143 +} + +let wired = false + +export function setupGracefulExit({ cleanups = [], failsafeMs = 4000, onError, onSignal }: SetupOptions = {}) { + if (wired) { + return + } + + wired = true + + let shuttingDown = false + + const exit = (code: number, signal?: NodeJS.Signals) => { + if (shuttingDown) { + return + } + + shuttingDown = true + + if (signal) { + onSignal?.(signal) + } + + setTimeout(() => process.exit(code), failsafeMs).unref?.() + + void Promise.allSettled(cleanups.map(fn => Promise.resolve().then(fn))).finally(() => process.exit(code)) + } + + for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP'] as const) { + process.on(sig, () => exit(SIGNAL_EXIT_CODE[sig], sig)) + } + + process.on('uncaughtException', err => onError?.('uncaughtException', err)) + process.on('unhandledRejection', reason => onError?.('unhandledRejection', reason)) +} diff --git a/ui-tui/src/lib/memory.ts b/ui-tui/src/lib/memory.ts new file mode 100644 index 0000000000..9f157adffc --- /dev/null +++ b/ui-tui/src/lib/memory.ts @@ -0,0 +1,187 @@ +import { createWriteStream } from 'node:fs' +import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises' +import { homedir, tmpdir } from 'node:os' +import { join } from 'node:path' +import { pipeline } from 'node:stream/promises' +import { getHeapSnapshot, getHeapSpaceStatistics, getHeapStatistics } from 'node:v8' + +export type MemoryTrigger = 'auto-critical' | 'auto-high' | 'manual' + +export interface MemoryDiagnostics { + activeHandles: number + activeRequests: number + analysis: { + potentialLeaks: string[] + recommendation: string + } + memoryGrowthRate: { + bytesPerSecond: number + mbPerHour: number + } + memoryUsage: { + arrayBuffers: number + external: number + heapTotal: number + heapUsed: number + rss: number + } + nodeVersion: string + openFileDescriptors?: number + platform: string + resourceUsage: { + maxRSS: number + systemCPUTime: number + userCPUTime: number + } + smapsRollup?: string + timestamp: string + trigger: MemoryTrigger + uptimeSeconds: number + v8HeapSpaces?: { available: number; name: string; size: number; used: number }[] + v8HeapStats: { + detachedContexts: number + heapSizeLimit: number + mallocedMemory: number + nativeContexts: number + peakMallocedMemory: number + } +} + +export interface HeapDumpResult { + diagPath?: string + error?: string + heapPath?: string + success: boolean +} + +export async function captureMemoryDiagnostics(trigger: MemoryTrigger): Promise { + const usage = process.memoryUsage() + const heapStats = getHeapStatistics() + const resourceUsage = process.resourceUsage() + const uptimeSeconds = process.uptime() + + // Not available on Bun / older Node. + let heapSpaces: ReturnType | undefined + + try { + heapSpaces = getHeapSpaceStatistics() + } catch { + /* noop */ + } + + const internals = process as unknown as { + _getActiveHandles: () => unknown[] + _getActiveRequests: () => unknown[] + } + + const activeHandles = internals._getActiveHandles().length + const activeRequests = internals._getActiveRequests().length + const openFileDescriptors = await swallow(async () => (await readdir('/proc/self/fd')).length) + const smapsRollup = await swallow(() => readFile('/proc/self/smaps_rollup', 'utf8')) + + const nativeMemory = usage.rss - usage.heapUsed + // Real growth rate since STARTED_AT (captured at module load) — NOT a lifetime + // average of rss/uptime, which would report phantom "growth" for a stable process. + const elapsed = Math.max(0, uptimeSeconds - STARTED_AT.uptime) + const bytesPerSecond = elapsed > 0 ? (usage.rss - STARTED_AT.rss) / elapsed : 0 + const mbPerHour = (bytesPerSecond * 3600) / (1024 * 1024) + + const potentialLeaks = [ + heapStats.number_of_detached_contexts > 0 && + `${heapStats.number_of_detached_contexts} detached context(s) — possible component/closure leak`, + activeHandles > 100 && `${activeHandles} active handles — possible timer/socket leak`, + nativeMemory > usage.heapUsed && 'Native memory > heap — leak may be in native addons', + mbPerHour > 100 && `High memory growth rate: ${mbPerHour.toFixed(1)} MB/hour`, + openFileDescriptors && openFileDescriptors > 500 && `${openFileDescriptors} open FDs — possible file/socket leak` + ].filter((s): s is string => typeof s === 'string') + + return { + activeHandles, + activeRequests, + analysis: { + potentialLeaks, + recommendation: potentialLeaks.length + ? `WARNING: ${potentialLeaks.length} potential leak indicator(s). See potentialLeaks.` + : 'No obvious leak indicators. Inspect heap snapshot for retained objects.' + }, + memoryGrowthRate: { bytesPerSecond, mbPerHour }, + memoryUsage: { + arrayBuffers: usage.arrayBuffers, + external: usage.external, + heapTotal: usage.heapTotal, + heapUsed: usage.heapUsed, + rss: usage.rss + }, + nodeVersion: process.version, + openFileDescriptors, + platform: process.platform, + resourceUsage: { + maxRSS: resourceUsage.maxRSS * 1024, + systemCPUTime: resourceUsage.systemCPUTime, + userCPUTime: resourceUsage.userCPUTime + }, + smapsRollup, + timestamp: new Date().toISOString(), + trigger, + uptimeSeconds, + v8HeapSpaces: heapSpaces?.map(s => ({ + available: s.space_available_size, + name: s.space_name, + size: s.space_size, + used: s.space_used_size + })), + v8HeapStats: { + detachedContexts: heapStats.number_of_detached_contexts, + heapSizeLimit: heapStats.heap_size_limit, + mallocedMemory: heapStats.malloced_memory, + nativeContexts: heapStats.number_of_native_contexts, + peakMallocedMemory: heapStats.peak_malloced_memory + } + } +} + +export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promise { + try { + // Diagnostics first — heap-snapshot serialization can crash on very large + // heaps, and the JSON sidecar is the most actionable artifact if so. + const diagnostics = await captureMemoryDiagnostics(trigger) + const dir = process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps') + + await mkdir(dir, { recursive: true }) + + const base = `hermes-${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${trigger}` + const heapPath = join(dir, `${base}.heapsnapshot`) + const diagPath = join(dir, `${base}.diagnostics.json`) + + await writeFile(diagPath, JSON.stringify(diagnostics, null, 2), { mode: 0o600 }) + await pipeline(getHeapSnapshot(), createWriteStream(heapPath, { mode: 0o600 })) + + return { diagPath, heapPath, success: true } + } catch (e) { + return { error: e instanceof Error ? e.message : String(e), success: false } + } +} + +export function formatBytes(bytes: number): string { + if (!Number.isFinite(bytes) || bytes <= 0) { + return '0B' + } + + const exp = Math.min(UNITS.length - 1, Math.floor(Math.log10(bytes) / 3)) + const value = bytes / 1024 ** exp + + return `${value >= 100 ? value.toFixed(0) : value.toFixed(1)}${UNITS[exp]}` +} + +const UNITS = ['B', 'KB', 'MB', 'GB', 'TB'] + +const STARTED_AT = { rss: process.memoryUsage().rss, uptime: process.uptime() } + +// Returns undefined when the probe isn't available (non-Linux paths, sandboxed FS). +const swallow = async (fn: () => Promise): Promise => { + try { + return await fn() + } catch { + return undefined + } +} diff --git a/ui-tui/src/lib/memoryMonitor.ts b/ui-tui/src/lib/memoryMonitor.ts new file mode 100644 index 0000000000..6655819b5a --- /dev/null +++ b/ui-tui/src/lib/memoryMonitor.ts @@ -0,0 +1,55 @@ +import { type HeapDumpResult, performHeapDump } from './memory.js' + +export type MemoryLevel = 'critical' | 'high' | 'normal' + +export interface MemorySnapshot { + heapUsed: number + level: MemoryLevel + rss: number +} + +export interface MemoryMonitorOptions { + criticalBytes?: number + highBytes?: number + intervalMs?: number + onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void + onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void +} + +const GB = 1024 ** 3 + +export function startMemoryMonitor({ + criticalBytes = 2.5 * GB, + highBytes = 1.5 * GB, + intervalMs = 10_000, + onCritical, + onHigh +}: MemoryMonitorOptions = {}): () => void { + const dumped = new Set>() + + const tick = async () => { + const { heapUsed, rss } = process.memoryUsage() + const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal' + + if (level === 'normal') { + return void dumped.clear() + } + + if (dumped.has(level)) { + return + } + + dumped.add(level) + const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null) + + const snap: MemorySnapshot = { heapUsed, level, rss } + + ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump) + } + + const handle = setInterval(() => void tick(), intervalMs) + + handle.unref?.() + + return () => clearInterval(handle) +}