Merge pull request #13231 from NousResearch/bb/tui-node-oom-hardening

fix(tui): harden against Node V8 OOM + GatewayClient leaks + resize perf
This commit is contained in:
brooklyn! 2026-04-20 19:12:43 -05:00 committed by GitHub
commit fc8e4ebf8e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 578 additions and 62 deletions

View file

@ -1003,6 +1003,17 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
)
env.setdefault("HERMES_PYTHON", sys.executable)
env.setdefault("HERMES_CWD", os.getcwd())
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
# ~1.54GB depending on version and can fatal-OOM on long sessions with
# large transcripts / reasoning blobs. Token-level merge: respect any
# user-supplied --max-old-space-size (they may have set it higher) and
# avoid duplicating --expose-gc.
_tokens = env.get("NODE_OPTIONS", "").split()
if not any(t.startswith("--max-old-space-size=") for t in _tokens):
_tokens.append("--max-old-space-size=8192")
if "--expose-gc" not in _tokens:
_tokens.append("--expose-gc")
env["NODE_OPTIONS"] = " ".join(_tokens)
if resume_session_id:
env["HERMES_TUI_RESUME"] = resume_session_id

View file

@ -0,0 +1,48 @@
import { formatBytes, performHeapDump } from '../../../lib/memory.js'
import type { SlashCommand } from '../types.js'
export const debugCommands: SlashCommand[] = [
{
help: 'write a V8 heap snapshot + memory diagnostics (see HERMES_HEAPDUMP_DIR)',
name: 'heapdump',
run: (_arg, ctx) => {
const { heapUsed, rss } = process.memoryUsage()
ctx.transcript.sys(`writing heap dump (heap ${formatBytes(heapUsed)} · rss ${formatBytes(rss)})…`)
void performHeapDump('manual').then(r => {
if (ctx.stale()) {
return
}
if (!r.success) {
return ctx.transcript.sys(`heapdump failed: ${r.error ?? 'unknown error'}`)
}
ctx.transcript.sys(`heapdump: ${r.heapPath}`)
ctx.transcript.sys(`diagnostics: ${r.diagPath}`)
})
}
},
{
help: 'print live V8 heap + rss numbers',
name: 'mem',
run: (_arg, ctx) => {
const { arrayBuffers, external, heapTotal, heapUsed, rss } = process.memoryUsage()
ctx.transcript.panel('Memory', [
{
rows: [
['heap used', formatBytes(heapUsed)],
['heap total', formatBytes(heapTotal)],
['external', formatBytes(external)],
['array buffers', formatBytes(arrayBuffers)],
['rss', formatBytes(rss)],
['uptime', `${process.uptime().toFixed(0)}s`]
]
}
])
}
}
]

View file

@ -1,10 +1,17 @@
import { coreCommands } from './commands/core.js'
import { debugCommands } from './commands/debug.js'
import { opsCommands } from './commands/ops.js'
import { sessionCommands } from './commands/session.js'
import { setupCommands } from './commands/setup.js'
import type { SlashCommand } from './types.js'
export const SLASH_COMMANDS: SlashCommand[] = [...coreCommands, ...sessionCommands, ...opsCommands, ...setupCommands]
export const SLASH_COMMANDS: SlashCommand[] = [
...coreCommands,
...sessionCommands,
...opsCommands,
...setupCommands,
...debugCommands
]
const byName = new Map<string, SlashCommand>(
SLASH_COMMANDS.flatMap(cmd => [cmd.name, ...(cmd.aliases ?? [])].map(name => [name, cmd] as const))

View file

@ -161,7 +161,7 @@ export function useMainApp(gw: GatewayClient) {
[historyItems, messageId]
)
const virtualHistory = useVirtualHistory(scrollRef, virtualRows)
const virtualHistory = useVirtualHistory(scrollRef, virtualRows, cols)
const scrollWithSelection = useCallback(
(delta: number) => {
@ -306,12 +306,20 @@ export function useMainApp(gw: GatewayClient) {
return
}
const onResize = () =>
rpc<TerminalResizeResponse>('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid })
let timer: ReturnType<typeof setTimeout> | undefined
const onResize = () => {
clearTimeout(timer)
timer = setTimeout(() => {
timer = undefined
void rpc<TerminalResizeResponse>('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid })
}, 100)
}
stdout.on('resize', onResize)
return () => {
clearTimeout(timer)
stdout.off('resize', onResize)
}
}, [rpc, stdout, ui.sid])

View file

@ -1,7 +1,9 @@
#!/usr/bin/env node
// Order matters: paint banner + spawn python before loading @hermes/ink.
#!/usr/bin/env -S node --max-old-space-size=8192 --expose-gc
import { bootBanner } from './bootBanner.js'
import { GatewayClient } from './gatewayClient.js'
import { setupGracefulExit } from './lib/gracefulExit.js'
import { formatBytes, type HeapDumpResult, performHeapDump } from './lib/memory.js'
import { type MemorySnapshot, startMemoryMonitor } from './lib/memoryMonitor.js'
if (!process.stdin.isTTY) {
console.log('hermes-tui: no TTY')
@ -11,8 +13,37 @@ if (!process.stdin.isTTY) {
process.stdout.write(bootBanner())
const gw = new GatewayClient()
gw.start()
const dumpNotice = (snap: MemorySnapshot, dump: HeapDumpResult | null) =>
`hermes-tui: ${snap.level} memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n`
setupGracefulExit({
cleanups: [() => gw.kill()],
onError: (scope, err) => {
const message = err instanceof Error ? `${err.name}: ${err.message}` : String(err)
process.stderr.write(`hermes-tui ${scope}: ${message.slice(0, 2000)}\n`)
},
onSignal: signal => process.stderr.write(`hermes-tui: received ${signal}\n`)
})
const stopMemoryMonitor = startMemoryMonitor({
onCritical: (snap, dump) => {
process.stderr.write(dumpNotice(snap, dump))
process.stderr.write('hermes-tui: exiting to avoid OOM; restart to recover\n')
process.exit(137)
},
onHigh: (snap, dump) => process.stderr.write(dumpNotice(snap, dump))
})
if (process.env.HERMES_HEAPDUMP_ON_START === '1') {
void performHeapDump('manual')
}
process.on('beforeExit', () => stopMemoryMonitor())
const [{ render }, { App }] = await Promise.all([import('@hermes/ink'), import('./app.js')])
render(<App gw={gw} />, { exitOnCtrlC: false })

View file

@ -5,6 +5,7 @@ import { delimiter, resolve } from 'node:path'
import { createInterface } from 'node:readline'
import type { GatewayEvent } from './gatewayTypes.js'
import { CircularBuffer } from './lib/circularBuffer.js'
const MAX_GATEWAY_LOG_LINES = 200
const MAX_LOG_LINE_BYTES = 4096
@ -43,16 +44,19 @@ const asGatewayEvent = (value: unknown): GatewayEvent | null =>
: null
interface Pending {
id: string
method: string
reject: (e: Error) => void
resolve: (v: unknown) => void
timeout: ReturnType<typeof setTimeout>
}
export class GatewayClient extends EventEmitter {
private proc: ChildProcess | null = null
private reqId = 0
private logs: string[] = []
private logs = new CircularBuffer<string>(MAX_GATEWAY_LOG_LINES)
private pending = new Map<string, Pending>()
private bufferedEvents: GatewayEvent[] = []
private bufferedEvents = new CircularBuffer<GatewayEvent>(MAX_BUFFERED_EVENTS)
private pendingExit: number | null | undefined
private ready = false
private readyTimer: ReturnType<typeof setTimeout> | null = null
@ -60,6 +64,13 @@ export class GatewayClient extends EventEmitter {
private stdoutRl: ReturnType<typeof createInterface> | null = null
private stderrRl: ReturnType<typeof createInterface> | null = null
constructor() {
super()
// useInput / createGatewayEventHandler can legitimately attach many
// listeners. Default 10-cap triggers spurious warnings.
this.setMaxListeners(0)
}
private publish(ev: GatewayEvent) {
if (ev.type === 'gateway.ready') {
this.ready = true
@ -74,9 +85,7 @@ export class GatewayClient extends EventEmitter {
return void this.emit('event', ev)
}
if (this.bufferedEvents.push(ev) > MAX_BUFFERED_EVENTS) {
this.bufferedEvents.splice(0, this.bufferedEvents.length - MAX_BUFFERED_EVENTS)
}
this.bufferedEvents.push(ev)
}
start() {
@ -88,7 +97,7 @@ export class GatewayClient extends EventEmitter {
env.PYTHONPATH = pyPath ? `${root}${delimiter}${pyPath}` : root
this.ready = false
this.bufferedEvents = []
this.bufferedEvents.clear()
this.pendingExit = undefined
this.stdoutRl?.close()
this.stderrRl?.close()
@ -165,15 +174,7 @@ export class GatewayClient extends EventEmitter {
const p = id ? this.pending.get(id) : undefined
if (p) {
this.pending.delete(id!)
if (msg.error) {
const err = msg.error as { message?: unknown } | null | undefined
p.reject(new Error(typeof err?.message === 'string' ? err.message : 'request failed'))
} else {
p.resolve(msg.result)
}
this.settle(p, msg.error ? this.toError(msg.error) : null, msg.result)
return
}
@ -187,24 +188,51 @@ export class GatewayClient extends EventEmitter {
}
}
private pushLog(line: string) {
if (this.logs.push(truncateLine(line)) > MAX_GATEWAY_LOG_LINES) {
this.logs.splice(0, this.logs.length - MAX_GATEWAY_LOG_LINES)
private toError(raw: unknown): Error {
const err = raw as { message?: unknown } | null | undefined
return new Error(typeof err?.message === 'string' ? err.message : 'request failed')
}
private settle(p: Pending, err: Error | null, result: unknown) {
clearTimeout(p.timeout)
this.pending.delete(p.id)
if (err) {
p.reject(err)
} else {
p.resolve(result)
}
}
private pushLog(line: string) {
this.logs.push(truncateLine(line))
}
private rejectPending(err: Error) {
for (const p of this.pending.values()) {
clearTimeout(p.timeout)
p.reject(err)
}
this.pending.clear()
}
// Arrow class-field — stable identity, so `setTimeout(this.onTimeout, …, id)`
// doesn't allocate a bound function per request.
private onTimeout = (id: string) => {
const p = this.pending.get(id)
if (p) {
this.pending.delete(id)
p.reject(new Error(`timeout: ${p.method}`))
}
}
drain() {
this.subscribed = true
for (const ev of this.bufferedEvents.splice(0)) {
for (const ev of this.bufferedEvents.drain()) {
this.emit('event', ev)
}
@ -217,7 +245,7 @@ export class GatewayClient extends EventEmitter {
}
getLogTail(limit = 20): string {
return this.logs.slice(-Math.max(1, limit)).join('\n')
return this.logs.tail(Math.max(1, limit)).join('\n')
}
request<T = unknown>(method: string, params: Record<string, unknown> = {}): Promise<T> {
@ -231,29 +259,29 @@ export class GatewayClient extends EventEmitter {
const id = `r${++this.reqId}`
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
if (this.pending.delete(id)) {
reject(new Error(`timeout: ${method}`))
}
}, REQUEST_TIMEOUT_MS)
return new Promise<T>((resolve, reject) => {
const timeout = setTimeout(this.onTimeout, REQUEST_TIMEOUT_MS, id)
timeout.unref?.()
this.pending.set(id, {
reject: e => {
clearTimeout(timeout)
reject(e)
},
resolve: v => {
clearTimeout(timeout)
resolve(v as T)
}
id,
method,
reject,
resolve: v => resolve(v as T),
timeout
})
try {
this.proc!.stdin!.write(JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n')
this.proc!.stdin!.write(JSON.stringify({ id, jsonrpc: '2.0', method, params }) + '\n')
} catch (e) {
clearTimeout(timeout)
this.pending.delete(id)
const pending = this.pending.get(id)
if (pending) {
clearTimeout(pending.timeout)
this.pending.delete(id)
}
reject(e instanceof Error ? e : new Error(String(e)))
}
})

View file

@ -15,13 +15,15 @@ const OVERSCAN = 40
const MAX_MOUNTED = 260
const COLD_START = 40
const QUANTUM = OVERSCAN >> 1
const FREEZE_RENDERS = 2
const upperBound = (arr: number[], target: number) => {
let lo = 0,
hi = arr.length
let lo = 0
let hi = arr.length
while (lo < hi) {
const mid = (lo + hi) >> 1
arr[mid]! <= target ? (lo = mid + 1) : (hi = mid)
}
@ -31,6 +33,7 @@ const upperBound = (arr: number[], target: number) => {
export function useVirtualHistory(
scrollRef: RefObject<ScrollBoxHandle | null>,
items: readonly { key: string }[],
columns: number,
{ estimate = ESTIMATE, overscan = OVERSCAN, maxMounted = MAX_MOUNTED, coldStartCount = COLD_START } = {}
) {
const nodes = useRef(new Map<string, unknown>())
@ -40,6 +43,29 @@ export function useVirtualHistory(
const [hasScrollRef, setHasScrollRef] = useState(false)
const metrics = useRef({ sticky: true, top: 0, vp: 0 })
// Width change: scale cached heights (not clear — clearing forces a
// pessimistic back-walk mounting ~190 rows at once, each a fresh
// marked.lexer + syntax highlight ≈ 3ms). Freeze mount range for 2
// renders so warm memos survive; skip one measurement so useLayoutEffect
// doesn't poison the scaled cache with pre-resize Yoga heights.
const prevColumns = useRef(columns)
const skipMeasurement = useRef(false)
const prevRange = useRef<null | readonly [number, number]>(null)
const freezeRenders = useRef(0)
if (prevColumns.current !== columns && prevColumns.current > 0 && columns > 0) {
const ratio = prevColumns.current / columns
prevColumns.current = columns
for (const [k, h] of heights.current) {
heights.current.set(k, Math.max(1, Math.round(h * ratio)))
}
skipMeasurement.current = true
freezeRenders.current = FREEZE_RENDERS
}
useLayoutEffect(() => {
setHasScrollRef(Boolean(scrollRef.current))
}, [scrollRef])
@ -92,25 +118,41 @@ export function useVirtualHistory(
return out
}, [estimate, items, ver])
const total = offsets[items.length] ?? 0
const n = items.length
const total = offsets[n] ?? 0
const top = Math.max(0, scrollRef.current?.getScrollTop() ?? 0)
const vp = Math.max(0, scrollRef.current?.getViewportHeight() ?? 0)
const sticky = scrollRef.current?.isSticky() ?? true
let start = 0,
end = items.length
// During a freeze, drop the frozen range if items shrank past its start
// (/clear, compaction) — clamping would collapse to an empty mount and
// flash blank. Fall through to the normal path in that case.
const frozenRange =
freezeRenders.current > 0 && prevRange.current && prevRange.current[0] < n ? prevRange.current : null
if (items.length > 0) {
let start = 0
let end = n
if (frozenRange) {
start = frozenRange[0]
end = Math.min(frozenRange[1], n)
} else if (n > 0) {
if (vp <= 0) {
start = Math.max(0, items.length - coldStartCount)
start = Math.max(0, n - coldStartCount)
} else {
start = Math.max(0, Math.min(items.length - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1))
end = Math.max(start + 1, Math.min(items.length, upperBound(offsets, top + vp + overscan)))
start = Math.max(0, Math.min(n - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1))
end = Math.max(start + 1, Math.min(n, upperBound(offsets, top + vp + overscan)))
}
}
if (end - start > maxMounted) {
sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(items.length, start + maxMounted))
sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(n, start + maxMounted))
}
if (freezeRenders.current > 0) {
freezeRenders.current--
} else {
prevRange.current = [start, end]
}
const measureRef = useCallback((key: string) => {
@ -127,18 +169,22 @@ export function useVirtualHistory(
useLayoutEffect(() => {
let dirty = false
for (let i = start; i < end; i++) {
const k = items[i]?.key
if (skipMeasurement.current) {
skipMeasurement.current = false
} else {
for (let i = start; i < end; i++) {
const k = items[i]?.key
if (!k) {
continue
}
if (!k) {
continue
}
const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0)
const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0)
if (h > 0 && heights.current.get(k) !== h) {
heights.current.set(k, h)
dirty = true
if (h > 0 && heights.current.get(k) !== h) {
heights.current.set(k, h)
dirty = true
}
}
}

View file

@ -0,0 +1,48 @@
export class CircularBuffer<T> {
private buf: T[]
private head = 0
private len = 0
constructor(private capacity: number) {
if (!Number.isInteger(capacity) || capacity <= 0) {
throw new RangeError(`CircularBuffer capacity must be a positive integer, got ${capacity}`)
}
this.buf = new Array<T>(capacity)
}
push(item: T) {
this.buf[this.head] = item
this.head = (this.head + 1) % this.capacity
if (this.len < this.capacity) {
this.len++
}
}
tail(n = this.len): T[] {
const take = Math.min(Math.max(0, n), this.len)
const start = this.len < this.capacity ? 0 : this.head
const out: T[] = new Array<T>(take)
for (let i = 0; i < take; i++) {
out[i] = this.buf[(start + this.len - take + i) % this.capacity]!
}
return out
}
drain(): T[] {
const out = this.tail()
this.clear()
return out
}
clear() {
this.buf = new Array<T>(this.capacity)
this.head = 0
this.len = 0
}
}

View file

@ -0,0 +1,47 @@
interface SetupOptions {
cleanups?: (() => Promise<void> | void)[]
failsafeMs?: number
onError?: (scope: 'uncaughtException' | 'unhandledRejection', err: unknown) => void
onSignal?: (signal: NodeJS.Signals) => void
}
const SIGNAL_EXIT_CODE: Record<'SIGHUP' | 'SIGINT' | 'SIGTERM', number> = {
SIGHUP: 129,
SIGINT: 130,
SIGTERM: 143
}
let wired = false
export function setupGracefulExit({ cleanups = [], failsafeMs = 4000, onError, onSignal }: SetupOptions = {}) {
if (wired) {
return
}
wired = true
let shuttingDown = false
const exit = (code: number, signal?: NodeJS.Signals) => {
if (shuttingDown) {
return
}
shuttingDown = true
if (signal) {
onSignal?.(signal)
}
setTimeout(() => process.exit(code), failsafeMs).unref?.()
void Promise.allSettled(cleanups.map(fn => Promise.resolve().then(fn))).finally(() => process.exit(code))
}
for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP'] as const) {
process.on(sig, () => exit(SIGNAL_EXIT_CODE[sig], sig))
}
process.on('uncaughtException', err => onError?.('uncaughtException', err))
process.on('unhandledRejection', reason => onError?.('unhandledRejection', reason))
}

187
ui-tui/src/lib/memory.ts Normal file
View file

@ -0,0 +1,187 @@
import { createWriteStream } from 'node:fs'
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'
import { homedir, tmpdir } from 'node:os'
import { join } from 'node:path'
import { pipeline } from 'node:stream/promises'
import { getHeapSnapshot, getHeapSpaceStatistics, getHeapStatistics } from 'node:v8'
export type MemoryTrigger = 'auto-critical' | 'auto-high' | 'manual'
export interface MemoryDiagnostics {
activeHandles: number
activeRequests: number
analysis: {
potentialLeaks: string[]
recommendation: string
}
memoryGrowthRate: {
bytesPerSecond: number
mbPerHour: number
}
memoryUsage: {
arrayBuffers: number
external: number
heapTotal: number
heapUsed: number
rss: number
}
nodeVersion: string
openFileDescriptors?: number
platform: string
resourceUsage: {
maxRSS: number
systemCPUTime: number
userCPUTime: number
}
smapsRollup?: string
timestamp: string
trigger: MemoryTrigger
uptimeSeconds: number
v8HeapSpaces?: { available: number; name: string; size: number; used: number }[]
v8HeapStats: {
detachedContexts: number
heapSizeLimit: number
mallocedMemory: number
nativeContexts: number
peakMallocedMemory: number
}
}
export interface HeapDumpResult {
diagPath?: string
error?: string
heapPath?: string
success: boolean
}
export async function captureMemoryDiagnostics(trigger: MemoryTrigger): Promise<MemoryDiagnostics> {
const usage = process.memoryUsage()
const heapStats = getHeapStatistics()
const resourceUsage = process.resourceUsage()
const uptimeSeconds = process.uptime()
// Not available on Bun / older Node.
let heapSpaces: ReturnType<typeof getHeapSpaceStatistics> | undefined
try {
heapSpaces = getHeapSpaceStatistics()
} catch {
/* noop */
}
const internals = process as unknown as {
_getActiveHandles: () => unknown[]
_getActiveRequests: () => unknown[]
}
const activeHandles = internals._getActiveHandles().length
const activeRequests = internals._getActiveRequests().length
const openFileDescriptors = await swallow(async () => (await readdir('/proc/self/fd')).length)
const smapsRollup = await swallow(() => readFile('/proc/self/smaps_rollup', 'utf8'))
const nativeMemory = usage.rss - usage.heapUsed
// Real growth rate since STARTED_AT (captured at module load) — NOT a lifetime
// average of rss/uptime, which would report phantom "growth" for a stable process.
const elapsed = Math.max(0, uptimeSeconds - STARTED_AT.uptime)
const bytesPerSecond = elapsed > 0 ? (usage.rss - STARTED_AT.rss) / elapsed : 0
const mbPerHour = (bytesPerSecond * 3600) / (1024 * 1024)
const potentialLeaks = [
heapStats.number_of_detached_contexts > 0 &&
`${heapStats.number_of_detached_contexts} detached context(s) — possible component/closure leak`,
activeHandles > 100 && `${activeHandles} active handles — possible timer/socket leak`,
nativeMemory > usage.heapUsed && 'Native memory > heap — leak may be in native addons',
mbPerHour > 100 && `High memory growth rate: ${mbPerHour.toFixed(1)} MB/hour`,
openFileDescriptors && openFileDescriptors > 500 && `${openFileDescriptors} open FDs — possible file/socket leak`
].filter((s): s is string => typeof s === 'string')
return {
activeHandles,
activeRequests,
analysis: {
potentialLeaks,
recommendation: potentialLeaks.length
? `WARNING: ${potentialLeaks.length} potential leak indicator(s). See potentialLeaks.`
: 'No obvious leak indicators. Inspect heap snapshot for retained objects.'
},
memoryGrowthRate: { bytesPerSecond, mbPerHour },
memoryUsage: {
arrayBuffers: usage.arrayBuffers,
external: usage.external,
heapTotal: usage.heapTotal,
heapUsed: usage.heapUsed,
rss: usage.rss
},
nodeVersion: process.version,
openFileDescriptors,
platform: process.platform,
resourceUsage: {
maxRSS: resourceUsage.maxRSS * 1024,
systemCPUTime: resourceUsage.systemCPUTime,
userCPUTime: resourceUsage.userCPUTime
},
smapsRollup,
timestamp: new Date().toISOString(),
trigger,
uptimeSeconds,
v8HeapSpaces: heapSpaces?.map(s => ({
available: s.space_available_size,
name: s.space_name,
size: s.space_size,
used: s.space_used_size
})),
v8HeapStats: {
detachedContexts: heapStats.number_of_detached_contexts,
heapSizeLimit: heapStats.heap_size_limit,
mallocedMemory: heapStats.malloced_memory,
nativeContexts: heapStats.number_of_native_contexts,
peakMallocedMemory: heapStats.peak_malloced_memory
}
}
}
export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promise<HeapDumpResult> {
try {
// Diagnostics first — heap-snapshot serialization can crash on very large
// heaps, and the JSON sidecar is the most actionable artifact if so.
const diagnostics = await captureMemoryDiagnostics(trigger)
const dir = process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps')
await mkdir(dir, { recursive: true })
const base = `hermes-${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${trigger}`
const heapPath = join(dir, `${base}.heapsnapshot`)
const diagPath = join(dir, `${base}.diagnostics.json`)
await writeFile(diagPath, JSON.stringify(diagnostics, null, 2), { mode: 0o600 })
await pipeline(getHeapSnapshot(), createWriteStream(heapPath, { mode: 0o600 }))
return { diagPath, heapPath, success: true }
} catch (e) {
return { error: e instanceof Error ? e.message : String(e), success: false }
}
}
export function formatBytes(bytes: number): string {
if (!Number.isFinite(bytes) || bytes <= 0) {
return '0B'
}
const exp = Math.min(UNITS.length - 1, Math.floor(Math.log10(bytes) / 3))
const value = bytes / 1024 ** exp
return `${value >= 100 ? value.toFixed(0) : value.toFixed(1)}${UNITS[exp]}`
}
const UNITS = ['B', 'KB', 'MB', 'GB', 'TB']
const STARTED_AT = { rss: process.memoryUsage().rss, uptime: process.uptime() }
// Returns undefined when the probe isn't available (non-Linux paths, sandboxed FS).
const swallow = async <T>(fn: () => Promise<T>): Promise<T | undefined> => {
try {
return await fn()
} catch {
return undefined
}
}

View file

@ -0,0 +1,55 @@
import { type HeapDumpResult, performHeapDump } from './memory.js'
export type MemoryLevel = 'critical' | 'high' | 'normal'
export interface MemorySnapshot {
heapUsed: number
level: MemoryLevel
rss: number
}
export interface MemoryMonitorOptions {
criticalBytes?: number
highBytes?: number
intervalMs?: number
onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
}
const GB = 1024 ** 3
export function startMemoryMonitor({
criticalBytes = 2.5 * GB,
highBytes = 1.5 * GB,
intervalMs = 10_000,
onCritical,
onHigh
}: MemoryMonitorOptions = {}): () => void {
const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
const tick = async () => {
const { heapUsed, rss } = process.memoryUsage()
const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
if (level === 'normal') {
return void dumped.clear()
}
if (dumped.has(level)) {
return
}
dumped.add(level)
const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
const snap: MemorySnapshot = { heapUsed, level, rss }
;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
}
const handle = setInterval(() => void tick(), intervalMs)
handle.unref?.()
return () => clearInterval(handle)
}