mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge pull request #13231 from NousResearch/bb/tui-node-oom-hardening
fix(tui): harden against Node V8 OOM + GatewayClient leaks + resize perf
This commit is contained in:
commit
fc8e4ebf8e
11 changed files with 578 additions and 62 deletions
|
|
@ -1003,6 +1003,17 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
|
|||
)
|
||||
env.setdefault("HERMES_PYTHON", sys.executable)
|
||||
env.setdefault("HERMES_CWD", os.getcwd())
|
||||
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
|
||||
# ~1.5–4GB depending on version and can fatal-OOM on long sessions with
|
||||
# large transcripts / reasoning blobs. Token-level merge: respect any
|
||||
# user-supplied --max-old-space-size (they may have set it higher) and
|
||||
# avoid duplicating --expose-gc.
|
||||
_tokens = env.get("NODE_OPTIONS", "").split()
|
||||
if not any(t.startswith("--max-old-space-size=") for t in _tokens):
|
||||
_tokens.append("--max-old-space-size=8192")
|
||||
if "--expose-gc" not in _tokens:
|
||||
_tokens.append("--expose-gc")
|
||||
env["NODE_OPTIONS"] = " ".join(_tokens)
|
||||
if resume_session_id:
|
||||
env["HERMES_TUI_RESUME"] = resume_session_id
|
||||
|
||||
|
|
|
|||
48
ui-tui/src/app/slash/commands/debug.ts
Normal file
48
ui-tui/src/app/slash/commands/debug.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import { formatBytes, performHeapDump } from '../../../lib/memory.js'
|
||||
import type { SlashCommand } from '../types.js'
|
||||
|
||||
export const debugCommands: SlashCommand[] = [
|
||||
{
|
||||
help: 'write a V8 heap snapshot + memory diagnostics (see HERMES_HEAPDUMP_DIR)',
|
||||
name: 'heapdump',
|
||||
run: (_arg, ctx) => {
|
||||
const { heapUsed, rss } = process.memoryUsage()
|
||||
|
||||
ctx.transcript.sys(`writing heap dump (heap ${formatBytes(heapUsed)} · rss ${formatBytes(rss)})…`)
|
||||
|
||||
void performHeapDump('manual').then(r => {
|
||||
if (ctx.stale()) {
|
||||
return
|
||||
}
|
||||
|
||||
if (!r.success) {
|
||||
return ctx.transcript.sys(`heapdump failed: ${r.error ?? 'unknown error'}`)
|
||||
}
|
||||
|
||||
ctx.transcript.sys(`heapdump: ${r.heapPath}`)
|
||||
ctx.transcript.sys(`diagnostics: ${r.diagPath}`)
|
||||
})
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
help: 'print live V8 heap + rss numbers',
|
||||
name: 'mem',
|
||||
run: (_arg, ctx) => {
|
||||
const { arrayBuffers, external, heapTotal, heapUsed, rss } = process.memoryUsage()
|
||||
|
||||
ctx.transcript.panel('Memory', [
|
||||
{
|
||||
rows: [
|
||||
['heap used', formatBytes(heapUsed)],
|
||||
['heap total', formatBytes(heapTotal)],
|
||||
['external', formatBytes(external)],
|
||||
['array buffers', formatBytes(arrayBuffers)],
|
||||
['rss', formatBytes(rss)],
|
||||
['uptime', `${process.uptime().toFixed(0)}s`]
|
||||
]
|
||||
}
|
||||
])
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
@ -1,10 +1,17 @@
|
|||
import { coreCommands } from './commands/core.js'
|
||||
import { debugCommands } from './commands/debug.js'
|
||||
import { opsCommands } from './commands/ops.js'
|
||||
import { sessionCommands } from './commands/session.js'
|
||||
import { setupCommands } from './commands/setup.js'
|
||||
import type { SlashCommand } from './types.js'
|
||||
|
||||
export const SLASH_COMMANDS: SlashCommand[] = [...coreCommands, ...sessionCommands, ...opsCommands, ...setupCommands]
|
||||
export const SLASH_COMMANDS: SlashCommand[] = [
|
||||
...coreCommands,
|
||||
...sessionCommands,
|
||||
...opsCommands,
|
||||
...setupCommands,
|
||||
...debugCommands
|
||||
]
|
||||
|
||||
const byName = new Map<string, SlashCommand>(
|
||||
SLASH_COMMANDS.flatMap(cmd => [cmd.name, ...(cmd.aliases ?? [])].map(name => [name, cmd] as const))
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ export function useMainApp(gw: GatewayClient) {
|
|||
[historyItems, messageId]
|
||||
)
|
||||
|
||||
const virtualHistory = useVirtualHistory(scrollRef, virtualRows)
|
||||
const virtualHistory = useVirtualHistory(scrollRef, virtualRows, cols)
|
||||
|
||||
const scrollWithSelection = useCallback(
|
||||
(delta: number) => {
|
||||
|
|
@ -306,12 +306,20 @@ export function useMainApp(gw: GatewayClient) {
|
|||
return
|
||||
}
|
||||
|
||||
const onResize = () =>
|
||||
rpc<TerminalResizeResponse>('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid })
|
||||
let timer: ReturnType<typeof setTimeout> | undefined
|
||||
|
||||
const onResize = () => {
|
||||
clearTimeout(timer)
|
||||
timer = setTimeout(() => {
|
||||
timer = undefined
|
||||
void rpc<TerminalResizeResponse>('terminal.resize', { cols: stdout.columns ?? 80, session_id: ui.sid })
|
||||
}, 100)
|
||||
}
|
||||
|
||||
stdout.on('resize', onResize)
|
||||
|
||||
return () => {
|
||||
clearTimeout(timer)
|
||||
stdout.off('resize', onResize)
|
||||
}
|
||||
}, [rpc, stdout, ui.sid])
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
#!/usr/bin/env node
|
||||
// Order matters: paint banner + spawn python before loading @hermes/ink.
|
||||
#!/usr/bin/env -S node --max-old-space-size=8192 --expose-gc
|
||||
import { bootBanner } from './bootBanner.js'
|
||||
import { GatewayClient } from './gatewayClient.js'
|
||||
import { setupGracefulExit } from './lib/gracefulExit.js'
|
||||
import { formatBytes, type HeapDumpResult, performHeapDump } from './lib/memory.js'
|
||||
import { type MemorySnapshot, startMemoryMonitor } from './lib/memoryMonitor.js'
|
||||
|
||||
if (!process.stdin.isTTY) {
|
||||
console.log('hermes-tui: no TTY')
|
||||
|
|
@ -11,8 +13,37 @@ if (!process.stdin.isTTY) {
|
|||
process.stdout.write(bootBanner())
|
||||
|
||||
const gw = new GatewayClient()
|
||||
|
||||
gw.start()
|
||||
|
||||
const dumpNotice = (snap: MemorySnapshot, dump: HeapDumpResult | null) =>
|
||||
`hermes-tui: ${snap.level} memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n`
|
||||
|
||||
setupGracefulExit({
|
||||
cleanups: [() => gw.kill()],
|
||||
onError: (scope, err) => {
|
||||
const message = err instanceof Error ? `${err.name}: ${err.message}` : String(err)
|
||||
|
||||
process.stderr.write(`hermes-tui ${scope}: ${message.slice(0, 2000)}\n`)
|
||||
},
|
||||
onSignal: signal => process.stderr.write(`hermes-tui: received ${signal}\n`)
|
||||
})
|
||||
|
||||
const stopMemoryMonitor = startMemoryMonitor({
|
||||
onCritical: (snap, dump) => {
|
||||
process.stderr.write(dumpNotice(snap, dump))
|
||||
process.stderr.write('hermes-tui: exiting to avoid OOM; restart to recover\n')
|
||||
process.exit(137)
|
||||
},
|
||||
onHigh: (snap, dump) => process.stderr.write(dumpNotice(snap, dump))
|
||||
})
|
||||
|
||||
if (process.env.HERMES_HEAPDUMP_ON_START === '1') {
|
||||
void performHeapDump('manual')
|
||||
}
|
||||
|
||||
process.on('beforeExit', () => stopMemoryMonitor())
|
||||
|
||||
const [{ render }, { App }] = await Promise.all([import('@hermes/ink'), import('./app.js')])
|
||||
|
||||
render(<App gw={gw} />, { exitOnCtrlC: false })
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import { delimiter, resolve } from 'node:path'
|
|||
import { createInterface } from 'node:readline'
|
||||
|
||||
import type { GatewayEvent } from './gatewayTypes.js'
|
||||
import { CircularBuffer } from './lib/circularBuffer.js'
|
||||
|
||||
const MAX_GATEWAY_LOG_LINES = 200
|
||||
const MAX_LOG_LINE_BYTES = 4096
|
||||
|
|
@ -43,16 +44,19 @@ const asGatewayEvent = (value: unknown): GatewayEvent | null =>
|
|||
: null
|
||||
|
||||
interface Pending {
|
||||
id: string
|
||||
method: string
|
||||
reject: (e: Error) => void
|
||||
resolve: (v: unknown) => void
|
||||
timeout: ReturnType<typeof setTimeout>
|
||||
}
|
||||
|
||||
export class GatewayClient extends EventEmitter {
|
||||
private proc: ChildProcess | null = null
|
||||
private reqId = 0
|
||||
private logs: string[] = []
|
||||
private logs = new CircularBuffer<string>(MAX_GATEWAY_LOG_LINES)
|
||||
private pending = new Map<string, Pending>()
|
||||
private bufferedEvents: GatewayEvent[] = []
|
||||
private bufferedEvents = new CircularBuffer<GatewayEvent>(MAX_BUFFERED_EVENTS)
|
||||
private pendingExit: number | null | undefined
|
||||
private ready = false
|
||||
private readyTimer: ReturnType<typeof setTimeout> | null = null
|
||||
|
|
@ -60,6 +64,13 @@ export class GatewayClient extends EventEmitter {
|
|||
private stdoutRl: ReturnType<typeof createInterface> | null = null
|
||||
private stderrRl: ReturnType<typeof createInterface> | null = null
|
||||
|
||||
constructor() {
|
||||
super()
|
||||
// useInput / createGatewayEventHandler can legitimately attach many
|
||||
// listeners. Default 10-cap triggers spurious warnings.
|
||||
this.setMaxListeners(0)
|
||||
}
|
||||
|
||||
private publish(ev: GatewayEvent) {
|
||||
if (ev.type === 'gateway.ready') {
|
||||
this.ready = true
|
||||
|
|
@ -74,9 +85,7 @@ export class GatewayClient extends EventEmitter {
|
|||
return void this.emit('event', ev)
|
||||
}
|
||||
|
||||
if (this.bufferedEvents.push(ev) > MAX_BUFFERED_EVENTS) {
|
||||
this.bufferedEvents.splice(0, this.bufferedEvents.length - MAX_BUFFERED_EVENTS)
|
||||
}
|
||||
this.bufferedEvents.push(ev)
|
||||
}
|
||||
|
||||
start() {
|
||||
|
|
@ -88,7 +97,7 @@ export class GatewayClient extends EventEmitter {
|
|||
env.PYTHONPATH = pyPath ? `${root}${delimiter}${pyPath}` : root
|
||||
|
||||
this.ready = false
|
||||
this.bufferedEvents = []
|
||||
this.bufferedEvents.clear()
|
||||
this.pendingExit = undefined
|
||||
this.stdoutRl?.close()
|
||||
this.stderrRl?.close()
|
||||
|
|
@ -165,15 +174,7 @@ export class GatewayClient extends EventEmitter {
|
|||
const p = id ? this.pending.get(id) : undefined
|
||||
|
||||
if (p) {
|
||||
this.pending.delete(id!)
|
||||
|
||||
if (msg.error) {
|
||||
const err = msg.error as { message?: unknown } | null | undefined
|
||||
|
||||
p.reject(new Error(typeof err?.message === 'string' ? err.message : 'request failed'))
|
||||
} else {
|
||||
p.resolve(msg.result)
|
||||
}
|
||||
this.settle(p, msg.error ? this.toError(msg.error) : null, msg.result)
|
||||
|
||||
return
|
||||
}
|
||||
|
|
@ -187,24 +188,51 @@ export class GatewayClient extends EventEmitter {
|
|||
}
|
||||
}
|
||||
|
||||
private pushLog(line: string) {
|
||||
if (this.logs.push(truncateLine(line)) > MAX_GATEWAY_LOG_LINES) {
|
||||
this.logs.splice(0, this.logs.length - MAX_GATEWAY_LOG_LINES)
|
||||
private toError(raw: unknown): Error {
|
||||
const err = raw as { message?: unknown } | null | undefined
|
||||
|
||||
return new Error(typeof err?.message === 'string' ? err.message : 'request failed')
|
||||
}
|
||||
|
||||
private settle(p: Pending, err: Error | null, result: unknown) {
|
||||
clearTimeout(p.timeout)
|
||||
this.pending.delete(p.id)
|
||||
|
||||
if (err) {
|
||||
p.reject(err)
|
||||
} else {
|
||||
p.resolve(result)
|
||||
}
|
||||
}
|
||||
|
||||
private pushLog(line: string) {
|
||||
this.logs.push(truncateLine(line))
|
||||
}
|
||||
|
||||
private rejectPending(err: Error) {
|
||||
for (const p of this.pending.values()) {
|
||||
clearTimeout(p.timeout)
|
||||
p.reject(err)
|
||||
}
|
||||
|
||||
this.pending.clear()
|
||||
}
|
||||
|
||||
// Arrow class-field — stable identity, so `setTimeout(this.onTimeout, …, id)`
|
||||
// doesn't allocate a bound function per request.
|
||||
private onTimeout = (id: string) => {
|
||||
const p = this.pending.get(id)
|
||||
|
||||
if (p) {
|
||||
this.pending.delete(id)
|
||||
p.reject(new Error(`timeout: ${p.method}`))
|
||||
}
|
||||
}
|
||||
|
||||
drain() {
|
||||
this.subscribed = true
|
||||
|
||||
for (const ev of this.bufferedEvents.splice(0)) {
|
||||
for (const ev of this.bufferedEvents.drain()) {
|
||||
this.emit('event', ev)
|
||||
}
|
||||
|
||||
|
|
@ -217,7 +245,7 @@ export class GatewayClient extends EventEmitter {
|
|||
}
|
||||
|
||||
getLogTail(limit = 20): string {
|
||||
return this.logs.slice(-Math.max(1, limit)).join('\n')
|
||||
return this.logs.tail(Math.max(1, limit)).join('\n')
|
||||
}
|
||||
|
||||
request<T = unknown>(method: string, params: Record<string, unknown> = {}): Promise<T> {
|
||||
|
|
@ -231,29 +259,29 @@ export class GatewayClient extends EventEmitter {
|
|||
|
||||
const id = `r${++this.reqId}`
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const timeout = setTimeout(() => {
|
||||
if (this.pending.delete(id)) {
|
||||
reject(new Error(`timeout: ${method}`))
|
||||
}
|
||||
}, REQUEST_TIMEOUT_MS)
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
const timeout = setTimeout(this.onTimeout, REQUEST_TIMEOUT_MS, id)
|
||||
|
||||
timeout.unref?.()
|
||||
|
||||
this.pending.set(id, {
|
||||
reject: e => {
|
||||
clearTimeout(timeout)
|
||||
reject(e)
|
||||
},
|
||||
resolve: v => {
|
||||
clearTimeout(timeout)
|
||||
resolve(v as T)
|
||||
}
|
||||
id,
|
||||
method,
|
||||
reject,
|
||||
resolve: v => resolve(v as T),
|
||||
timeout
|
||||
})
|
||||
|
||||
try {
|
||||
this.proc!.stdin!.write(JSON.stringify({ jsonrpc: '2.0', id, method, params }) + '\n')
|
||||
this.proc!.stdin!.write(JSON.stringify({ id, jsonrpc: '2.0', method, params }) + '\n')
|
||||
} catch (e) {
|
||||
clearTimeout(timeout)
|
||||
this.pending.delete(id)
|
||||
const pending = this.pending.get(id)
|
||||
|
||||
if (pending) {
|
||||
clearTimeout(pending.timeout)
|
||||
this.pending.delete(id)
|
||||
}
|
||||
|
||||
reject(e instanceof Error ? e : new Error(String(e)))
|
||||
}
|
||||
})
|
||||
|
|
|
|||
|
|
@ -15,13 +15,15 @@ const OVERSCAN = 40
|
|||
const MAX_MOUNTED = 260
|
||||
const COLD_START = 40
|
||||
const QUANTUM = OVERSCAN >> 1
|
||||
const FREEZE_RENDERS = 2
|
||||
|
||||
const upperBound = (arr: number[], target: number) => {
|
||||
let lo = 0,
|
||||
hi = arr.length
|
||||
let lo = 0
|
||||
let hi = arr.length
|
||||
|
||||
while (lo < hi) {
|
||||
const mid = (lo + hi) >> 1
|
||||
|
||||
arr[mid]! <= target ? (lo = mid + 1) : (hi = mid)
|
||||
}
|
||||
|
||||
|
|
@ -31,6 +33,7 @@ const upperBound = (arr: number[], target: number) => {
|
|||
export function useVirtualHistory(
|
||||
scrollRef: RefObject<ScrollBoxHandle | null>,
|
||||
items: readonly { key: string }[],
|
||||
columns: number,
|
||||
{ estimate = ESTIMATE, overscan = OVERSCAN, maxMounted = MAX_MOUNTED, coldStartCount = COLD_START } = {}
|
||||
) {
|
||||
const nodes = useRef(new Map<string, unknown>())
|
||||
|
|
@ -40,6 +43,29 @@ export function useVirtualHistory(
|
|||
const [hasScrollRef, setHasScrollRef] = useState(false)
|
||||
const metrics = useRef({ sticky: true, top: 0, vp: 0 })
|
||||
|
||||
// Width change: scale cached heights (not clear — clearing forces a
|
||||
// pessimistic back-walk mounting ~190 rows at once, each a fresh
|
||||
// marked.lexer + syntax highlight ≈ 3ms). Freeze mount range for 2
|
||||
// renders so warm memos survive; skip one measurement so useLayoutEffect
|
||||
// doesn't poison the scaled cache with pre-resize Yoga heights.
|
||||
const prevColumns = useRef(columns)
|
||||
const skipMeasurement = useRef(false)
|
||||
const prevRange = useRef<null | readonly [number, number]>(null)
|
||||
const freezeRenders = useRef(0)
|
||||
|
||||
if (prevColumns.current !== columns && prevColumns.current > 0 && columns > 0) {
|
||||
const ratio = prevColumns.current / columns
|
||||
|
||||
prevColumns.current = columns
|
||||
|
||||
for (const [k, h] of heights.current) {
|
||||
heights.current.set(k, Math.max(1, Math.round(h * ratio)))
|
||||
}
|
||||
|
||||
skipMeasurement.current = true
|
||||
freezeRenders.current = FREEZE_RENDERS
|
||||
}
|
||||
|
||||
useLayoutEffect(() => {
|
||||
setHasScrollRef(Boolean(scrollRef.current))
|
||||
}, [scrollRef])
|
||||
|
|
@ -92,25 +118,41 @@ export function useVirtualHistory(
|
|||
return out
|
||||
}, [estimate, items, ver])
|
||||
|
||||
const total = offsets[items.length] ?? 0
|
||||
const n = items.length
|
||||
const total = offsets[n] ?? 0
|
||||
const top = Math.max(0, scrollRef.current?.getScrollTop() ?? 0)
|
||||
const vp = Math.max(0, scrollRef.current?.getViewportHeight() ?? 0)
|
||||
const sticky = scrollRef.current?.isSticky() ?? true
|
||||
|
||||
let start = 0,
|
||||
end = items.length
|
||||
// During a freeze, drop the frozen range if items shrank past its start
|
||||
// (/clear, compaction) — clamping would collapse to an empty mount and
|
||||
// flash blank. Fall through to the normal path in that case.
|
||||
const frozenRange =
|
||||
freezeRenders.current > 0 && prevRange.current && prevRange.current[0] < n ? prevRange.current : null
|
||||
|
||||
if (items.length > 0) {
|
||||
let start = 0
|
||||
let end = n
|
||||
|
||||
if (frozenRange) {
|
||||
start = frozenRange[0]
|
||||
end = Math.min(frozenRange[1], n)
|
||||
} else if (n > 0) {
|
||||
if (vp <= 0) {
|
||||
start = Math.max(0, items.length - coldStartCount)
|
||||
start = Math.max(0, n - coldStartCount)
|
||||
} else {
|
||||
start = Math.max(0, Math.min(items.length - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1))
|
||||
end = Math.max(start + 1, Math.min(items.length, upperBound(offsets, top + vp + overscan)))
|
||||
start = Math.max(0, Math.min(n - 1, upperBound(offsets, Math.max(0, top - overscan)) - 1))
|
||||
end = Math.max(start + 1, Math.min(n, upperBound(offsets, top + vp + overscan)))
|
||||
}
|
||||
}
|
||||
|
||||
if (end - start > maxMounted) {
|
||||
sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(items.length, start + maxMounted))
|
||||
sticky ? (start = Math.max(0, end - maxMounted)) : (end = Math.min(n, start + maxMounted))
|
||||
}
|
||||
|
||||
if (freezeRenders.current > 0) {
|
||||
freezeRenders.current--
|
||||
} else {
|
||||
prevRange.current = [start, end]
|
||||
}
|
||||
|
||||
const measureRef = useCallback((key: string) => {
|
||||
|
|
@ -127,18 +169,22 @@ export function useVirtualHistory(
|
|||
useLayoutEffect(() => {
|
||||
let dirty = false
|
||||
|
||||
for (let i = start; i < end; i++) {
|
||||
const k = items[i]?.key
|
||||
if (skipMeasurement.current) {
|
||||
skipMeasurement.current = false
|
||||
} else {
|
||||
for (let i = start; i < end; i++) {
|
||||
const k = items[i]?.key
|
||||
|
||||
if (!k) {
|
||||
continue
|
||||
}
|
||||
if (!k) {
|
||||
continue
|
||||
}
|
||||
|
||||
const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0)
|
||||
const h = Math.ceil((nodes.current.get(k) as MeasuredNode | undefined)?.yogaNode?.getComputedHeight?.() ?? 0)
|
||||
|
||||
if (h > 0 && heights.current.get(k) !== h) {
|
||||
heights.current.set(k, h)
|
||||
dirty = true
|
||||
if (h > 0 && heights.current.get(k) !== h) {
|
||||
heights.current.set(k, h)
|
||||
dirty = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
48
ui-tui/src/lib/circularBuffer.ts
Normal file
48
ui-tui/src/lib/circularBuffer.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
export class CircularBuffer<T> {
|
||||
private buf: T[]
|
||||
private head = 0
|
||||
private len = 0
|
||||
|
||||
constructor(private capacity: number) {
|
||||
if (!Number.isInteger(capacity) || capacity <= 0) {
|
||||
throw new RangeError(`CircularBuffer capacity must be a positive integer, got ${capacity}`)
|
||||
}
|
||||
|
||||
this.buf = new Array<T>(capacity)
|
||||
}
|
||||
|
||||
push(item: T) {
|
||||
this.buf[this.head] = item
|
||||
this.head = (this.head + 1) % this.capacity
|
||||
|
||||
if (this.len < this.capacity) {
|
||||
this.len++
|
||||
}
|
||||
}
|
||||
|
||||
tail(n = this.len): T[] {
|
||||
const take = Math.min(Math.max(0, n), this.len)
|
||||
const start = this.len < this.capacity ? 0 : this.head
|
||||
const out: T[] = new Array<T>(take)
|
||||
|
||||
for (let i = 0; i < take; i++) {
|
||||
out[i] = this.buf[(start + this.len - take + i) % this.capacity]!
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
drain(): T[] {
|
||||
const out = this.tail()
|
||||
|
||||
this.clear()
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.buf = new Array<T>(this.capacity)
|
||||
this.head = 0
|
||||
this.len = 0
|
||||
}
|
||||
}
|
||||
47
ui-tui/src/lib/gracefulExit.ts
Normal file
47
ui-tui/src/lib/gracefulExit.ts
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
interface SetupOptions {
|
||||
cleanups?: (() => Promise<void> | void)[]
|
||||
failsafeMs?: number
|
||||
onError?: (scope: 'uncaughtException' | 'unhandledRejection', err: unknown) => void
|
||||
onSignal?: (signal: NodeJS.Signals) => void
|
||||
}
|
||||
|
||||
const SIGNAL_EXIT_CODE: Record<'SIGHUP' | 'SIGINT' | 'SIGTERM', number> = {
|
||||
SIGHUP: 129,
|
||||
SIGINT: 130,
|
||||
SIGTERM: 143
|
||||
}
|
||||
|
||||
let wired = false
|
||||
|
||||
export function setupGracefulExit({ cleanups = [], failsafeMs = 4000, onError, onSignal }: SetupOptions = {}) {
|
||||
if (wired) {
|
||||
return
|
||||
}
|
||||
|
||||
wired = true
|
||||
|
||||
let shuttingDown = false
|
||||
|
||||
const exit = (code: number, signal?: NodeJS.Signals) => {
|
||||
if (shuttingDown) {
|
||||
return
|
||||
}
|
||||
|
||||
shuttingDown = true
|
||||
|
||||
if (signal) {
|
||||
onSignal?.(signal)
|
||||
}
|
||||
|
||||
setTimeout(() => process.exit(code), failsafeMs).unref?.()
|
||||
|
||||
void Promise.allSettled(cleanups.map(fn => Promise.resolve().then(fn))).finally(() => process.exit(code))
|
||||
}
|
||||
|
||||
for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP'] as const) {
|
||||
process.on(sig, () => exit(SIGNAL_EXIT_CODE[sig], sig))
|
||||
}
|
||||
|
||||
process.on('uncaughtException', err => onError?.('uncaughtException', err))
|
||||
process.on('unhandledRejection', reason => onError?.('unhandledRejection', reason))
|
||||
}
|
||||
187
ui-tui/src/lib/memory.ts
Normal file
187
ui-tui/src/lib/memory.ts
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
import { createWriteStream } from 'node:fs'
|
||||
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'
|
||||
import { homedir, tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import { pipeline } from 'node:stream/promises'
|
||||
import { getHeapSnapshot, getHeapSpaceStatistics, getHeapStatistics } from 'node:v8'
|
||||
|
||||
export type MemoryTrigger = 'auto-critical' | 'auto-high' | 'manual'
|
||||
|
||||
export interface MemoryDiagnostics {
|
||||
activeHandles: number
|
||||
activeRequests: number
|
||||
analysis: {
|
||||
potentialLeaks: string[]
|
||||
recommendation: string
|
||||
}
|
||||
memoryGrowthRate: {
|
||||
bytesPerSecond: number
|
||||
mbPerHour: number
|
||||
}
|
||||
memoryUsage: {
|
||||
arrayBuffers: number
|
||||
external: number
|
||||
heapTotal: number
|
||||
heapUsed: number
|
||||
rss: number
|
||||
}
|
||||
nodeVersion: string
|
||||
openFileDescriptors?: number
|
||||
platform: string
|
||||
resourceUsage: {
|
||||
maxRSS: number
|
||||
systemCPUTime: number
|
||||
userCPUTime: number
|
||||
}
|
||||
smapsRollup?: string
|
||||
timestamp: string
|
||||
trigger: MemoryTrigger
|
||||
uptimeSeconds: number
|
||||
v8HeapSpaces?: { available: number; name: string; size: number; used: number }[]
|
||||
v8HeapStats: {
|
||||
detachedContexts: number
|
||||
heapSizeLimit: number
|
||||
mallocedMemory: number
|
||||
nativeContexts: number
|
||||
peakMallocedMemory: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface HeapDumpResult {
|
||||
diagPath?: string
|
||||
error?: string
|
||||
heapPath?: string
|
||||
success: boolean
|
||||
}
|
||||
|
||||
export async function captureMemoryDiagnostics(trigger: MemoryTrigger): Promise<MemoryDiagnostics> {
|
||||
const usage = process.memoryUsage()
|
||||
const heapStats = getHeapStatistics()
|
||||
const resourceUsage = process.resourceUsage()
|
||||
const uptimeSeconds = process.uptime()
|
||||
|
||||
// Not available on Bun / older Node.
|
||||
let heapSpaces: ReturnType<typeof getHeapSpaceStatistics> | undefined
|
||||
|
||||
try {
|
||||
heapSpaces = getHeapSpaceStatistics()
|
||||
} catch {
|
||||
/* noop */
|
||||
}
|
||||
|
||||
const internals = process as unknown as {
|
||||
_getActiveHandles: () => unknown[]
|
||||
_getActiveRequests: () => unknown[]
|
||||
}
|
||||
|
||||
const activeHandles = internals._getActiveHandles().length
|
||||
const activeRequests = internals._getActiveRequests().length
|
||||
const openFileDescriptors = await swallow(async () => (await readdir('/proc/self/fd')).length)
|
||||
const smapsRollup = await swallow(() => readFile('/proc/self/smaps_rollup', 'utf8'))
|
||||
|
||||
const nativeMemory = usage.rss - usage.heapUsed
|
||||
// Real growth rate since STARTED_AT (captured at module load) — NOT a lifetime
|
||||
// average of rss/uptime, which would report phantom "growth" for a stable process.
|
||||
const elapsed = Math.max(0, uptimeSeconds - STARTED_AT.uptime)
|
||||
const bytesPerSecond = elapsed > 0 ? (usage.rss - STARTED_AT.rss) / elapsed : 0
|
||||
const mbPerHour = (bytesPerSecond * 3600) / (1024 * 1024)
|
||||
|
||||
const potentialLeaks = [
|
||||
heapStats.number_of_detached_contexts > 0 &&
|
||||
`${heapStats.number_of_detached_contexts} detached context(s) — possible component/closure leak`,
|
||||
activeHandles > 100 && `${activeHandles} active handles — possible timer/socket leak`,
|
||||
nativeMemory > usage.heapUsed && 'Native memory > heap — leak may be in native addons',
|
||||
mbPerHour > 100 && `High memory growth rate: ${mbPerHour.toFixed(1)} MB/hour`,
|
||||
openFileDescriptors && openFileDescriptors > 500 && `${openFileDescriptors} open FDs — possible file/socket leak`
|
||||
].filter((s): s is string => typeof s === 'string')
|
||||
|
||||
return {
|
||||
activeHandles,
|
||||
activeRequests,
|
||||
analysis: {
|
||||
potentialLeaks,
|
||||
recommendation: potentialLeaks.length
|
||||
? `WARNING: ${potentialLeaks.length} potential leak indicator(s). See potentialLeaks.`
|
||||
: 'No obvious leak indicators. Inspect heap snapshot for retained objects.'
|
||||
},
|
||||
memoryGrowthRate: { bytesPerSecond, mbPerHour },
|
||||
memoryUsage: {
|
||||
arrayBuffers: usage.arrayBuffers,
|
||||
external: usage.external,
|
||||
heapTotal: usage.heapTotal,
|
||||
heapUsed: usage.heapUsed,
|
||||
rss: usage.rss
|
||||
},
|
||||
nodeVersion: process.version,
|
||||
openFileDescriptors,
|
||||
platform: process.platform,
|
||||
resourceUsage: {
|
||||
maxRSS: resourceUsage.maxRSS * 1024,
|
||||
systemCPUTime: resourceUsage.systemCPUTime,
|
||||
userCPUTime: resourceUsage.userCPUTime
|
||||
},
|
||||
smapsRollup,
|
||||
timestamp: new Date().toISOString(),
|
||||
trigger,
|
||||
uptimeSeconds,
|
||||
v8HeapSpaces: heapSpaces?.map(s => ({
|
||||
available: s.space_available_size,
|
||||
name: s.space_name,
|
||||
size: s.space_size,
|
||||
used: s.space_used_size
|
||||
})),
|
||||
v8HeapStats: {
|
||||
detachedContexts: heapStats.number_of_detached_contexts,
|
||||
heapSizeLimit: heapStats.heap_size_limit,
|
||||
mallocedMemory: heapStats.malloced_memory,
|
||||
nativeContexts: heapStats.number_of_native_contexts,
|
||||
peakMallocedMemory: heapStats.peak_malloced_memory
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promise<HeapDumpResult> {
|
||||
try {
|
||||
// Diagnostics first — heap-snapshot serialization can crash on very large
|
||||
// heaps, and the JSON sidecar is the most actionable artifact if so.
|
||||
const diagnostics = await captureMemoryDiagnostics(trigger)
|
||||
const dir = process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps')
|
||||
|
||||
await mkdir(dir, { recursive: true })
|
||||
|
||||
const base = `hermes-${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${trigger}`
|
||||
const heapPath = join(dir, `${base}.heapsnapshot`)
|
||||
const diagPath = join(dir, `${base}.diagnostics.json`)
|
||||
|
||||
await writeFile(diagPath, JSON.stringify(diagnostics, null, 2), { mode: 0o600 })
|
||||
await pipeline(getHeapSnapshot(), createWriteStream(heapPath, { mode: 0o600 }))
|
||||
|
||||
return { diagPath, heapPath, success: true }
|
||||
} catch (e) {
|
||||
return { error: e instanceof Error ? e.message : String(e), success: false }
|
||||
}
|
||||
}
|
||||
|
||||
export function formatBytes(bytes: number): string {
|
||||
if (!Number.isFinite(bytes) || bytes <= 0) {
|
||||
return '0B'
|
||||
}
|
||||
|
||||
const exp = Math.min(UNITS.length - 1, Math.floor(Math.log10(bytes) / 3))
|
||||
const value = bytes / 1024 ** exp
|
||||
|
||||
return `${value >= 100 ? value.toFixed(0) : value.toFixed(1)}${UNITS[exp]}`
|
||||
}
|
||||
|
||||
const UNITS = ['B', 'KB', 'MB', 'GB', 'TB']
|
||||
|
||||
const STARTED_AT = { rss: process.memoryUsage().rss, uptime: process.uptime() }
|
||||
|
||||
// Returns undefined when the probe isn't available (non-Linux paths, sandboxed FS).
|
||||
const swallow = async <T>(fn: () => Promise<T>): Promise<T | undefined> => {
|
||||
try {
|
||||
return await fn()
|
||||
} catch {
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
55
ui-tui/src/lib/memoryMonitor.ts
Normal file
55
ui-tui/src/lib/memoryMonitor.ts
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
import { type HeapDumpResult, performHeapDump } from './memory.js'
|
||||
|
||||
export type MemoryLevel = 'critical' | 'high' | 'normal'
|
||||
|
||||
export interface MemorySnapshot {
|
||||
heapUsed: number
|
||||
level: MemoryLevel
|
||||
rss: number
|
||||
}
|
||||
|
||||
export interface MemoryMonitorOptions {
|
||||
criticalBytes?: number
|
||||
highBytes?: number
|
||||
intervalMs?: number
|
||||
onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
|
||||
onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
|
||||
}
|
||||
|
||||
const GB = 1024 ** 3
|
||||
|
||||
export function startMemoryMonitor({
|
||||
criticalBytes = 2.5 * GB,
|
||||
highBytes = 1.5 * GB,
|
||||
intervalMs = 10_000,
|
||||
onCritical,
|
||||
onHigh
|
||||
}: MemoryMonitorOptions = {}): () => void {
|
||||
const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
|
||||
|
||||
const tick = async () => {
|
||||
const { heapUsed, rss } = process.memoryUsage()
|
||||
const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
|
||||
|
||||
if (level === 'normal') {
|
||||
return void dumped.clear()
|
||||
}
|
||||
|
||||
if (dumped.has(level)) {
|
||||
return
|
||||
}
|
||||
|
||||
dumped.add(level)
|
||||
const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
|
||||
|
||||
const snap: MemorySnapshot = { heapUsed, level, rss }
|
||||
|
||||
;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
|
||||
}
|
||||
|
||||
const handle = setInterval(() => void tick(), intervalMs)
|
||||
|
||||
handle.unref?.()
|
||||
|
||||
return () => clearInterval(handle)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue