hermes-agent/ui-tui/src/__tests__/memoryMonitor.test.ts
teknium1 e76d8bf5aa
fix(tui): stop persisting full tool output in trail lines (silent OOM death)
A heavy --tui session (browser snapshots, large tool outputs) silently
OOM-killed the Node parent within minutes — closing the gateway child's
stdin, which the user saw only as a bare "gateway exited" / stdin EOF.
CLI was immune. Root cause: each completed tool's verbose trail line
embedded up to 16KB of result_text, persisted in transcript Msg.tools[]
for the whole session and rendered EXPANDED by default, so an Ink
render-node tree was built for every one of up to 800 messages at once.
That tree blew past Node's heap at a few hundred MB — far below the 2.5GB
memory-monitor exit threshold, so the death was never even attributed.

- text.ts: persisted verbose tool-trail blocks now cap to a small preview
  (VERBOSE_TRAIL_MAX_CHARS=800/12 lines), not the 16KB live-render budget.
  Retained trail strings drop ~17x (12.2MB -> 0.7MB at 800 msgs); the live
  streaming tail still uses the larger LIVE_RENDER budget.
- tui_gateway/server.py: lower the gateway-side verbose text cap to match
  (1KB/16 lines) so we stop shipping output the TUI no longer renders.
- memoryMonitor.ts: derive critical/high thresholds from the real V8 heap
  ceiling (~88%/70%) instead of the hardcoded 2.5GB that killed the process
  at 31% of an 8GB ceiling; add a one-shot onWarn early-warning on fast
  sub-threshold heap growth so the next such death is diagnosable, not silent.
- entry.tsx: wire onWarn to a crash-log breadcrumb + stderr line.

Full tool output is unchanged in the agent context and SQLite session — this
is display/transport only, no behavior or context change.

Fixes #34095. Related #27282.

Tests: ui-tui text + new memoryMonitor suites (33 pass), python verbose-cap
guard (5 pass); full ui-tui suite shows no new failures vs pristine main.
E2E repro confirms the retention drop.
2026-06-03 06:00:22 -07:00

102 lines
3.8 KiB
TypeScript

import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
// memory.js performs real heap dumps / fs work — stub it so the monitor's
// dump path is a no-op in tests.
vi.mock('../lib/memory.js', () => ({
performHeapDump: vi.fn(async () => null)
}))
// @hermes/ink is dynamically imported only on the dump path; stub the eviction.
vi.mock('@hermes/ink', () => ({ evictInkCaches: vi.fn() }))
import { startMemoryMonitor } from '../lib/memoryMonitor.js'
const GB = 1024 ** 3
const MB = 1024 ** 2
describe('startMemoryMonitor thresholds (#34095)', () => {
let stop: (() => void) | undefined
beforeEach(() => {
vi.useFakeTimers()
})
afterEach(() => {
stop?.()
stop = undefined
vi.restoreAllMocks()
vi.useRealTimers()
})
const withHeap = (heapUsed: number, rss = heapUsed) =>
vi.spyOn(process, 'memoryUsage').mockReturnValue({
arrayBuffers: 0,
external: 0,
heapTotal: heapUsed,
heapUsed,
rss
} as NodeJS.MemoryUsage)
it('does NOT fire onCritical at 2.5GB when the heap ceiling is 8GB', async () => {
// The old hardcoded 2.5GB constant killed the process at ~31% of the real
// ceiling. With relative thresholds (~88%), 2.5GB is well within normal.
const onCritical = vi.fn()
withHeap(2.5 * GB)
stop = startMemoryMonitor({ intervalMs: 1, onCritical })
await vi.advanceTimersByTimeAsync(5)
expect(onCritical).not.toHaveBeenCalled()
})
it('fires onCritical only near the configured ceiling', async () => {
const onCritical = vi.fn()
// Explicit small ceiling-derived thresholds via override to keep the test
// independent of the host V8 heap_size_limit.
withHeap(7.5 * GB)
stop = startMemoryMonitor({ criticalBytes: 7 * GB, highBytes: 5 * GB, intervalMs: 1, onCritical })
await vi.advanceTimersByTimeAsync(5)
expect(onCritical).toHaveBeenCalledTimes(1)
})
it('fires onWarn once on fast sub-threshold heap growth, then re-arms', async () => {
const onWarn = vi.fn()
// Start low, then jump >150MB across a tick while above the 600MB floor and
// below `high` — the silent-death regime.
const spy = withHeap(100 * MB)
stop = startMemoryMonitor({ highBytes: 2 * GB, intervalMs: 1, onWarn, warnBytes: 600 * MB })
await vi.advanceTimersByTimeAsync(2) // seed lastHeap at 100MB, below floor
expect(onWarn).not.toHaveBeenCalled()
spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 800 * MB, heapUsed: 800 * MB, rss: 800 * MB } as NodeJS.MemoryUsage)
await vi.advanceTimersByTimeAsync(2) // jumped 700MB → above floor + steep
expect(onWarn).toHaveBeenCalledTimes(1)
// Stays elevated but not re-firing.
await vi.advanceTimersByTimeAsync(2)
expect(onWarn).toHaveBeenCalledTimes(1)
// Falls back below the floor → re-armed, then climbs again → fires again.
spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 100 * MB, heapUsed: 100 * MB, rss: 100 * MB } as NodeJS.MemoryUsage)
await vi.advanceTimersByTimeAsync(2)
spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 800 * MB, heapUsed: 800 * MB, rss: 800 * MB } as NodeJS.MemoryUsage)
await vi.advanceTimersByTimeAsync(2)
expect(onWarn).toHaveBeenCalledTimes(2)
})
it('does not warn on slow growth below the steep-growth step', async () => {
const onWarn = vi.fn()
const spy = withHeap(650 * MB)
stop = startMemoryMonitor({ highBytes: 2 * GB, intervalMs: 1, onWarn, warnBytes: 600 * MB })
await vi.advanceTimersByTimeAsync(2)
// +50MB per tick — above the floor but gentle, not a render-tree blowup.
spy.mockReturnValue({ arrayBuffers: 0, external: 0, heapTotal: 700 * MB, heapUsed: 700 * MB, rss: 700 * MB } as NodeJS.MemoryUsage)
await vi.advanceTimersByTimeAsync(2)
expect(onWarn).not.toHaveBeenCalled()
})
})