perf(tui): full-pipeline instrumentation + profiling harness

Extends HERMES_DEV_PERF to capture the complete render pipeline, not
just React commits. Adds scripts/profile-tui.py to drive repeatable
hold-PageUp stress tests against a real long session.

perfPane.tsx:
  Wires ink's onFrame callback (already plumbed through the fork) into
  the same perf.log as the React.Profiler samples. Captures per-phase
  timing (yoga calculateLayout, renderNodeToOutput, screen diff, patch
  optimize, stdout write) plus yoga counters (visited/measured/cache-
  Hits/live) and patch counts per frame.  Events are tagged
  {src: 'react'|'frame'} so jq can split them.  logFrameEvent is
  undefined when HERMES_DEV_PERF is unset, so ink doesn't even attach
  the callback.

entry.tsx:
  Passes logFrameEvent into render().

types/hermes-ink.d.ts:
  Declares FrameEvent + onFrame on RenderOptions so the ui-tui side
  type-checks against the plumbed-through ink option.

scripts/profile-tui.py:
  New harness. Launches the built TUI under a PTY with the longest
  session in state.db resumed, holds PageUp/PageDown/etc at a
  configurable Hz for N seconds, then parses perf.log and prints
  per-phase p50/p95/p99/max plus yoga-counter summaries. Zero deps
  beyond stdlib. Exit 2 if nothing was captured (wiring broken).

Initial findings (1106-msg session, 6s PageUp hold at 30Hz):
  - Steady state: 10 fps; renderer phase p99=63ms, write p99=0.2ms
  - 4/107 heavy frames (>=16ms), all dominated by renderNodeToOutput
  - One pathological 97ms frame with yoga measuring 70,415 text cells
    and Yoga visiting 225k nodes — the cold-unmeasured-region hit
  - Ink's scroll fast-path (DECSTBM blit from prevScreen) is
    disqualified because our spacer-based virtual history doesn't
    keep heightDelta in sync with scroll.delta, so every PageUp step
    falls through to a full 2000-4800 patch re-render instead of ~40
This commit is contained in:
Brooklyn Nicholson 2026-04-26 16:36:25 -05:00
parent 69ff201050
commit 71eee26640
4 changed files with 461 additions and 30 deletions

View file

@ -41,6 +41,10 @@ if (process.env.HERMES_HEAPDUMP_ON_START === '1') {
process.on('beforeExit', () => stopMemoryMonitor())
const [{ render }, { App }] = await Promise.all([import('@hermes/ink'), import('./app.js')])
const [{ render }, { App }, { logFrameEvent }] = await Promise.all([
import('@hermes/ink'),
import('./app.js'),
import('./lib/perfPane.js')
])
render(<App gw={gw} />, { exitOnCtrlC: false })
render(<App gw={gw} />, { exitOnCtrlC: false, onFrame: logFrameEvent })

View file

@ -1,27 +1,53 @@
// Perf instrumentation: wraps React.Profiler around named panes and writes
// commit timings to a log file when HERMES_DEV_PERF is set. Enabled per-run
// via the env var; zero-cost (Profiler is replaced by a Fragment) when off.
// Perf instrumentation for the full render pipeline.
//
// Log format: one JSON object per line, for easy `jq` filtering. We only
// log commits that exceed a threshold (default 2ms) so the file doesn't
// fill up with sub-millisecond idle renders. Tune via HERMES_DEV_PERF_MS.
// Two sources of timing:
// 1. React.Profiler wrapper (PerfPane) → per-pane commit times. Shows
// which subtree is reconciling and for how long.
// 2. Ink onFrame callback (logFrameEvent) → per-frame pipeline phases:
// yoga (calculateLayout), renderer (DOM → screen buffer), diff
// (prev vs current screen → patches), optimize (patch merge/dedupe),
// write (serialize → ANSI → stdout), plus yoga counters (visited,
// measured, cacheHits, live). Shows where the time goes BELOW React.
//
// Usage in consumers:
// import { PerfPane } from './perfPane.js'
// <PerfPane id="transcript"> ... </PerfPane>
// Both sources gate on HERMES_DEV_PERF=1 and dump JSON-lines to the same
// log (default ~/.hermes/perf.log, override via HERMES_DEV_PERF_LOG).
// Events are tagged { src: 'react' | 'frame' } so jq can split them.
//
// Inspect with:
// tail -f ~/.hermes/perf.log | jq -c 'select(.actualMs > 8)'
// jq -s 'group_by(.id) | map({id: .[0].id, count: length, p50: (sort_by(.actualMs) | .[length/2|floor].actualMs), p99: (sort_by(.actualMs) | .[length*0.99|floor].actualMs)})' ~/.hermes/perf.log
// Threshold HERMES_DEV_PERF_MS (default 2ms) skips sub-millisecond idle
// frames. For the 2fps-during-PageUp investigation, set
// HERMES_DEV_PERF_MS=0 to capture everything, then filter with jq.
//
// Zero cost when the env var is unset: PerfPane returns children
// directly (no Profiler fiber), logFrameEvent is a noop on the onFrame
// callback — the ink instance isn't given the callback at all.
//
// Usage:
// # entry.tsx wires logFrameEvent into render()
// import { logFrameEvent, PerfPane } from './lib/perfPane.js'
// render(<App/>, { onFrame: logFrameEvent })
//
// Analysis helpers (once you've captured a session):
// tail -f ~/.hermes/perf.log | jq -c 'select(.src=="frame" and .durationMs > 16)'
// # p50/p99 per phase across frame events:
// jq -s '[.[] | select(.src=="frame")] |
// {n: length,
// dur_p50: (sort_by(.durationMs) | .[length/2|floor].durationMs),
// dur_p99: (sort_by(.durationMs) | .[length*0.99|floor].durationMs),
// yoga_p99: (sort_by(.phases.yoga) | .[length*0.99|floor].phases.yoga),
// write_p99: (sort_by(.phases.write) | .[length*0.99|floor].phases.write),
// diff_p99: (sort_by(.phases.diff) | .[length*0.99|floor].phases.diff),
// patches_p99: (sort_by(.phases.patches) | .[length*0.99|floor].phases.patches)}' \
// ~/.hermes/perf.log
import { appendFileSync, mkdirSync } from 'node:fs'
import { homedir } from 'node:os'
import { dirname, join } from 'node:path'
import type { FrameEvent } from '@hermes/ink'
import { Profiler, type ProfilerOnRenderCallback, type ReactNode } from 'react'
const ENABLED = /^(?:1|true|yes|on)$/i.test((process.env.HERMES_DEV_PERF ?? '').trim())
const THRESHOLD_MS = Number(process.env.HERMES_DEV_PERF_MS ?? '2') || 2
const THRESHOLD_MS = Number(process.env.HERMES_DEV_PERF_MS ?? '2') || 0
const LOG_PATH = process.env.HERMES_DEV_PERF_LOG?.trim() || join(homedir(), '.hermes', 'perf.log')
let initialized = false
@ -42,23 +68,9 @@ const ensureLogDir = () => {
}
}
const onRender: ProfilerOnRenderCallback = (id, phase, actualMs, baseMs, startTime, commitTime) => {
if (actualMs < THRESHOLD_MS) {
return
}
const writeRow = (row: Record<string, unknown>) => {
ensureLogDir()
const row = {
actualMs: Math.round(actualMs * 100) / 100,
baseMs: Math.round(baseMs * 100) / 100,
commitMs: Math.round(commitTime * 100) / 100,
id,
phase,
startMs: Math.round(startTime * 100) / 100,
ts: Date.now()
}
try {
appendFileSync(LOG_PATH, `${JSON.stringify(row)}\n`)
} catch {
@ -66,6 +78,25 @@ const onRender: ProfilerOnRenderCallback = (id, phase, actualMs, baseMs, startTi
}
}
const round2 = (n: number) => Math.round(n * 100) / 100
const onRender: ProfilerOnRenderCallback = (id, phase, actualMs, baseMs, startTime, commitTime) => {
if (actualMs < THRESHOLD_MS) {
return
}
writeRow({
actualMs: round2(actualMs),
baseMs: round2(baseMs),
commitMs: round2(commitTime),
id,
phase,
src: 'react',
startMs: round2(startTime),
ts: Date.now()
})
}
export function PerfPane({ children, id }: { children: ReactNode; id: string }) {
if (!ENABLED) {
return children
@ -78,5 +109,42 @@ export function PerfPane({ children, id }: { children: ReactNode; id: string })
)
}
/**
* Ink onFrame handler. Captures the FULL render pipeline: yoga calculateLayout,
* DOM screen buffer, screen diff, patch optimize, and stdout write.
*
* Returns `undefined` when disabled so `render()` doesn't attach the callback
* ink only pays the timing cost when the callback is truthy.
*/
export const logFrameEvent = ENABLED
? (event: FrameEvent) => {
if (event.durationMs < THRESHOLD_MS) {
return
}
writeRow({
durationMs: round2(event.durationMs),
flickers: event.flickers.length ? event.flickers : undefined,
phases: event.phases
? {
commit: round2(event.phases.commit),
diff: round2(event.phases.diff),
optimize: round2(event.phases.optimize),
patches: event.phases.patches,
renderer: round2(event.phases.renderer),
write: round2(event.phases.write),
yoga: round2(event.phases.yoga),
yogaCacheHits: event.phases.yogaCacheHits,
yogaLive: event.phases.yogaLive,
yogaMeasured: event.phases.yogaMeasured,
yogaVisited: event.phases.yogaVisited
}
: undefined,
src: 'frame',
ts: Date.now()
})
}
: undefined
export const PERF_ENABLED = ENABLED
export const PERF_LOG_PATH = LOG_PATH

View file

@ -33,11 +33,35 @@ declare module '@hermes/ink' {
export type InputHandler = (input: string, key: Key, event: InputEvent) => void
export type FrameEvent = {
readonly durationMs: number
readonly phases?: {
readonly renderer: number
readonly diff: number
readonly optimize: number
readonly write: number
readonly patches: number
readonly yoga: number
readonly commit: number
readonly yogaVisited: number
readonly yogaMeasured: number
readonly yogaCacheHits: number
readonly yogaLive: number
}
readonly flickers: ReadonlyArray<{
readonly desiredHeight: number
readonly availableHeight: number
readonly reason: 'resize' | 'offscreen' | 'clear'
}>
}
export type RenderOptions = {
readonly stdin?: NodeJS.ReadStream
readonly stdout?: NodeJS.WriteStream
readonly stderr?: NodeJS.WriteStream
readonly exitOnCtrlC?: boolean
readonly patchConsole?: boolean
readonly onFrame?: (event: FrameEvent) => void
}
export type Instance = {