perf(tui): instrument stdout drain — rule out terminal parse bottleneck

Adds four fields to FrameEvent.phases and the matching profile
summary:

  optimizedPatches  post-optimize patch count (what's actually
                    written to stdout; the .patches field is
                    pre-optimize)
  writeBytes        UTF-8 byte count of the write this frame
  backpressure      true when Node's stdout.write returned false
                    (Writable buffer full — outer terminal can't
                    keep up)
  prevFrameDrainMs  end-to-end drain time of the PREVIOUS frame's
                    write, captured from stdout.write's 2-arg
                    callback.  Reported on the next frame so the
                    measurement reflects "time until OS flushed
                    the bytes to the terminal fd", not "time until
                    queued in Node".

writeDiffToTerminal() now returns { bytes, backpressure } and
accepts an optional onDrain callback.  Only attached on TTY with
diff; piped/non-TTY stdout bypasses flow control so the callback
would fire synchronously anyway.

Initial measurements under hold-wheel_up against 1106-msg session
(30Hz for 6s):

  patches total    28,888
  optimized total  16,700   (ratio 0.58 — optimizer cuts ~42%)
  writeBytes       42 KB / 10s = 4.2 KB/s throughput
  drainMs p50      0.14 ms   terminal accepts bytes instantly
  drainMs p99      0.85 ms
  backpressure     0% of frames

This rules out the terminal-parse hypothesis — Cursor's xterm.js
drains our output in sub-millisecond time at only 4 KB/s.  The
remaining lag has to be in the render pipeline, not the wire.
Profile output now includes the bytes+drain+backpressure lines to
keep this visible on every subsequent iteration.
This commit is contained in:
Brooklyn Nicholson 2026-04-26 17:06:22 -05:00
parent d3dedf10aa
commit f823535db2
6 changed files with 126 additions and 4 deletions

View file

@ -219,6 +219,45 @@ def format_report(data: dict[str, Any]) -> str:
f" patches p50={pct(patches,0.5):.0f} p99={pct(patches,0.99):.0f} " f" patches p50={pct(patches,0.5):.0f} p99={pct(patches,0.99):.0f} "
f"max={max(patches)} total={sum(patches)}" f"max={max(patches)} total={sum(patches)}"
) )
optimized = [
f["phases"].get("optimizedPatches", 0)
for f in frames if f.get("phases")
]
if any(optimized):
out.append(
f" optimized p50={pct(optimized,0.5):.0f} p99={pct(optimized,0.99):.0f} "
f"max={max(optimized)} total={sum(optimized)}"
f" (ratio: {sum(optimized)/max(1,sum(patches)):.2f})"
)
# Write bytes + drain telemetry — the outer-terminal bottleneck gauge.
bytes_written = [
f["phases"].get("writeBytes", 0)
for f in frames if f.get("phases")
]
if any(bytes_written):
total_b = sum(bytes_written)
kb = total_b / 1024
out.append(
f" writeBytes p50={pct(bytes_written,0.5):.0f}B p99={pct(bytes_written,0.99):.0f}B "
f"max={max(bytes_written)}B total={kb:.1f}KB"
)
drains = [
f["phases"].get("prevFrameDrainMs", 0)
for f in frames if f.get("phases")
]
if any(d > 0 for d in drains):
nonzero = [d for d in drains if d > 0]
out.append(
f" drainMs p50={pct(nonzero,0.5):.2f} p95={pct(nonzero,0.95):.2f} "
f"p99={pct(nonzero,0.99):.2f} max={max(nonzero):.2f} (terminal flush latency)"
)
backpressure = sum(1 for f in frames if f.get("phases", {}).get("backpressure"))
if backpressure:
out.append(
f" backpressure: {backpressure}/{len(frames)} frames "
f"({100*backpressure/len(frames):.0f}%) (Node stdout buffer full — terminal slow)"
)
# Flickers # Flickers
flicker_frames = [f for f in frames if f.get("flickers")] flicker_frames = [f for f in frames if f.get("flickers")]

View file

@ -46,6 +46,17 @@ export type FrameEvent = {
write: number write: number
/** Pre-optimize patch count (proxy for how much changed this frame) */ /** Pre-optimize patch count (proxy for how much changed this frame) */
patches: number patches: number
/** Post-optimize patch count — what was actually written to stdout. */
optimizedPatches: number
/** Bytes written to stdout this frame (escape sequences + payload). */
writeBytes: number
/** Whether stdout.write returned false (backpressure = outer terminal slow). */
backpressure: boolean
/** ms from this frame's stdout.write until the write-callback fired.
* Populated on the NEXT frame (async), so this field reflects the
* PREVIOUS frame's terminal-drain time. 0 = callback already fired
* before next frame started (drained in sub-ms). */
prevFrameDrainMs: number
/** yoga calculateLayout() time (runs in resetAfterCommit, before onRender) */ /** yoga calculateLayout() time (runs in resetAfterCommit, before onRender) */
yoga: number yoga: number
/** React reconcile time: scrollMutated → resetAfterCommit. 0 if no commit. */ /** React reconcile time: scrollMutated → resetAfterCommit. 0 if no commit. */

View file

@ -165,6 +165,15 @@ export default class Ink {
private backFrame: Frame private backFrame: Frame
private lastPoolResetTime = performance.now() private lastPoolResetTime = performance.now()
private drainTimer: ReturnType<typeof setTimeout> | null = null private drainTimer: ReturnType<typeof setTimeout> | null = null
// Write-drain telemetry: pendingWriteStart is the performance.now() of
// the most recent stdout.write waiting for its drain callback. Set to
// null when the callback fires (drained). Read on the NEXT frame and
// reported as prevFrameDrainMs so the FrameEvent records how long the
// previous write took to actually hit the terminal — distinguishes
// "queued in Node" (write returned true) from "terminal accepted bytes"
// (callback fired).
private pendingWriteStart: number | null = null
private lastDrainMs = 0
private lastYogaCounters: { private lastYogaCounters: {
ms: number ms: number
visited: number visited: number
@ -970,7 +979,43 @@ export default class Ink {
} }
const tWrite = performance.now() const tWrite = performance.now()
writeDiffToTerminal(this.terminal, optimized, this.altScreenActive && !SYNC_OUTPUT_SUPPORTED) // Capture any stale pending write BEFORE starting this frame's write —
// if the callback already fired, pendingWriteStart is null and lastDrainMs
// already reflects the previous frame's drain. If it hasn't fired, we
// report "still pending" via a non-zero duration based on now-then so
// backpressure shows up even if Node never flushes this session.
const staleDrain =
this.pendingWriteStart !== null
? performance.now() - this.pendingWriteStart
: this.lastDrainMs
const prevFrameDrainMs = Math.round(staleDrain * 100) / 100
this.lastDrainMs = 0
// Only track drain on TTY. Piped/non-TTY stdout bypasses flow control.
const trackDrain = this.options.stdout.isTTY && hasDiff
const drainStart = trackDrain ? tWrite : 0
if (trackDrain) {
this.pendingWriteStart = drainStart
}
const { bytes: writeBytes, backpressure } = writeDiffToTerminal(
this.terminal,
optimized,
this.altScreenActive && !SYNC_OUTPUT_SUPPORTED,
trackDrain
? () => {
// Callback fires once Node has flushed the chunk to the OS.
// Capture the drain time and clear pending so the NEXT frame's
// staleDrain = the real end-to-end flush time.
if (this.pendingWriteStart === drainStart) {
this.lastDrainMs = performance.now() - drainStart
this.pendingWriteStart = null
}
}
: undefined
)
const writeMs = performance.now() - tWrite const writeMs = performance.now() - tWrite
// Update blit safety for the NEXT frame. The frame just rendered // Update blit safety for the NEXT frame. The frame just rendered
@ -1008,6 +1053,10 @@ export default class Ink {
optimize: optimizeMs, optimize: optimizeMs,
write: writeMs, write: writeMs,
patches: diff.length, patches: diff.length,
optimizedPatches: optimized.length,
writeBytes,
backpressure,
prevFrameDrainMs,
yoga: yogaMs, yoga: yogaMs,
commit: commitMs, commit: commitMs,
yogaVisited: yc.visited, yogaVisited: yc.visited,

View file

@ -203,10 +203,15 @@ export type Terminal = {
stderr: Writable stderr: Writable
} }
export function writeDiffToTerminal(terminal: Terminal, diff: Diff, skipSyncMarkers = false): void { export function writeDiffToTerminal(
terminal: Terminal,
diff: Diff,
skipSyncMarkers = false,
onDrain?: () => void
): { bytes: number; backpressure: boolean } {
// No output if there are no patches // No output if there are no patches
if (diff.length === 0) { if (diff.length === 0) {
return return { bytes: 0, backpressure: false }
} }
// BSU/ESU wrapping is opt-out to keep main-screen behavior unchanged. // BSU/ESU wrapping is opt-out to keep main-screen behavior unchanged.
@ -278,5 +283,15 @@ export function writeDiffToTerminal(terminal: Terminal, diff: Diff, skipSyncMark
buffer += ESU buffer += ESU
} }
terminal.stdout.write(buffer) // Node's Writable.write returns false when the internal buffer is full
// (backpressure). On a slow terminal parser that's the tell: we're
// producing bytes faster than the outer terminal can consume them.
// The 2-arg form attaches a drain callback that fires once the chunk
// is actually flushed to the OS socket/pipe — giving us end-to-end
// drain timing, not just "queued in Node".
const wrote = onDrain
? terminal.stdout.write(buffer, () => onDrain())
: terminal.stdout.write(buffer)
return { bytes: Buffer.byteLength(buffer, 'utf8'), backpressure: !wrote }
} }

View file

@ -149,12 +149,16 @@ export const logFrameEvent = ENABLED
flickers: event.flickers.length ? event.flickers : undefined, flickers: event.flickers.length ? event.flickers : undefined,
phases: event.phases phases: event.phases
? { ? {
backpressure: event.phases.backpressure,
commit: round2(event.phases.commit), commit: round2(event.phases.commit),
diff: round2(event.phases.diff), diff: round2(event.phases.diff),
optimize: round2(event.phases.optimize), optimize: round2(event.phases.optimize),
optimizedPatches: event.phases.optimizedPatches,
patches: event.phases.patches, patches: event.phases.patches,
prevFrameDrainMs: round2(event.phases.prevFrameDrainMs),
renderer: round2(event.phases.renderer), renderer: round2(event.phases.renderer),
write: round2(event.phases.write), write: round2(event.phases.write),
writeBytes: event.phases.writeBytes,
yoga: round2(event.phases.yoga), yoga: round2(event.phases.yoga),
yogaCacheHits: event.phases.yogaCacheHits, yogaCacheHits: event.phases.yogaCacheHits,
yogaLive: event.phases.yogaLive, yogaLive: event.phases.yogaLive,

View file

@ -41,6 +41,10 @@ declare module '@hermes/ink' {
readonly optimize: number readonly optimize: number
readonly write: number readonly write: number
readonly patches: number readonly patches: number
readonly optimizedPatches: number
readonly writeBytes: number
readonly backpressure: boolean
readonly prevFrameDrainMs: number
readonly yoga: number readonly yoga: number
readonly commit: number readonly commit: number
readonly yogaVisited: number readonly yogaVisited: number