From f823535db21585b0b604f9f48c41baaadcdaa12a Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Sun, 26 Apr 2026 17:06:22 -0500 Subject: [PATCH] =?UTF-8?q?perf(tui):=20instrument=20stdout=20drain=20?= =?UTF-8?q?=E2=80=94=20rule=20out=20terminal=20parse=20bottleneck?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four fields to FrameEvent.phases and the matching profile summary: optimizedPatches post-optimize patch count (what's actually written to stdout; the .patches field is pre-optimize) writeBytes UTF-8 byte count of the write this frame backpressure true when Node's stdout.write returned false (Writable buffer full — outer terminal can't keep up) prevFrameDrainMs end-to-end drain time of the PREVIOUS frame's write, captured from stdout.write's 2-arg callback. Reported on the next frame so the measurement reflects "time until OS flushed the bytes to the terminal fd", not "time until queued in Node". writeDiffToTerminal() now returns { bytes, backpressure } and accepts an optional onDrain callback. Only attached on TTY with diff; piped/non-TTY stdout bypasses flow control so the callback would fire synchronously anyway. Initial measurements under hold-wheel_up against 1106-msg session (30Hz for 6s): patches total 28,888 optimized total 16,700 (ratio 0.58 — optimizer cuts ~42%) writeBytes 42 KB / 10s = 4.2 KB/s throughput drainMs p50 0.14 ms terminal accepts bytes instantly drainMs p99 0.85 ms backpressure 0% of frames This rules out the terminal-parse hypothesis — Cursor's xterm.js drains our output in sub-millisecond time at only 4 KB/s. The remaining lag has to be in the render pipeline, not the wire. Profile output now includes the bytes+drain+backpressure lines to keep this visible on every subsequent iteration. --- scripts/profile-tui.py | 39 ++++++++++++++ ui-tui/packages/hermes-ink/src/ink/frame.ts | 11 ++++ ui-tui/packages/hermes-ink/src/ink/ink.tsx | 51 ++++++++++++++++++- .../packages/hermes-ink/src/ink/terminal.ts | 21 ++++++-- ui-tui/src/lib/perfPane.tsx | 4 ++ ui-tui/src/types/hermes-ink.d.ts | 4 ++ 6 files changed, 126 insertions(+), 4 deletions(-) diff --git a/scripts/profile-tui.py b/scripts/profile-tui.py index 5ef987c652..e70e990631 100755 --- a/scripts/profile-tui.py +++ b/scripts/profile-tui.py @@ -219,6 +219,45 @@ def format_report(data: dict[str, Any]) -> str: f" patches p50={pct(patches,0.5):.0f} p99={pct(patches,0.99):.0f} " f"max={max(patches)} total={sum(patches)}" ) + optimized = [ + f["phases"].get("optimizedPatches", 0) + for f in frames if f.get("phases") + ] + if any(optimized): + out.append( + f" optimized p50={pct(optimized,0.5):.0f} p99={pct(optimized,0.99):.0f} " + f"max={max(optimized)} total={sum(optimized)}" + f" (ratio: {sum(optimized)/max(1,sum(patches)):.2f})" + ) + + # Write bytes + drain telemetry — the outer-terminal bottleneck gauge. + bytes_written = [ + f["phases"].get("writeBytes", 0) + for f in frames if f.get("phases") + ] + if any(bytes_written): + total_b = sum(bytes_written) + kb = total_b / 1024 + out.append( + f" writeBytes p50={pct(bytes_written,0.5):.0f}B p99={pct(bytes_written,0.99):.0f}B " + f"max={max(bytes_written)}B total={kb:.1f}KB" + ) + drains = [ + f["phases"].get("prevFrameDrainMs", 0) + for f in frames if f.get("phases") + ] + if any(d > 0 for d in drains): + nonzero = [d for d in drains if d > 0] + out.append( + f" drainMs p50={pct(nonzero,0.5):.2f} p95={pct(nonzero,0.95):.2f} " + f"p99={pct(nonzero,0.99):.2f} max={max(nonzero):.2f} (terminal flush latency)" + ) + backpressure = sum(1 for f in frames if f.get("phases", {}).get("backpressure")) + if backpressure: + out.append( + f" backpressure: {backpressure}/{len(frames)} frames " + f"({100*backpressure/len(frames):.0f}%) (Node stdout buffer full — terminal slow)" + ) # Flickers flicker_frames = [f for f in frames if f.get("flickers")] diff --git a/ui-tui/packages/hermes-ink/src/ink/frame.ts b/ui-tui/packages/hermes-ink/src/ink/frame.ts index b85c0ad944..760fcc52fe 100644 --- a/ui-tui/packages/hermes-ink/src/ink/frame.ts +++ b/ui-tui/packages/hermes-ink/src/ink/frame.ts @@ -46,6 +46,17 @@ export type FrameEvent = { write: number /** Pre-optimize patch count (proxy for how much changed this frame) */ patches: number + /** Post-optimize patch count — what was actually written to stdout. */ + optimizedPatches: number + /** Bytes written to stdout this frame (escape sequences + payload). */ + writeBytes: number + /** Whether stdout.write returned false (backpressure = outer terminal slow). */ + backpressure: boolean + /** ms from this frame's stdout.write until the write-callback fired. + * Populated on the NEXT frame (async), so this field reflects the + * PREVIOUS frame's terminal-drain time. 0 = callback already fired + * before next frame started (drained in sub-ms). */ + prevFrameDrainMs: number /** yoga calculateLayout() time (runs in resetAfterCommit, before onRender) */ yoga: number /** React reconcile time: scrollMutated → resetAfterCommit. 0 if no commit. */ diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx index 71e3066a47..1bd47d61f1 100644 --- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx @@ -165,6 +165,15 @@ export default class Ink { private backFrame: Frame private lastPoolResetTime = performance.now() private drainTimer: ReturnType | null = null + // Write-drain telemetry: pendingWriteStart is the performance.now() of + // the most recent stdout.write waiting for its drain callback. Set to + // null when the callback fires (drained). Read on the NEXT frame and + // reported as prevFrameDrainMs so the FrameEvent records how long the + // previous write took to actually hit the terminal — distinguishes + // "queued in Node" (write returned true) from "terminal accepted bytes" + // (callback fired). + private pendingWriteStart: number | null = null + private lastDrainMs = 0 private lastYogaCounters: { ms: number visited: number @@ -970,7 +979,43 @@ export default class Ink { } const tWrite = performance.now() - writeDiffToTerminal(this.terminal, optimized, this.altScreenActive && !SYNC_OUTPUT_SUPPORTED) + // Capture any stale pending write BEFORE starting this frame's write — + // if the callback already fired, pendingWriteStart is null and lastDrainMs + // already reflects the previous frame's drain. If it hasn't fired, we + // report "still pending" via a non-zero duration based on now-then so + // backpressure shows up even if Node never flushes this session. + const staleDrain = + this.pendingWriteStart !== null + ? performance.now() - this.pendingWriteStart + : this.lastDrainMs + + const prevFrameDrainMs = Math.round(staleDrain * 100) / 100 + this.lastDrainMs = 0 + + // Only track drain on TTY. Piped/non-TTY stdout bypasses flow control. + const trackDrain = this.options.stdout.isTTY && hasDiff + const drainStart = trackDrain ? tWrite : 0 + + if (trackDrain) { + this.pendingWriteStart = drainStart + } + + const { bytes: writeBytes, backpressure } = writeDiffToTerminal( + this.terminal, + optimized, + this.altScreenActive && !SYNC_OUTPUT_SUPPORTED, + trackDrain + ? () => { + // Callback fires once Node has flushed the chunk to the OS. + // Capture the drain time and clear pending so the NEXT frame's + // staleDrain = the real end-to-end flush time. + if (this.pendingWriteStart === drainStart) { + this.lastDrainMs = performance.now() - drainStart + this.pendingWriteStart = null + } + } + : undefined + ) const writeMs = performance.now() - tWrite // Update blit safety for the NEXT frame. The frame just rendered @@ -1008,6 +1053,10 @@ export default class Ink { optimize: optimizeMs, write: writeMs, patches: diff.length, + optimizedPatches: optimized.length, + writeBytes, + backpressure, + prevFrameDrainMs, yoga: yogaMs, commit: commitMs, yogaVisited: yc.visited, diff --git a/ui-tui/packages/hermes-ink/src/ink/terminal.ts b/ui-tui/packages/hermes-ink/src/ink/terminal.ts index 75637c76f8..0ffe6e80cb 100644 --- a/ui-tui/packages/hermes-ink/src/ink/terminal.ts +++ b/ui-tui/packages/hermes-ink/src/ink/terminal.ts @@ -203,10 +203,15 @@ export type Terminal = { stderr: Writable } -export function writeDiffToTerminal(terminal: Terminal, diff: Diff, skipSyncMarkers = false): void { +export function writeDiffToTerminal( + terminal: Terminal, + diff: Diff, + skipSyncMarkers = false, + onDrain?: () => void +): { bytes: number; backpressure: boolean } { // No output if there are no patches if (diff.length === 0) { - return + return { bytes: 0, backpressure: false } } // BSU/ESU wrapping is opt-out to keep main-screen behavior unchanged. @@ -278,5 +283,15 @@ export function writeDiffToTerminal(terminal: Terminal, diff: Diff, skipSyncMark buffer += ESU } - terminal.stdout.write(buffer) + // Node's Writable.write returns false when the internal buffer is full + // (backpressure). On a slow terminal parser that's the tell: we're + // producing bytes faster than the outer terminal can consume them. + // The 2-arg form attaches a drain callback that fires once the chunk + // is actually flushed to the OS socket/pipe — giving us end-to-end + // drain timing, not just "queued in Node". + const wrote = onDrain + ? terminal.stdout.write(buffer, () => onDrain()) + : terminal.stdout.write(buffer) + + return { bytes: Buffer.byteLength(buffer, 'utf8'), backpressure: !wrote } } diff --git a/ui-tui/src/lib/perfPane.tsx b/ui-tui/src/lib/perfPane.tsx index 331fb62dc5..ab512c108f 100644 --- a/ui-tui/src/lib/perfPane.tsx +++ b/ui-tui/src/lib/perfPane.tsx @@ -149,12 +149,16 @@ export const logFrameEvent = ENABLED flickers: event.flickers.length ? event.flickers : undefined, phases: event.phases ? { + backpressure: event.phases.backpressure, commit: round2(event.phases.commit), diff: round2(event.phases.diff), optimize: round2(event.phases.optimize), + optimizedPatches: event.phases.optimizedPatches, patches: event.phases.patches, + prevFrameDrainMs: round2(event.phases.prevFrameDrainMs), renderer: round2(event.phases.renderer), write: round2(event.phases.write), + writeBytes: event.phases.writeBytes, yoga: round2(event.phases.yoga), yogaCacheHits: event.phases.yogaCacheHits, yogaLive: event.phases.yogaLive, diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts index 0ad9a957ef..4ecd10ee9d 100644 --- a/ui-tui/src/types/hermes-ink.d.ts +++ b/ui-tui/src/types/hermes-ink.d.ts @@ -41,6 +41,10 @@ declare module '@hermes/ink' { readonly optimize: number readonly write: number readonly patches: number + readonly optimizedPatches: number + readonly writeBytes: number + readonly backpressure: boolean + readonly prevFrameDrainMs: number readonly yoga: number readonly commit: number readonly yogaVisited: number