mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
perf(tui): instrument stdout drain — rule out terminal parse bottleneck
Adds four fields to FrameEvent.phases and the matching profile
summary:
optimizedPatches post-optimize patch count (what's actually
written to stdout; the .patches field is
pre-optimize)
writeBytes UTF-8 byte count of the write this frame
backpressure true when Node's stdout.write returned false
(Writable buffer full — outer terminal can't
keep up)
prevFrameDrainMs end-to-end drain time of the PREVIOUS frame's
write, captured from stdout.write's 2-arg
callback. Reported on the next frame so the
measurement reflects "time until OS flushed
the bytes to the terminal fd", not "time until
queued in Node".
writeDiffToTerminal() now returns { bytes, backpressure } and
accepts an optional onDrain callback. Only attached on TTY with
diff; piped/non-TTY stdout bypasses flow control so the callback
would fire synchronously anyway.
Initial measurements under hold-wheel_up against 1106-msg session
(30Hz for 6s):
patches total 28,888
optimized total 16,700 (ratio 0.58 — optimizer cuts ~42%)
writeBytes 42 KB / 10s = 4.2 KB/s throughput
drainMs p50 0.14 ms terminal accepts bytes instantly
drainMs p99 0.85 ms
backpressure 0% of frames
This rules out the terminal-parse hypothesis — Cursor's xterm.js
drains our output in sub-millisecond time at only 4 KB/s. The
remaining lag has to be in the render pipeline, not the wire.
Profile output now includes the bytes+drain+backpressure lines to
keep this visible on every subsequent iteration.
This commit is contained in:
parent
d3dedf10aa
commit
f823535db2
6 changed files with 126 additions and 4 deletions
|
|
@ -219,6 +219,45 @@ def format_report(data: dict[str, Any]) -> str:
|
|||
f" patches p50={pct(patches,0.5):.0f} p99={pct(patches,0.99):.0f} "
|
||||
f"max={max(patches)} total={sum(patches)}"
|
||||
)
|
||||
optimized = [
|
||||
f["phases"].get("optimizedPatches", 0)
|
||||
for f in frames if f.get("phases")
|
||||
]
|
||||
if any(optimized):
|
||||
out.append(
|
||||
f" optimized p50={pct(optimized,0.5):.0f} p99={pct(optimized,0.99):.0f} "
|
||||
f"max={max(optimized)} total={sum(optimized)}"
|
||||
f" (ratio: {sum(optimized)/max(1,sum(patches)):.2f})"
|
||||
)
|
||||
|
||||
# Write bytes + drain telemetry — the outer-terminal bottleneck gauge.
|
||||
bytes_written = [
|
||||
f["phases"].get("writeBytes", 0)
|
||||
for f in frames if f.get("phases")
|
||||
]
|
||||
if any(bytes_written):
|
||||
total_b = sum(bytes_written)
|
||||
kb = total_b / 1024
|
||||
out.append(
|
||||
f" writeBytes p50={pct(bytes_written,0.5):.0f}B p99={pct(bytes_written,0.99):.0f}B "
|
||||
f"max={max(bytes_written)}B total={kb:.1f}KB"
|
||||
)
|
||||
drains = [
|
||||
f["phases"].get("prevFrameDrainMs", 0)
|
||||
for f in frames if f.get("phases")
|
||||
]
|
||||
if any(d > 0 for d in drains):
|
||||
nonzero = [d for d in drains if d > 0]
|
||||
out.append(
|
||||
f" drainMs p50={pct(nonzero,0.5):.2f} p95={pct(nonzero,0.95):.2f} "
|
||||
f"p99={pct(nonzero,0.99):.2f} max={max(nonzero):.2f} (terminal flush latency)"
|
||||
)
|
||||
backpressure = sum(1 for f in frames if f.get("phases", {}).get("backpressure"))
|
||||
if backpressure:
|
||||
out.append(
|
||||
f" backpressure: {backpressure}/{len(frames)} frames "
|
||||
f"({100*backpressure/len(frames):.0f}%) (Node stdout buffer full — terminal slow)"
|
||||
)
|
||||
|
||||
# Flickers
|
||||
flicker_frames = [f for f in frames if f.get("flickers")]
|
||||
|
|
|
|||
|
|
@ -46,6 +46,17 @@ export type FrameEvent = {
|
|||
write: number
|
||||
/** Pre-optimize patch count (proxy for how much changed this frame) */
|
||||
patches: number
|
||||
/** Post-optimize patch count — what was actually written to stdout. */
|
||||
optimizedPatches: number
|
||||
/** Bytes written to stdout this frame (escape sequences + payload). */
|
||||
writeBytes: number
|
||||
/** Whether stdout.write returned false (backpressure = outer terminal slow). */
|
||||
backpressure: boolean
|
||||
/** ms from this frame's stdout.write until the write-callback fired.
|
||||
* Populated on the NEXT frame (async), so this field reflects the
|
||||
* PREVIOUS frame's terminal-drain time. 0 = callback already fired
|
||||
* before next frame started (drained in sub-ms). */
|
||||
prevFrameDrainMs: number
|
||||
/** yoga calculateLayout() time (runs in resetAfterCommit, before onRender) */
|
||||
yoga: number
|
||||
/** React reconcile time: scrollMutated → resetAfterCommit. 0 if no commit. */
|
||||
|
|
|
|||
|
|
@ -165,6 +165,15 @@ export default class Ink {
|
|||
private backFrame: Frame
|
||||
private lastPoolResetTime = performance.now()
|
||||
private drainTimer: ReturnType<typeof setTimeout> | null = null
|
||||
// Write-drain telemetry: pendingWriteStart is the performance.now() of
|
||||
// the most recent stdout.write waiting for its drain callback. Set to
|
||||
// null when the callback fires (drained). Read on the NEXT frame and
|
||||
// reported as prevFrameDrainMs so the FrameEvent records how long the
|
||||
// previous write took to actually hit the terminal — distinguishes
|
||||
// "queued in Node" (write returned true) from "terminal accepted bytes"
|
||||
// (callback fired).
|
||||
private pendingWriteStart: number | null = null
|
||||
private lastDrainMs = 0
|
||||
private lastYogaCounters: {
|
||||
ms: number
|
||||
visited: number
|
||||
|
|
@ -970,7 +979,43 @@ export default class Ink {
|
|||
}
|
||||
|
||||
const tWrite = performance.now()
|
||||
writeDiffToTerminal(this.terminal, optimized, this.altScreenActive && !SYNC_OUTPUT_SUPPORTED)
|
||||
// Capture any stale pending write BEFORE starting this frame's write —
|
||||
// if the callback already fired, pendingWriteStart is null and lastDrainMs
|
||||
// already reflects the previous frame's drain. If it hasn't fired, we
|
||||
// report "still pending" via a non-zero duration based on now-then so
|
||||
// backpressure shows up even if Node never flushes this session.
|
||||
const staleDrain =
|
||||
this.pendingWriteStart !== null
|
||||
? performance.now() - this.pendingWriteStart
|
||||
: this.lastDrainMs
|
||||
|
||||
const prevFrameDrainMs = Math.round(staleDrain * 100) / 100
|
||||
this.lastDrainMs = 0
|
||||
|
||||
// Only track drain on TTY. Piped/non-TTY stdout bypasses flow control.
|
||||
const trackDrain = this.options.stdout.isTTY && hasDiff
|
||||
const drainStart = trackDrain ? tWrite : 0
|
||||
|
||||
if (trackDrain) {
|
||||
this.pendingWriteStart = drainStart
|
||||
}
|
||||
|
||||
const { bytes: writeBytes, backpressure } = writeDiffToTerminal(
|
||||
this.terminal,
|
||||
optimized,
|
||||
this.altScreenActive && !SYNC_OUTPUT_SUPPORTED,
|
||||
trackDrain
|
||||
? () => {
|
||||
// Callback fires once Node has flushed the chunk to the OS.
|
||||
// Capture the drain time and clear pending so the NEXT frame's
|
||||
// staleDrain = the real end-to-end flush time.
|
||||
if (this.pendingWriteStart === drainStart) {
|
||||
this.lastDrainMs = performance.now() - drainStart
|
||||
this.pendingWriteStart = null
|
||||
}
|
||||
}
|
||||
: undefined
|
||||
)
|
||||
const writeMs = performance.now() - tWrite
|
||||
|
||||
// Update blit safety for the NEXT frame. The frame just rendered
|
||||
|
|
@ -1008,6 +1053,10 @@ export default class Ink {
|
|||
optimize: optimizeMs,
|
||||
write: writeMs,
|
||||
patches: diff.length,
|
||||
optimizedPatches: optimized.length,
|
||||
writeBytes,
|
||||
backpressure,
|
||||
prevFrameDrainMs,
|
||||
yoga: yogaMs,
|
||||
commit: commitMs,
|
||||
yogaVisited: yc.visited,
|
||||
|
|
|
|||
|
|
@ -203,10 +203,15 @@ export type Terminal = {
|
|||
stderr: Writable
|
||||
}
|
||||
|
||||
export function writeDiffToTerminal(terminal: Terminal, diff: Diff, skipSyncMarkers = false): void {
|
||||
export function writeDiffToTerminal(
|
||||
terminal: Terminal,
|
||||
diff: Diff,
|
||||
skipSyncMarkers = false,
|
||||
onDrain?: () => void
|
||||
): { bytes: number; backpressure: boolean } {
|
||||
// No output if there are no patches
|
||||
if (diff.length === 0) {
|
||||
return
|
||||
return { bytes: 0, backpressure: false }
|
||||
}
|
||||
|
||||
// BSU/ESU wrapping is opt-out to keep main-screen behavior unchanged.
|
||||
|
|
@ -278,5 +283,15 @@ export function writeDiffToTerminal(terminal: Terminal, diff: Diff, skipSyncMark
|
|||
buffer += ESU
|
||||
}
|
||||
|
||||
terminal.stdout.write(buffer)
|
||||
// Node's Writable.write returns false when the internal buffer is full
|
||||
// (backpressure). On a slow terminal parser that's the tell: we're
|
||||
// producing bytes faster than the outer terminal can consume them.
|
||||
// The 2-arg form attaches a drain callback that fires once the chunk
|
||||
// is actually flushed to the OS socket/pipe — giving us end-to-end
|
||||
// drain timing, not just "queued in Node".
|
||||
const wrote = onDrain
|
||||
? terminal.stdout.write(buffer, () => onDrain())
|
||||
: terminal.stdout.write(buffer)
|
||||
|
||||
return { bytes: Buffer.byteLength(buffer, 'utf8'), backpressure: !wrote }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -149,12 +149,16 @@ export const logFrameEvent = ENABLED
|
|||
flickers: event.flickers.length ? event.flickers : undefined,
|
||||
phases: event.phases
|
||||
? {
|
||||
backpressure: event.phases.backpressure,
|
||||
commit: round2(event.phases.commit),
|
||||
diff: round2(event.phases.diff),
|
||||
optimize: round2(event.phases.optimize),
|
||||
optimizedPatches: event.phases.optimizedPatches,
|
||||
patches: event.phases.patches,
|
||||
prevFrameDrainMs: round2(event.phases.prevFrameDrainMs),
|
||||
renderer: round2(event.phases.renderer),
|
||||
write: round2(event.phases.write),
|
||||
writeBytes: event.phases.writeBytes,
|
||||
yoga: round2(event.phases.yoga),
|
||||
yogaCacheHits: event.phases.yogaCacheHits,
|
||||
yogaLive: event.phases.yogaLive,
|
||||
|
|
|
|||
4
ui-tui/src/types/hermes-ink.d.ts
vendored
4
ui-tui/src/types/hermes-ink.d.ts
vendored
|
|
@ -41,6 +41,10 @@ declare module '@hermes/ink' {
|
|||
readonly optimize: number
|
||||
readonly write: number
|
||||
readonly patches: number
|
||||
readonly optimizedPatches: number
|
||||
readonly writeBytes: number
|
||||
readonly backpressure: boolean
|
||||
readonly prevFrameDrainMs: number
|
||||
readonly yoga: number
|
||||
readonly commit: number
|
||||
readonly yogaVisited: number
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue