mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
perf(tui): instrument stdout drain — rule out terminal parse bottleneck
Adds four fields to FrameEvent.phases and the matching profile
summary:
optimizedPatches post-optimize patch count (what's actually
written to stdout; the .patches field is
pre-optimize)
writeBytes UTF-8 byte count of the write this frame
backpressure true when Node's stdout.write returned false
(Writable buffer full — outer terminal can't
keep up)
prevFrameDrainMs end-to-end drain time of the PREVIOUS frame's
write, captured from stdout.write's 2-arg
callback. Reported on the next frame so the
measurement reflects "time until OS flushed
the bytes to the terminal fd", not "time until
queued in Node".
writeDiffToTerminal() now returns { bytes, backpressure } and
accepts an optional onDrain callback. Only attached on TTY with
diff; piped/non-TTY stdout bypasses flow control so the callback
would fire synchronously anyway.
Initial measurements under hold-wheel_up against 1106-msg session
(30Hz for 6s):
patches total 28,888
optimized total 16,700 (ratio 0.58 — optimizer cuts ~42%)
writeBytes 42 KB / 10s = 4.2 KB/s throughput
drainMs p50 0.14 ms terminal accepts bytes instantly
drainMs p99 0.85 ms
backpressure 0% of frames
This rules out the terminal-parse hypothesis — Cursor's xterm.js
drains our output in sub-millisecond time at only 4 KB/s. The
remaining lag has to be in the render pipeline, not the wire.
Profile output now includes the bytes+drain+backpressure lines to
keep this visible on every subsequent iteration.
This commit is contained in:
parent
d3dedf10aa
commit
f823535db2
6 changed files with 126 additions and 4 deletions
|
|
@ -219,6 +219,45 @@ def format_report(data: dict[str, Any]) -> str:
|
||||||
f" patches p50={pct(patches,0.5):.0f} p99={pct(patches,0.99):.0f} "
|
f" patches p50={pct(patches,0.5):.0f} p99={pct(patches,0.99):.0f} "
|
||||||
f"max={max(patches)} total={sum(patches)}"
|
f"max={max(patches)} total={sum(patches)}"
|
||||||
)
|
)
|
||||||
|
optimized = [
|
||||||
|
f["phases"].get("optimizedPatches", 0)
|
||||||
|
for f in frames if f.get("phases")
|
||||||
|
]
|
||||||
|
if any(optimized):
|
||||||
|
out.append(
|
||||||
|
f" optimized p50={pct(optimized,0.5):.0f} p99={pct(optimized,0.99):.0f} "
|
||||||
|
f"max={max(optimized)} total={sum(optimized)}"
|
||||||
|
f" (ratio: {sum(optimized)/max(1,sum(patches)):.2f})"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Write bytes + drain telemetry — the outer-terminal bottleneck gauge.
|
||||||
|
bytes_written = [
|
||||||
|
f["phases"].get("writeBytes", 0)
|
||||||
|
for f in frames if f.get("phases")
|
||||||
|
]
|
||||||
|
if any(bytes_written):
|
||||||
|
total_b = sum(bytes_written)
|
||||||
|
kb = total_b / 1024
|
||||||
|
out.append(
|
||||||
|
f" writeBytes p50={pct(bytes_written,0.5):.0f}B p99={pct(bytes_written,0.99):.0f}B "
|
||||||
|
f"max={max(bytes_written)}B total={kb:.1f}KB"
|
||||||
|
)
|
||||||
|
drains = [
|
||||||
|
f["phases"].get("prevFrameDrainMs", 0)
|
||||||
|
for f in frames if f.get("phases")
|
||||||
|
]
|
||||||
|
if any(d > 0 for d in drains):
|
||||||
|
nonzero = [d for d in drains if d > 0]
|
||||||
|
out.append(
|
||||||
|
f" drainMs p50={pct(nonzero,0.5):.2f} p95={pct(nonzero,0.95):.2f} "
|
||||||
|
f"p99={pct(nonzero,0.99):.2f} max={max(nonzero):.2f} (terminal flush latency)"
|
||||||
|
)
|
||||||
|
backpressure = sum(1 for f in frames if f.get("phases", {}).get("backpressure"))
|
||||||
|
if backpressure:
|
||||||
|
out.append(
|
||||||
|
f" backpressure: {backpressure}/{len(frames)} frames "
|
||||||
|
f"({100*backpressure/len(frames):.0f}%) (Node stdout buffer full — terminal slow)"
|
||||||
|
)
|
||||||
|
|
||||||
# Flickers
|
# Flickers
|
||||||
flicker_frames = [f for f in frames if f.get("flickers")]
|
flicker_frames = [f for f in frames if f.get("flickers")]
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,17 @@ export type FrameEvent = {
|
||||||
write: number
|
write: number
|
||||||
/** Pre-optimize patch count (proxy for how much changed this frame) */
|
/** Pre-optimize patch count (proxy for how much changed this frame) */
|
||||||
patches: number
|
patches: number
|
||||||
|
/** Post-optimize patch count — what was actually written to stdout. */
|
||||||
|
optimizedPatches: number
|
||||||
|
/** Bytes written to stdout this frame (escape sequences + payload). */
|
||||||
|
writeBytes: number
|
||||||
|
/** Whether stdout.write returned false (backpressure = outer terminal slow). */
|
||||||
|
backpressure: boolean
|
||||||
|
/** ms from this frame's stdout.write until the write-callback fired.
|
||||||
|
* Populated on the NEXT frame (async), so this field reflects the
|
||||||
|
* PREVIOUS frame's terminal-drain time. 0 = callback already fired
|
||||||
|
* before next frame started (drained in sub-ms). */
|
||||||
|
prevFrameDrainMs: number
|
||||||
/** yoga calculateLayout() time (runs in resetAfterCommit, before onRender) */
|
/** yoga calculateLayout() time (runs in resetAfterCommit, before onRender) */
|
||||||
yoga: number
|
yoga: number
|
||||||
/** React reconcile time: scrollMutated → resetAfterCommit. 0 if no commit. */
|
/** React reconcile time: scrollMutated → resetAfterCommit. 0 if no commit. */
|
||||||
|
|
|
||||||
|
|
@ -165,6 +165,15 @@ export default class Ink {
|
||||||
private backFrame: Frame
|
private backFrame: Frame
|
||||||
private lastPoolResetTime = performance.now()
|
private lastPoolResetTime = performance.now()
|
||||||
private drainTimer: ReturnType<typeof setTimeout> | null = null
|
private drainTimer: ReturnType<typeof setTimeout> | null = null
|
||||||
|
// Write-drain telemetry: pendingWriteStart is the performance.now() of
|
||||||
|
// the most recent stdout.write waiting for its drain callback. Set to
|
||||||
|
// null when the callback fires (drained). Read on the NEXT frame and
|
||||||
|
// reported as prevFrameDrainMs so the FrameEvent records how long the
|
||||||
|
// previous write took to actually hit the terminal — distinguishes
|
||||||
|
// "queued in Node" (write returned true) from "terminal accepted bytes"
|
||||||
|
// (callback fired).
|
||||||
|
private pendingWriteStart: number | null = null
|
||||||
|
private lastDrainMs = 0
|
||||||
private lastYogaCounters: {
|
private lastYogaCounters: {
|
||||||
ms: number
|
ms: number
|
||||||
visited: number
|
visited: number
|
||||||
|
|
@ -970,7 +979,43 @@ export default class Ink {
|
||||||
}
|
}
|
||||||
|
|
||||||
const tWrite = performance.now()
|
const tWrite = performance.now()
|
||||||
writeDiffToTerminal(this.terminal, optimized, this.altScreenActive && !SYNC_OUTPUT_SUPPORTED)
|
// Capture any stale pending write BEFORE starting this frame's write —
|
||||||
|
// if the callback already fired, pendingWriteStart is null and lastDrainMs
|
||||||
|
// already reflects the previous frame's drain. If it hasn't fired, we
|
||||||
|
// report "still pending" via a non-zero duration based on now-then so
|
||||||
|
// backpressure shows up even if Node never flushes this session.
|
||||||
|
const staleDrain =
|
||||||
|
this.pendingWriteStart !== null
|
||||||
|
? performance.now() - this.pendingWriteStart
|
||||||
|
: this.lastDrainMs
|
||||||
|
|
||||||
|
const prevFrameDrainMs = Math.round(staleDrain * 100) / 100
|
||||||
|
this.lastDrainMs = 0
|
||||||
|
|
||||||
|
// Only track drain on TTY. Piped/non-TTY stdout bypasses flow control.
|
||||||
|
const trackDrain = this.options.stdout.isTTY && hasDiff
|
||||||
|
const drainStart = trackDrain ? tWrite : 0
|
||||||
|
|
||||||
|
if (trackDrain) {
|
||||||
|
this.pendingWriteStart = drainStart
|
||||||
|
}
|
||||||
|
|
||||||
|
const { bytes: writeBytes, backpressure } = writeDiffToTerminal(
|
||||||
|
this.terminal,
|
||||||
|
optimized,
|
||||||
|
this.altScreenActive && !SYNC_OUTPUT_SUPPORTED,
|
||||||
|
trackDrain
|
||||||
|
? () => {
|
||||||
|
// Callback fires once Node has flushed the chunk to the OS.
|
||||||
|
// Capture the drain time and clear pending so the NEXT frame's
|
||||||
|
// staleDrain = the real end-to-end flush time.
|
||||||
|
if (this.pendingWriteStart === drainStart) {
|
||||||
|
this.lastDrainMs = performance.now() - drainStart
|
||||||
|
this.pendingWriteStart = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
: undefined
|
||||||
|
)
|
||||||
const writeMs = performance.now() - tWrite
|
const writeMs = performance.now() - tWrite
|
||||||
|
|
||||||
// Update blit safety for the NEXT frame. The frame just rendered
|
// Update blit safety for the NEXT frame. The frame just rendered
|
||||||
|
|
@ -1008,6 +1053,10 @@ export default class Ink {
|
||||||
optimize: optimizeMs,
|
optimize: optimizeMs,
|
||||||
write: writeMs,
|
write: writeMs,
|
||||||
patches: diff.length,
|
patches: diff.length,
|
||||||
|
optimizedPatches: optimized.length,
|
||||||
|
writeBytes,
|
||||||
|
backpressure,
|
||||||
|
prevFrameDrainMs,
|
||||||
yoga: yogaMs,
|
yoga: yogaMs,
|
||||||
commit: commitMs,
|
commit: commitMs,
|
||||||
yogaVisited: yc.visited,
|
yogaVisited: yc.visited,
|
||||||
|
|
|
||||||
|
|
@ -203,10 +203,15 @@ export type Terminal = {
|
||||||
stderr: Writable
|
stderr: Writable
|
||||||
}
|
}
|
||||||
|
|
||||||
export function writeDiffToTerminal(terminal: Terminal, diff: Diff, skipSyncMarkers = false): void {
|
export function writeDiffToTerminal(
|
||||||
|
terminal: Terminal,
|
||||||
|
diff: Diff,
|
||||||
|
skipSyncMarkers = false,
|
||||||
|
onDrain?: () => void
|
||||||
|
): { bytes: number; backpressure: boolean } {
|
||||||
// No output if there are no patches
|
// No output if there are no patches
|
||||||
if (diff.length === 0) {
|
if (diff.length === 0) {
|
||||||
return
|
return { bytes: 0, backpressure: false }
|
||||||
}
|
}
|
||||||
|
|
||||||
// BSU/ESU wrapping is opt-out to keep main-screen behavior unchanged.
|
// BSU/ESU wrapping is opt-out to keep main-screen behavior unchanged.
|
||||||
|
|
@ -278,5 +283,15 @@ export function writeDiffToTerminal(terminal: Terminal, diff: Diff, skipSyncMark
|
||||||
buffer += ESU
|
buffer += ESU
|
||||||
}
|
}
|
||||||
|
|
||||||
terminal.stdout.write(buffer)
|
// Node's Writable.write returns false when the internal buffer is full
|
||||||
|
// (backpressure). On a slow terminal parser that's the tell: we're
|
||||||
|
// producing bytes faster than the outer terminal can consume them.
|
||||||
|
// The 2-arg form attaches a drain callback that fires once the chunk
|
||||||
|
// is actually flushed to the OS socket/pipe — giving us end-to-end
|
||||||
|
// drain timing, not just "queued in Node".
|
||||||
|
const wrote = onDrain
|
||||||
|
? terminal.stdout.write(buffer, () => onDrain())
|
||||||
|
: terminal.stdout.write(buffer)
|
||||||
|
|
||||||
|
return { bytes: Buffer.byteLength(buffer, 'utf8'), backpressure: !wrote }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -149,12 +149,16 @@ export const logFrameEvent = ENABLED
|
||||||
flickers: event.flickers.length ? event.flickers : undefined,
|
flickers: event.flickers.length ? event.flickers : undefined,
|
||||||
phases: event.phases
|
phases: event.phases
|
||||||
? {
|
? {
|
||||||
|
backpressure: event.phases.backpressure,
|
||||||
commit: round2(event.phases.commit),
|
commit: round2(event.phases.commit),
|
||||||
diff: round2(event.phases.diff),
|
diff: round2(event.phases.diff),
|
||||||
optimize: round2(event.phases.optimize),
|
optimize: round2(event.phases.optimize),
|
||||||
|
optimizedPatches: event.phases.optimizedPatches,
|
||||||
patches: event.phases.patches,
|
patches: event.phases.patches,
|
||||||
|
prevFrameDrainMs: round2(event.phases.prevFrameDrainMs),
|
||||||
renderer: round2(event.phases.renderer),
|
renderer: round2(event.phases.renderer),
|
||||||
write: round2(event.phases.write),
|
write: round2(event.phases.write),
|
||||||
|
writeBytes: event.phases.writeBytes,
|
||||||
yoga: round2(event.phases.yoga),
|
yoga: round2(event.phases.yoga),
|
||||||
yogaCacheHits: event.phases.yogaCacheHits,
|
yogaCacheHits: event.phases.yogaCacheHits,
|
||||||
yogaLive: event.phases.yogaLive,
|
yogaLive: event.phases.yogaLive,
|
||||||
|
|
|
||||||
4
ui-tui/src/types/hermes-ink.d.ts
vendored
4
ui-tui/src/types/hermes-ink.d.ts
vendored
|
|
@ -41,6 +41,10 @@ declare module '@hermes/ink' {
|
||||||
readonly optimize: number
|
readonly optimize: number
|
||||||
readonly write: number
|
readonly write: number
|
||||||
readonly patches: number
|
readonly patches: number
|
||||||
|
readonly optimizedPatches: number
|
||||||
|
readonly writeBytes: number
|
||||||
|
readonly backpressure: boolean
|
||||||
|
readonly prevFrameDrainMs: number
|
||||||
readonly yoga: number
|
readonly yoga: number
|
||||||
readonly commit: number
|
readonly commit: number
|
||||||
readonly yogaVisited: number
|
readonly yogaVisited: number
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue