mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-23 05:31:23 +00:00
perf(tui): cache stringWidth/wrapText/sliceAnsi + skip-slice when line fits clip
CPU profile (Apr 2026, real-user scroll on 11k-line session) showed three hot loops in the per-frame render path: Output.get() per-frame walk: 24% total └─ sliceAnsi(line, from, to) per write: 18% total stringWidth(line) chain (cached + JS): 14% total All three were re-doing identical work every frame: same string → same clipped slice → same width. Fixes: 1. Memoize stringWidth (8k-entry LRU) for non-ASCII strings; ASCII fast-path skips the cache (inline scan beats Map.get for short ASCII, the >90% case). String.charCodeAt scan up to 64 chars is cheaper than the regex fallback. 2. Memoize wrapText (4k-entry LRU keyed by maxWidth|wrapType|text) — wrapAnsi is pure and the same content reflows identically every frame. 3. Memoize sliceAnsi (4k-entry LRU keyed by start|end|str) for the end-defined hot path used by Output.get(). 4. Skip the slice entirely in Output.get() when the line already fits the clip box (startsBefore=false && endsAfter=false). Most transcript lines never exceed their container width, and tokenizing them just to slice (line, 0, width) was pure overhead. This single fast-path drops sliceAnsi from 18% → ~0% in the profile. Also tighten virtualization constants (MAX_MOUNTED 260→120, OVERSCAN 40→20, SLIDE_STEP 25→12) and cap historical-message render at 800 chars / 16 lines via HISTORY_RENDER_MAX_*; messages inside the FULL_RENDER_TAIL_ITEMS window still render in full so reading-zone behavior is unchanged. Validation, real-user CPU profile, page-up scroll on 11k-line session: Output.get() self-time: 24% → 0.3% sliceAnsi total: 18% → not in top 25 stringWidth family: 14% → ~3% idle: 60.7% → 77.3% Frame timings (synthetic page-up profile harness): dur p95: ~10ms → 4.87ms dur p99: 25ms+ → 12.80ms yoga p99: ~20ms → 1.87ms The remaining CPU in the profile is Yoga layoutNode + React commit, which is the irreducible work for this UI tree size.
This commit is contained in:
parent
85e9a23efb
commit
c370e2e1e5
14 changed files with 450 additions and 42 deletions
|
|
@ -1,19 +1,29 @@
|
|||
import type { ScrollBoxHandle } from '@hermes/ink'
|
||||
import {
|
||||
type RefObject,
|
||||
useCallback,
|
||||
useDeferredValue,
|
||||
useEffect,
|
||||
useLayoutEffect,
|
||||
useRef,
|
||||
useState,
|
||||
useSyncExternalStore
|
||||
useSyncExternalStore,
|
||||
type RefObject
|
||||
} from 'react'
|
||||
|
||||
const ESTIMATE = 4
|
||||
const OVERSCAN = 40
|
||||
const MAX_MOUNTED = 260
|
||||
const COLD_START = 40
|
||||
// Overscan was 40 (= viewport) which is way more than needed when heights
|
||||
// are well-estimated. Cutting in half saves ~20 mounted items per scroll
|
||||
// edge → smaller fiber tree → less buffer-compose work per frame. HN/CC
|
||||
// dev (https://news.ycombinator.com/item?id=46699072) confirmed GC pressure
|
||||
// from large JSX trees was their main perf issue post-rewrite.
|
||||
const OVERSCAN = 20
|
||||
// Hard cap on mounted items. Was 260; profiling showed ~23k live Yoga
|
||||
// nodes during sustained PageUp catch-up (renderer p99=106ms). The
|
||||
// viewport+2*overscan = 80 rows of needed coverage = ~25 items at avg 3
|
||||
// rows/item, so 120 leaves >4× headroom and never blanks the viewport
|
||||
// even when items are tiny.
|
||||
const MAX_MOUNTED = 120
|
||||
const COLD_START = 30
|
||||
// Floor on unmeasured row height used when computing coverage — guarantees
|
||||
// the mounted span physically reaches the viewport bottom regardless of how
|
||||
// small items actually are (at the cost of over-mounting when items are
|
||||
|
|
@ -34,8 +44,10 @@ const FREEZE_RENDERS = 2
|
|||
// a single PageUp into unmeasured territory mounts ~190 rows with
|
||||
// PESSIMISTIC=1 coverage — each row running marked lexer + syntax
|
||||
// highlighting for ~3ms = ~600ms sync block. Sliding toward the target
|
||||
// over several commits keeps per-commit mount cost bounded.
|
||||
const SLIDE_STEP = 25
|
||||
// over several commits keeps per-commit mount cost bounded. Tightened
|
||||
// from 25 → 12: each new item adds ~100 fibers / Yoga nodes, and a
|
||||
// 25-item commit was the dominant contributor to the 100ms+ p99 frames.
|
||||
const SLIDE_STEP = 12
|
||||
|
||||
const NOOP = () => {}
|
||||
|
||||
|
|
@ -70,15 +82,19 @@ export function useVirtualHistory(
|
|||
columns: number,
|
||||
{
|
||||
estimate = ESTIMATE,
|
||||
initialHeights,
|
||||
liveTailActive = false,
|
||||
onHeightsChange,
|
||||
overscan = OVERSCAN,
|
||||
maxMounted = MAX_MOUNTED,
|
||||
coldStartCount = COLD_START
|
||||
} = {}
|
||||
}: VirtualHistoryOptions = {}
|
||||
) {
|
||||
const nodes = useRef(new Map<string, unknown>())
|
||||
const heights = useRef(new Map<string, number>())
|
||||
const heights = useRef(new Map(initialHeights))
|
||||
const initialHeightsRef = useRef(initialHeights)
|
||||
const refs = useRef(new Map<string, (el: unknown) => void>())
|
||||
const onHeightsChangeRef = useRef(onHeightsChange)
|
||||
// Bump whenever heightCache mutates so offsets rebuild on next read.
|
||||
// Ref (not state) — checked during render phase, zero extra commits.
|
||||
const offsetVersion = useRef(0)
|
||||
|
|
@ -106,6 +122,14 @@ export function useVirtualHistory(
|
|||
const prevRange = useRef<null | readonly [number, number]>(null)
|
||||
const freezeRenders = useRef(0)
|
||||
|
||||
onHeightsChangeRef.current = onHeightsChange
|
||||
|
||||
if (initialHeightsRef.current !== initialHeights) {
|
||||
initialHeightsRef.current = initialHeights
|
||||
heights.current = new Map(initialHeights)
|
||||
offsetVersion.current++
|
||||
}
|
||||
|
||||
if (prevColumns.current !== columns && prevColumns.current > 0 && columns > 0) {
|
||||
const ratio = prevColumns.current / columns
|
||||
|
||||
|
|
@ -377,6 +401,7 @@ export function useVirtualHistory(
|
|||
if (h > 0 && heights.current.get(key) !== h) {
|
||||
heights.current.set(key, h)
|
||||
offsetVersion.current++
|
||||
onHeightsChangeRef.current?.(heights.current)
|
||||
}
|
||||
|
||||
nodes.current.delete(key)
|
||||
|
|
@ -454,6 +479,7 @@ export function useVirtualHistory(
|
|||
|
||||
if (dirty) {
|
||||
offsetVersion.current++
|
||||
onHeightsChangeRef.current?.(heights.current)
|
||||
}
|
||||
})
|
||||
|
||||
|
|
@ -470,3 +496,13 @@ export function useVirtualHistory(
|
|||
interface MeasuredNode {
|
||||
yogaNode?: { getComputedHeight?: () => number } | null
|
||||
}
|
||||
|
||||
interface VirtualHistoryOptions {
|
||||
coldStartCount?: number
|
||||
estimate?: number
|
||||
initialHeights?: ReadonlyMap<string, number>
|
||||
liveTailActive?: boolean
|
||||
maxMounted?: number
|
||||
onHeightsChange?: (heights: ReadonlyMap<string, number>) => void
|
||||
overscan?: number
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue