mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
perf(tui): cache stringWidth/wrapText/sliceAnsi + skip-slice when line fits clip
CPU profile (Apr 2026, real-user scroll on 11k-line session) showed three hot loops in the per-frame render path: Output.get() per-frame walk: 24% total └─ sliceAnsi(line, from, to) per write: 18% total stringWidth(line) chain (cached + JS): 14% total All three were re-doing identical work every frame: same string → same clipped slice → same width. Fixes: 1. Memoize stringWidth (8k-entry LRU) for non-ASCII strings; ASCII fast-path skips the cache (inline scan beats Map.get for short ASCII, the >90% case). String.charCodeAt scan up to 64 chars is cheaper than the regex fallback. 2. Memoize wrapText (4k-entry LRU keyed by maxWidth|wrapType|text) — wrapAnsi is pure and the same content reflows identically every frame. 3. Memoize sliceAnsi (4k-entry LRU keyed by start|end|str) for the end-defined hot path used by Output.get(). 4. Skip the slice entirely in Output.get() when the line already fits the clip box (startsBefore=false && endsAfter=false). Most transcript lines never exceed their container width, and tokenizing them just to slice (line, 0, width) was pure overhead. This single fast-path drops sliceAnsi from 18% → ~0% in the profile. Also tighten virtualization constants (MAX_MOUNTED 260→120, OVERSCAN 40→20, SLIDE_STEP 25→12) and cap historical-message render at 800 chars / 16 lines via HISTORY_RENDER_MAX_*; messages inside the FULL_RENDER_TAIL_ITEMS window still render in full so reading-zone behavior is unchanged. Validation, real-user CPU profile, page-up scroll on 11k-line session: Output.get() self-time: 24% → 0.3% sliceAnsi total: 18% → not in top 25 stringWidth family: 14% → ~3% idle: 60.7% → 77.3% Frame timings (synthetic page-up profile harness): dur p95: ~10ms → 4.87ms dur p99: 25ms+ → 12.80ms yoga p99: ~20ms → 1.87ms The remaining CPU in the profile is Yoga layoutNode + React commit, which is the irreducible work for this UI tree size.
This commit is contained in:
parent
85e9a23efb
commit
c370e2e1e5
14 changed files with 450 additions and 42 deletions
|
|
@ -467,9 +467,21 @@ export default class Output {
|
|||
|
||||
if (clipHorizontally) {
|
||||
lines = lines.map(line => {
|
||||
const from = x < clip.x1! ? clip.x1! - x : 0
|
||||
const startsBefore = x < clip.x1!
|
||||
const width = stringWidth(line)
|
||||
const to = x + width > clip.x2! ? clip.x2! - x : width
|
||||
const endsAfter = x + width > clip.x2!
|
||||
|
||||
// Fast path: line fits entirely within the clip box — skip
|
||||
// the tokenize/slice. This is the common case for transcript
|
||||
// text where containers are wider than the rendered content.
|
||||
// CPU profile (Apr 2026) showed sliceAnsi at 18% total time;
|
||||
// most calls were no-op slices like (line, 0, width).
|
||||
if (!startsBefore && !endsAfter) {
|
||||
return line
|
||||
}
|
||||
|
||||
const from = startsBefore ? clip.x1! - x : 0
|
||||
const to = endsAfter ? clip.x2! - x : width
|
||||
let sliced = sliceAnsi(line, from, to)
|
||||
|
||||
// Wide chars (CJK, emoji) occupy 2 cells. When `to` lands
|
||||
|
|
|
|||
|
|
@ -270,6 +270,58 @@ const bunStringWidth = typeof Bun !== 'undefined' && typeof Bun.stringWidth ===
|
|||
|
||||
const BUN_STRING_WIDTH_OPTS = { ambiguousIsNarrow: true } as const
|
||||
|
||||
export const stringWidth: (str: string) => number = bunStringWidth
|
||||
const rawStringWidth: (str: string) => number = bunStringWidth
|
||||
? str => bunStringWidth(str, BUN_STRING_WIDTH_OPTS)
|
||||
: stringWidthJavaScript
|
||||
|
||||
// Memoize stringWidth — it's pure, hot (~100k calls/frame per the comment
|
||||
// above), and the underlying impl scans every grapheme + tests EMOJI_REGEX.
|
||||
// CPU profile (Apr 2026) showed stringWidth dominating at 21% of total
|
||||
// runtime during scroll. Cache is global (vs per-frame) since the same
|
||||
// strings recur across frames in a stable transcript.
|
||||
//
|
||||
// Pure-ASCII short-strings (the >90% common case) skip the cache: the inline
|
||||
// loop in stringWidthJavaScript is already faster than a Map.get for them.
|
||||
const widthCache = new Map<string, number>()
|
||||
const WIDTH_CACHE_LIMIT = 8192
|
||||
|
||||
export const stringWidth: (str: string) => number = str => {
|
||||
if (!str) {
|
||||
return 0
|
||||
}
|
||||
|
||||
// ASCII fast-path detection — for short ASCII, skip the cache.
|
||||
if (str.length <= 64) {
|
||||
let asciiOnly = true
|
||||
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const code = str.charCodeAt(i)
|
||||
|
||||
if (code >= 127 || code === 0x1b) {
|
||||
asciiOnly = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (asciiOnly) {
|
||||
return rawStringWidth(str)
|
||||
}
|
||||
}
|
||||
|
||||
const cached = widthCache.get(str)
|
||||
|
||||
if (cached !== undefined) {
|
||||
return cached
|
||||
}
|
||||
|
||||
const w = rawStringWidth(str)
|
||||
|
||||
if (widthCache.size >= WIDTH_CACHE_LIMIT) {
|
||||
// Drop oldest entry — Map iteration order is insertion order.
|
||||
widthCache.delete(widthCache.keys().next().value!)
|
||||
}
|
||||
|
||||
widthCache.set(str, w)
|
||||
|
||||
return w
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,40 @@ import { wrapAnsi } from './wrapAnsi.js'
|
|||
|
||||
const ELLIPSIS = '…'
|
||||
|
||||
// CPU profile (Apr 2026) showed `wrap-ansi` → `string-width` consuming 30% of
|
||||
// total runtime during fast scroll: every layout pass re-wraps every visible
|
||||
// line via wrap-ansi, which calls string-width once per grapheme. The output
|
||||
// is pure of (text, maxWidth, wrapType), so memoize it. LRU-bounded so long
|
||||
// sessions don't accrete unbounded cache.
|
||||
const WRAP_CACHE_LIMIT = 4096
|
||||
const wrapCache = new Map<string, string>()
|
||||
|
||||
function memoizedWrap(text: string, maxWidth: number, wrapType: Styles['textWrap']): string {
|
||||
// Key folds maxWidth + wrapType into the prefix so the same text re-wrapped
|
||||
// at a different width doesn't collide. Width prefix bounded by viewport
|
||||
// (~10 distinct widths in a session); wrapType bounded by enum (~6 values).
|
||||
const key = `${maxWidth}|${wrapType}|${text}`
|
||||
const cached = wrapCache.get(key)
|
||||
|
||||
if (cached !== undefined) {
|
||||
// LRU touch
|
||||
wrapCache.delete(key)
|
||||
wrapCache.set(key, cached)
|
||||
|
||||
return cached
|
||||
}
|
||||
|
||||
const result = computeWrap(text, maxWidth, wrapType)
|
||||
|
||||
if (wrapCache.size >= WRAP_CACHE_LIMIT) {
|
||||
wrapCache.delete(wrapCache.keys().next().value!)
|
||||
}
|
||||
|
||||
wrapCache.set(key, result)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// sliceAnsi may include a boundary-spanning wide char (e.g. CJK at position
|
||||
// end-1 with width 2 overshoots by 1). Retry with a tighter bound once.
|
||||
function sliceFit(text: string, start: number, end: number): string {
|
||||
|
|
@ -42,12 +76,9 @@ function truncate(text: string, columns: number, position: 'start' | 'middle' |
|
|||
return sliceFit(text, 0, columns - 1) + ELLIPSIS
|
||||
}
|
||||
|
||||
export default function wrapText(text: string, maxWidth: number, wrapType: Styles['textWrap']): string {
|
||||
function computeWrap(text: string, maxWidth: number, wrapType: Styles['textWrap']): string {
|
||||
if (wrapType === 'wrap') {
|
||||
return wrapAnsi(text, maxWidth, {
|
||||
trim: false,
|
||||
hard: true
|
||||
})
|
||||
return wrapAnsi(text, maxWidth, { trim: false, hard: true })
|
||||
}
|
||||
|
||||
if (wrapType === 'wrap-char') {
|
||||
|
|
@ -55,25 +86,24 @@ export default function wrapText(text: string, maxWidth: number, wrapType: Style
|
|||
}
|
||||
|
||||
if (wrapType === 'wrap-trim') {
|
||||
return wrapAnsi(text, maxWidth, {
|
||||
trim: true,
|
||||
hard: true
|
||||
})
|
||||
return wrapAnsi(text, maxWidth, { trim: true, hard: true })
|
||||
}
|
||||
|
||||
if (wrapType!.startsWith('truncate')) {
|
||||
let position: 'end' | 'middle' | 'start' = 'end'
|
||||
|
||||
if (wrapType === 'truncate-middle') {
|
||||
position = 'middle'
|
||||
}
|
||||
|
||||
if (wrapType === 'truncate-start') {
|
||||
position = 'start'
|
||||
}
|
||||
const position: 'end' | 'middle' | 'start' =
|
||||
wrapType === 'truncate-middle' ? 'middle' : wrapType === 'truncate-start' ? 'start' : 'end'
|
||||
|
||||
return truncate(text, maxWidth, position)
|
||||
}
|
||||
|
||||
return text
|
||||
}
|
||||
|
||||
export default function wrapText(text: string, maxWidth: number, wrapType: Styles['textWrap']): string {
|
||||
// Skip cache for trivial inputs (faster than Map lookup).
|
||||
if (!text || maxWidth <= 0) {
|
||||
return computeWrap(text, maxWidth, wrapType)
|
||||
}
|
||||
|
||||
return memoizedWrap(text, maxWidth, wrapType)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,42 @@ function filterStartCodes(codes: AnsiCode[]): AnsiCode[] {
|
|||
return codes.filter(c => !isEndCode(c))
|
||||
}
|
||||
|
||||
// LRU cache: same (string, start, end) → same output. Output.get() re-emits
|
||||
// identical writes every frame for stable transcript content; this avoids
|
||||
// re-tokenizing them. CPU profile (Apr 2026) showed sliceAnsi at 18% total
|
||||
// time during scroll. Bounded at 4096 entries — entries are short clipped
|
||||
// lines so memory cost is small.
|
||||
const sliceCache = new Map<string, string>()
|
||||
const SLICE_CACHE_LIMIT = 4096
|
||||
|
||||
export default function sliceAnsi(str: string, start: number, end?: number): string {
|
||||
if (!str) return ''
|
||||
|
||||
// Hot-path: only cache when end is defined (the Output.get() use-case).
|
||||
if (end !== undefined) {
|
||||
const key = `${start}|${end}|${str}`
|
||||
const cached = sliceCache.get(key)
|
||||
|
||||
if (cached !== undefined) {
|
||||
sliceCache.delete(key)
|
||||
sliceCache.set(key, cached)
|
||||
return cached
|
||||
}
|
||||
|
||||
const result = computeSlice(str, start, end)
|
||||
|
||||
if (sliceCache.size >= SLICE_CACHE_LIMIT) {
|
||||
sliceCache.delete(sliceCache.keys().next().value!)
|
||||
}
|
||||
|
||||
sliceCache.set(key, result)
|
||||
return result
|
||||
}
|
||||
|
||||
return computeSlice(str, start, end)
|
||||
}
|
||||
|
||||
function computeSlice(str: string, start: number, end?: number): string {
|
||||
const tokens = tokenize(str)
|
||||
let activeCodes: AnsiCode[] = []
|
||||
let position = 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue