perf(tui): incremental markdown during streaming

Split in-flight assistant text at the last stable block boundary so only
the unclosed tail re-tokenizes per stream delta. Previously the full
text was rendered as plain <Text> during streaming and only flipped to
<Md> at message.complete — cheap per delta but loses live markdown
formatting.

New StreamingMd component holds a monotonically-growing stablePrefix
in a ref (idempotent under StrictMode double-render), renders it as
one <Md> that memoizes across deltas, and renders the unstable suffix
as a second <Md> that re-parses on each delta. Cost per delta drops
from O(total length) to O(unstable length).

findStableBoundary walks back to the last "\n\n" outside an open
fenced code block — splitting inside an open fence would orphan the
opener and break highlighting in the prefix.

Adapted from claude-code's src/components/Markdown.tsx:186 but built
on our line-based tokenizer instead of marked.lexer. 9 new tests cover
fence balance, boundary walk, and empty input.

Part of the --tui perf audit (see audit #7).
This commit is contained in:
Brooklyn Nicholson 2026-04-26 16:21:34 -05:00
parent bde89c169b
commit debae25f1c
3 changed files with 211 additions and 1 deletions

View file

@ -10,6 +10,7 @@ import type { Theme } from '../theme.js'
import type { ActiveTool, DetailsMode, Msg, SectionVisibility } from '../types.js'
import { Md } from './markdown.js'
import { StreamingMd } from './streamingMarkdown.js'
import { ToolTrail } from './thinking.js'
import { TodoPanel } from './todoPanel.js'
@ -94,7 +95,10 @@ export const MessageLine = memo(function MessageLine({
if (msg.role === 'assistant') {
return isStreaming ? (
<Text color={body}>{boundedLiveRenderText(msg.text)}</Text>
// Incremental markdown: split at the last stable block boundary so
// only the in-flight tail re-tokenizes per delta. See
// streamingMarkdown.tsx for the cost model.
<StreamingMd compact={compact} t={t} text={boundedLiveRenderText(msg.text)} />
) : (
<Md compact={compact} t={t} text={msg.text} />
)

View file

@ -0,0 +1,127 @@
// StreamingMd — incremental markdown renderer for in-flight assistant text.
//
// Naive approach (render <Md text={full}/>) re-tokenizes the entire message
// on every stream delta. At 20-char batches over a 3 KB response that's 150
// full re-parses.
//
// This splits `text` at the last stable top-level block boundary (blank
// line outside a fenced code span) into:
// stablePrefix — passed to an inner <Md>, memoized on its exact text
// value. During the turn, the prefix only grows monotonically,
// so its memo key matches the previous render and React
// reuses the cached subtree — zero re-tokenization.
// unstableSuffix — the in-flight block(s). A separate <Md> re-parses just
// this tail on every delta (O(unstable length) vs.
// O(total length)).
//
// The boundary is stored in a ref so it only advances — idempotent under
// StrictMode double-render. Component unmounts between turns (isStreaming
// flips off → message moves to history and renders via <Md> directly), so
// the ref resets naturally.
//
// See src/app/useMainApp.ts for the reasoning on why we don't memoize the
// whole Md text during streaming: that cache never hits because `text` is
// growing. Mirror claude-code's `StreamingMarkdown` approach adapted to
// our line-based tokenizer.
import { memo, useRef } from 'react'
import type { Theme } from '../theme.js'
import { Md } from './markdown.js'
// Count ``` or ~~~ fence toggles in `s` up to `end`. Odd = currently inside
// a fenced block; we can't split the prefix there or we'd orphan the fence.
const fenceOpenAt = (s: string, end: number) => {
let open = false
let i = 0
while (i < end) {
const nl = s.indexOf('\n', i)
const lineEnd = nl < 0 || nl > end ? end : nl
const line = s.slice(i, lineEnd)
if (/^\s*(?:`{3,}|~{3,})/.test(line)) {
open = !open
}
if (nl < 0 || nl >= end) {
break
}
i = nl + 1
}
return open
}
// Find the last "\n\n" boundary before `end` that is OUTSIDE a fenced code
// block. Returns the index AFTER the second newline (start of the next
// block), or -1 if no safe boundary exists yet.
export const findStableBoundary = (text: string) => {
let idx = text.length
while (idx > 0) {
const boundary = text.lastIndexOf('\n\n', idx - 1)
if (boundary < 0) {
return -1
}
// Boundary candidate: end of stable prefix is boundary + 2 (start of
// next block). Check fence balance up to that point.
const splitAt = boundary + 2
if (!fenceOpenAt(text, splitAt)) {
return splitAt
}
idx = boundary
}
return -1
}
export const StreamingMd = memo(function StreamingMd({ compact, t, text }: StreamingMdProps) {
const stablePrefixRef = useRef('')
// Reset if the text no longer starts with our recorded prefix (defensive;
// normally the component unmounts between turns so this shouldn't trigger).
if (!text.startsWith(stablePrefixRef.current)) {
stablePrefixRef.current = ''
}
const boundary = findStableBoundary(text)
// Only advance the prefix — never retreat. The boundary math looks at the
// FULL text each call; if it returns a larger index than before, we grow
// the cached prefix. Monotonic growth makes the memo key stable across
// deltas (identical string → same <Md> subtree → no re-render).
if (boundary > stablePrefixRef.current.length) {
stablePrefixRef.current = text.slice(0, boundary)
}
const stablePrefix = stablePrefixRef.current
const unstableSuffix = text.slice(stablePrefix.length)
if (!stablePrefix) {
return <Md compact={compact} t={t} text={unstableSuffix} />
}
if (!unstableSuffix) {
return <Md compact={compact} t={t} text={stablePrefix} />
}
return (
<>
<Md compact={compact} t={t} text={stablePrefix} />
<Md compact={compact} t={t} text={unstableSuffix} />
</>
)
})
interface StreamingMdProps {
compact?: boolean
t: Theme
text: string
}