perf(tui): incremental markdown during streaming

Split in-flight assistant text at the last stable block boundary so only
the unclosed tail re-tokenizes per stream delta. Previously the full
text was rendered as plain <Text> during streaming and only flipped to
<Md> at message.complete — cheap per delta but loses live markdown
formatting.

New StreamingMd component holds a monotonically-growing stablePrefix
in a ref (idempotent under StrictMode double-render), renders it as
one <Md> that memoizes across deltas, and renders the unstable suffix
as a second <Md> that re-parses on each delta. Cost per delta drops
from O(total length) to O(unstable length).

findStableBoundary walks back to the last "\n\n" outside an open
fenced code block — splitting inside an open fence would orphan the
opener and break highlighting in the prefix.

Adapted from claude-code's src/components/Markdown.tsx:186 but built
on our line-based tokenizer instead of marked.lexer. 9 new tests cover
fence balance, boundary walk, and empty input.

Part of the --tui perf audit (see audit #7).
This commit is contained in:
Brooklyn Nicholson 2026-04-26 16:21:34 -05:00
parent bde89c169b
commit debae25f1c
3 changed files with 211 additions and 1 deletions

View file

@ -0,0 +1,79 @@
import { describe, expect, it } from 'vitest'
import { findStableBoundary } from '../components/streamingMarkdown.js'
// We test the pure boundary logic by rendering the component's ref
// behaviour through repeated calls. Since React isn't being rendered here,
// we reach into the module to test findStableBoundary via its exported
// behaviour — but the pure helper isn't exported. So test the component's
// observable output: pass sequential text values and verify the stable
// prefix never retreats.
//
// Strategy: mount StreamingMd in isolation and observe which <Md>
// instances it renders (by text prop). Without a DOM renderer that's
// heavy, so we validate the helper behaviour by directly invoking the
// fence/boundary logic via a re-exported surface.
import { DEFAULT_THEME } from '../theme.js'
describe('findStableBoundary', () => {
it('returns -1 when no blank line exists yet', () => {
expect(findStableBoundary('partial line with no newline yet')).toBe(-1)
})
it('returns -1 when only single newlines exist', () => {
expect(findStableBoundary('line one\nline two\nline three')).toBe(-1)
})
it('splits after the last blank line separator', () => {
// 'first\n\nsecond\n\nthird' → last blank = before 'third'
const text = 'first paragraph\n\nsecond paragraph\n\nthird'
const idx = findStableBoundary(text)
expect(text.slice(0, idx)).toBe('first paragraph\n\nsecond paragraph\n\n')
expect(text.slice(idx)).toBe('third')
})
it('refuses to split inside an open fenced block', () => {
// Fence opens, contains a blank line inside the code, no close yet.
const text = '```ts\nfn();\n\nmore code here'
expect(findStableBoundary(text)).toBe(-1)
})
it('splits before an open fenced block but not inside', () => {
const text = 'intro paragraph\n\n```ts\nfn();\n\nmore code'
const idx = findStableBoundary(text)
expect(text.slice(0, idx)).toBe('intro paragraph\n\n')
expect(text.slice(idx).startsWith('```ts')).toBe(true)
})
it('allows splitting after a fenced block closes', () => {
const text = '```ts\nfn();\n```\n\nnarration continues'
const idx = findStableBoundary(text)
expect(text.slice(0, idx)).toBe('```ts\nfn();\n```\n\n')
expect(text.slice(idx)).toBe('narration continues')
})
it('walks backwards through nested fence boundaries safely', () => {
// Two closed fences + narration + one new open fence. The only legal
// split is before the open fence, not between the closed ones.
const text = '```js\na\n```\n\nmid text\n\n```python\nstill open'
const idx = findStableBoundary(text)
expect(text.slice(0, idx)).toBe('```js\na\n```\n\nmid text\n\n')
})
it('handles empty input', () => {
expect(findStableBoundary('')).toBe(-1)
})
})
describe('streaming theme assumption', () => {
it('theme is exportable (component import sanity check)', () => {
// Sanity that the theme we pass doesn't change shape. Component import
// already happens above — this is a smoke test that the module graph
// for streamingMarkdown wires up without cycles.
expect(DEFAULT_THEME.color.amber).toBeTruthy()
})
})

View file

@ -10,6 +10,7 @@ import type { Theme } from '../theme.js'
import type { ActiveTool, DetailsMode, Msg, SectionVisibility } from '../types.js' import type { ActiveTool, DetailsMode, Msg, SectionVisibility } from '../types.js'
import { Md } from './markdown.js' import { Md } from './markdown.js'
import { StreamingMd } from './streamingMarkdown.js'
import { ToolTrail } from './thinking.js' import { ToolTrail } from './thinking.js'
import { TodoPanel } from './todoPanel.js' import { TodoPanel } from './todoPanel.js'
@ -94,7 +95,10 @@ export const MessageLine = memo(function MessageLine({
if (msg.role === 'assistant') { if (msg.role === 'assistant') {
return isStreaming ? ( return isStreaming ? (
<Text color={body}>{boundedLiveRenderText(msg.text)}</Text> // Incremental markdown: split at the last stable block boundary so
// only the in-flight tail re-tokenizes per delta. See
// streamingMarkdown.tsx for the cost model.
<StreamingMd compact={compact} t={t} text={boundedLiveRenderText(msg.text)} />
) : ( ) : (
<Md compact={compact} t={t} text={msg.text} /> <Md compact={compact} t={t} text={msg.text} />
) )

View file

@ -0,0 +1,127 @@
// StreamingMd — incremental markdown renderer for in-flight assistant text.
//
// Naive approach (render <Md text={full}/>) re-tokenizes the entire message
// on every stream delta. At 20-char batches over a 3 KB response that's 150
// full re-parses.
//
// This splits `text` at the last stable top-level block boundary (blank
// line outside a fenced code span) into:
// stablePrefix — passed to an inner <Md>, memoized on its exact text
// value. During the turn, the prefix only grows monotonically,
// so its memo key matches the previous render and React
// reuses the cached subtree — zero re-tokenization.
// unstableSuffix — the in-flight block(s). A separate <Md> re-parses just
// this tail on every delta (O(unstable length) vs.
// O(total length)).
//
// The boundary is stored in a ref so it only advances — idempotent under
// StrictMode double-render. Component unmounts between turns (isStreaming
// flips off → message moves to history and renders via <Md> directly), so
// the ref resets naturally.
//
// See src/app/useMainApp.ts for the reasoning on why we don't memoize the
// whole Md text during streaming: that cache never hits because `text` is
// growing. Mirror claude-code's `StreamingMarkdown` approach adapted to
// our line-based tokenizer.
import { memo, useRef } from 'react'
import type { Theme } from '../theme.js'
import { Md } from './markdown.js'
// Count ``` or ~~~ fence toggles in `s` up to `end`. Odd = currently inside
// a fenced block; we can't split the prefix there or we'd orphan the fence.
const fenceOpenAt = (s: string, end: number) => {
let open = false
let i = 0
while (i < end) {
const nl = s.indexOf('\n', i)
const lineEnd = nl < 0 || nl > end ? end : nl
const line = s.slice(i, lineEnd)
if (/^\s*(?:`{3,}|~{3,})/.test(line)) {
open = !open
}
if (nl < 0 || nl >= end) {
break
}
i = nl + 1
}
return open
}
// Find the last "\n\n" boundary before `end` that is OUTSIDE a fenced code
// block. Returns the index AFTER the second newline (start of the next
// block), or -1 if no safe boundary exists yet.
export const findStableBoundary = (text: string) => {
let idx = text.length
while (idx > 0) {
const boundary = text.lastIndexOf('\n\n', idx - 1)
if (boundary < 0) {
return -1
}
// Boundary candidate: end of stable prefix is boundary + 2 (start of
// next block). Check fence balance up to that point.
const splitAt = boundary + 2
if (!fenceOpenAt(text, splitAt)) {
return splitAt
}
idx = boundary
}
return -1
}
export const StreamingMd = memo(function StreamingMd({ compact, t, text }: StreamingMdProps) {
const stablePrefixRef = useRef('')
// Reset if the text no longer starts with our recorded prefix (defensive;
// normally the component unmounts between turns so this shouldn't trigger).
if (!text.startsWith(stablePrefixRef.current)) {
stablePrefixRef.current = ''
}
const boundary = findStableBoundary(text)
// Only advance the prefix — never retreat. The boundary math looks at the
// FULL text each call; if it returns a larger index than before, we grow
// the cached prefix. Monotonic growth makes the memo key stable across
// deltas (identical string → same <Md> subtree → no re-render).
if (boundary > stablePrefixRef.current.length) {
stablePrefixRef.current = text.slice(0, boundary)
}
const stablePrefix = stablePrefixRef.current
const unstableSuffix = text.slice(stablePrefix.length)
if (!stablePrefix) {
return <Md compact={compact} t={t} text={unstableSuffix} />
}
if (!unstableSuffix) {
return <Md compact={compact} t={t} text={stablePrefix} />
}
return (
<>
<Md compact={compact} t={t} text={stablePrefix} />
<Md compact={compact} t={t} text={unstableSuffix} />
</>
)
})
interface StreamingMdProps {
compact?: boolean
t: Theme
text: string
}