perf(tui): incremental markdown during streaming

Split in-flight assistant text at the last stable block boundary so only the unclosed tail re-tokenizes per stream delta. Previously the full text was rendered as plain <Text> during streaming and only flipped to <Md> at message.complete — cheap per delta but loses live markdown formatting. New StreamingMd component holds a monotonically-growing stablePrefix in a ref (idempotent under StrictMode double-render), renders it as one <Md> that memoizes across deltas, and renders the unstable suffix as a second <Md> that re-parses on each delta. Cost per delta drops from O(total length) to O(unstable length). findStableBoundary walks back to the last "\n\n" outside an open fenced code block — splitting inside an open fence would orphan the opener and break highlighting in the prefix. Adapted from claude-code's src/components/Markdown.tsx:186 but built on our line-based tokenizer instead of marked.lexer. 9 new tests cover fence balance, boundary walk, and empty input. Part of the --tui perf audit (see audit #7).
2026-05-08 03:01:47 +00:00 · 2026-04-26 16:21:34 -05:00 · 2026-04-26 16:21:34 -05:00 · debae25f1c
commit debae25f1c
parent bde89c169b
3 changed files with 211 additions and 1 deletions
--- a/ui-tui/src/tests/streamingMarkdown.test.ts
+++ b/ui-tui/src/tests/streamingMarkdown.test.ts
@ -0,0 +1,79 @@
 import { describe, expect, it } from 'vitest'
 import { findStableBoundary } from '../components/streamingMarkdown.js'
 // We test the pure boundary logic by rendering the component's ref
 // behaviour through repeated calls. Since React isn't being rendered here,
 // we reach into the module to test findStableBoundary via its exported
 // behaviour — but the pure helper isn't exported. So test the component's
 // observable output: pass sequential text values and verify the stable
 // prefix never retreats.
 //
 // Strategy: mount StreamingMd in isolation and observe which <Md>
 // instances it renders (by text prop). Without a DOM renderer that's
 // heavy, so we validate the helper behaviour by directly invoking the
 // fence/boundary logic via a re-exported surface.
 import { DEFAULT_THEME } from '../theme.js'
 describe('findStableBoundary', () => {
  it('returns -1 when no blank line exists yet', () => {
    expect(findStableBoundary('partial line with no newline yet')).toBe(-1)
  })
  it('returns -1 when only single newlines exist', () => {
    expect(findStableBoundary('line one\nline two\nline three')).toBe(-1)
  })
  it('splits after the last blank line separator', () => {
    // 'first\n\nsecond\n\nthird' → last blank = before 'third'
    const text = 'first paragraph\n\nsecond paragraph\n\nthird'
    const idx = findStableBoundary(text)
    expect(text.slice(0, idx)).toBe('first paragraph\n\nsecond paragraph\n\n')
    expect(text.slice(idx)).toBe('third')
  })
  it('refuses to split inside an open fenced block', () => {
    // Fence opens, contains a blank line inside the code, no close yet.
    const text = '```ts\nfn();\n\nmore code here'
    expect(findStableBoundary(text)).toBe(-1)
  })
  it('splits before an open fenced block but not inside', () => {
    const text = 'intro paragraph\n\n```ts\nfn();\n\nmore code'
    const idx = findStableBoundary(text)
    expect(text.slice(0, idx)).toBe('intro paragraph\n\n')
    expect(text.slice(idx).startsWith('```ts')).toBe(true)
  })
  it('allows splitting after a fenced block closes', () => {
    const text = '```ts\nfn();\n```\n\nnarration continues'
    const idx = findStableBoundary(text)
    expect(text.slice(0, idx)).toBe('```ts\nfn();\n```\n\n')
    expect(text.slice(idx)).toBe('narration continues')
  })
  it('walks backwards through nested fence boundaries safely', () => {
    // Two closed fences + narration + one new open fence. The only legal
    // split is before the open fence, not between the closed ones.
    const text = '```js\na\n```\n\nmid text\n\n```python\nstill open'
    const idx = findStableBoundary(text)
    expect(text.slice(0, idx)).toBe('```js\na\n```\n\nmid text\n\n')
  })
  it('handles empty input', () => {
    expect(findStableBoundary('')).toBe(-1)
  })
 })
 describe('streaming theme assumption', () => {
  it('theme is exportable (component import sanity check)', () => {
    // Sanity that the theme we pass doesn't change shape. Component import
    // already happens above — this is a smoke test that the module graph
    // for streamingMarkdown wires up without cycles.
    expect(DEFAULT_THEME.color.amber).toBeTruthy()
  })
 })
--- a/ui-tui/src/components/messageLine.tsx
+++ b/ui-tui/src/components/messageLine.tsx
@ -10,6 +10,7 @@ import type { Theme } from '../theme.js'
 import type { ActiveTool, DetailsMode, Msg, SectionVisibility } from '../types.js'
 import { Md } from './markdown.js'
 import { StreamingMd } from './streamingMarkdown.js'
 import { ToolTrail } from './thinking.js'
 import { TodoPanel } from './todoPanel.js'
@ -94,7 +95,10 @@ export const MessageLine = memo(function MessageLine({
    if (msg.role === 'assistant') {
      return isStreaming ? (
-        <Text color={body}>{boundedLiveRenderText(msg.text)}</Text>
+        // Incremental markdown: split at the last stable block boundary so
        // only the in-flight tail re-tokenizes per delta. See
        // streamingMarkdown.tsx for the cost model.
        <StreamingMd compact={compact} t={t} text={boundedLiveRenderText(msg.text)} />
      ) : (
        <Md compact={compact} t={t} text={msg.text} />
      )
--- a/ui-tui/src/components/streamingMarkdown.tsx
+++ b/ui-tui/src/components/streamingMarkdown.tsx
@ -0,0 +1,127 @@
 // StreamingMd — incremental markdown renderer for in-flight assistant text.
 //
 // Naive approach (render <Md text={full}/>) re-tokenizes the entire message
 // on every stream delta. At 20-char batches over a 3 KB response that's 150
 // full re-parses.
 //
 // This splits `text` at the last stable top-level block boundary (blank
 // line outside a fenced code span) into:
 //   stablePrefix — passed to an inner <Md>, memoized on its exact text
 //                  value. During the turn, the prefix only grows monotonically,
 //                  so its memo key matches the previous render and React
 //                  reuses the cached subtree — zero re-tokenization.
 //   unstableSuffix — the in-flight block(s). A separate <Md> re-parses just
 //                    this tail on every delta (O(unstable length) vs.
 //                    O(total length)).
 //
 // The boundary is stored in a ref so it only advances — idempotent under
 // StrictMode double-render. Component unmounts between turns (isStreaming
 // flips off → message moves to history and renders via <Md> directly), so
 // the ref resets naturally.
 //
 // See src/app/useMainApp.ts for the reasoning on why we don't memoize the
 // whole Md text during streaming: that cache never hits because `text` is
 // growing. Mirror claude-code's `StreamingMarkdown` approach adapted to
 // our line-based tokenizer.
 import { memo, useRef } from 'react'
 import type { Theme } from '../theme.js'
 import { Md } from './markdown.js'
 // Count ``` or ~~~ fence toggles in `s` up to `end`. Odd = currently inside
 // a fenced block; we can't split the prefix there or we'd orphan the fence.
 const fenceOpenAt = (s: string, end: number) => {
  let open = false
  let i = 0
  while (i < end) {
    const nl = s.indexOf('\n', i)
    const lineEnd = nl < 0 || nl > end ? end : nl
    const line = s.slice(i, lineEnd)
    if (/^\s*(?:`{3,}|~{3,})/.test(line)) {
      open = !open
    }
    if (nl < 0 || nl >= end) {
      break
    }
    i = nl + 1
  }
  return open
 }
 // Find the last "\n\n" boundary before `end` that is OUTSIDE a fenced code
 // block. Returns the index AFTER the second newline (start of the next
 // block), or -1 if no safe boundary exists yet.
 export const findStableBoundary = (text: string) => {
  let idx = text.length
  while (idx > 0) {
    const boundary = text.lastIndexOf('\n\n', idx - 1)
    if (boundary < 0) {
      return -1
    }
    // Boundary candidate: end of stable prefix is boundary + 2 (start of
    // next block). Check fence balance up to that point.
    const splitAt = boundary + 2
    if (!fenceOpenAt(text, splitAt)) {
      return splitAt
    }
    idx = boundary
  }
  return -1
 }
 export const StreamingMd = memo(function StreamingMd({ compact, t, text }: StreamingMdProps) {
  const stablePrefixRef = useRef('')
  // Reset if the text no longer starts with our recorded prefix (defensive;
  // normally the component unmounts between turns so this shouldn't trigger).
  if (!text.startsWith(stablePrefixRef.current)) {
    stablePrefixRef.current = ''
  }
  const boundary = findStableBoundary(text)
  // Only advance the prefix — never retreat. The boundary math looks at the
  // FULL text each call; if it returns a larger index than before, we grow
  // the cached prefix. Monotonic growth makes the memo key stable across
  // deltas (identical string → same <Md> subtree → no re-render).
  if (boundary > stablePrefixRef.current.length) {
    stablePrefixRef.current = text.slice(0, boundary)
  }
  const stablePrefix = stablePrefixRef.current
  const unstableSuffix = text.slice(stablePrefix.length)
  if (!stablePrefix) {
    return <Md compact={compact} t={t} text={unstableSuffix} />
  }
  if (!unstableSuffix) {
    return <Md compact={compact} t={t} text={stablePrefix} />
  }
  return (
    <>
      <Md compact={compact} t={t} text={stablePrefix} />
      <Md compact={compact} t={t} text={unstableSuffix} />
    </>
  )
 })
 interface StreamingMdProps {
  compact?: boolean
  t: Theme
  text: string
 }