mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
perf(tui): incremental markdown during streaming
Split in-flight assistant text at the last stable block boundary so only the unclosed tail re-tokenizes per stream delta. Previously the full text was rendered as plain <Text> during streaming and only flipped to <Md> at message.complete — cheap per delta but loses live markdown formatting. New StreamingMd component holds a monotonically-growing stablePrefix in a ref (idempotent under StrictMode double-render), renders it as one <Md> that memoizes across deltas, and renders the unstable suffix as a second <Md> that re-parses on each delta. Cost per delta drops from O(total length) to O(unstable length). findStableBoundary walks back to the last "\n\n" outside an open fenced code block — splitting inside an open fence would orphan the opener and break highlighting in the prefix. Adapted from claude-code's src/components/Markdown.tsx:186 but built on our line-based tokenizer instead of marked.lexer. 9 new tests cover fence balance, boundary walk, and empty input. Part of the --tui perf audit (see audit #7).
This commit is contained in:
parent
bde89c169b
commit
debae25f1c
3 changed files with 211 additions and 1 deletions
79
ui-tui/src/__tests__/streamingMarkdown.test.ts
Normal file
79
ui-tui/src/__tests__/streamingMarkdown.test.ts
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
import { describe, expect, it } from 'vitest'
|
||||||
|
|
||||||
|
import { findStableBoundary } from '../components/streamingMarkdown.js'
|
||||||
|
// We test the pure boundary logic by rendering the component's ref
|
||||||
|
// behaviour through repeated calls. Since React isn't being rendered here,
|
||||||
|
// we reach into the module to test findStableBoundary via its exported
|
||||||
|
// behaviour — but the pure helper isn't exported. So test the component's
|
||||||
|
// observable output: pass sequential text values and verify the stable
|
||||||
|
// prefix never retreats.
|
||||||
|
//
|
||||||
|
// Strategy: mount StreamingMd in isolation and observe which <Md>
|
||||||
|
// instances it renders (by text prop). Without a DOM renderer that's
|
||||||
|
// heavy, so we validate the helper behaviour by directly invoking the
|
||||||
|
// fence/boundary logic via a re-exported surface.
|
||||||
|
import { DEFAULT_THEME } from '../theme.js'
|
||||||
|
|
||||||
|
describe('findStableBoundary', () => {
|
||||||
|
it('returns -1 when no blank line exists yet', () => {
|
||||||
|
expect(findStableBoundary('partial line with no newline yet')).toBe(-1)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('returns -1 when only single newlines exist', () => {
|
||||||
|
expect(findStableBoundary('line one\nline two\nline three')).toBe(-1)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('splits after the last blank line separator', () => {
|
||||||
|
// 'first\n\nsecond\n\nthird' → last blank = before 'third'
|
||||||
|
const text = 'first paragraph\n\nsecond paragraph\n\nthird'
|
||||||
|
const idx = findStableBoundary(text)
|
||||||
|
|
||||||
|
expect(text.slice(0, idx)).toBe('first paragraph\n\nsecond paragraph\n\n')
|
||||||
|
expect(text.slice(idx)).toBe('third')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('refuses to split inside an open fenced block', () => {
|
||||||
|
// Fence opens, contains a blank line inside the code, no close yet.
|
||||||
|
const text = '```ts\nfn();\n\nmore code here'
|
||||||
|
|
||||||
|
expect(findStableBoundary(text)).toBe(-1)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('splits before an open fenced block but not inside', () => {
|
||||||
|
const text = 'intro paragraph\n\n```ts\nfn();\n\nmore code'
|
||||||
|
const idx = findStableBoundary(text)
|
||||||
|
|
||||||
|
expect(text.slice(0, idx)).toBe('intro paragraph\n\n')
|
||||||
|
expect(text.slice(idx).startsWith('```ts')).toBe(true)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('allows splitting after a fenced block closes', () => {
|
||||||
|
const text = '```ts\nfn();\n```\n\nnarration continues'
|
||||||
|
const idx = findStableBoundary(text)
|
||||||
|
|
||||||
|
expect(text.slice(0, idx)).toBe('```ts\nfn();\n```\n\n')
|
||||||
|
expect(text.slice(idx)).toBe('narration continues')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('walks backwards through nested fence boundaries safely', () => {
|
||||||
|
// Two closed fences + narration + one new open fence. The only legal
|
||||||
|
// split is before the open fence, not between the closed ones.
|
||||||
|
const text = '```js\na\n```\n\nmid text\n\n```python\nstill open'
|
||||||
|
const idx = findStableBoundary(text)
|
||||||
|
|
||||||
|
expect(text.slice(0, idx)).toBe('```js\na\n```\n\nmid text\n\n')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('handles empty input', () => {
|
||||||
|
expect(findStableBoundary('')).toBe(-1)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('streaming theme assumption', () => {
|
||||||
|
it('theme is exportable (component import sanity check)', () => {
|
||||||
|
// Sanity that the theme we pass doesn't change shape. Component import
|
||||||
|
// already happens above — this is a smoke test that the module graph
|
||||||
|
// for streamingMarkdown wires up without cycles.
|
||||||
|
expect(DEFAULT_THEME.color.amber).toBeTruthy()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
@ -10,6 +10,7 @@ import type { Theme } from '../theme.js'
|
||||||
import type { ActiveTool, DetailsMode, Msg, SectionVisibility } from '../types.js'
|
import type { ActiveTool, DetailsMode, Msg, SectionVisibility } from '../types.js'
|
||||||
|
|
||||||
import { Md } from './markdown.js'
|
import { Md } from './markdown.js'
|
||||||
|
import { StreamingMd } from './streamingMarkdown.js'
|
||||||
import { ToolTrail } from './thinking.js'
|
import { ToolTrail } from './thinking.js'
|
||||||
import { TodoPanel } from './todoPanel.js'
|
import { TodoPanel } from './todoPanel.js'
|
||||||
|
|
||||||
|
|
@ -94,7 +95,10 @@ export const MessageLine = memo(function MessageLine({
|
||||||
|
|
||||||
if (msg.role === 'assistant') {
|
if (msg.role === 'assistant') {
|
||||||
return isStreaming ? (
|
return isStreaming ? (
|
||||||
<Text color={body}>{boundedLiveRenderText(msg.text)}</Text>
|
// Incremental markdown: split at the last stable block boundary so
|
||||||
|
// only the in-flight tail re-tokenizes per delta. See
|
||||||
|
// streamingMarkdown.tsx for the cost model.
|
||||||
|
<StreamingMd compact={compact} t={t} text={boundedLiveRenderText(msg.text)} />
|
||||||
) : (
|
) : (
|
||||||
<Md compact={compact} t={t} text={msg.text} />
|
<Md compact={compact} t={t} text={msg.text} />
|
||||||
)
|
)
|
||||||
|
|
|
||||||
127
ui-tui/src/components/streamingMarkdown.tsx
Normal file
127
ui-tui/src/components/streamingMarkdown.tsx
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
// StreamingMd — incremental markdown renderer for in-flight assistant text.
|
||||||
|
//
|
||||||
|
// Naive approach (render <Md text={full}/>) re-tokenizes the entire message
|
||||||
|
// on every stream delta. At 20-char batches over a 3 KB response that's 150
|
||||||
|
// full re-parses.
|
||||||
|
//
|
||||||
|
// This splits `text` at the last stable top-level block boundary (blank
|
||||||
|
// line outside a fenced code span) into:
|
||||||
|
// stablePrefix — passed to an inner <Md>, memoized on its exact text
|
||||||
|
// value. During the turn, the prefix only grows monotonically,
|
||||||
|
// so its memo key matches the previous render and React
|
||||||
|
// reuses the cached subtree — zero re-tokenization.
|
||||||
|
// unstableSuffix — the in-flight block(s). A separate <Md> re-parses just
|
||||||
|
// this tail on every delta (O(unstable length) vs.
|
||||||
|
// O(total length)).
|
||||||
|
//
|
||||||
|
// The boundary is stored in a ref so it only advances — idempotent under
|
||||||
|
// StrictMode double-render. Component unmounts between turns (isStreaming
|
||||||
|
// flips off → message moves to history and renders via <Md> directly), so
|
||||||
|
// the ref resets naturally.
|
||||||
|
//
|
||||||
|
// See src/app/useMainApp.ts for the reasoning on why we don't memoize the
|
||||||
|
// whole Md text during streaming: that cache never hits because `text` is
|
||||||
|
// growing. Mirror claude-code's `StreamingMarkdown` approach adapted to
|
||||||
|
// our line-based tokenizer.
|
||||||
|
|
||||||
|
import { memo, useRef } from 'react'
|
||||||
|
|
||||||
|
import type { Theme } from '../theme.js'
|
||||||
|
|
||||||
|
import { Md } from './markdown.js'
|
||||||
|
|
||||||
|
// Count ``` or ~~~ fence toggles in `s` up to `end`. Odd = currently inside
|
||||||
|
// a fenced block; we can't split the prefix there or we'd orphan the fence.
|
||||||
|
const fenceOpenAt = (s: string, end: number) => {
|
||||||
|
let open = false
|
||||||
|
let i = 0
|
||||||
|
|
||||||
|
while (i < end) {
|
||||||
|
const nl = s.indexOf('\n', i)
|
||||||
|
const lineEnd = nl < 0 || nl > end ? end : nl
|
||||||
|
const line = s.slice(i, lineEnd)
|
||||||
|
|
||||||
|
if (/^\s*(?:`{3,}|~{3,})/.test(line)) {
|
||||||
|
open = !open
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nl < 0 || nl >= end) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
i = nl + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return open
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the last "\n\n" boundary before `end` that is OUTSIDE a fenced code
|
||||||
|
// block. Returns the index AFTER the second newline (start of the next
|
||||||
|
// block), or -1 if no safe boundary exists yet.
|
||||||
|
export const findStableBoundary = (text: string) => {
|
||||||
|
let idx = text.length
|
||||||
|
|
||||||
|
while (idx > 0) {
|
||||||
|
const boundary = text.lastIndexOf('\n\n', idx - 1)
|
||||||
|
|
||||||
|
if (boundary < 0) {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Boundary candidate: end of stable prefix is boundary + 2 (start of
|
||||||
|
// next block). Check fence balance up to that point.
|
||||||
|
const splitAt = boundary + 2
|
||||||
|
|
||||||
|
if (!fenceOpenAt(text, splitAt)) {
|
||||||
|
return splitAt
|
||||||
|
}
|
||||||
|
|
||||||
|
idx = boundary
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
export const StreamingMd = memo(function StreamingMd({ compact, t, text }: StreamingMdProps) {
|
||||||
|
const stablePrefixRef = useRef('')
|
||||||
|
|
||||||
|
// Reset if the text no longer starts with our recorded prefix (defensive;
|
||||||
|
// normally the component unmounts between turns so this shouldn't trigger).
|
||||||
|
if (!text.startsWith(stablePrefixRef.current)) {
|
||||||
|
stablePrefixRef.current = ''
|
||||||
|
}
|
||||||
|
|
||||||
|
const boundary = findStableBoundary(text)
|
||||||
|
|
||||||
|
// Only advance the prefix — never retreat. The boundary math looks at the
|
||||||
|
// FULL text each call; if it returns a larger index than before, we grow
|
||||||
|
// the cached prefix. Monotonic growth makes the memo key stable across
|
||||||
|
// deltas (identical string → same <Md> subtree → no re-render).
|
||||||
|
if (boundary > stablePrefixRef.current.length) {
|
||||||
|
stablePrefixRef.current = text.slice(0, boundary)
|
||||||
|
}
|
||||||
|
|
||||||
|
const stablePrefix = stablePrefixRef.current
|
||||||
|
const unstableSuffix = text.slice(stablePrefix.length)
|
||||||
|
|
||||||
|
if (!stablePrefix) {
|
||||||
|
return <Md compact={compact} t={t} text={unstableSuffix} />
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!unstableSuffix) {
|
||||||
|
return <Md compact={compact} t={t} text={stablePrefix} />
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<Md compact={compact} t={t} text={stablePrefix} />
|
||||||
|
<Md compact={compact} t={t} text={unstableSuffix} />
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
interface StreamingMdProps {
|
||||||
|
compact?: boolean
|
||||||
|
t: Theme
|
||||||
|
text: string
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue