mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
perf(tui): incremental markdown during streaming
Split in-flight assistant text at the last stable block boundary so only the unclosed tail re-tokenizes per stream delta. Previously the full text was rendered as plain <Text> during streaming and only flipped to <Md> at message.complete — cheap per delta but loses live markdown formatting. New StreamingMd component holds a monotonically-growing stablePrefix in a ref (idempotent under StrictMode double-render), renders it as one <Md> that memoizes across deltas, and renders the unstable suffix as a second <Md> that re-parses on each delta. Cost per delta drops from O(total length) to O(unstable length). findStableBoundary walks back to the last "\n\n" outside an open fenced code block — splitting inside an open fence would orphan the opener and break highlighting in the prefix. Adapted from claude-code's src/components/Markdown.tsx:186 but built on our line-based tokenizer instead of marked.lexer. 9 new tests cover fence balance, boundary walk, and empty input. Part of the --tui perf audit (see audit #7).
This commit is contained in:
parent
bde89c169b
commit
debae25f1c
3 changed files with 211 additions and 1 deletions
|
|
@ -10,6 +10,7 @@ import type { Theme } from '../theme.js'
|
|||
import type { ActiveTool, DetailsMode, Msg, SectionVisibility } from '../types.js'
|
||||
|
||||
import { Md } from './markdown.js'
|
||||
import { StreamingMd } from './streamingMarkdown.js'
|
||||
import { ToolTrail } from './thinking.js'
|
||||
import { TodoPanel } from './todoPanel.js'
|
||||
|
||||
|
|
@ -94,7 +95,10 @@ export const MessageLine = memo(function MessageLine({
|
|||
|
||||
if (msg.role === 'assistant') {
|
||||
return isStreaming ? (
|
||||
<Text color={body}>{boundedLiveRenderText(msg.text)}</Text>
|
||||
// Incremental markdown: split at the last stable block boundary so
|
||||
// only the in-flight tail re-tokenizes per delta. See
|
||||
// streamingMarkdown.tsx for the cost model.
|
||||
<StreamingMd compact={compact} t={t} text={boundedLiveRenderText(msg.text)} />
|
||||
) : (
|
||||
<Md compact={compact} t={t} text={msg.text} />
|
||||
)
|
||||
|
|
|
|||
127
ui-tui/src/components/streamingMarkdown.tsx
Normal file
127
ui-tui/src/components/streamingMarkdown.tsx
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
// StreamingMd — incremental markdown renderer for in-flight assistant text.
|
||||
//
|
||||
// Naive approach (render <Md text={full}/>) re-tokenizes the entire message
|
||||
// on every stream delta. At 20-char batches over a 3 KB response that's 150
|
||||
// full re-parses.
|
||||
//
|
||||
// This splits `text` at the last stable top-level block boundary (blank
|
||||
// line outside a fenced code span) into:
|
||||
// stablePrefix — passed to an inner <Md>, memoized on its exact text
|
||||
// value. During the turn, the prefix only grows monotonically,
|
||||
// so its memo key matches the previous render and React
|
||||
// reuses the cached subtree — zero re-tokenization.
|
||||
// unstableSuffix — the in-flight block(s). A separate <Md> re-parses just
|
||||
// this tail on every delta (O(unstable length) vs.
|
||||
// O(total length)).
|
||||
//
|
||||
// The boundary is stored in a ref so it only advances — idempotent under
|
||||
// StrictMode double-render. Component unmounts between turns (isStreaming
|
||||
// flips off → message moves to history and renders via <Md> directly), so
|
||||
// the ref resets naturally.
|
||||
//
|
||||
// See src/app/useMainApp.ts for the reasoning on why we don't memoize the
|
||||
// whole Md text during streaming: that cache never hits because `text` is
|
||||
// growing. Mirror claude-code's `StreamingMarkdown` approach adapted to
|
||||
// our line-based tokenizer.
|
||||
|
||||
import { memo, useRef } from 'react'
|
||||
|
||||
import type { Theme } from '../theme.js'
|
||||
|
||||
import { Md } from './markdown.js'
|
||||
|
||||
// Count ``` or ~~~ fence toggles in `s` up to `end`. Odd = currently inside
|
||||
// a fenced block; we can't split the prefix there or we'd orphan the fence.
|
||||
const fenceOpenAt = (s: string, end: number) => {
|
||||
let open = false
|
||||
let i = 0
|
||||
|
||||
while (i < end) {
|
||||
const nl = s.indexOf('\n', i)
|
||||
const lineEnd = nl < 0 || nl > end ? end : nl
|
||||
const line = s.slice(i, lineEnd)
|
||||
|
||||
if (/^\s*(?:`{3,}|~{3,})/.test(line)) {
|
||||
open = !open
|
||||
}
|
||||
|
||||
if (nl < 0 || nl >= end) {
|
||||
break
|
||||
}
|
||||
|
||||
i = nl + 1
|
||||
}
|
||||
|
||||
return open
|
||||
}
|
||||
|
||||
// Find the last "\n\n" boundary before `end` that is OUTSIDE a fenced code
|
||||
// block. Returns the index AFTER the second newline (start of the next
|
||||
// block), or -1 if no safe boundary exists yet.
|
||||
export const findStableBoundary = (text: string) => {
|
||||
let idx = text.length
|
||||
|
||||
while (idx > 0) {
|
||||
const boundary = text.lastIndexOf('\n\n', idx - 1)
|
||||
|
||||
if (boundary < 0) {
|
||||
return -1
|
||||
}
|
||||
|
||||
// Boundary candidate: end of stable prefix is boundary + 2 (start of
|
||||
// next block). Check fence balance up to that point.
|
||||
const splitAt = boundary + 2
|
||||
|
||||
if (!fenceOpenAt(text, splitAt)) {
|
||||
return splitAt
|
||||
}
|
||||
|
||||
idx = boundary
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
export const StreamingMd = memo(function StreamingMd({ compact, t, text }: StreamingMdProps) {
|
||||
const stablePrefixRef = useRef('')
|
||||
|
||||
// Reset if the text no longer starts with our recorded prefix (defensive;
|
||||
// normally the component unmounts between turns so this shouldn't trigger).
|
||||
if (!text.startsWith(stablePrefixRef.current)) {
|
||||
stablePrefixRef.current = ''
|
||||
}
|
||||
|
||||
const boundary = findStableBoundary(text)
|
||||
|
||||
// Only advance the prefix — never retreat. The boundary math looks at the
|
||||
// FULL text each call; if it returns a larger index than before, we grow
|
||||
// the cached prefix. Monotonic growth makes the memo key stable across
|
||||
// deltas (identical string → same <Md> subtree → no re-render).
|
||||
if (boundary > stablePrefixRef.current.length) {
|
||||
stablePrefixRef.current = text.slice(0, boundary)
|
||||
}
|
||||
|
||||
const stablePrefix = stablePrefixRef.current
|
||||
const unstableSuffix = text.slice(stablePrefix.length)
|
||||
|
||||
if (!stablePrefix) {
|
||||
return <Md compact={compact} t={t} text={unstableSuffix} />
|
||||
}
|
||||
|
||||
if (!unstableSuffix) {
|
||||
return <Md compact={compact} t={t} text={stablePrefix} />
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<Md compact={compact} t={t} text={stablePrefix} />
|
||||
<Md compact={compact} t={t} text={unstableSuffix} />
|
||||
</>
|
||||
)
|
||||
})
|
||||
|
||||
interface StreamingMdProps {
|
||||
compact?: boolean
|
||||
t: Theme
|
||||
text: string
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue