diff --git a/apps/desktop/package.json b/apps/desktop/package.json index 08f1cc1aa0f..6fed75f5638 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -90,6 +90,7 @@ "react-router-dom": "^7.17.0", "react-shiki": "^0.9.3", "remark-math": "^6.0.0", + "remend": "^1.3.0", "shiki": "^4.0.2", "streamdown": "^2.5.0", "tailwind-merge": "^3.5.0", diff --git a/apps/desktop/src/components/assistant-ui/markdown-text.tsx b/apps/desktop/src/components/assistant-ui/markdown-text.tsx index 8ec734bf8b6..1c50b65eab4 100644 --- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx +++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx @@ -2,6 +2,7 @@ import { TextMessagePartProvider, useMessagePartText } from '@assistant-ui/react' import { + parseMarkdownIntoBlocks, type StreamdownTextComponents, StreamdownTextPrimitive, type SyntaxHighlighterProps @@ -26,6 +27,7 @@ import { mediaStreamUrl } from '@/lib/media' import { previewTargetFromMarkdownHref } from '@/lib/preview-targets' +import { tailBoundedRemend } from '@/lib/remend-tail' import { cn } from '@/lib/utils' // Math rendering plugin (KaTeX). Configured once at module scope — the @@ -42,6 +44,51 @@ import { cn } from '@/lib/utils' // LLM convention). The default false-setting only accepts `$$...$$`. const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true }) +// Replaces Streamdown's `parseIncompleteMarkdown` (full-text remend per +// flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay +// module-scope so the prop identity is stable across renders. +function preprocessWithTailRepair(text: string): string { + return tailBoundedRemend(preprocessMarkdown(text)) +} + +// Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full +// `marked` lex of the entire message, ~1.6ms per 28KB) inside a useMemo keyed +// on the text — but the same text is re-lexed every time a message REMOUNTS +// (virtualizer scroll, session switch) and whenever multiple surfaces render +// the same content (deferred + smooth reveal republish). A small module-level +// LRU keyed by the exact source string removes all of those repeat parses +// with zero correctness risk (same input → same output). Streaming tail +// growth misses the cache by design (every flush is a new string) — that +// single lex is the irreducible cost. +const BLOCK_CACHE_MAX = 64 +const BLOCK_CACHE_MIN_LENGTH = 1024 +const blockCache = new Map() + +function parseMarkdownIntoBlocksCached(markdown: string): string[] { + if (markdown.length < BLOCK_CACHE_MIN_LENGTH) { + return parseMarkdownIntoBlocks(markdown) + } + + const hit = blockCache.get(markdown) + + if (hit) { + // Refresh recency (Map iteration order is insertion order). + blockCache.delete(markdown) + blockCache.set(markdown, hit) + + return hit + } + + const blocks = parseMarkdownIntoBlocks(markdown) + blockCache.set(markdown, blocks) + + if (blockCache.size > BLOCK_CACHE_MAX) { + blockCache.delete(blockCache.keys().next().value as string) + } + + return blocks +} + async function mediaSrc(path: string): Promise { if (/^(?:https?|data):/i.test(path)) { return path @@ -241,6 +288,13 @@ function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>) // keeps draining its tail instead of snapping. const REVEAL_DRAIN_MS = 500 const REVEAL_MAX_CHARS_PER_FRAME = 30 +// Floor between reveal commits. Each commit republishes the text context and +// re-runs the whole Streamdown pipeline (preprocess → remend → lex → micromark +// on the open block) over the full accumulated text — at raw rAF cadence +// that's 60 full parses/second and was the dominant streaming cost for +// reasoning text. ~33ms keeps the reveal visually fluid (2 frames) while +// halving the parse work. +const REVEAL_MIN_COMMIT_MS = 33 function useSmoothReveal(text: string, isRunning: boolean): string { const [displayed, setDisplayed] = useState(isRunning ? '' : text) @@ -273,10 +327,27 @@ function useSmoothReveal(text: string, isRunning: boolean): string { const tick = () => { const now = performance.now() const dt = now - lastTickRef.current + + // Skip this frame if the floor hasn't elapsed — the backlog math below + // is dt-proportional, so delayed commits reveal proportionally more. + if (dt < REVEAL_MIN_COMMIT_MS) { + frameRef.current = requestAnimationFrame(tick) + + return + } + lastTickRef.current = now const remaining = targetRef.current.length - shownRef.current.length - const add = Math.min(remaining, REVEAL_MAX_CHARS_PER_FRAME, Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS))) + + const add = Math.min( + remaining, + // dt-scaled so the per-commit cap stays equivalent to the old + // per-frame cap at any commit cadence. + Math.ceil((REVEAL_MAX_CHARS_PER_FRAME * dt) / 16.7), + Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS)) + ) + shownRef.current = targetRef.current.slice(0, shownRef.current.length + add) setDisplayed(shownRef.current) @@ -460,17 +531,20 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex containerProps={containerProps} lineNumbers={false} mode="streaming" - // Always auto-close incomplete fences — even during streaming. - // Without this, an unclosed ```python ... ``` whose body contains - // `$` (very common: shell snippets, JS template strings, dollar - // amounts) leaks those dollars out to the math parser and they - // get rendered as broken inline math until the closing fence - // arrives. Shiki is independently deferred via `defer={isStreaming}` - // on the SyntaxHighlighter component, so we don't pay code-block - // tokenization on every token even with this set. - parseIncompleteMarkdown + // Incomplete-markdown repair is handled by `preprocessWithTailRepair` + // below (tail-bounded remend) instead of Streamdown's built-in pass, + // which re-runs remend over the ENTIRE message on every flush — ~18% + // of streaming script time on 50KB+ messages. The repair itself stays + // always-on (even between flushes / for completed messages): an + // unclosed ```python ... ``` whose body contains `$` (shell snippets, + // JS template strings, dollar amounts) would otherwise leak those + // dollars to the math parser and render broken inline math. Shiki is + // independently deferred via `defer={isStreaming}` on the + // SyntaxHighlighter component. + parseIncompleteMarkdown={false} + parseMarkdownIntoBlocksFn={parseMarkdownIntoBlocksCached} plugins={plugins} - preprocess={preprocessMarkdown} + preprocess={preprocessWithTailRepair} /> ) } diff --git a/apps/desktop/src/lib/remend-tail.test.ts b/apps/desktop/src/lib/remend-tail.test.ts new file mode 100644 index 00000000000..c730937356d --- /dev/null +++ b/apps/desktop/src/lib/remend-tail.test.ts @@ -0,0 +1,105 @@ +import { parseMarkdownIntoBlocks } from '@assistant-ui/react-streamdown' +import remend from 'remend' +import { describe, expect, it } from 'vitest' + +import { findRemendWindowStart, tailBoundedRemend } from './remend-tail' + +const CORPUS = `# Heading one + +Intro paragraph with **bold**, *italic*, \`inline code\`, and a [link](https://example.com). + +## Code + +\`\`\`python +def main(): + cost = "$5" + print(f"total: $\{cost}") +\`\`\` + +Some text after the fence with $x^2 + y^2$ inline math. + +$$ +\\int_0^1 f(x) dx +$$ + +- list item one with **bold** +- list item two + +| col a | col b | +| ----- | ----- | +| 1 | 2 | + +~~~js +const s = \`template \${value}\` +~~~ + +Final paragraph with ~~strike~~ and unfinished [link text](https://exa +` + +/** + * Render-equivalence oracle: full-text remend and tail-bounded remend may + * differ in raw string output ONLY in ways that cannot affect rendering — + * i.e. after block splitting, every block must be identical. (Streamdown + * renders blocks independently, so block-level equality IS render equality.) + */ +function blocksOf(text: string): string[] { + return parseMarkdownIntoBlocks(text) +} + +describe('tailBoundedRemend', () => { + it('matches full remend block output at every streaming prefix', () => { + for (let end = 1; end <= CORPUS.length; end++) { + const prefix = CORPUS.slice(0, end) + const full = blocksOf(remend(prefix)) + const tail = blocksOf(tailBoundedRemend(prefix)) + + expect(tail, `prefix length ${end}: ${JSON.stringify(prefix.slice(-60))}`).toEqual(full) + } + }) + + it('repairs an unclosed fence opened early in a long message', () => { + const text = `intro\n\n\`\`\`python\n${'x = 1\n'.repeat(500)}print("$dollar")` + const repaired = tailBoundedRemend(text) + + expect(blocksOf(repaired)).toEqual(blocksOf(remend(text))) + // the window must reach back to the fence opener + expect(findRemendWindowStart(text)).toBe(text.indexOf('```python')) + }) + + it('bounds the window to the tail paragraph when no fence is open', () => { + const text = `para one\n\npara two\n\npara three with **bold` + const start = findRemendWindowStart(text) + + expect(start).toBe(text.indexOf('para three')) + expect(tailBoundedRemend(text)).toBe(remend(text)) + }) + + it('widens the window across an open $$ math block', () => { + const text = `before\n\n$$\n\\frac{a}{b}` + const start = findRemendWindowStart(text) + + expect(start).toBeLessThanOrEqual(text.indexOf('$$')) + expect(blocksOf(tailBoundedRemend(text))).toEqual(blocksOf(remend(text))) + }) + + it('handles closed constructs without modification', () => { + const text = `done **bold** and \`code\`\n\n\`\`\`js\nconst a = 1\n\`\`\`\n\nlast line.` + + expect(tailBoundedRemend(text)).toBe(text) + }) + + it('intentionally diverges from full remend on cross-block dangling openers', () => { + // Full remend scans the whole document and appends `**` for an opener + // left dangling in an EARLIER block, dumping stray asterisks into the + // unrelated tail block ("|**"). Because Streamdown splits into blocks + // after the repair, that opener never renders as bold either way — the + // tail-bounded result is the cleaner of the two. This test documents + // the divergence so a future remend upgrade that changes the behavior + // gets noticed. + const text = `- item with **dangling\n- item two\n\n|` + + expect(remend(text).endsWith('|**')).toBe(true) + expect(tailBoundedRemend(text).endsWith('|')).toBe(true) + expect(tailBoundedRemend(text).endsWith('|**')).toBe(false) + }) +}) diff --git a/apps/desktop/src/lib/remend-tail.ts b/apps/desktop/src/lib/remend-tail.ts new file mode 100644 index 00000000000..683f7dc193e --- /dev/null +++ b/apps/desktop/src/lib/remend-tail.ts @@ -0,0 +1,108 @@ +import remend from 'remend' + +// Tail-bounded incomplete-markdown repair. +// +// Streamdown's built-in `parseIncompleteMarkdown` runs `remend` over the whole +// accumulated message on every streaming flush (~18% of script time on 50KB+ +// messages). But repairs only ever matter in the trailing block: inline +// constructs can't cross a blank line, and Streamdown splits into blocks AFTER +// the repair, so a dangling opener in an earlier block can't reach the tail. +// We run `remend` on just that block instead. + +const BACKTICK = 96 // ` +const TILDE = 126 // ~ +const SPACE = 32 +const TAB = 9 +const BACKSLASH = 92 + +const isSpace = (c: number) => c === SPACE || c === TAB + +/** + * Index of the last top-level block start — the char after the most recent + * blank line that sits outside any open code fence or `$$` math block. An + * unclosed fence/math always begins after that blank, so it stays wholly + * inside the window without separate tracking. One cheap char pass, no regex. + */ +export function findRemendWindowStart(text: string): number { + const n = text.length + let inFence = false + let fenceChar = 0 + let fenceRun = 0 + let inMath = false + let boundary = 0 + let pending = -1 // a blank line, committed to `boundary` once content follows + + for (let lineStart = 0; lineStart <= n; ) { + let lineEnd = text.indexOf('\n', lineStart) + + if (lineEnd === -1) { + lineEnd = n + } + + let i = lineStart + + while (i < lineEnd && isSpace(text.charCodeAt(i))) { + i += 1 + } + + const first = i < lineEnd ? text.charCodeAt(i) : -1 + let marker = false + + // Fence open/close (``` or ~~~, ≤3 spaces indent). + if ((first === BACKTICK || first === TILDE) && i - lineStart <= 3) { + let run = i + + while (run < lineEnd && text.charCodeAt(run) === first) { + run += 1 + } + + if (run - i >= 3) { + marker = true + + if (!inFence) { + inFence = true + fenceChar = first + fenceRun = run - i + } else if (first === fenceChar && run - i >= fenceRun && onlyWhitespace(text, run, lineEnd)) { + inFence = false + } + } + } + + // Toggle `$$` math state on plain lines ($$ inside a fence is literal). + if (!inFence && !marker) { + for (let s = text.indexOf('$$', lineStart); s !== -1 && s < lineEnd - 1; s = text.indexOf('$$', s + 2)) { + if (s === 0 || text.charCodeAt(s - 1) !== BACKSLASH) { + inMath = !inMath + } + } + } + + if (first === -1 && !inFence && !inMath) { + pending = lineEnd + 1 + } else if (pending !== -1) { + boundary = pending + pending = -1 + } + + lineStart = lineEnd + 1 + } + + return boundary +} + +function onlyWhitespace(text: string, from: number, to: number): boolean { + for (let i = from; i < to; i += 1) { + if (!isSpace(text.charCodeAt(i))) { + return false + } + } + + return true +} + +export function tailBoundedRemend(text: string): string { + const start = findRemendWindowStart(text) + + return start <= 0 ? remend(text) : text.slice(0, start) + remend(text.slice(start)) +} diff --git a/package-lock.json b/package-lock.json index 018074f3023..717f7a12c25 100644 --- a/package-lock.json +++ b/package-lock.json @@ -119,6 +119,7 @@ "react-router-dom": "^7.17.0", "react-shiki": "^0.9.3", "remark-math": "^6.0.0", + "remend": "^1.3.0", "shiki": "^4.0.2", "streamdown": "^2.5.0", "tailwind-merge": "^3.5.0",