diff --git a/apps/desktop/src/components/assistant-ui/markdown-text.test.ts b/apps/desktop/src/components/assistant-ui/markdown-text.test.ts index 529654ed050..22645ec7cd0 100644 --- a/apps/desktop/src/components/assistant-ui/markdown-text.test.ts +++ b/apps/desktop/src/components/assistant-ui/markdown-text.test.ts @@ -136,8 +136,13 @@ describe('preprocessMarkdown', () => { const output = preprocessMarkdown(input) expect(output).not.toContain('```') + // Currency dollar amounts get escaped to `\$` in the preprocessor + // so they don't get parsed as math delimiters by remark-math (we + // enable singleDollarTextMath, which would otherwise greedy-match + // `$56...$99` as one big inline math span). The escape is invisible + // to the user — `\$` renders as a literal `$` in the final output. expect(output).toContain( - '~$56 Old San Juan Sunset Cruise' + '~\\$56 Old San Juan Sunset Cruise' ) expect(output).toContain( '' diff --git a/apps/desktop/src/components/assistant-ui/markdown-text.tsx b/apps/desktop/src/components/assistant-ui/markdown-text.tsx index 9b5f7ec649e..0ab147a6c45 100644 --- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx +++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx @@ -335,7 +335,15 @@ const MarkdownTextImpl = () => { )} lineNumbers={false} mode="streaming" - parseIncompleteMarkdown={!isStreaming} + // Always auto-close incomplete fences — even during streaming. + // Without this, an unclosed ```python ... ``` whose body contains + // `$` (very common: shell snippets, JS template strings, dollar + // amounts) leaks those dollars out to the math parser and they + // get rendered as broken inline math until the closing fence + // arrives. Shiki is independently deferred via `defer={isStreaming}` + // on the SyntaxHighlighter component, so we don't pay code-block + // tokenization on every token even with this set. + parseIncompleteMarkdown plugins={{ math: mathPlugin, ...(isStreaming ? {} : { code }) }} preprocess={preprocessMarkdown} shikiTheme={['github-light-default', 'github-dark-default']} diff --git a/apps/desktop/src/lib/markdown-preprocess.ts b/apps/desktop/src/lib/markdown-preprocess.ts index b7eb47f2220..bb85b22b0a2 100644 --- a/apps/desktop/src/lib/markdown-preprocess.ts +++ b/apps/desktop/src/lib/markdown-preprocess.ts @@ -94,7 +94,14 @@ function scrubBacktickNoise(text: string): string { out += text.slice(cursor).replace(fenceNoiseRe, '') for (let pass = 0; pass < 2; pass += 1) { - out = out.replace(/``\s*``/g, '') + // Match EXACTLY 2 backticks (not part of a longer run) on each side. + // Without the lookbehind/lookahead, two adjacent triple-backtick + // fences with only whitespace between them get spliced together — + // e.g. ```bash\n...\n```\n\n```latex matches the regex's + // last-2-of-bash-close + \n\n + first-2-of-latex-open and the + // surrounding fence markers collapse into a single longer block, + // which the markdown parser then treats as ONE giant code block. + out = out.replace(/(? element. We + // keep the fence intact (instead of converting to $$..$$) so + // any literal `$$` characters in the body don't collide with + // an outer math wrapper. No close emitted yet — streaming. + out.push(`${indent}${marker}math`) + out.push(...bodyLines) } else { out.push(`${indent}${marker}${language}`) out.push(...bodyLines) @@ -241,6 +273,21 @@ function normalizeFenceBlocks(text: string): string { continue } + if (isMathFence(language)) { + // Closed math fence — rewrite the language tag to "math" so + // rehype-katex's language-math class detection picks it up. + // Body stays untouched (no $$..$$ rewrite) so authors can write + // arbitrary LaTeX including `$$display$$` markers without them + // colliding with our wrapper. Without this rewrite the block + // would render as a syntax-highlighted "latex" code listing. + out.push(`${indent}${marker}math`) + out.push(...bodyLines) + out.push(`${indent}${marker}`) + index = closeIndex + 1 + + continue + } + out.push(`${indent}${marker}${language}`) out.push(...bodyLines) out.push(`${indent}${marker}`) @@ -250,6 +297,39 @@ function normalizeFenceBlocks(text: string): string { return out.join('\n') } +// Convert LaTeX bracket delimiters to remark-math's dollar-sign syntax. +// Models often emit `\(...\)` for inline math and `\[...\]` for display +// math (the standard LaTeX convention) instead of `$...$` / `$$...$$`. +// remark-math only natively recognizes the dollar form, so we rewrite at +// preprocess time. Done with simple non-greedy matches keyed on the +// escaped-bracket sequences — these are rare enough in non-math content +// (you'd have to write a literal `\(` followed eventually by a literal +// `\)` with NO interleaving newline-paragraph-break) that false positives +// are extremely unlikely. +const LATEX_INLINE_RE = /\\\(([^\n]+?)\\\)/g +const LATEX_DISPLAY_RE = /\\\[([\s\S]+?)\\\]/g + +function rewriteLatexBracketDelimiters(text: string): string { + return text.replace(LATEX_INLINE_RE, (_, body: string) => `$${body}$`).replace(LATEX_DISPLAY_RE, (_, body: string) => `$$${body}$$`) +} + +// Escape `$` patterns so they don't get eaten as math delimiters. +// Models commonly write currency amounts ($5, $19.99, $1,299) in prose. +// With `singleDollarTextMath: true`, remark-math is greedy and matches +// EVERY pair of `$`s — including the open of `$5` to the next `$10`, +// rendering "5 in my pocket and you have " as italicized math text. +// The de-facto convention across math-supporting LLM UIs is to treat +// `$` followed by a digit as currency rather than math, since math +// expressions almost always start with a letter or `\command`. Trade- +// off: a math expression like `$5x = 10$` would have its leading 5 +// escaped — annoying but rare. The escape `\$` survives to render as +// a literal `$` in the final output. +const CURRENCY_DOLLAR_RE = /(^|[^\\])\$(?=\d)/g + +function escapeCurrencyDollars(text: string): string { + return text.replace(CURRENCY_DOLLAR_RE, '$1\\$') +} + export function preprocessMarkdown(text: string): string { const cleaned = text.replace(REASONING_BLOCK_RE, '').replace(PREVIEW_MARKER_RE, '') const scrubbed = scrubBacktickNoise(cleaned) @@ -258,7 +338,35 @@ export function preprocessMarkdown(text: string): string { return strippedEmptyFences .split(CODE_FENCE_SPLIT_RE) - .map(part => (/^(?:```|~~~)/.test(part) ? part : normalizeVisibleProse(stripPreviewTargets(part)))) + .map(part => { + // Fence blocks pass through untouched. + if (/^(?:```|~~~)/.test(part)) {return part} + + // Whitespace-only segments (e.g. the `\n\n` between two adjacent + // fences) must NOT go through stripPreviewTargets — its internal + // .trim() would collapse them to '' and glue the surrounding + // fences together, producing things like ``````math which the + // markdown parser then reads as a single 6-backtick block. + if (!part.trim()) {return part} + // Preserve leading/trailing whitespace around the prose body so + // that fence-prose-fence sequences keep their blank-line gaps. + // stripPreviewTargets internally calls .trim() on its result for + // the benefit of its other (single-segment) callers; here we're + // operating on a SEGMENT of a larger document where outer + // whitespace is structural and must survive. + const leading = part.match(/^\s*/)?.[0] ?? '' + const trailing = part.match(/\s*$/)?.[0] ?? '' + + // rewriteLatexBracketDelimiters runs only on prose segments so + // we don't accidentally touch `\(` inside a code block. + // escapeCurrencyDollars likewise only runs on prose, so legit + // `$5` literals inside fenced code stay intact. + const transformed = normalizeVisibleProse( + stripPreviewTargets(rewriteLatexBracketDelimiters(escapeCurrencyDollars(part))) + ) + + return leading + transformed + trailing + }) .join('') .replace(/[ \t]+\n/g, '\n') }