mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
perf(desktop): memoize KaTeX renders so math streams without re-rendering
Wrap rehype-katex with a per-equation LRU cache (keyed by displayMode + source text) and re-enable math during streaming. Stock @streamdown/math runs rehype-katex on every markdown commit, so each new token re-katexes every equation in the message. For math-heavy responses (an equation derived step-by-step) that's hundreds of ms of wasted work per token and the streaming UI chokes. With memoization, each equation pays katex.renderToString exactly once; subsequent tokens re-walk the tree but hit cache for unchanged equations. The wrapper mirrors rehype-katex's semantics exactly: same class detection (language-math, math-inline, math-display), same <pre>-walk-up for fenced math blocks, same parent.children.splice replacement, same SKIP traversal, same strict-then-lenient render strategy with VFile message reporting. Cached children are structuredCloned on each splice so downstream rehype plugins or toJsxRuntime can't mutate the cache.
This commit is contained in:
parent
71e864b600
commit
112cad59b4
2 changed files with 284 additions and 7 deletions
|
|
@ -7,7 +7,6 @@ import {
|
||||||
type SyntaxHighlighterProps
|
type SyntaxHighlighterProps
|
||||||
} from '@assistant-ui/react-streamdown'
|
} from '@assistant-ui/react-streamdown'
|
||||||
import { code } from '@streamdown/code'
|
import { code } from '@streamdown/code'
|
||||||
import { createMathPlugin } from '@streamdown/math'
|
|
||||||
import { type ComponentProps, memo, useEffect, useMemo, useState } from 'react'
|
import { type ComponentProps, memo, useEffect, useMemo, useState } from 'react'
|
||||||
|
|
||||||
import { PreviewAttachment } from '@/components/chat/preview-attachment'
|
import { PreviewAttachment } from '@/components/chat/preview-attachment'
|
||||||
|
|
@ -15,6 +14,7 @@ import { SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
|
||||||
import { ZoomableImage } from '@/components/chat/zoomable-image'
|
import { ZoomableImage } from '@/components/chat/zoomable-image'
|
||||||
import { CopyButton } from '@/components/ui/copy-button'
|
import { CopyButton } from '@/components/ui/copy-button'
|
||||||
import { normalizeExternalUrl, openExternalLink, PrettyLink } from '@/lib/external-link'
|
import { normalizeExternalUrl, openExternalLink, PrettyLink } from '@/lib/external-link'
|
||||||
|
import { createMemoizedMathPlugin } from '@/lib/katex-memo'
|
||||||
import { isLikelyProseCodeBlock, sanitizeLanguageTag } from '@/lib/markdown-code'
|
import { isLikelyProseCodeBlock, sanitizeLanguageTag } from '@/lib/markdown-code'
|
||||||
import { preprocessMarkdown } from '@/lib/markdown-preprocess'
|
import { preprocessMarkdown } from '@/lib/markdown-preprocess'
|
||||||
import {
|
import {
|
||||||
|
|
@ -29,11 +29,18 @@ import { previewTargetFromMarkdownHref } from '@/lib/preview-targets'
|
||||||
import { cn } from '@/lib/utils'
|
import { cn } from '@/lib/utils'
|
||||||
|
|
||||||
// Math rendering plugin (KaTeX). Configured once at module scope — the
|
// Math rendering plugin (KaTeX). Configured once at module scope — the
|
||||||
// plugin is stateless so re-creating per-render is wasted work. Enable
|
// plugin is stateless beyond its internal cache so re-creating per-render
|
||||||
// `singleDollarTextMath` so models that emit `$x^2$` for inline math
|
// would needlessly thrash. We use a memoizing wrapper around rehype-katex
|
||||||
// (the de-facto convention in OpenAI / Anthropic outputs) render
|
// (see lib/katex-memo.ts) so that during streaming we re-katex only the
|
||||||
// correctly. The default false-setting only accepts `$$...$$` blocks.
|
// equations whose source actually changed since the last token. With the
|
||||||
const mathPlugin = createMathPlugin({ singleDollarTextMath: true })
|
// stock @streamdown/math plugin every equation re-renders on every token,
|
||||||
|
// which throttles UI updates badly for math-heavy responses; the memoized
|
||||||
|
// plugin keeps the steady-state work proportional to "new equations
|
||||||
|
// arriving" rather than "equations × tokens-per-second".
|
||||||
|
//
|
||||||
|
// `singleDollarTextMath: true` enables `$x^2$` for inline math (de-facto
|
||||||
|
// LLM convention). The default false-setting only accepts `$$...$$`.
|
||||||
|
const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })
|
||||||
|
|
||||||
function CodeHeader({ language, code }: { language?: string; code?: string }) {
|
function CodeHeader({ language, code }: { language?: string; code?: string }) {
|
||||||
const normalizedCode = (code ?? '').replace(/^\n+/, '').trimEnd()
|
const normalizedCode = (code ?? '').replace(/^\n+/, '').trimEnd()
|
||||||
|
|
@ -329,7 +336,7 @@ const MarkdownTextImpl = () => {
|
||||||
lineNumbers={false}
|
lineNumbers={false}
|
||||||
mode="streaming"
|
mode="streaming"
|
||||||
parseIncompleteMarkdown={!isStreaming}
|
parseIncompleteMarkdown={!isStreaming}
|
||||||
plugins={isStreaming ? undefined : { code, math: mathPlugin }}
|
plugins={{ math: mathPlugin, ...(isStreaming ? {} : { code }) }}
|
||||||
preprocess={preprocessMarkdown}
|
preprocess={preprocessMarkdown}
|
||||||
shikiTheme={['github-light-default', 'github-dark-default']}
|
shikiTheme={['github-light-default', 'github-dark-default']}
|
||||||
/>
|
/>
|
||||||
|
|
|
||||||
270
apps/desktop/src/lib/katex-memo.ts
Normal file
270
apps/desktop/src/lib/katex-memo.ts
Normal file
|
|
@ -0,0 +1,270 @@
|
||||||
|
/**
|
||||||
|
* Memoizing wrapper around `rehype-katex`.
|
||||||
|
*
|
||||||
|
* Why: the default `@streamdown/math` plugin runs `rehype-katex` on every
|
||||||
|
* markdown commit. During streaming, that means each new token re-runs
|
||||||
|
* KaTeX on EVERY math node in the message — including equations that
|
||||||
|
* haven't changed since the last token. For math-heavy responses (a
|
||||||
|
* model deriving an equation step-by-step) this becomes a major source
|
||||||
|
* of jank: 20 unchanged equations each pay ~5–20ms of katex.renderToString
|
||||||
|
* work per token, adding up to hundreds of ms of CPU bound work that
|
||||||
|
* delays the next streaming update.
|
||||||
|
*
|
||||||
|
* What this plugin does: walk the hast tree looking for the math nodes
|
||||||
|
* that `remark-math` emits (`<code class="math-inline">…</code>` for
|
||||||
|
* inline and `<pre><code class="math-display">…</code></pre>` for
|
||||||
|
* display), key them by `(displayMode, value)`, and serve them from an
|
||||||
|
* in-memory LRU cache when we've rendered the same equation before.
|
||||||
|
* Cache misses still go through `katex.renderToString`; cache hits
|
||||||
|
* return the previously generated hast subtree.
|
||||||
|
*
|
||||||
|
* Result: each unique equation only pays the katex cost once. Adding
|
||||||
|
* one new equation to a paragraph re-renders just that one equation
|
||||||
|
* instead of all of them. The cache is process-global so it survives
|
||||||
|
* moves between messages (e.g., re-rendering a session).
|
||||||
|
*
|
||||||
|
* Compatibility: the produced hast structure matches what `rehype-katex`
|
||||||
|
* itself produces — we use the same `hast-util-from-html-isomorphic`
|
||||||
|
* fragment parsing and the same parent-splice semantics, including the
|
||||||
|
* `<pre>`-walk-up for display mode. Drop-in replacement for the math
|
||||||
|
* slot in streamdown's PluginConfig.
|
||||||
|
*
|
||||||
|
* Wire it in via `createMemoizedMathPlugin`:
|
||||||
|
*
|
||||||
|
* import { createMemoizedMathPlugin } from '@/lib/katex-memo'
|
||||||
|
* const math = createMemoizedMathPlugin({ singleDollarTextMath: true })
|
||||||
|
* <Streamdown plugins={{ math }} ... />
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { Element, ElementContent, Parent, Root } from 'hast'
|
||||||
|
import { fromHtmlIsomorphic } from 'hast-util-from-html-isomorphic'
|
||||||
|
import { toText } from 'hast-util-to-text'
|
||||||
|
import katex from 'katex'
|
||||||
|
import remarkMath from 'remark-math'
|
||||||
|
import type { Pluggable } from 'unified'
|
||||||
|
import { SKIP, visitParents } from 'unist-util-visit-parents'
|
||||||
|
import type { VFile } from 'vfile'
|
||||||
|
|
||||||
|
interface KatexMemoOptions {
|
||||||
|
/**
|
||||||
|
* Color used for KaTeX errors when we fall back to the lenient parser.
|
||||||
|
* Mirrors `@streamdown/math`'s default so the visual output is identical.
|
||||||
|
*/
|
||||||
|
errorColor?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MathPluginConfig {
|
||||||
|
/**
|
||||||
|
* Match `singleDollarTextMath` from `@streamdown/math`. When true the
|
||||||
|
* remark-math parser treats `$x$` as inline math; when false it requires
|
||||||
|
* `$$x$$`. Models almost always emit the single-dollar form, so we
|
||||||
|
* default it to true at the createMemoizedMathPlugin call site.
|
||||||
|
*/
|
||||||
|
singleDollarTextMath?: boolean
|
||||||
|
errorColor?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Cached rendered hast — children to splice into the math node's parent. */
|
||||||
|
type CachedRender = ElementContent[]
|
||||||
|
|
||||||
|
const CACHE_LIMIT = 512
|
||||||
|
|
||||||
|
class LruCache<K, V> {
|
||||||
|
private readonly map = new Map<K, V>()
|
||||||
|
|
||||||
|
get(key: K): undefined | V {
|
||||||
|
const value = this.map.get(key)
|
||||||
|
|
||||||
|
if (value === undefined) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
// Refresh recency by re-inserting at the tail. Map iteration order is
|
||||||
|
// insertion order, so the oldest entry is at the head.
|
||||||
|
this.map.delete(key)
|
||||||
|
this.map.set(key, value)
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
set(key: K, value: V): void {
|
||||||
|
if (this.map.has(key)) {
|
||||||
|
this.map.delete(key)
|
||||||
|
} else if (this.map.size >= CACHE_LIMIT) {
|
||||||
|
const oldest = this.map.keys().next().value
|
||||||
|
|
||||||
|
if (oldest !== undefined) {
|
||||||
|
this.map.delete(oldest)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.map.set(key, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const cache = new LruCache<string, CachedRender>()
|
||||||
|
|
||||||
|
function cacheKey(displayMode: boolean, value: string): string {
|
||||||
|
// `\u0001` is a control character that (a) won't appear in normal
|
||||||
|
// markdown and (b) is a single byte so the join is cheap.
|
||||||
|
return `${displayMode ? 'd' : 'i'}\u0001${value}`
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Render one math expression with the same two-pass strategy `rehype-katex`
|
||||||
|
* uses internally: try strict first (so genuine TeX errors get reported in
|
||||||
|
* the VFile message stream), and on failure fall back to lenient mode so
|
||||||
|
* the document still renders without a thrown exception. The lenient
|
||||||
|
* fallback paints the equation in `errorColor` instead of erroring out.
|
||||||
|
*/
|
||||||
|
function renderMath(
|
||||||
|
value: string,
|
||||||
|
displayMode: boolean,
|
||||||
|
errorColor: string,
|
||||||
|
file: VFile,
|
||||||
|
element: Element
|
||||||
|
): ElementContent[] {
|
||||||
|
let html: string
|
||||||
|
|
||||||
|
try {
|
||||||
|
html = katex.renderToString(value, { displayMode, throwOnError: true })
|
||||||
|
} catch (error) {
|
||||||
|
const cause = error as Error
|
||||||
|
|
||||||
|
file.message('Could not render math with KaTeX', {
|
||||||
|
cause,
|
||||||
|
place: element.position,
|
||||||
|
ruleId: cause.name?.toLowerCase() ?? 'katex',
|
||||||
|
source: 'rehype-katex-memo'
|
||||||
|
})
|
||||||
|
|
||||||
|
try {
|
||||||
|
html = katex.renderToString(value, {
|
||||||
|
displayMode,
|
||||||
|
errorColor,
|
||||||
|
strict: 'ignore',
|
||||||
|
throwOnError: false
|
||||||
|
})
|
||||||
|
} catch {
|
||||||
|
// Last-resort fallback — render the source text inside a styled span
|
||||||
|
// so the user at least sees what was supposed to be there. Mirrors
|
||||||
|
// rehype-katex's own escape hatch.
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
type: 'element',
|
||||||
|
tagName: 'span',
|
||||||
|
properties: {
|
||||||
|
className: ['katex-error'],
|
||||||
|
style: `color:${errorColor}`,
|
||||||
|
title: String(error)
|
||||||
|
},
|
||||||
|
children: [{ type: 'text', value }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fragment = fromHtmlIsomorphic(html, { fragment: true })
|
||||||
|
|
||||||
|
return fragment.children as ElementContent[]
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The actual rehype plugin. Wraps `rehype-katex`'s logic with our LRU
|
||||||
|
* cache. Mirrors the upstream visitor exactly except for the cache lookup
|
||||||
|
* and an LRU.set on miss.
|
||||||
|
*/
|
||||||
|
function createMemoizedRehypeKatex(options: KatexMemoOptions = {}): Pluggable {
|
||||||
|
const errorColor = options.errorColor ?? 'var(--color-muted-foreground)'
|
||||||
|
|
||||||
|
return () =>
|
||||||
|
function transform(tree: Root, file: VFile): undefined {
|
||||||
|
visitParents(tree, 'element', (element, parents) => {
|
||||||
|
const classes = Array.isArray(element.properties?.className)
|
||||||
|
? (element.properties.className as string[])
|
||||||
|
: []
|
||||||
|
|
||||||
|
// Match the same class set rehype-katex looks for. `language-math`
|
||||||
|
// is the markdown ` ```math ` form, `math-inline` is what
|
||||||
|
// remark-math emits for `$x$`, `math-display` for `$$x$$`.
|
||||||
|
const languageMath = classes.includes('language-math')
|
||||||
|
const mathDisplay = classes.includes('math-display')
|
||||||
|
const mathInline = classes.includes('math-inline')
|
||||||
|
|
||||||
|
if (!(languageMath || mathDisplay || mathInline)) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
let displayMode = mathDisplay
|
||||||
|
let scope: Element = element
|
||||||
|
let parent: Parent | undefined = parents[parents.length - 1]
|
||||||
|
|
||||||
|
// For ` ```math ` the scope walks up to the wrapping <pre> and
|
||||||
|
// we treat it as display math. Same logic rehype-katex uses.
|
||||||
|
if (
|
||||||
|
languageMath &&
|
||||||
|
parent &&
|
||||||
|
parent.type === 'element' &&
|
||||||
|
(parent as Element).tagName === 'pre'
|
||||||
|
) {
|
||||||
|
scope = parent as Element
|
||||||
|
parent = parents[parents.length - 2]
|
||||||
|
displayMode = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// No parent means the math node is at the root — there's nothing
|
||||||
|
// to splice into, so bail. This shouldn't happen for properly
|
||||||
|
// nested markdown but is the same defensive guard rehype-katex has.
|
||||||
|
if (!parent) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const value = toText(scope, { whitespace: 'pre' })
|
||||||
|
const key = cacheKey(displayMode, value)
|
||||||
|
let cached = cache.get(key)
|
||||||
|
|
||||||
|
if (!cached) {
|
||||||
|
cached = renderMath(value, displayMode, errorColor, file, scope)
|
||||||
|
cache.set(key, cached)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Splice CLONES of the cached children into the parent. Reusing
|
||||||
|
// the same node instances across renders would let downstream
|
||||||
|
// rehype plugins or toJsxRuntime mutate the cached subtree —
|
||||||
|
// breaking the next cache hit. structuredClone is ~100µs per
|
||||||
|
// equation, well below the ~5–20ms katex.renderToString cost
|
||||||
|
// we're avoiding.
|
||||||
|
const clonedChildren = cached.map(child => structuredClone(child))
|
||||||
|
const index = parent.children.indexOf(scope as ElementContent)
|
||||||
|
|
||||||
|
if (index === -1) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
parent.children.splice(index, 1, ...clonedChildren)
|
||||||
|
|
||||||
|
return SKIP
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a streamdown MathPlugin object that uses the memoized rehype-katex
|
||||||
|
* wrapper. Drop-in for `@streamdown/math`'s `createMathPlugin`.
|
||||||
|
*/
|
||||||
|
export function createMemoizedMathPlugin(config: MathPluginConfig = {}) {
|
||||||
|
const remarkPlugin: Pluggable = [
|
||||||
|
remarkMath,
|
||||||
|
{ singleDollarTextMath: config.singleDollarTextMath ?? false }
|
||||||
|
]
|
||||||
|
|
||||||
|
const rehypePlugin = createMemoizedRehypeKatex({ errorColor: config.errorColor })
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: 'katex' as const,
|
||||||
|
type: 'math' as const,
|
||||||
|
remarkPlugin,
|
||||||
|
rehypePlugin,
|
||||||
|
getStyles: () => 'katex/dist/katex.min.css'
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue