hermes-agent/apps/desktop/src/components/assistant-ui/markdown-text.tsx
brooklyn! f10f7114f9
Merge pull request #47664 from NousResearch/bb/desktop-markdown-spread-overflow
fix(desktop): stop a single message from crashing or freezing the chat
2026-06-17 08:37:06 -05:00

640 lines
23 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use client'
import { TextMessagePartProvider, useMessagePartText } from '@assistant-ui/react'
import {
parseMarkdownIntoBlocks,
type StreamdownTextComponents,
StreamdownTextPrimitive,
type SyntaxHighlighterProps
} from '@assistant-ui/react-streamdown'
import { code } from '@streamdown/code'
import {
type ComponentProps,
memo,
type ReactNode,
useDeferredValue,
useEffect,
useMemo,
useRef,
useState
} from 'react'
import { ExpandableBlock } from '@/components/chat/expandable-block'
import { PreviewAttachment } from '@/components/chat/preview-attachment'
import { chunkByLines, SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
import { ZoomableImage } from '@/components/chat/zoomable-image'
import { normalizeExternalUrl, openExternalLink, PrettyLink } from '@/lib/external-link'
import { createMemoizedMathPlugin } from '@/lib/katex-memo'
import { preprocessMarkdown } from '@/lib/markdown-preprocess'
import {
filePathFromMediaPath,
gatewayMediaDataUrl,
isRemoteGateway,
mediaExternalUrl,
mediaKind,
mediaName,
mediaPathFromMarkdownHref,
mediaStreamUrl
} from '@/lib/media'
import { previewTargetFromMarkdownHref } from '@/lib/preview-targets'
import { tailBoundedRemend } from '@/lib/remend-tail'
import { cn } from '@/lib/utils'
// Math rendering plugin (KaTeX). Configured once at module scope — the
// plugin is stateless beyond its internal cache so re-creating per-render
// would needlessly thrash. We use a memoizing wrapper around rehype-katex
// (see lib/katex-memo.ts) so that during streaming we re-katex only the
// equations whose source actually changed since the last token. With the
// stock @streamdown/math plugin every equation re-renders on every token,
// which throttles UI updates badly for math-heavy responses; the memoized
// plugin keeps the steady-state work proportional to "new equations
// arriving" rather than "equations × tokens-per-second".
//
// `singleDollarTextMath: true` enables `$x^2$` for inline math (de-facto
// LLM convention). The default false-setting only accepts `$$...$$`.
const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })
// Replaces Streamdown's `parseIncompleteMarkdown` (full-text remend per
// flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay
// module-scope so the prop identity is stable across renders.
function preprocessWithTailRepair(text: string): string {
try {
return tailBoundedRemend(preprocessMarkdown(text))
} catch {
return text
}
}
// Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full
// `marked` lex of the entire message, ~1.6ms per 28KB) inside a useMemo keyed
// on the text — but the same text is re-lexed every time a message REMOUNTS
// (virtualizer scroll, session switch) and whenever multiple surfaces render
// the same content (deferred + smooth reveal republish). A small module-level
// LRU keyed by the exact source string removes all of those repeat parses
// with zero correctness risk (same input → same output). Streaming tail
// growth misses the cache by design (every flush is a new string) — that
// single lex is the irreducible cost.
const BLOCK_CACHE_MAX = 64
const BLOCK_CACHE_MIN_LENGTH = 1024
const blockCache = new Map<string, string[]>()
function parseMarkdownIntoBlocksCached(markdown: string): string[] {
if (markdown.length < BLOCK_CACHE_MIN_LENGTH) {
return parseMarkdownIntoBlocks(markdown)
}
const hit = blockCache.get(markdown)
if (hit) {
// Refresh recency (Map iteration order is insertion order).
blockCache.delete(markdown)
blockCache.set(markdown, hit)
return hit
}
const blocks = parseMarkdownIntoBlocks(markdown)
blockCache.set(markdown, blocks)
if (blockCache.size > BLOCK_CACHE_MAX) {
blockCache.delete(blockCache.keys().next().value as string)
}
return blocks
}
async function mediaSrc(path: string): Promise<string> {
if (/^(?:https?|data):/i.test(path)) {
return path
}
// Stream audio/video through the custom protocol: data URLs are capped and
// load the whole file into memory, which broke playback for larger videos.
if (window.hermesDesktop && ['audio', 'video'].includes(mediaKind(path))) {
return mediaStreamUrl(path)
}
// Remote gateway: the image lives on the gateway machine, so read it over the
// authenticated API rather than this machine's disk.
if (window.hermesDesktop && isRemoteGateway()) {
return gatewayMediaDataUrl(path)
}
if (!window.hermesDesktop?.readFileDataUrl) {
return mediaExternalUrl(path)
}
return window.hermesDesktop.readFileDataUrl(filePathFromMediaPath(path))
}
function OpenMediaButton({ kind, path }: { kind: 'audio' | 'video'; path: string }) {
return (
<button
className="mt-2 bg-transparent text-xs font-medium text-muted-foreground underline underline-offset-4 decoration-current/20 hover:text-foreground"
onClick={() => void window.hermesDesktop?.openExternal(mediaExternalUrl(path))}
type="button"
>
Open {kind} file
</button>
)
}
function MediaAttachment({ path }: { path: string }) {
const [src, setSrc] = useState('')
const [failed, setFailed] = useState(false)
const kind = mediaKind(path)
const name = mediaName(path)
useEffect(() => {
let cancelled = false
let objectUrl = ''
setFailed(false)
setSrc('')
void mediaSrc(path)
.then(value => {
if (value.startsWith('blob:')) {
objectUrl = value
}
if (!cancelled) {
setSrc(value)
} else if (objectUrl) {
URL.revokeObjectURL(objectUrl)
}
})
.catch(() => {
if (!cancelled) {
setFailed(true)
}
})
return () => {
cancelled = true
if (objectUrl) {
URL.revokeObjectURL(objectUrl)
}
}
}, [path])
if (kind === 'image' && src) {
return (
<span className="block">
<MarkdownImage alt={name} src={src} />
</span>
)
}
if (kind === 'audio' && src) {
return (
<span className="my-3 block max-w-md rounded-xl border border-border bg-muted/35 p-3">
<span className="mb-2 block truncate text-xs font-medium text-muted-foreground">{name}</span>
<audio className="block w-full" controls onError={() => setFailed(true)} preload="metadata" src={src} />
{failed && <OpenMediaButton kind="audio" path={path} />}
</span>
)
}
if (kind === 'video' && src) {
return (
<span className="my-3 block max-w-2xl rounded-xl border border-border bg-muted/35 p-3">
<span className="mb-2 block truncate text-xs font-medium text-muted-foreground">{name}</span>
<video
className="block max-h-112 w-full rounded-lg bg-black"
controls
onError={() => setFailed(true)}
src={src}
/>
{failed && <OpenMediaButton kind="video" path={path} />}
</span>
)
}
return (
<a
className="font-semibold text-foreground underline underline-offset-4 decoration-current/20 wrap-anywhere"
href="#"
onClick={event => {
event.preventDefault()
openExternalLink(mediaExternalUrl(path))
}}
>
{failed ? `Open ${name}` : `Loading ${name}...`}
</a>
)
}
function childrenToText(children: unknown): string {
if (typeof children === 'string' || typeof children === 'number') {
return String(children).trim()
}
if (Array.isArray(children) && children.every(c => typeof c === 'string' || typeof c === 'number')) {
return children.join('').trim()
}
return ''
}
function MarkdownLink({ children, className, href, ...props }: ComponentProps<'a'>) {
const mediaPath = mediaPathFromMarkdownHref(href)
if (mediaPath) {
return <MediaAttachment path={mediaPath} />
}
const previewTarget = previewTargetFromMarkdownHref(href)
if (previewTarget) {
return <PreviewAttachment source="explicit-link" target={previewTarget} />
}
const target = href ? normalizeExternalUrl(href) : href
if (!target || !/^https?:\/\//i.test(target)) {
return (
<a
className={cn(
'font-semibold text-foreground underline underline-offset-4 decoration-current/20 wrap-anywhere',
className
)}
href={href}
rel="noopener noreferrer"
target="_blank"
{...props}
>
{children}
</a>
)
}
const text = childrenToText(children)
const fallbackLabel = text && normalizeExternalUrl(text) !== target ? text : undefined
return (
<PrettyLink className={cn('wrap-anywhere', className)} fallbackLabel={fallbackLabel} href={target} {...props} />
)
}
function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>) {
return (
<ZoomableImage
alt={alt}
className={cn(
'm-0 block h-auto w-auto max-h-(--image-preview-height) max-w-[min(100%,var(--image-preview-max-width))] rounded-lg object-contain shadow-[0_0.0625rem_0.125rem_color-mix(in_srgb,#000_4%,transparent),0_0.625rem_1.5rem_color-mix(in_srgb,#000_5%,transparent)]',
className
)}
containerClassName="my-2 block w-fit max-w-full"
slot="aui_markdown-image"
src={src}
{...props}
/>
)
}
// Steady character-reveal for streaming text: decouples visible cadence from
// bursty arrival so text flows instead of popping (cf. assistant-ui's useSmooth,
// reimplemented for a tunable rate). Proportional drain — each frame reveals a
// slice of the backlog so the reveal converges within ~REVEAL_DRAIN_MS whatever
// the size; the per-frame cap stops a huge dump rendering as one slab. The loop
// is gated on backlog, not isRunning, so a stream that completes mid-reveal
// keeps draining its tail instead of snapping.
const REVEAL_DRAIN_MS = 500
const REVEAL_MAX_CHARS_PER_FRAME = 30
// Floor between reveal commits. Each commit republishes the text context and
// re-runs the whole Streamdown pipeline (preprocess → remend → lex → micromark
// on the open block) over the full accumulated text — at raw rAF cadence
// that's 60 full parses/second and was the dominant streaming cost for
// reasoning text. ~33ms keeps the reveal visually fluid (2 frames) while
// halving the parse work.
const REVEAL_MIN_COMMIT_MS = 33
function useSmoothReveal(text: string, isRunning: boolean): string {
const [displayed, setDisplayed] = useState(isRunning ? '' : text)
const targetRef = useRef(text)
const shownRef = useRef(displayed)
const frameRef = useRef<number | null>(null)
const lastTickRef = useRef(0)
shownRef.current = displayed
targetRef.current = text
useEffect(() => {
if (typeof window === 'undefined') {
return
}
// Non-extending change (regenerate / branch / history swap): restart from
// empty while streaming, else snap to the replacement.
if (!text.startsWith(shownRef.current)) {
shownRef.current = isRunning ? '' : text
setDisplayed(shownRef.current)
}
if (shownRef.current.length >= text.length || frameRef.current !== null) {
return
}
lastTickRef.current = performance.now()
const tick = () => {
const now = performance.now()
const dt = now - lastTickRef.current
// Skip this frame if the floor hasn't elapsed — the backlog math below
// is dt-proportional, so delayed commits reveal proportionally more.
if (dt < REVEAL_MIN_COMMIT_MS) {
frameRef.current = requestAnimationFrame(tick)
return
}
lastTickRef.current = now
const remaining = targetRef.current.length - shownRef.current.length
const add = Math.min(
remaining,
// dt-scaled so the per-commit cap stays equivalent to the old
// per-frame cap at any commit cadence.
Math.ceil((REVEAL_MAX_CHARS_PER_FRAME * dt) / 16.7),
Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS))
)
shownRef.current = targetRef.current.slice(0, shownRef.current.length + add)
setDisplayed(shownRef.current)
frameRef.current = shownRef.current.length < targetRef.current.length ? requestAnimationFrame(tick) : null
}
frameRef.current = requestAnimationFrame(tick)
}, [text, isRunning])
useEffect(
() => () => {
if (frameRef.current !== null && typeof window !== 'undefined') {
cancelAnimationFrame(frameRef.current)
}
},
[]
)
return displayed
}
// Re-publish the part context with a smooth character-reveal, above
// DeferStreamingText so the reveal feeds the deferred markdown pipeline. Status
// stays running while revealing so the caret persists past the underlying part
// settling.
function SmoothStreamingText({ children }: { children: ReactNode }) {
const { text, status } = useMessagePartText()
const isRunning = status.type === 'running'
const revealed = useSmoothReveal(text, isRunning)
return (
<TextMessagePartProvider isRunning={isRunning || revealed !== text} text={revealed}>
{children}
</TextMessagePartProvider>
)
}
/**
* Re-publish the active message-part context with React's `useDeferredValue`
* applied to the streaming text and status. The outer wrapper still re-renders
* on every token, but the work it does is trivial (one hook, one provider).
*
* The expensive subtree (Streamdown → micromark → mdast → hast → React) lives
* inside `<TextMessagePartProvider>` and reads the deferred text via the
* normal `useMessagePartText` hook. React's concurrent scheduler then has
* permission to:
* - skip intermediate token states when the next token arrives mid-render
* (it abandons the in-flight deferred render and starts over)
* - deprioritize the markdown render when the main thread is busy with an
* urgent task (typing, scrolling, layout work elsewhere)
*
* Net effect: per-token CPU is unchanged but the *blocking* part of that work
* goes away — typing-while-streaming stays a single-frame paint, scroll
* stutter disappears, and the longtask histogram tightens because long
* commits can be interrupted and discarded.
*
* Industry standard (Streamdown's own block-array setState already uses
* `useTransition`); this just lifts the deferral up to the consumer text
* boundary so it covers the whole pipeline, not just the inner setState.
*/
function DeferStreamingText({ children }: { children: ReactNode }) {
const { text, status } = useMessagePartText()
const deferredText = useDeferredValue(text)
const isRunning = status.type === 'running'
return (
<TextMessagePartProvider isRunning={isRunning} text={deferredText}>
{children}
</TextMessagePartProvider>
)
}
interface MarkdownTextSurfaceProps {
containerClassName?: string
containerProps?: ComponentProps<'div'>
}
// Headings shrink to chat scale rather than the prose default (h1≈xl). Kept
// table-driven so adding/tweaking levels is one row.
const HEADING_SIZES: Record<'h1' | 'h2' | 'h3' | 'h4', string> = {
h1: 'text-[1rem] tracking-tight',
h2: 'text-[0.9375rem] tracking-tight',
h3: 'text-[0.875rem]',
h4: 'text-[0.8125rem]'
}
const MARKDOWN_CONTAINER_CLASS_NAME = cn(
'aui-md prose w-full max-w-none overflow-hidden text-[length:var(--conversation-text-font-size)] leading-(--dt-line-height) text-foreground',
'prose-p:leading-(--dt-line-height) prose-li:leading-(--dt-line-height)',
'prose-headings:text-foreground prose-strong:text-foreground',
'prose-a:break-words prose-p:[overflow-wrap:anywhere]',
'prose-li:marker:text-muted-foreground/70',
'prose-code:rounded-[0.25rem] prose-code:px-[0.1875rem] prose-code:py-px prose-code:font-mono prose-code:text-[0.9em] prose-code:font-normal prose-code:before:content-none prose-code:after:content-none',
'[&>*:first-child]:mt-0 [&>*:last-child]:mb-0 [&>*+*]:mt-(--paragraph-gap)'
)
const MAX_MARKDOWN_CHARS = 200_000
function HugeTextFallback({ containerClassName, text }: { containerClassName?: string; text: string }) {
const chunks = useMemo(() => chunkByLines(text, 200), [text])
return (
<div
className={cn(
'aui-md w-full max-w-none overflow-hidden rounded-[0.625rem] border border-border font-mono text-[0.7rem] leading-relaxed text-foreground/90',
containerClassName
)}
>
<ExpandableBlock className="p-2">
{chunks.map((chunk, index) => (
<div
className="[content-visibility:auto]"
key={index}
style={{ containIntrinsicSize: `auto ${chunk.lines * 16}px` }}
>
{chunk.text}
</div>
))}
</ExpandableBlock>
</div>
)
}
function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTextSurfaceProps) {
const { status, text } = useMessagePartText()
const isStreaming = status.type === 'running'
// Keep code parsing enabled while streaming so incomplete fenced blocks still
// render as code cards. The expensive Shiki pass is deferred by
// `SyntaxHighlighter` below when `isStreaming` is true.
const plugins = useMemo(() => ({ math: mathPlugin, code }), [])
const components = useMemo(
() =>
({
h1: ({ className, ...props }: ComponentProps<'h1'>) => (
<h1 className={cn('my-1 font-semibold', HEADING_SIZES.h1, className)} {...props} />
),
h2: ({ className, ...props }: ComponentProps<'h2'>) => (
<h2 className={cn('my-1 font-semibold', HEADING_SIZES.h2, className)} {...props} />
),
h3: ({ className, ...props }: ComponentProps<'h3'>) => (
<h3 className={cn('my-1 font-semibold', HEADING_SIZES.h3, className)} {...props} />
),
h4: ({ className, ...props }: ComponentProps<'h4'>) => (
<h4 className={cn('my-1 font-semibold', HEADING_SIZES.h4, className)} {...props} />
),
p: ({ className, ...props }: ComponentProps<'p'>) => (
// Vertical rhythm is owned by styles.css (`--paragraph-gap`), which
// must out-specify Tailwind Typography's `prose` margins — so no
// `my-*` here on purpose.
<p className={cn('wrap-anywhere leading-(--dt-line-height)', className)} {...props} />
),
a: MarkdownLink,
// Inline code must not vote when an ancestor resolves `dir="auto"`
// (HTML's algorithm skips descendants that carry their own dir),
// mirroring the CSS isolate that already keeps it out of the
// plaintext scan. Fenced code never reaches this override; it goes
// through the code plugin's CodeCard path.
inlineCode: ({ className, ...props }: ComponentProps<'code'>) => (
<code className={className} dir="ltr" {...props} />
),
// `---` as quiet spacing, not a heavy full-width rule.
hr: (_props: ComponentProps<'hr'>) => <div aria-hidden className="my-3" />,
// Lists and blockquotes have chrome that sits *beside* the text
// (markers, the quote border), and that side is driven by the CSS
// `direction` of the box, which `unicode-bidi: plaintext` never
// touches — an RTL list otherwise renders its numbers stranded at
// the far left. `dir="auto"` lets the browser resolve the box
// direction from content; the plaintext rules in styles.css keep
// owning per-line text direction. Inline code carries `dir="ltr"`
// (see the `code` override) so it doesn't vote here either, same
// contract as the CSS isolate.
blockquote: ({ className, ...props }: ComponentProps<'blockquote'>) => (
<blockquote
className={cn('border-s-2 border-border ps-3 text-muted-foreground italic', className)}
dir="auto"
{...props}
/>
),
ul: ({ className, ...props }: ComponentProps<'ul'>) => (
<ul className={cn('my-1 gap-0', className)} dir="auto" {...props} />
),
ol: ({ className, ...props }: ComponentProps<'ol'>) => (
<ol className={cn('my-1 gap-0', className)} dir="auto" {...props} />
),
li: ({ className, ...props }: ComponentProps<'li'>) => (
<li className={cn('leading-(--dt-line-height)', className)} {...props} />
),
table: ({ className, ...props }: ComponentProps<'table'>) => (
<div className="aui-md-table my-2 max-w-full overflow-x-auto rounded-[0.375rem] border border-border">
<table
className={cn(
'm-0 w-full min-w-[18rem] border-collapse text-[0.8125rem] [&_tr]:border-b [&_tr]:border-border last:[&_tr]:border-0',
className
)}
{...props}
/>
</div>
),
thead: ({ className, ...props }: ComponentProps<'thead'>) => (
<thead className={cn('m-0 bg-muted/35 text-muted-foreground', className)} {...props} />
),
th: ({ className, ...props }: ComponentProps<'th'>) => (
<th
className={cn(
'whitespace-nowrap px-2.5 py-1.5 text-left align-middle text-[0.75rem] font-medium text-muted-foreground',
className
)}
{...props}
/>
),
td: ({ className, ...props }: ComponentProps<'td'>) => (
<td className={cn('px-2.5 py-1.5 align-top text-[0.8125rem] leading-snug', className)} {...props} />
),
img: MarkdownImage,
SyntaxHighlighter: (props: SyntaxHighlighterProps) => <SyntaxHighlighter {...props} defer={isStreaming} />
}) as StreamdownTextComponents,
[isStreaming]
)
if (text.length > MAX_MARKDOWN_CHARS) {
return <HugeTextFallback containerClassName={containerClassName} text={text} />
}
return (
<StreamdownTextPrimitive
components={components}
containerClassName={cn(MARKDOWN_CONTAINER_CLASS_NAME, containerClassName)}
containerProps={containerProps}
lineNumbers={false}
mode="streaming"
// Incomplete-markdown repair is handled by `preprocessWithTailRepair`
// below (tail-bounded remend) instead of Streamdown's built-in pass,
// which re-runs remend over the ENTIRE message on every flush — ~18%
// of streaming script time on 50KB+ messages. The repair itself stays
// always-on (even between flushes / for completed messages): an
// unclosed ```python ... ``` whose body contains `$` (shell snippets,
// JS template strings, dollar amounts) would otherwise leak those
// dollars to the math parser and render broken inline math. Shiki is
// independently deferred via `defer={isStreaming}` on the
// SyntaxHighlighter component.
parseIncompleteMarkdown={false}
parseMarkdownIntoBlocksFn={parseMarkdownIntoBlocksCached}
plugins={plugins}
preprocess={preprocessWithTailRepair}
/>
)
}
interface MarkdownTextContentProps extends MarkdownTextSurfaceProps {
isRunning: boolean
text: string
}
export function MarkdownTextContent({ isRunning, text, ...surfaceProps }: MarkdownTextContentProps) {
return (
<TextMessagePartProvider isRunning={isRunning} text={text}>
<SmoothStreamingText>
<DeferStreamingText>
<MarkdownTextSurface {...surfaceProps} />
</DeferStreamingText>
</SmoothStreamingText>
</TextMessagePartProvider>
)
}
const MarkdownTextImpl = () => {
return (
<DeferStreamingText>
<MarkdownTextSurface />
</DeferStreamingText>
)
}
export const MarkdownText = memo(MarkdownTextImpl)