perf(desktop): cut GUI streaming & interaction lag (#45343)

* perf(desktop): isolate streaming re-renders & cut layout thrash

During a token stream $messages is replaced ~30x/s. Subscribing the whole
chat view to it re-rendered the composer, runtime boundary, and every
message on every delta.

- Derive coarse facts (empty thread? tail is user?) via nanostores
  `computed` atoms so per-token flushes don't re-render their consumers.
- Move the $messages subscription + runtime wiring into a dedicated
  ChatRuntimeBoundary; the composer reads $messages imperatively.
- Drive message rows off stable useAuiState selectors and a lazy
  getMessageText getter instead of eagerly materialized text.
- Feed ResizeObserver entry sizes into measureClamp / FadeText and dedupe
  the style writes, killing the read-write-read reflow cascade.

* perf(desktop): incremental markdown rendering during streams

Re-parsing the full message markdown every reveal frame is O(N^2) over a
long answer and dominated stream CPU.

- Throttle useSmoothReveal commits to ~1 frame (REVEAL_MIN_COMMIT_MS).
- Memoize block parsing with an LRU keyed on source text so only changed
  blocks re-parse.
- Replace Streamdown's full-text parseIncompleteMarkdown with a
  tail-bounded remend: scan to the last top-level boundary outside
  fences/math and repair only the trailing open block. New remend-tail.ts
  is proven render-equivalent to full remend at every streaming prefix
  (remend-tail.test.ts), minus an intentional, documented divergence on
  cross-block dangling openers.

* perf(desktop): faster session resume & warm AudioContext at idle

- Resume: fire the REST transcript prefetch and the session.resume RPC in
  parallel, and skip the redundant message conversion + reconciliation
  when the prefetch already hydrated the transcript.
- Haptics: web-haptics builds its AudioContext lazily on first trigger,
  paying the ~850ms CoreAudio spin-up on the first streamStart haptic as
  the first token paints. Open/close a throwaway context at idle so the
  real one connects to an already-warm audio service.
This commit is contained in:
brooklyn! 2026-06-12 21:22:39 -05:00 committed by GitHub
commit 492c402774
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 655 additions and 165 deletions

View file

@ -90,6 +90,7 @@
"react-router-dom": "^7.17.0",
"react-shiki": "^0.9.3",
"remark-math": "^6.0.0",
"remend": "^1.3.0",
"shiki": "^4.0.2",
"streamdown": "^2.5.0",
"tailwind-merge": "^3.5.0",

View file

@ -174,7 +174,6 @@ export function ChatBar({
const queuedPromptsBySession = useStore($queuedPromptsBySession)
const statusItemsBySession = useStore($statusItemsBySession)
const scrolledUp = useStore($threadScrolledUp)
const sessionMessages = useStore($messages)
const activeQueueSessionKey = queueSessionKey || sessionId || null
const queuedPrompts = useMemo(
@ -866,7 +865,9 @@ export function ChatBar({
event.preventDefault()
triggerKeyConsumedRef.current = true
const history = deriveUserHistory(sessionMessages, chatMessageText)
// $messages is read imperatively (not subscribed) so the composer
// doesn't re-render on every streaming delta flush.
const history = deriveUserHistory($messages.get(), chatMessageText)
const entry = browseBackward(sessionId, currentDraft, history)
if (entry !== null) {
@ -891,7 +892,7 @@ export function ChatBar({
event.preventDefault()
triggerKeyConsumedRef.current = true
const history = deriveUserHistory(sessionMessages, chatMessageText)
const history = deriveUserHistory($messages.get(), chatMessageText)
const result = browseForward(sessionId, history)
if (result !== null) {

View file

@ -35,7 +35,9 @@ import {
$gatewayState,
$introPersonality,
$introSeed,
$lastVisibleMessageIsUser,
$messages,
$messagesEmpty,
$selectedStoredSessionId,
$sessions,
sessionPinId
@ -55,7 +57,7 @@ import { type DroppedFile, partitionDroppedFiles } from './hooks/use-composer-ac
import { useFileDropZone } from './hooks/use-file-drop-zone'
import { ScrollToBottomButton } from './scroll-to-bottom-button'
import { SessionActionsMenu } from './sidebar/session-actions-menu'
import { lastVisibleMessageIsUser, threadLoadingState } from './thread-loading'
import { threadLoadingState } from './thread-loading'
interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
gateway: HermesGateway | null
@ -156,105 +158,35 @@ function ChatHeader({
)
}
export function ChatView({
className,
gateway,
onToggleSelectedPin,
onDeleteSelectedSession,
interface ChatRuntimeBoundaryProps {
busy: boolean
children: React.ReactNode
onCancel: () => Promise<void> | void
onEdit: (message: AppendMessage) => Promise<void>
onReload: (parentId: string | null) => Promise<void>
onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
}
/**
* Owns the $messages subscription and the assistant-ui external-store runtime.
*
* Isolated from ChatView so the per-token delta flush (which replaces the
* $messages atom ~30×/s during streaming) only re-renders this component and
* the runtime provider. The children (Thread, ChatBar) are created by
* ChatView, whose render output is stable across flushes so React bails out
* of re-rendering them by element identity and the stream's render cost stays
* confined to the streaming message's own subtree.
*/
function ChatRuntimeBoundary({
busy,
children,
onCancel,
onAddContextRef,
onAddUrl,
onAttachImageBlob,
onAttachDroppedItems,
onBranchInNewChat,
maxVoiceRecordingSeconds,
onPasteClipboardImage,
onPickFiles,
onPickFolders,
onPickImages,
onRemoveAttachment,
onSteer,
onSubmit,
onThreadMessagesChange,
onEdit,
onReload,
onRestoreToMessage,
onTranscribeAudio
}: ChatViewProps) {
const location = useLocation()
const activeSessionId = useStore($activeSessionId)
const awaitingResponse = useStore($awaitingResponse)
const busy = useStore($busy)
const contextSuggestions = useStore($contextSuggestions)
const currentCwd = useStore($currentCwd)
const currentModel = useStore($currentModel)
const currentProvider = useStore($currentProvider)
const freshDraftReady = useStore($freshDraftReady)
const gatewayState = useStore($gatewayState)
const gatewaySwapTarget = useStore($gatewaySwapTarget)
const gatewayOpen = gatewayState === 'open'
const introPersonality = useStore($introPersonality)
const introSeed = useStore($introSeed)
onThreadMessagesChange
}: ChatRuntimeBoundaryProps) {
const messages = useStore($messages)
const selectedSessionId = useStore($selectedStoredSessionId)
const runtimeMessageCacheRef = useRef(new WeakMap<ChatMessage, ThreadMessage>())
const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
const showIntro =
freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messages.length === 0
// Session is still loading if the route references a session we haven't
// resumed yet. Once `activeSessionId` is set (runtime has resumed), the
// session exists — even if it has zero messages (a brand-new routed
// session). The flicker where `busy` flips true briefly during hydrate
// is handled by `threadLoadingState`'s last-visible-user gate.
const loadingSession = isRoutedSessionView && messages.length === 0 && !activeSessionId
const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleMessageIsUser(messages))
const showChatBar = !loadingSession
const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
const modelOptionsQuery = useQuery<ModelOptionsResponse>({
queryKey: ['model-options', activeSessionId || 'global'],
queryFn: () => {
if (!activeSessionId) {
return getGlobalModelOptions()
}
if (!gateway) {
throw new Error('Hermes gateway unavailable')
}
return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
},
enabled: gatewayOpen
})
const quickModels = useMemo(
() => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
[currentModel, currentProvider, modelOptionsQuery.data]
)
const chatBarState = useMemo<ChatBarState>(
() => ({
model: {
model: currentModel,
provider: currentProvider,
canSwitch: gatewayOpen,
loading: !gatewayOpen || (!currentModel && !currentProvider),
quickModels
},
tools: {
enabled: true,
label: 'Add context',
suggestions: contextSuggestions
},
voice: {
enabled: true,
active: false
}
}),
[contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
)
const runtimeMessageRepository = useMemo(() => {
const items: { message: ThreadMessage; parentId: string | null }[] = []
@ -304,6 +236,113 @@ export function ChatView({
onReload
})
return <AssistantRuntimeProvider runtime={runtime}>{children}</AssistantRuntimeProvider>
}
export function ChatView({
className,
gateway,
onToggleSelectedPin,
onDeleteSelectedSession,
onCancel,
onAddContextRef,
onAddUrl,
onAttachImageBlob,
onAttachDroppedItems,
onBranchInNewChat,
maxVoiceRecordingSeconds,
onPasteClipboardImage,
onPickFiles,
onPickFolders,
onPickImages,
onRemoveAttachment,
onSteer,
onSubmit,
onThreadMessagesChange,
onEdit,
onReload,
onRestoreToMessage,
onTranscribeAudio
}: ChatViewProps) {
const location = useLocation()
const activeSessionId = useStore($activeSessionId)
const awaitingResponse = useStore($awaitingResponse)
const busy = useStore($busy)
const contextSuggestions = useStore($contextSuggestions)
const currentCwd = useStore($currentCwd)
const currentModel = useStore($currentModel)
const currentProvider = useStore($currentProvider)
const freshDraftReady = useStore($freshDraftReady)
const gatewayState = useStore($gatewayState)
const gatewaySwapTarget = useStore($gatewaySwapTarget)
const gatewayOpen = gatewayState === 'open'
const introPersonality = useStore($introPersonality)
const introSeed = useStore($introSeed)
// PERF: ChatView must not subscribe to $messages — the atom is replaced on
// every streaming delta flush (~30×/s) and a subscription here re-renders
// the entire chat shell (header, chat bar, thread wrapper) per token. The
// runtime that DOES need the messages lives in ChatRuntimeBoundary below;
// this component only needs streaming-stable derivations.
const messagesEmpty = useStore($messagesEmpty)
const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
const selectedSessionId = useStore($selectedStoredSessionId)
const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
const showIntro = freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty
// Session is still loading if the route references a session we haven't
// resumed yet. Once `activeSessionId` is set (runtime has resumed), the
// session exists — even if it has zero messages (a brand-new routed
// session). The flicker where `busy` flips true briefly during hydrate
// is handled by `threadLoadingState`'s last-visible-user gate.
const loadingSession = isRoutedSessionView && messagesEmpty && !activeSessionId
const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
const showChatBar = !loadingSession
const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
const modelOptionsQuery = useQuery<ModelOptionsResponse>({
queryKey: ['model-options', activeSessionId || 'global'],
queryFn: () => {
if (!activeSessionId) {
return getGlobalModelOptions()
}
if (!gateway) {
throw new Error('Hermes gateway unavailable')
}
return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
},
enabled: gatewayOpen
})
const quickModels = useMemo(
() => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
[currentModel, currentProvider, modelOptionsQuery.data]
)
const chatBarState = useMemo<ChatBarState>(
() => ({
model: {
model: currentModel,
provider: currentProvider,
canSwitch: gatewayOpen,
loading: !gatewayOpen || (!currentModel && !currentProvider),
quickModels
},
tools: {
enabled: true,
label: 'Add context',
suggestions: contextSuggestions
},
voice: {
enabled: true,
active: false
}
}),
[contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
)
// Drop files anywhere in the conversation area, not just on the composer
// input. In-app drags (project tree / gutter) carry workspace-relative paths
// the gateway resolves directly, so they stay inline `@file:` refs. OS/Finder
@ -356,7 +395,13 @@ export function ChatView({
className="relative min-h-0 max-w-full flex-1 overflow-hidden bg-(--ui-chat-surface-background) contain-[layout_paint]"
{...dropHandlers}
>
<AssistantRuntimeProvider runtime={runtime}>
<ChatRuntimeBoundary
busy={busy}
onCancel={onCancel}
onEdit={onEdit}
onReload={onReload}
onThreadMessagesChange={onThreadMessagesChange}
>
<Thread
clampToComposer={showChatBar}
cwd={currentCwd}
@ -397,7 +442,7 @@ export function ChatView({
/>
</Suspense>
)}
</AssistantRuntimeProvider>
</ChatRuntimeBoundary>
{showChatBar && <ScrollToBottomButton />}
<ChatDropOverlay kind={dragKind} />
<ChatSwapOverlay profile={gatewaySwapTarget} />

View file

@ -3,9 +3,14 @@ import type { ChatMessage } from '@/lib/chat-messages'
export type ThreadLoadingState = 'response' | 'session'
export function lastVisibleMessageIsUser(messages: ChatMessage[]): boolean {
const lastVisible = [...messages].reverse().find(message => !message.hidden)
// Allocation-free reverse scan — runs in a hot $messages computed.
for (let i = messages.length - 1; i >= 0; i -= 1) {
if (!messages[i].hidden) {
return messages[i].role === 'user'
}
}
return lastVisible?.role === 'user'
return false
}
export function threadLoadingState(

View file

@ -618,10 +618,26 @@ export function useSessionActions({
const watchWindow = isWatchWindow()
let localSnapshot = $messages.get()
// REST transcript prefetch and the gateway resume RPC are independent
// — run them concurrently so a big session's wall time is
// max(prefetch, resume) instead of their sum. The prefetch paints the
// transcript as soon as it lands; the RPC binds the runtime id.
// Watch windows skip the prefetch — lazy resume attaches the live mirror.
const prefetchPromise = watchWindow ? null : getSessionMessages(storedSessionId, sessionProfile)
const resumePromise = requestGateway<SessionResumeResponse>('session.resume', {
session_id: storedSessionId,
cols: 96,
...(watchWindow ? { lazy: true } : {}),
...(sessionProfile ? { profile: sessionProfile } : {})
})
// The rejection is consumed by the `await` below; this guard only
// keeps it from surfacing as unhandled while the prefetch settles.
resumePromise.catch(() => undefined)
try {
// Watch windows skip REST prefetch — lazy resume attaches the live mirror.
if (!watchWindow) {
const storedMessages = await getSessionMessages(storedSessionId, sessionProfile)
if (prefetchPromise) {
const storedMessages = await prefetchPromise
if (isCurrentResume()) {
localSnapshot = preserveLocalAssistantErrors(toChatMessages(storedMessages.messages), $messages.get())
@ -635,12 +651,7 @@ export function useSessionActions({
// Non-fatal: gateway resume below can still hydrate the session.
}
const resumed = await requestGateway<SessionResumeResponse>('session.resume', {
session_id: storedSessionId,
cols: 96,
...(watchWindow ? { lazy: true } : {}),
...(sessionProfile ? { profile: sessionProfile } : {})
})
const resumed = await resumePromise
if (!isCurrentResume()) {
return
@ -648,17 +659,22 @@ export function useSessionActions({
const currentMessages = $messages.get()
const resumedMessages = preserveLocalAssistantErrors(
reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
currentMessages
)
// Keep the local snapshot when resume would only reshuffle runtime projection.
// Keep the local snapshot when resume would only reshuffle runtime
// projection. When the REST prefetch already hydrated the transcript,
// skip converting/reconciling the resume payload entirely — on a
// 1000+-message session that second conversion plus the deep
// equivalence compare costs over a second of main-thread time.
const preferredMessages =
localSnapshot.length > 0
? localSnapshot
: chatMessageArraysEquivalent(currentMessages, resumedMessages)
? currentMessages
: resumedMessages
: (() => {
const resumedMessages = preserveLocalAssistantErrors(
reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
currentMessages
)
return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages
})()
const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages)

View file

@ -2,6 +2,7 @@
import { TextMessagePartProvider, useMessagePartText } from '@assistant-ui/react'
import {
parseMarkdownIntoBlocks,
type StreamdownTextComponents,
StreamdownTextPrimitive,
type SyntaxHighlighterProps
@ -26,6 +27,7 @@ import {
mediaStreamUrl
} from '@/lib/media'
import { previewTargetFromMarkdownHref } from '@/lib/preview-targets'
import { tailBoundedRemend } from '@/lib/remend-tail'
import { cn } from '@/lib/utils'
// Math rendering plugin (KaTeX). Configured once at module scope — the
@ -42,6 +44,51 @@ import { cn } from '@/lib/utils'
// LLM convention). The default false-setting only accepts `$$...$$`.
const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })
// Replaces Streamdown's `parseIncompleteMarkdown` (full-text remend per
// flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay
// module-scope so the prop identity is stable across renders.
function preprocessWithTailRepair(text: string): string {
return tailBoundedRemend(preprocessMarkdown(text))
}
// Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full
// `marked` lex of the entire message, ~1.6ms per 28KB) inside a useMemo keyed
// on the text — but the same text is re-lexed every time a message REMOUNTS
// (virtualizer scroll, session switch) and whenever multiple surfaces render
// the same content (deferred + smooth reveal republish). A small module-level
// LRU keyed by the exact source string removes all of those repeat parses
// with zero correctness risk (same input → same output). Streaming tail
// growth misses the cache by design (every flush is a new string) — that
// single lex is the irreducible cost.
const BLOCK_CACHE_MAX = 64
const BLOCK_CACHE_MIN_LENGTH = 1024
const blockCache = new Map<string, string[]>()
function parseMarkdownIntoBlocksCached(markdown: string): string[] {
if (markdown.length < BLOCK_CACHE_MIN_LENGTH) {
return parseMarkdownIntoBlocks(markdown)
}
const hit = blockCache.get(markdown)
if (hit) {
// Refresh recency (Map iteration order is insertion order).
blockCache.delete(markdown)
blockCache.set(markdown, hit)
return hit
}
const blocks = parseMarkdownIntoBlocks(markdown)
blockCache.set(markdown, blocks)
if (blockCache.size > BLOCK_CACHE_MAX) {
blockCache.delete(blockCache.keys().next().value as string)
}
return blocks
}
async function mediaSrc(path: string): Promise<string> {
if (/^(?:https?|data):/i.test(path)) {
return path
@ -241,6 +288,13 @@ function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>)
// keeps draining its tail instead of snapping.
const REVEAL_DRAIN_MS = 500
const REVEAL_MAX_CHARS_PER_FRAME = 30
// Floor between reveal commits. Each commit republishes the text context and
// re-runs the whole Streamdown pipeline (preprocess → remend → lex → micromark
// on the open block) over the full accumulated text — at raw rAF cadence
// that's 60 full parses/second and was the dominant streaming cost for
// reasoning text. ~33ms keeps the reveal visually fluid (2 frames) while
// halving the parse work.
const REVEAL_MIN_COMMIT_MS = 33
function useSmoothReveal(text: string, isRunning: boolean): string {
const [displayed, setDisplayed] = useState(isRunning ? '' : text)
@ -273,10 +327,27 @@ function useSmoothReveal(text: string, isRunning: boolean): string {
const tick = () => {
const now = performance.now()
const dt = now - lastTickRef.current
// Skip this frame if the floor hasn't elapsed — the backlog math below
// is dt-proportional, so delayed commits reveal proportionally more.
if (dt < REVEAL_MIN_COMMIT_MS) {
frameRef.current = requestAnimationFrame(tick)
return
}
lastTickRef.current = now
const remaining = targetRef.current.length - shownRef.current.length
const add = Math.min(remaining, REVEAL_MAX_CHARS_PER_FRAME, Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS)))
const add = Math.min(
remaining,
// dt-scaled so the per-commit cap stays equivalent to the old
// per-frame cap at any commit cadence.
Math.ceil((REVEAL_MAX_CHARS_PER_FRAME * dt) / 16.7),
Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS))
)
shownRef.current = targetRef.current.slice(0, shownRef.current.length + add)
setDisplayed(shownRef.current)
@ -460,17 +531,20 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
containerProps={containerProps}
lineNumbers={false}
mode="streaming"
// Always auto-close incomplete fences — even during streaming.
// Without this, an unclosed ```python ... ``` whose body contains
// `$` (very common: shell snippets, JS template strings, dollar
// amounts) leaks those dollars out to the math parser and they
// get rendered as broken inline math until the closing fence
// arrives. Shiki is independently deferred via `defer={isStreaming}`
// on the SyntaxHighlighter component, so we don't pay code-block
// tokenization on every token even with this set.
parseIncompleteMarkdown
// Incomplete-markdown repair is handled by `preprocessWithTailRepair`
// below (tail-bounded remend) instead of Streamdown's built-in pass,
// which re-runs remend over the ENTIRE message on every flush — ~18%
// of streaming script time on 50KB+ messages. The repair itself stays
// always-on (even between flushes / for completed messages): an
// unclosed ```python ... ``` whose body contains `$` (shell snippets,
// JS template strings, dollar amounts) would otherwise leak those
// dollars to the math parser and render broken inline math. Shiki is
// independently deferred via `defer={isStreaming}` on the
// SyntaxHighlighter component.
parseIncompleteMarkdown={false}
parseMarkdownIntoBlocksFn={parseMarkdownIntoBlocksCached}
plugins={plugins}
preprocess={preprocessMarkdown}
preprocess={preprocessWithTailRepair}
/>
)
}

View file

@ -7,7 +7,8 @@ import {
MessagePrimitive,
type ToolCallMessagePartProps,
useAui,
useAuiState
useAuiState,
useMessageRuntime
} from '@assistant-ui/react'
import { useStore } from '@nanostores/react'
import { IconPlayerStopFilled } from '@tabler/icons-react'
@ -105,7 +106,11 @@ type ThreadLoadingState = 'response' | 'session'
interface MessageActionProps {
messageId: string
messageText: string
/** Lazy accessor reads the live message text at action time. Passing the
* text itself as a prop forces the whole footer to re-render on every
* streaming delta flush (the text changes ~30×/s), which profiling showed
* was a large slice of per-token script time on long transcripts. */
getMessageText: () => string
onBranchInNewChat?: (messageId: string) => void
}
@ -133,6 +138,28 @@ function messageContentText(content: unknown): string {
return Array.isArray(content) ? content.map(partText).join('').trim() : ''
}
// Cheap streaming-stable "does this message have visible text" check: returns
// on the first non-whitespace text part without concatenating the whole
// message. Used as a useAuiState selector so its boolean output stays stable
// across token flushes (flips false→true once per turn).
function contentHasVisibleText(content: unknown): boolean {
if (typeof content === 'string') {
return content.trim().length > 0
}
if (!Array.isArray(content)) {
return false
}
for (const part of content) {
if (partText(part).trim().length > 0) {
return true
}
}
return false
}
export const Thread: FC<{
clampToComposer?: boolean
cwd?: string | null
@ -221,20 +248,39 @@ const CenteredThreadSpinner: FC = () => {
const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> = ({ onBranchInNewChat }) => {
const messageId = useAuiState(s => s.message.id)
const content = useAuiState(s => s.message.content)
const messageText = messageContentText(content)
const messageRuntime = useMessageRuntime()
// PERF: this component must NOT subscribe to the streaming text. Every
// selector here returns a value that stays referentially stable across
// token flushes (booleans, status strings, '' while running), so the
// 30 Hz delta stream only re-renders the markdown part and the tiny
// StreamStallIndicator leaf — not the footer/preview/root subtree.
const messageStatus = useAuiState(s => s.message.status?.type)
const isRunning = messageStatus === 'running'
const isPlaceholder = useAuiState(s => s.message.status?.type === 'running' && s.message.content.length === 0)
const hasVisibleText = useAuiState(s => contentHasVisibleText(s.message.content))
// Preview targets only materialize once the turn completes — while running
// the selector returns '' (stable), so per-token flushes skip the regex
// scan and the re-render it would cause.
const completedText = useAuiState(s =>
s.message.status?.type === 'running' ? '' : messageContentText(s.message.content)
)
const previewTargets = useMemo(() => {
if (!messageText || !/(https?:\/\/|file:\/\/)/i.test(messageText)) {
if (!completedText || !/(https?:\/\/|file:\/\/)/i.test(completedText)) {
return []
}
return pickPrimaryPreviewTarget(extractPreviewTargets(messageText))
}, [messageText])
return pickPrimaryPreviewTarget(extractPreviewTargets(completedText))
}, [completedText])
const messageStatus = useAuiState(s => s.message.status?.type)
const isPlaceholder = messageStatus === 'running' && content.length === 0
const enterRef = useEnterAnimation(messageStatus === 'running', `assistant-message:${messageId}`)
const getMessageText = useCallback(
() => messageContentText(messageRuntime.getState().content),
[messageRuntime]
)
const enterRef = useEnterAnimation(isRunning, `assistant-message:${messageId}`)
if (isPlaceholder) {
return null
@ -245,7 +291,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
className="group flex w-full min-w-0 max-w-full flex-col gap-0 self-start overflow-hidden"
data-role="assistant"
data-slot="aui_assistant-message-root"
data-streaming={messageStatus === 'running' ? 'true' : undefined}
data-streaming={isRunning ? 'true' : undefined}
ref={enterRef}
>
<div
@ -254,7 +300,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
>
{/* Todos render in the composer status stack now, not inline. */}
<MessagePrimitive.Parts components={MESSAGE_PARTS_COMPONENTS} />
{messageStatus === 'running' && <StreamStallIndicator activity={`${content.length}:${messageText.length}`} />}
{isRunning && <StreamStallIndicator />}
{previewTargets.length > 0 && (
<div className="mt-3 flex flex-wrap gap-2">
{previewTargets.map(target => (
@ -271,8 +317,8 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
</ErrorPrimitive.Root>
</MessagePrimitive.Error>
</div>
{messageText.trim().length > 0 && (
<AssistantFooter messageId={messageId} messageText={messageText} onBranchInNewChat={onBranchInNewChat} />
{hasVisibleText && (
<AssistantFooter getMessageText={getMessageText} messageId={messageId} onBranchInNewChat={onBranchInNewChat} />
)}
</MessagePrimitive.Root>
)
@ -313,10 +359,28 @@ const STREAM_STALL_S = 2
// Tail "still thinking" indicator: the pre-first-token spinner goes away once
// text flows, but if the stream then goes quiet mid-turn (tool think-time,
// provider stall) nothing signals that work continues. Watch a per-render
// provider stall) nothing signals that work continues. Watch a per-flush
// activity signal; when it hasn't changed for STREAM_STALL_S, re-show the
// dither + a timer counting from the last activity.
const StreamStallIndicator: FC<{ activity: string }> = ({ activity }) => {
//
// Subscribes to the activity signal ITSELF (rather than taking it as a prop)
// so that per-token updates re-render only this leaf, not the whole
// AssistantMessage subtree.
const StreamStallIndicator: FC = () => {
const activity = useAuiState(s => {
let textLength = 0
for (const part of s.message.content) {
const text = (part as { text?: unknown }).text
if (typeof text === 'string') {
textLength += text.length
}
}
return `${s.message.content.length}:${textLength}`
})
const [stalled, setStalled] = useState(false)
useEffect(() => {
@ -584,7 +648,7 @@ function formatMessageTimestamp(
return SHORT_FMT.format(date)
}
const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, onBranchInNewChat }) => {
const AssistantActionBar: FC<MessageActionProps> = ({ messageId, getMessageText, onBranchInNewChat }) => {
const { t } = useI18n()
const copy = t.assistant.thread
const [menuOpen, setMenuOpen] = useState(false)
@ -605,7 +669,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
)}
data-slot="aui_msg-actions"
>
<CopyButton appearance="icon" buttonSize="icon" disabled={!messageText} label={copy.copy} text={messageText} />
<CopyButton appearance="icon" buttonSize="icon" label={copy.copy} text={getMessageText} />
<ActionBarPrimitive.Reload asChild>
<TooltipIconButton onClick={() => triggerHaptic('submit')} tooltip={copy.refresh}>
<Codicon name="refresh" />
@ -623,7 +687,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
<GitBranchIcon />
{copy.branchNewChat}
</DropdownMenuItem>
<ReadAloudItem messageId={messageId} text={messageText} />
<ReadAloudItem getText={getMessageText} messageId={messageId} />
</DropdownMenuContent>
</DropdownMenu>
</ActionBarPrimitive.Root>
@ -631,7 +695,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
)
}
const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, text }) => {
const ReadAloudItem: FC<{ getText: () => string; messageId: string }> = ({ getText, messageId }) => {
const { t } = useI18n()
const copy = t.assistant.thread
const voicePlayback = useStore($voicePlayback)
@ -645,6 +709,8 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex
const Icon = isPreparing ? Loader2Icon : isSpeaking ? VolumeXIcon : Volume2Icon
const read = useCallback(async () => {
const text = getText()
if (!text || $voicePlayback.get().status !== 'idle') {
return
}
@ -654,11 +720,11 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex
} catch (error) {
notifyError(error, copy.readAloudFailed)
}
}, [copy.readAloudFailed, messageId, text])
}, [copy.readAloudFailed, getText, messageId])
return (
<DropdownMenuItem
disabled={isPreparing || (!isSpeaking && (anyPlaybackActive || !text))}
disabled={isPreparing || (!isSpeaking && anyPlaybackActive)}
onSelect={e => {
e.preventDefault()
void (isSpeaking ? stopVoicePlayback() : read())
@ -820,8 +886,10 @@ const UserMessage: FC<{
// changes, not on every frame while the outer max-height animates open.
const clampInnerRef = useRef<HTMLDivElement | null>(null)
const [bodyClamped, setBodyClamped] = useState(false)
const lastClampHeightRef = useRef(-1)
const lineHeightRef = useRef(0)
const measureClamp = useCallback(() => {
const measureClamp = useCallback((entries: readonly ResizeObserverEntry[]) => {
const inner = clampInnerRef.current
const outer = inner?.parentElement
@ -829,12 +897,28 @@ const UserMessage: FC<{
return
}
const styles = getComputedStyle(inner)
const lineHeight = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
const fullHeight = inner.scrollHeight
// Prefer the size the ResizeObserver already computed — reading
// `scrollHeight` outside RO timing forces a synchronous layout, and with
// many user bubbles observed at once those reads interleave with the
// style write below into a read-write-read reflow cascade.
const entryHeight = entries.find(entry => entry.target === inner)?.borderBoxSize?.[0]?.blockSize
const fullHeight = Math.ceil(entryHeight ?? inner.scrollHeight)
if (fullHeight === lastClampHeightRef.current) {
return
}
lastClampHeightRef.current = fullHeight
// Line-height is stable for the life of the bubble (font settings don't
// change under it) — resolve the computed style once.
if (!lineHeightRef.current) {
const styles = getComputedStyle(inner)
lineHeightRef.current = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
}
outer.style.setProperty('--human-msg-full', `${fullHeight}px`)
setBodyClamped(fullHeight > lineHeight * 2 + 1)
setBodyClamped(fullHeight > lineHeightRef.current * 2 + 1)
}, [])
useResizeObserver(measureClamp, clampInnerRef)

View file

@ -15,5 +15,29 @@ export function HapticsProvider({ children }: { children: ReactNode }) {
return () => registerHapticTrigger(null)
}, [muted, trigger])
// web-haptics builds its AudioContext lazily inside the first trigger(), and
// the process's first AudioContext pays the CoreAudio spin-up (~850ms stall
// in profiles) — which landed on the first streamStart haptic as the first
// token painted. Open/close a throwaway context at idle so the real one
// connects to an already-warm audio service in single-digit ms.
useEffect(() => {
if (typeof requestIdleCallback !== 'function' || typeof AudioContext === 'undefined') {
return undefined
}
const id = requestIdleCallback(
() => {
try {
void new AudioContext().close().catch(() => undefined)
} catch {
// No audio device (headless CI) — nothing to warm.
}
},
{ timeout: 2000 }
)
return () => cancelIdleCallback(id)
}, [])
return <>{children}</>
}

View file

@ -34,14 +34,21 @@ function FadeTextImpl({ children, className, fadeWidth = '3rem', style, ...rest
const ref = useRef<HTMLSpanElement>(null)
const [overflowing, setOverflowing] = useState(false)
const measureOverflow = useCallback(() => {
const measureOverflow = useCallback((entries: readonly ResizeObserverEntry[]) => {
const el = ref.current
if (!el) {
return
}
setOverflowing(el.scrollWidth - el.clientWidth > 1)
// `clientWidth` from the RO entry when available (already computed);
// `scrollWidth` is unavoidable — content width isn't part of the entry —
// but inside RO timing layout is already clean so the read is cheap.
const clientWidth = entries.find(entry => entry.target === el)?.contentRect?.width ?? el.clientWidth
// setState is identity-stable: React bails out when the boolean doesn't
// change, so repeated RO fires with the same answer don't re-render.
setOverflowing(el.scrollWidth - clientWidth > 1)
}, [])
useResizeObserver(measureOverflow, ref)

View file

@ -1,17 +1,26 @@
import { type RefObject, useLayoutEffect, useRef } from 'react'
export function useResizeObserver(onResize: () => void, ...refs: readonly RefObject<Element | null>[]) {
/**
* Observe element resizes. The callback receives the ResizeObserver entries
* (empty on the initial synchronous call and in non-RO environments) so
* callers can read the observed size off the entry instead of forcing a
* fresh layout read.
*/
export function useResizeObserver(
onResize: (entries: readonly ResizeObserverEntry[]) => void,
...refs: readonly RefObject<Element | null>[]
) {
const refsRef = useRef(refs)
refsRef.current = refs
useLayoutEffect(() => {
if (typeof ResizeObserver === 'undefined') {
onResize()
onResize([])
return
}
const observer = new ResizeObserver(() => onResize())
const observer = new ResizeObserver(entries => onResize(entries))
let observed = false
for (const ref of refsRef.current) {
@ -31,7 +40,7 @@ export function useResizeObserver(onResize: () => void, ...refs: readonly RefObj
return
}
onResize()
onResize([])
return () => observer.disconnect()
}, [onResize])

View file

@ -0,0 +1,105 @@
import { parseMarkdownIntoBlocks } from '@assistant-ui/react-streamdown'
import remend from 'remend'
import { describe, expect, it } from 'vitest'
import { findRemendWindowStart, tailBoundedRemend } from './remend-tail'
const CORPUS = `# Heading one
Intro paragraph with **bold**, *italic*, \`inline code\`, and a [link](https://example.com).
## Code
\`\`\`python
def main():
cost = "$5"
print(f"total: $\{cost}")
\`\`\`
Some text after the fence with $x^2 + y^2$ inline math.
$$
\\int_0^1 f(x) dx
$$
- list item one with **bold**
- list item two
| col a | col b |
| ----- | ----- |
| 1 | 2 |
~~~js
const s = \`template \${value}\`
~~~
Final paragraph with ~~strike~~ and unfinished [link text](https://exa
`
/**
* Render-equivalence oracle: full-text remend and tail-bounded remend may
* differ in raw string output ONLY in ways that cannot affect rendering
* i.e. after block splitting, every block must be identical. (Streamdown
* renders blocks independently, so block-level equality IS render equality.)
*/
function blocksOf(text: string): string[] {
return parseMarkdownIntoBlocks(text)
}
describe('tailBoundedRemend', () => {
it('matches full remend block output at every streaming prefix', () => {
for (let end = 1; end <= CORPUS.length; end++) {
const prefix = CORPUS.slice(0, end)
const full = blocksOf(remend(prefix))
const tail = blocksOf(tailBoundedRemend(prefix))
expect(tail, `prefix length ${end}: ${JSON.stringify(prefix.slice(-60))}`).toEqual(full)
}
})
it('repairs an unclosed fence opened early in a long message', () => {
const text = `intro\n\n\`\`\`python\n${'x = 1\n'.repeat(500)}print("$dollar")`
const repaired = tailBoundedRemend(text)
expect(blocksOf(repaired)).toEqual(blocksOf(remend(text)))
// the window must reach back to the fence opener
expect(findRemendWindowStart(text)).toBe(text.indexOf('```python'))
})
it('bounds the window to the tail paragraph when no fence is open', () => {
const text = `para one\n\npara two\n\npara three with **bold`
const start = findRemendWindowStart(text)
expect(start).toBe(text.indexOf('para three'))
expect(tailBoundedRemend(text)).toBe(remend(text))
})
it('widens the window across an open $$ math block', () => {
const text = `before\n\n$$\n\\frac{a}{b}`
const start = findRemendWindowStart(text)
expect(start).toBeLessThanOrEqual(text.indexOf('$$'))
expect(blocksOf(tailBoundedRemend(text))).toEqual(blocksOf(remend(text)))
})
it('handles closed constructs without modification', () => {
const text = `done **bold** and \`code\`\n\n\`\`\`js\nconst a = 1\n\`\`\`\n\nlast line.`
expect(tailBoundedRemend(text)).toBe(text)
})
it('intentionally diverges from full remend on cross-block dangling openers', () => {
// Full remend scans the whole document and appends `**` for an opener
// left dangling in an EARLIER block, dumping stray asterisks into the
// unrelated tail block ("|**"). Because Streamdown splits into blocks
// after the repair, that opener never renders as bold either way — the
// tail-bounded result is the cleaner of the two. This test documents
// the divergence so a future remend upgrade that changes the behavior
// gets noticed.
const text = `- item with **dangling\n- item two\n\n|`
expect(remend(text).endsWith('|**')).toBe(true)
expect(tailBoundedRemend(text).endsWith('|')).toBe(true)
expect(tailBoundedRemend(text).endsWith('|**')).toBe(false)
})
})

View file

@ -0,0 +1,108 @@
import remend from 'remend'
// Tail-bounded incomplete-markdown repair.
//
// Streamdown's built-in `parseIncompleteMarkdown` runs `remend` over the whole
// accumulated message on every streaming flush (~18% of script time on 50KB+
// messages). But repairs only ever matter in the trailing block: inline
// constructs can't cross a blank line, and Streamdown splits into blocks AFTER
// the repair, so a dangling opener in an earlier block can't reach the tail.
// We run `remend` on just that block instead.
const BACKTICK = 96 // `
const TILDE = 126 // ~
const SPACE = 32
const TAB = 9
const BACKSLASH = 92
const isSpace = (c: number) => c === SPACE || c === TAB
/**
* Index of the last top-level block start the char after the most recent
* blank line that sits outside any open code fence or `$$` math block. An
* unclosed fence/math always begins after that blank, so it stays wholly
* inside the window without separate tracking. One cheap char pass, no regex.
*/
export function findRemendWindowStart(text: string): number {
const n = text.length
let inFence = false
let fenceChar = 0
let fenceRun = 0
let inMath = false
let boundary = 0
let pending = -1 // a blank line, committed to `boundary` once content follows
for (let lineStart = 0; lineStart <= n; ) {
let lineEnd = text.indexOf('\n', lineStart)
if (lineEnd === -1) {
lineEnd = n
}
let i = lineStart
while (i < lineEnd && isSpace(text.charCodeAt(i))) {
i += 1
}
const first = i < lineEnd ? text.charCodeAt(i) : -1
let marker = false
// Fence open/close (``` or ~~~, ≤3 spaces indent).
if ((first === BACKTICK || first === TILDE) && i - lineStart <= 3) {
let run = i
while (run < lineEnd && text.charCodeAt(run) === first) {
run += 1
}
if (run - i >= 3) {
marker = true
if (!inFence) {
inFence = true
fenceChar = first
fenceRun = run - i
} else if (first === fenceChar && run - i >= fenceRun && onlyWhitespace(text, run, lineEnd)) {
inFence = false
}
}
}
// Toggle `$$` math state on plain lines ($$ inside a fence is literal).
if (!inFence && !marker) {
for (let s = text.indexOf('$$', lineStart); s !== -1 && s < lineEnd - 1; s = text.indexOf('$$', s + 2)) {
if (s === 0 || text.charCodeAt(s - 1) !== BACKSLASH) {
inMath = !inMath
}
}
}
if (first === -1 && !inFence && !inMath) {
pending = lineEnd + 1
} else if (pending !== -1) {
boundary = pending
pending = -1
}
lineStart = lineEnd + 1
}
return boundary
}
function onlyWhitespace(text: string, from: number, to: number): boolean {
for (let i = from; i < to; i += 1) {
if (!isSpace(text.charCodeAt(i))) {
return false
}
}
return true
}
export function tailBoundedRemend(text: string): string {
const start = findRemendWindowStart(text)
return start <= 0 ? remend(text) : text.slice(0, start) + remend(text.slice(start))
}

View file

@ -1,5 +1,6 @@
import { atom } from 'nanostores'
import { atom, computed } from 'nanostores'
import { lastVisibleMessageIsUser } from '@/app/chat/thread-loading'
import type { ContextSuggestion } from '@/app/types'
import type { HermesConnection } from '@/global'
import type { ChatMessage } from '@/lib/chat-messages'
@ -195,6 +196,15 @@ export const $workingSessionIds = atom<string[]>([])
export const $activeSessionId = atom<string | null>(null)
export const $selectedStoredSessionId = atom<string | null>(null)
export const $messages = atom<ChatMessage[]>([])
// Streaming-stable derivations of $messages. During a token stream the array
// is replaced ~30×/s; components that only care about coarse facts (is the
// thread empty? is the tail a user message?) subscribe to these instead of
// $messages so per-token flushes don't re-render them — nanostores' `computed`
// only notifies when the derived VALUE changes.
export const $messagesEmpty = computed($messages, messages => messages.length === 0)
export const $lastVisibleMessageIsUser = computed($messages, lastVisibleMessageIsUser)
export const $freshDraftReady = atom(false)
export const $busy = atom(false)
export const $awaitingResponse = atom(false)

View file

@ -21,7 +21,7 @@ let
# Single npm deps fetch from the workspace root lockfile.
# All workspace packages share this derivation.
npmDepsHash = "sha256-BfTSh6J2VZ/07tq2DYnKgUViZCgRhW1sC2uj18H65SE=";
npmDepsHash = "sha256-dFUlWvIIsCqvtGkoobs0qUzFlSdejuffI/uLoQxhW8Q=";
npmDeps = pkgs.fetchNpmDeps {
inherit src;

1
package-lock.json generated
View file

@ -119,6 +119,7 @@
"react-router-dom": "^7.17.0",
"react-shiki": "^0.9.3",
"remark-math": "^6.0.0",
"remend": "^1.3.0",
"shiki": "^4.0.2",
"streamdown": "^2.5.0",
"tailwind-merge": "^3.5.0",