perf(desktop): cut GUI streaming & interaction lag (#45343)

* perf(desktop): isolate streaming re-renders & cut layout thrash During a token stream $messages is replaced ~30x/s. Subscribing the whole chat view to it re-rendered the composer, runtime boundary, and every message on every delta. - Derive coarse facts (empty thread? tail is user?) via nanostores `computed` atoms so per-token flushes don't re-render their consumers. - Move the $messages subscription + runtime wiring into a dedicated ChatRuntimeBoundary; the composer reads $messages imperatively. - Drive message rows off stable useAuiState selectors and a lazy getMessageText getter instead of eagerly materialized text. - Feed ResizeObserver entry sizes into measureClamp / FadeText and dedupe the style writes, killing the read-write-read reflow cascade. * perf(desktop): incremental markdown rendering during streams Re-parsing the full message markdown every reveal frame is O(N^2) over a long answer and dominated stream CPU. - Throttle useSmoothReveal commits to ~1 frame (REVEAL_MIN_COMMIT_MS). - Memoize block parsing with an LRU keyed on source text so only changed blocks re-parse. - Replace Streamdown's full-text parseIncompleteMarkdown with a tail-bounded remend: scan to the last top-level boundary outside fences/math and repair only the trailing open block. New remend-tail.ts is proven render-equivalent to full remend at every streaming prefix (remend-tail.test.ts), minus an intentional, documented divergence on cross-block dangling openers. * perf(desktop): faster session resume & warm AudioContext at idle - Resume: fire the REST transcript prefetch and the session.resume RPC in parallel, and skip the redundant message conversion + reconciliation when the prefetch already hydrated the transcript. - Haptics: web-haptics builds its AudioContext lazily on first trigger, paying the ~850ms CoreAudio spin-up on the first streamStart haptic as the first token paints. Open/close a throwaway context at idle so the real one connects to an already-warm audio service.
2026-06-14 09:11:54 +00:00 · 2026-06-12 21:22:39 -05:00 · 2026-06-12 21:22:39 -05:00 · 492c402774
commit 492c402774
parent a86b7b314b d62e9b7592
15 changed files with 655 additions and 165 deletions
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@ -90,6 +90,7 @@
    "react-router-dom": "^7.17.0",
    "react-shiki": "^0.9.3",
    "remark-math": "^6.0.0",
+    "remend": "^1.3.0",
    "shiki": "^4.0.2",
    "streamdown": "^2.5.0",
    "tailwind-merge": "^3.5.0",
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@ -174,7 +174,6 @@ export function ChatBar({
  const queuedPromptsBySession = useStore($queuedPromptsBySession)
  const statusItemsBySession = useStore($statusItemsBySession)
  const scrolledUp = useStore($threadScrolledUp)
-  const sessionMessages = useStore($messages)
  const activeQueueSessionKey = queueSessionKey || sessionId || null

  const queuedPrompts = useMemo(
@ -866,7 +865,9 @@ export function ChatBar({
      event.preventDefault()
      triggerKeyConsumedRef.current = true

-      const history = deriveUserHistory(sessionMessages, chatMessageText)
+      // $messages is read imperatively (not subscribed) so the composer
+      // doesn't re-render on every streaming delta flush.
+      const history = deriveUserHistory($messages.get(), chatMessageText)
      const entry = browseBackward(sessionId, currentDraft, history)

      if (entry !== null) {
@ -891,7 +892,7 @@ export function ChatBar({
        event.preventDefault()
        triggerKeyConsumedRef.current = true

-        const history = deriveUserHistory(sessionMessages, chatMessageText)
+        const history = deriveUserHistory($messages.get(), chatMessageText)
        const result = browseForward(sessionId, history)

        if (result !== null) {
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@ -35,7 +35,9 @@ import {
  $gatewayState,
  $introPersonality,
  $introSeed,
+  $lastVisibleMessageIsUser,
  $messages,
+  $messagesEmpty,
  $selectedStoredSessionId,
  $sessions,
  sessionPinId
@ -55,7 +57,7 @@ import { type DroppedFile, partitionDroppedFiles } from './hooks/use-composer-ac
 import { useFileDropZone } from './hooks/use-file-drop-zone'
 import { ScrollToBottomButton } from './scroll-to-bottom-button'
 import { SessionActionsMenu } from './sidebar/session-actions-menu'
-import { lastVisibleMessageIsUser, threadLoadingState } from './thread-loading'
+import { threadLoadingState } from './thread-loading'

 interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
  gateway: HermesGateway | null
@ -156,105 +158,35 @@ function ChatHeader({
  )
 }

-export function ChatView({
-  className,
-  gateway,
-  onToggleSelectedPin,
-  onDeleteSelectedSession,
+interface ChatRuntimeBoundaryProps {
+  busy: boolean
+  children: React.ReactNode
+  onCancel: () => Promise<void> | void
+  onEdit: (message: AppendMessage) => Promise<void>
+  onReload: (parentId: string | null) => Promise<void>
+  onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
+}
+
+/**
+ * Owns the $messages subscription and the assistant-ui external-store runtime.
+ *
+ * Isolated from ChatView so the per-token delta flush (which replaces the
+ * $messages atom ~30×/s during streaming) only re-renders this component and
+ * the runtime provider. The children (Thread, ChatBar) are created by
+ * ChatView, whose render output is stable across flushes — so React bails out
+ * of re-rendering them by element identity and the stream's render cost stays
+ * confined to the streaming message's own subtree.
+ */
+function ChatRuntimeBoundary({
+  busy,
+  children,
  onCancel,
-  onAddContextRef,
-  onAddUrl,
-  onAttachImageBlob,
-  onAttachDroppedItems,
-  onBranchInNewChat,
-  maxVoiceRecordingSeconds,
-  onPasteClipboardImage,
-  onPickFiles,
-  onPickFolders,
-  onPickImages,
-  onRemoveAttachment,
-  onSteer,
-  onSubmit,
-  onThreadMessagesChange,
  onEdit,
  onReload,
-  onRestoreToMessage,
-  onTranscribeAudio
-}: ChatViewProps) {
-  const location = useLocation()
-  const activeSessionId = useStore($activeSessionId)
-  const awaitingResponse = useStore($awaitingResponse)
-  const busy = useStore($busy)
-  const contextSuggestions = useStore($contextSuggestions)
-  const currentCwd = useStore($currentCwd)
-  const currentModel = useStore($currentModel)
-  const currentProvider = useStore($currentProvider)
-  const freshDraftReady = useStore($freshDraftReady)
-  const gatewayState = useStore($gatewayState)
-  const gatewaySwapTarget = useStore($gatewaySwapTarget)
-  const gatewayOpen = gatewayState === 'open'
-  const introPersonality = useStore($introPersonality)
-  const introSeed = useStore($introSeed)
+  onThreadMessagesChange
+}: ChatRuntimeBoundaryProps) {
  const messages = useStore($messages)
-  const selectedSessionId = useStore($selectedStoredSessionId)
  const runtimeMessageCacheRef = useRef(new WeakMap<ChatMessage, ThreadMessage>())
-  const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
-
-  const showIntro =
-    freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messages.length === 0
-
-  // Session is still loading if the route references a session we haven't
-  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
-  // session exists — even if it has zero messages (a brand-new routed
-  // session). The flicker where `busy` flips true briefly during hydrate
-  // is handled by `threadLoadingState`'s last-visible-user gate.
-  const loadingSession = isRoutedSessionView && messages.length === 0 && !activeSessionId
-  const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleMessageIsUser(messages))
-  const showChatBar = !loadingSession
-  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
-
-  const modelOptionsQuery = useQuery<ModelOptionsResponse>({
-    queryKey: ['model-options', activeSessionId || 'global'],
-    queryFn: () => {
-      if (!activeSessionId) {
-        return getGlobalModelOptions()
-      }
-
-      if (!gateway) {
-        throw new Error('Hermes gateway unavailable')
-      }
-
-      return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
-    },
-    enabled: gatewayOpen
-  })
-
-  const quickModels = useMemo(
-    () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
-    [currentModel, currentProvider, modelOptionsQuery.data]
-  )
-
-  const chatBarState = useMemo<ChatBarState>(
-    () => ({
-      model: {
-        model: currentModel,
-        provider: currentProvider,
-        canSwitch: gatewayOpen,
-        loading: !gatewayOpen || (!currentModel && !currentProvider),
-        quickModels
-      },
-      tools: {
-        enabled: true,
-        label: 'Add context',
-        suggestions: contextSuggestions
-      },
-      voice: {
-        enabled: true,
-        active: false
-      }
-    }),
-    [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
-  )

  const runtimeMessageRepository = useMemo(() => {
    const items: { message: ThreadMessage; parentId: string | null }[] = []
@ -304,6 +236,113 @@ export function ChatView({
    onReload
  })

+  return <AssistantRuntimeProvider runtime={runtime}>{children}</AssistantRuntimeProvider>
+}
+
+export function ChatView({
+  className,
+  gateway,
+  onToggleSelectedPin,
+  onDeleteSelectedSession,
+  onCancel,
+  onAddContextRef,
+  onAddUrl,
+  onAttachImageBlob,
+  onAttachDroppedItems,
+  onBranchInNewChat,
+  maxVoiceRecordingSeconds,
+  onPasteClipboardImage,
+  onPickFiles,
+  onPickFolders,
+  onPickImages,
+  onRemoveAttachment,
+  onSteer,
+  onSubmit,
+  onThreadMessagesChange,
+  onEdit,
+  onReload,
+  onRestoreToMessage,
+  onTranscribeAudio
+}: ChatViewProps) {
+  const location = useLocation()
+  const activeSessionId = useStore($activeSessionId)
+  const awaitingResponse = useStore($awaitingResponse)
+  const busy = useStore($busy)
+  const contextSuggestions = useStore($contextSuggestions)
+  const currentCwd = useStore($currentCwd)
+  const currentModel = useStore($currentModel)
+  const currentProvider = useStore($currentProvider)
+  const freshDraftReady = useStore($freshDraftReady)
+  const gatewayState = useStore($gatewayState)
+  const gatewaySwapTarget = useStore($gatewaySwapTarget)
+  const gatewayOpen = gatewayState === 'open'
+  const introPersonality = useStore($introPersonality)
+  const introSeed = useStore($introSeed)
+  // PERF: ChatView must not subscribe to $messages — the atom is replaced on
+  // every streaming delta flush (~30×/s) and a subscription here re-renders
+  // the entire chat shell (header, chat bar, thread wrapper) per token. The
+  // runtime that DOES need the messages lives in ChatRuntimeBoundary below;
+  // this component only needs streaming-stable derivations.
+  const messagesEmpty = useStore($messagesEmpty)
+  const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
+  const selectedSessionId = useStore($selectedStoredSessionId)
+  const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
+
+  const showIntro = freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty
+
+  // Session is still loading if the route references a session we haven't
+  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
+  // session exists — even if it has zero messages (a brand-new routed
+  // session). The flicker where `busy` flips true briefly during hydrate
+  // is handled by `threadLoadingState`'s last-visible-user gate.
+  const loadingSession = isRoutedSessionView && messagesEmpty && !activeSessionId
+  const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
+  const showChatBar = !loadingSession
+  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
+
+  const modelOptionsQuery = useQuery<ModelOptionsResponse>({
+    queryKey: ['model-options', activeSessionId || 'global'],
+    queryFn: () => {
+      if (!activeSessionId) {
+        return getGlobalModelOptions()
+      }
+
+      if (!gateway) {
+        throw new Error('Hermes gateway unavailable')
+      }
+
+      return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
+    },
+    enabled: gatewayOpen
+  })
+
+  const quickModels = useMemo(
+    () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
+    [currentModel, currentProvider, modelOptionsQuery.data]
+  )
+
+  const chatBarState = useMemo<ChatBarState>(
+    () => ({
+      model: {
+        model: currentModel,
+        provider: currentProvider,
+        canSwitch: gatewayOpen,
+        loading: !gatewayOpen || (!currentModel && !currentProvider),
+        quickModels
+      },
+      tools: {
+        enabled: true,
+        label: 'Add context',
+        suggestions: contextSuggestions
+      },
+      voice: {
+        enabled: true,
+        active: false
+      }
+    }),
+    [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
+  )
+
  // Drop files anywhere in the conversation area, not just on the composer
  // input. In-app drags (project tree / gutter) carry workspace-relative paths
  // the gateway resolves directly, so they stay inline `@file:` refs. OS/Finder
@ -356,7 +395,13 @@ export function ChatView({
        className="relative min-h-0 max-w-full flex-1 overflow-hidden bg-(--ui-chat-surface-background) contain-[layout_paint]"
        {...dropHandlers}
      >
-        <AssistantRuntimeProvider runtime={runtime}>
+        <ChatRuntimeBoundary
+          busy={busy}
+          onCancel={onCancel}
+          onEdit={onEdit}
+          onReload={onReload}
+          onThreadMessagesChange={onThreadMessagesChange}
+        >
          <Thread
            clampToComposer={showChatBar}
            cwd={currentCwd}
@ -397,7 +442,7 @@ export function ChatView({
              />
            </Suspense>
          )}
-        </AssistantRuntimeProvider>
+        </ChatRuntimeBoundary>
        {showChatBar && <ScrollToBottomButton />}
        <ChatDropOverlay kind={dragKind} />
        <ChatSwapOverlay profile={gatewaySwapTarget} />
--- a/apps/desktop/src/app/chat/thread-loading.ts
+++ b/apps/desktop/src/app/chat/thread-loading.ts
@ -3,9 +3,14 @@ import type { ChatMessage } from '@/lib/chat-messages'
 export type ThreadLoadingState = 'response' | 'session'

 export function lastVisibleMessageIsUser(messages: ChatMessage[]): boolean {
-  const lastVisible = [...messages].reverse().find(message => !message.hidden)
+  // Allocation-free reverse scan — runs in a hot $messages computed.
+  for (let i = messages.length - 1; i >= 0; i -= 1) {
+    if (!messages[i].hidden) {
+      return messages[i].role === 'user'
+    }
+  }

-  return lastVisible?.role === 'user'
+  return false
 }

 export function threadLoadingState(
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@ -618,10 +618,26 @@ export function useSessionActions({
        const watchWindow = isWatchWindow()
        let localSnapshot = $messages.get()

+        // REST transcript prefetch and the gateway resume RPC are independent
+        // — run them concurrently so a big session's wall time is
+        // max(prefetch, resume) instead of their sum. The prefetch paints the
+        // transcript as soon as it lands; the RPC binds the runtime id.
+        // Watch windows skip the prefetch — lazy resume attaches the live mirror.
+        const prefetchPromise = watchWindow ? null : getSessionMessages(storedSessionId, sessionProfile)
+
+        const resumePromise = requestGateway<SessionResumeResponse>('session.resume', {
+          session_id: storedSessionId,
+          cols: 96,
+          ...(watchWindow ? { lazy: true } : {}),
+          ...(sessionProfile ? { profile: sessionProfile } : {})
+        })
+        // The rejection is consumed by the `await` below; this guard only
+        // keeps it from surfacing as unhandled while the prefetch settles.
+        resumePromise.catch(() => undefined)
+
        try {
-          // Watch windows skip REST prefetch — lazy resume attaches the live mirror.
-          if (!watchWindow) {
-            const storedMessages = await getSessionMessages(storedSessionId, sessionProfile)
+          if (prefetchPromise) {
+            const storedMessages = await prefetchPromise

            if (isCurrentResume()) {
              localSnapshot = preserveLocalAssistantErrors(toChatMessages(storedMessages.messages), $messages.get())
@ -635,12 +651,7 @@ export function useSessionActions({
          // Non-fatal: gateway resume below can still hydrate the session.
        }

-        const resumed = await requestGateway<SessionResumeResponse>('session.resume', {
-          session_id: storedSessionId,
-          cols: 96,
-          ...(watchWindow ? { lazy: true } : {}),
-          ...(sessionProfile ? { profile: sessionProfile } : {})
-        })
+        const resumed = await resumePromise

        if (!isCurrentResume()) {
          return
@ -648,17 +659,22 @@ export function useSessionActions({

        const currentMessages = $messages.get()

-        const resumedMessages = preserveLocalAssistantErrors(
-          reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
-          currentMessages
-        )
-        // Keep the local snapshot when resume would only reshuffle runtime projection.
+        // Keep the local snapshot when resume would only reshuffle runtime
+        // projection. When the REST prefetch already hydrated the transcript,
+        // skip converting/reconciling the resume payload entirely — on a
+        // 1000+-message session that second conversion plus the deep
+        // equivalence compare costs over a second of main-thread time.
        const preferredMessages =
          localSnapshot.length > 0
            ? localSnapshot
-            : chatMessageArraysEquivalent(currentMessages, resumedMessages)
-              ? currentMessages
-              : resumedMessages
+            : (() => {
+                const resumedMessages = preserveLocalAssistantErrors(
+                  reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
+                  currentMessages
+                )
+
+                return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages
+              })()

        const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages)

--- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx
+++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
@ -2,6 +2,7 @@

 import { TextMessagePartProvider, useMessagePartText } from '@assistant-ui/react'
 import {
+  parseMarkdownIntoBlocks,
  type StreamdownTextComponents,
  StreamdownTextPrimitive,
  type SyntaxHighlighterProps
@ -26,6 +27,7 @@ import {
  mediaStreamUrl
 } from '@/lib/media'
 import { previewTargetFromMarkdownHref } from '@/lib/preview-targets'
+import { tailBoundedRemend } from '@/lib/remend-tail'
 import { cn } from '@/lib/utils'

 // Math rendering plugin (KaTeX). Configured once at module scope — the
@ -42,6 +44,51 @@ import { cn } from '@/lib/utils'
 // LLM convention). The default false-setting only accepts `$$...$$`.
 const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })

+// Replaces Streamdown's `parseIncompleteMarkdown` (full-text remend per
+// flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay
+// module-scope so the prop identity is stable across renders.
+function preprocessWithTailRepair(text: string): string {
+  return tailBoundedRemend(preprocessMarkdown(text))
+}
+
+// Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full
+// `marked` lex of the entire message, ~1.6ms per 28KB) inside a useMemo keyed
+// on the text — but the same text is re-lexed every time a message REMOUNTS
+// (virtualizer scroll, session switch) and whenever multiple surfaces render
+// the same content (deferred + smooth reveal republish). A small module-level
+// LRU keyed by the exact source string removes all of those repeat parses
+// with zero correctness risk (same input → same output). Streaming tail
+// growth misses the cache by design (every flush is a new string) — that
+// single lex is the irreducible cost.
+const BLOCK_CACHE_MAX = 64
+const BLOCK_CACHE_MIN_LENGTH = 1024
+const blockCache = new Map<string, string[]>()
+
+function parseMarkdownIntoBlocksCached(markdown: string): string[] {
+  if (markdown.length < BLOCK_CACHE_MIN_LENGTH) {
+    return parseMarkdownIntoBlocks(markdown)
+  }
+
+  const hit = blockCache.get(markdown)
+
+  if (hit) {
+    // Refresh recency (Map iteration order is insertion order).
+    blockCache.delete(markdown)
+    blockCache.set(markdown, hit)
+
+    return hit
+  }
+
+  const blocks = parseMarkdownIntoBlocks(markdown)
+  blockCache.set(markdown, blocks)
+
+  if (blockCache.size > BLOCK_CACHE_MAX) {
+    blockCache.delete(blockCache.keys().next().value as string)
+  }
+
+  return blocks
+}
+
 async function mediaSrc(path: string): Promise<string> {
  if (/^(?:https?|data):/i.test(path)) {
    return path
@ -241,6 +288,13 @@ function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>)
 // keeps draining its tail instead of snapping.
 const REVEAL_DRAIN_MS = 500
 const REVEAL_MAX_CHARS_PER_FRAME = 30
+// Floor between reveal commits. Each commit republishes the text context and
+// re-runs the whole Streamdown pipeline (preprocess → remend → lex → micromark
+// on the open block) over the full accumulated text — at raw rAF cadence
+// that's 60 full parses/second and was the dominant streaming cost for
+// reasoning text. ~33ms keeps the reveal visually fluid (2 frames) while
+// halving the parse work.
+const REVEAL_MIN_COMMIT_MS = 33

 function useSmoothReveal(text: string, isRunning: boolean): string {
  const [displayed, setDisplayed] = useState(isRunning ? '' : text)
@ -273,10 +327,27 @@ function useSmoothReveal(text: string, isRunning: boolean): string {
    const tick = () => {
      const now = performance.now()
      const dt = now - lastTickRef.current
+
+      // Skip this frame if the floor hasn't elapsed — the backlog math below
+      // is dt-proportional, so delayed commits reveal proportionally more.
+      if (dt < REVEAL_MIN_COMMIT_MS) {
+        frameRef.current = requestAnimationFrame(tick)
+
+        return
+      }
+
      lastTickRef.current = now

      const remaining = targetRef.current.length - shownRef.current.length
-      const add = Math.min(remaining, REVEAL_MAX_CHARS_PER_FRAME, Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS)))
+
+      const add = Math.min(
+        remaining,
+        // dt-scaled so the per-commit cap stays equivalent to the old
+        // per-frame cap at any commit cadence.
+        Math.ceil((REVEAL_MAX_CHARS_PER_FRAME * dt) / 16.7),
+        Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS))
+      )
+
      shownRef.current = targetRef.current.slice(0, shownRef.current.length + add)
      setDisplayed(shownRef.current)

@ -460,17 +531,20 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
      containerProps={containerProps}
      lineNumbers={false}
      mode="streaming"
-      // Always auto-close incomplete fences — even during streaming.
-      // Without this, an unclosed ```python ... ``` whose body contains
-      // `$` (very common: shell snippets, JS template strings, dollar
-      // amounts) leaks those dollars out to the math parser and they
-      // get rendered as broken inline math until the closing fence
-      // arrives. Shiki is independently deferred via `defer={isStreaming}`
-      // on the SyntaxHighlighter component, so we don't pay code-block
-      // tokenization on every token even with this set.
-      parseIncompleteMarkdown
+      // Incomplete-markdown repair is handled by `preprocessWithTailRepair`
+      // below (tail-bounded remend) instead of Streamdown's built-in pass,
+      // which re-runs remend over the ENTIRE message on every flush — ~18%
+      // of streaming script time on 50KB+ messages. The repair itself stays
+      // always-on (even between flushes / for completed messages): an
+      // unclosed ```python ... ``` whose body contains `$` (shell snippets,
+      // JS template strings, dollar amounts) would otherwise leak those
+      // dollars to the math parser and render broken inline math. Shiki is
+      // independently deferred via `defer={isStreaming}` on the
+      // SyntaxHighlighter component.
+      parseIncompleteMarkdown={false}
+      parseMarkdownIntoBlocksFn={parseMarkdownIntoBlocksCached}
      plugins={plugins}
-      preprocess={preprocessMarkdown}
+      preprocess={preprocessWithTailRepair}
    />
  )
 }
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@ -7,7 +7,8 @@ import {
  MessagePrimitive,
  type ToolCallMessagePartProps,
  useAui,
-  useAuiState
+  useAuiState,
+  useMessageRuntime
 } from '@assistant-ui/react'
 import { useStore } from '@nanostores/react'
 import { IconPlayerStopFilled } from '@tabler/icons-react'
@ -105,7 +106,11 @@ type ThreadLoadingState = 'response' | 'session'

 interface MessageActionProps {
  messageId: string
-  messageText: string
+  /** Lazy accessor — reads the live message text at action time. Passing the
+   *  text itself as a prop forces the whole footer to re-render on every
+   *  streaming delta flush (the text changes ~30×/s), which profiling showed
+   *  was a large slice of per-token script time on long transcripts. */
+  getMessageText: () => string
  onBranchInNewChat?: (messageId: string) => void
 }

@ -133,6 +138,28 @@ function messageContentText(content: unknown): string {
  return Array.isArray(content) ? content.map(partText).join('').trim() : ''
 }

+// Cheap streaming-stable "does this message have visible text" check: returns
+// on the first non-whitespace text part without concatenating the whole
+// message. Used as a useAuiState selector so its boolean output stays stable
+// across token flushes (flips false→true once per turn).
+function contentHasVisibleText(content: unknown): boolean {
+  if (typeof content === 'string') {
+    return content.trim().length > 0
+  }
+
+  if (!Array.isArray(content)) {
+    return false
+  }
+
+  for (const part of content) {
+    if (partText(part).trim().length > 0) {
+      return true
+    }
+  }
+
+  return false
+}
+
 export const Thread: FC<{
  clampToComposer?: boolean
  cwd?: string | null
@ -221,20 +248,39 @@ const CenteredThreadSpinner: FC = () => {

 const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> = ({ onBranchInNewChat }) => {
  const messageId = useAuiState(s => s.message.id)
-  const content = useAuiState(s => s.message.content)
-  const messageText = messageContentText(content)
+  const messageRuntime = useMessageRuntime()
+
+  // PERF: this component must NOT subscribe to the streaming text. Every
+  // selector here returns a value that stays referentially stable across
+  // token flushes (booleans, status strings, '' while running), so the
+  // 30 Hz delta stream only re-renders the markdown part and the tiny
+  // StreamStallIndicator leaf — not the footer/preview/root subtree.
+  const messageStatus = useAuiState(s => s.message.status?.type)
+  const isRunning = messageStatus === 'running'
+  const isPlaceholder = useAuiState(s => s.message.status?.type === 'running' && s.message.content.length === 0)
+  const hasVisibleText = useAuiState(s => contentHasVisibleText(s.message.content))
+
+  // Preview targets only materialize once the turn completes — while running
+  // the selector returns '' (stable), so per-token flushes skip the regex
+  // scan and the re-render it would cause.
+  const completedText = useAuiState(s =>
+    s.message.status?.type === 'running' ? '' : messageContentText(s.message.content)
+  )

  const previewTargets = useMemo(() => {
-    if (!messageText || !/(https?:\/\/|file:\/\/)/i.test(messageText)) {
+    if (!completedText || !/(https?:\/\/|file:\/\/)/i.test(completedText)) {
      return []
    }

-    return pickPrimaryPreviewTarget(extractPreviewTargets(messageText))
-  }, [messageText])
+    return pickPrimaryPreviewTarget(extractPreviewTargets(completedText))
+  }, [completedText])

-  const messageStatus = useAuiState(s => s.message.status?.type)
-  const isPlaceholder = messageStatus === 'running' && content.length === 0
-  const enterRef = useEnterAnimation(messageStatus === 'running', `assistant-message:${messageId}`)
+  const getMessageText = useCallback(
+    () => messageContentText(messageRuntime.getState().content),
+    [messageRuntime]
+  )
+
+  const enterRef = useEnterAnimation(isRunning, `assistant-message:${messageId}`)

  if (isPlaceholder) {
    return null
@ -245,7 +291,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
      className="group flex w-full min-w-0 max-w-full flex-col gap-0 self-start overflow-hidden"
      data-role="assistant"
      data-slot="aui_assistant-message-root"
-      data-streaming={messageStatus === 'running' ? 'true' : undefined}
+      data-streaming={isRunning ? 'true' : undefined}
      ref={enterRef}
    >
      <div
@ -254,7 +300,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
      >
        {/* Todos render in the composer status stack now, not inline. */}
        <MessagePrimitive.Parts components={MESSAGE_PARTS_COMPONENTS} />
-        {messageStatus === 'running' && <StreamStallIndicator activity={`${content.length}:${messageText.length}`} />}
+        {isRunning && <StreamStallIndicator />}
        {previewTargets.length > 0 && (
          <div className="mt-3 flex flex-wrap gap-2">
            {previewTargets.map(target => (
@ -271,8 +317,8 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
          </ErrorPrimitive.Root>
        </MessagePrimitive.Error>
      </div>
-      {messageText.trim().length > 0 && (
-        <AssistantFooter messageId={messageId} messageText={messageText} onBranchInNewChat={onBranchInNewChat} />
+      {hasVisibleText && (
+        <AssistantFooter getMessageText={getMessageText} messageId={messageId} onBranchInNewChat={onBranchInNewChat} />
      )}
    </MessagePrimitive.Root>
  )
@ -313,10 +359,28 @@ const STREAM_STALL_S = 2

 // Tail "still thinking" indicator: the pre-first-token spinner goes away once
 // text flows, but if the stream then goes quiet mid-turn (tool think-time,
-// provider stall) nothing signals that work continues. Watch a per-render
+// provider stall) nothing signals that work continues. Watch a per-flush
 // activity signal; when it hasn't changed for STREAM_STALL_S, re-show the
 // dither + a timer counting from the last activity.
-const StreamStallIndicator: FC<{ activity: string }> = ({ activity }) => {
+//
+// Subscribes to the activity signal ITSELF (rather than taking it as a prop)
+// so that per-token updates re-render only this leaf, not the whole
+// AssistantMessage subtree.
+const StreamStallIndicator: FC = () => {
+  const activity = useAuiState(s => {
+    let textLength = 0
+
+    for (const part of s.message.content) {
+      const text = (part as { text?: unknown }).text
+
+      if (typeof text === 'string') {
+        textLength += text.length
+      }
+    }
+
+    return `${s.message.content.length}:${textLength}`
+  })
+
  const [stalled, setStalled] = useState(false)

  useEffect(() => {
@ -584,7 +648,7 @@ function formatMessageTimestamp(
  return SHORT_FMT.format(date)
 }

-const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, onBranchInNewChat }) => {
+const AssistantActionBar: FC<MessageActionProps> = ({ messageId, getMessageText, onBranchInNewChat }) => {
  const { t } = useI18n()
  const copy = t.assistant.thread
  const [menuOpen, setMenuOpen] = useState(false)
@ -605,7 +669,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
        )}
        data-slot="aui_msg-actions"
      >
-        <CopyButton appearance="icon" buttonSize="icon" disabled={!messageText} label={copy.copy} text={messageText} />
+        <CopyButton appearance="icon" buttonSize="icon" label={copy.copy} text={getMessageText} />
        <ActionBarPrimitive.Reload asChild>
          <TooltipIconButton onClick={() => triggerHaptic('submit')} tooltip={copy.refresh}>
            <Codicon name="refresh" />
@ -623,7 +687,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
              <GitBranchIcon />
              {copy.branchNewChat}
            </DropdownMenuItem>
-            <ReadAloudItem messageId={messageId} text={messageText} />
+            <ReadAloudItem getText={getMessageText} messageId={messageId} />
          </DropdownMenuContent>
        </DropdownMenu>
      </ActionBarPrimitive.Root>
@ -631,7 +695,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
  )
 }

-const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, text }) => {
+const ReadAloudItem: FC<{ getText: () => string; messageId: string }> = ({ getText, messageId }) => {
  const { t } = useI18n()
  const copy = t.assistant.thread
  const voicePlayback = useStore($voicePlayback)
@ -645,6 +709,8 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex
  const Icon = isPreparing ? Loader2Icon : isSpeaking ? VolumeXIcon : Volume2Icon

  const read = useCallback(async () => {
+    const text = getText()
+
    if (!text || $voicePlayback.get().status !== 'idle') {
      return
    }
@ -654,11 +720,11 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex
    } catch (error) {
      notifyError(error, copy.readAloudFailed)
    }
-  }, [copy.readAloudFailed, messageId, text])
+  }, [copy.readAloudFailed, getText, messageId])

  return (
    <DropdownMenuItem
-      disabled={isPreparing || (!isSpeaking && (anyPlaybackActive || !text))}
+      disabled={isPreparing || (!isSpeaking && anyPlaybackActive)}
      onSelect={e => {
        e.preventDefault()
        void (isSpeaking ? stopVoicePlayback() : read())
@ -820,8 +886,10 @@ const UserMessage: FC<{
  // changes, not on every frame while the outer max-height animates open.
  const clampInnerRef = useRef<HTMLDivElement | null>(null)
  const [bodyClamped, setBodyClamped] = useState(false)
+  const lastClampHeightRef = useRef(-1)
+  const lineHeightRef = useRef(0)

-  const measureClamp = useCallback(() => {
+  const measureClamp = useCallback((entries: readonly ResizeObserverEntry[]) => {
    const inner = clampInnerRef.current
    const outer = inner?.parentElement

@ -829,12 +897,28 @@ const UserMessage: FC<{
      return
    }

-    const styles = getComputedStyle(inner)
-    const lineHeight = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
-    const fullHeight = inner.scrollHeight
+    // Prefer the size the ResizeObserver already computed — reading
+    // `scrollHeight` outside RO timing forces a synchronous layout, and with
+    // many user bubbles observed at once those reads interleave with the
+    // style write below into a read-write-read reflow cascade.
+    const entryHeight = entries.find(entry => entry.target === inner)?.borderBoxSize?.[0]?.blockSize
+    const fullHeight = Math.ceil(entryHeight ?? inner.scrollHeight)
+
+    if (fullHeight === lastClampHeightRef.current) {
+      return
+    }
+
+    lastClampHeightRef.current = fullHeight
+
+    // Line-height is stable for the life of the bubble (font settings don't
+    // change under it) — resolve the computed style once.
+    if (!lineHeightRef.current) {
+      const styles = getComputedStyle(inner)
+      lineHeightRef.current = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
+    }

    outer.style.setProperty('--human-msg-full', `${fullHeight}px`)
-    setBodyClamped(fullHeight > lineHeight * 2 + 1)
+    setBodyClamped(fullHeight > lineHeightRef.current * 2 + 1)
  }, [])

  useResizeObserver(measureClamp, clampInnerRef)
--- a/apps/desktop/src/components/haptics-provider.tsx
+++ b/apps/desktop/src/components/haptics-provider.tsx
@ -15,5 +15,29 @@ export function HapticsProvider({ children }: { children: ReactNode }) {
    return () => registerHapticTrigger(null)
  }, [muted, trigger])

+  // web-haptics builds its AudioContext lazily inside the first trigger(), and
+  // the process's first AudioContext pays the CoreAudio spin-up (~850ms stall
+  // in profiles) — which landed on the first streamStart haptic as the first
+  // token painted. Open/close a throwaway context at idle so the real one
+  // connects to an already-warm audio service in single-digit ms.
+  useEffect(() => {
+    if (typeof requestIdleCallback !== 'function' || typeof AudioContext === 'undefined') {
+      return undefined
+    }
+
+    const id = requestIdleCallback(
+      () => {
+        try {
+          void new AudioContext().close().catch(() => undefined)
+        } catch {
+          // No audio device (headless CI) — nothing to warm.
+        }
+      },
+      { timeout: 2000 }
+    )
+
+    return () => cancelIdleCallback(id)
+  }, [])
+
  return <>{children}</>
 }
--- a/apps/desktop/src/components/ui/fade-text.tsx
+++ b/apps/desktop/src/components/ui/fade-text.tsx
@ -34,14 +34,21 @@ function FadeTextImpl({ children, className, fadeWidth = '3rem', style, ...rest
  const ref = useRef<HTMLSpanElement>(null)
  const [overflowing, setOverflowing] = useState(false)

-  const measureOverflow = useCallback(() => {
+  const measureOverflow = useCallback((entries: readonly ResizeObserverEntry[]) => {
    const el = ref.current

    if (!el) {
      return
    }

-    setOverflowing(el.scrollWidth - el.clientWidth > 1)
+    // `clientWidth` from the RO entry when available (already computed);
+    // `scrollWidth` is unavoidable — content width isn't part of the entry —
+    // but inside RO timing layout is already clean so the read is cheap.
+    const clientWidth = entries.find(entry => entry.target === el)?.contentRect?.width ?? el.clientWidth
+
+    // setState is identity-stable: React bails out when the boolean doesn't
+    // change, so repeated RO fires with the same answer don't re-render.
+    setOverflowing(el.scrollWidth - clientWidth > 1)
  }, [])

  useResizeObserver(measureOverflow, ref)
--- a/apps/desktop/src/hooks/use-resize-observer.ts
+++ b/apps/desktop/src/hooks/use-resize-observer.ts
@ -1,17 +1,26 @@
 import { type RefObject, useLayoutEffect, useRef } from 'react'

-export function useResizeObserver(onResize: () => void, ...refs: readonly RefObject<Element | null>[]) {
+/**
+ * Observe element resizes. The callback receives the ResizeObserver entries
+ * (empty on the initial synchronous call and in non-RO environments) so
+ * callers can read the observed size off the entry instead of forcing a
+ * fresh layout read.
+ */
+export function useResizeObserver(
+  onResize: (entries: readonly ResizeObserverEntry[]) => void,
+  ...refs: readonly RefObject<Element | null>[]
+) {
  const refsRef = useRef(refs)
  refsRef.current = refs

  useLayoutEffect(() => {
    if (typeof ResizeObserver === 'undefined') {
-      onResize()
+      onResize([])

      return
    }

-    const observer = new ResizeObserver(() => onResize())
+    const observer = new ResizeObserver(entries => onResize(entries))
    let observed = false

    for (const ref of refsRef.current) {
@ -31,7 +40,7 @@ export function useResizeObserver(onResize: () => void, ...refs: readonly RefObj
      return
    }

-    onResize()
+    onResize([])

    return () => observer.disconnect()
  }, [onResize])
--- a/apps/desktop/src/lib/remend-tail.test.ts
+++ b/apps/desktop/src/lib/remend-tail.test.ts
@ -0,0 +1,105 @@
+import { parseMarkdownIntoBlocks } from '@assistant-ui/react-streamdown'
+import remend from 'remend'
+import { describe, expect, it } from 'vitest'
+
+import { findRemendWindowStart, tailBoundedRemend } from './remend-tail'
+
+const CORPUS = `# Heading one
+
+Intro paragraph with **bold**, *italic*, \`inline code\`, and a [link](https://example.com).
+
+## Code
+
+\`\`\`python
+def main():
+    cost = "$5"
+    print(f"total: $\{cost}")
+\`\`\`
+
+Some text after the fence with $x^2 + y^2$ inline math.
+
+$$
+\\int_0^1 f(x) dx
+$$
+
+- list item one with **bold**
+- list item two
+
+| col a | col b |
+| ----- | ----- |
+| 1     | 2     |
+
+~~~js
+const s = \`template \${value}\`
+~~~
+
+Final paragraph with ~~strike~~ and unfinished [link text](https://exa
+`
+
+/**
+ * Render-equivalence oracle: full-text remend and tail-bounded remend may
+ * differ in raw string output ONLY in ways that cannot affect rendering —
+ * i.e. after block splitting, every block must be identical. (Streamdown
+ * renders blocks independently, so block-level equality IS render equality.)
+ */
+function blocksOf(text: string): string[] {
+  return parseMarkdownIntoBlocks(text)
+}
+
+describe('tailBoundedRemend', () => {
+  it('matches full remend block output at every streaming prefix', () => {
+    for (let end = 1; end <= CORPUS.length; end++) {
+      const prefix = CORPUS.slice(0, end)
+      const full = blocksOf(remend(prefix))
+      const tail = blocksOf(tailBoundedRemend(prefix))
+
+      expect(tail, `prefix length ${end}: ${JSON.stringify(prefix.slice(-60))}`).toEqual(full)
+    }
+  })
+
+  it('repairs an unclosed fence opened early in a long message', () => {
+    const text = `intro\n\n\`\`\`python\n${'x = 1\n'.repeat(500)}print("$dollar")`
+    const repaired = tailBoundedRemend(text)
+
+    expect(blocksOf(repaired)).toEqual(blocksOf(remend(text)))
+    // the window must reach back to the fence opener
+    expect(findRemendWindowStart(text)).toBe(text.indexOf('```python'))
+  })
+
+  it('bounds the window to the tail paragraph when no fence is open', () => {
+    const text = `para one\n\npara two\n\npara three with **bold`
+    const start = findRemendWindowStart(text)
+
+    expect(start).toBe(text.indexOf('para three'))
+    expect(tailBoundedRemend(text)).toBe(remend(text))
+  })
+
+  it('widens the window across an open $$ math block', () => {
+    const text = `before\n\n$$\n\\frac{a}{b}`
+    const start = findRemendWindowStart(text)
+
+    expect(start).toBeLessThanOrEqual(text.indexOf('$$'))
+    expect(blocksOf(tailBoundedRemend(text))).toEqual(blocksOf(remend(text)))
+  })
+
+  it('handles closed constructs without modification', () => {
+    const text = `done **bold** and \`code\`\n\n\`\`\`js\nconst a = 1\n\`\`\`\n\nlast line.`
+
+    expect(tailBoundedRemend(text)).toBe(text)
+  })
+
+  it('intentionally diverges from full remend on cross-block dangling openers', () => {
+    // Full remend scans the whole document and appends `**` for an opener
+    // left dangling in an EARLIER block, dumping stray asterisks into the
+    // unrelated tail block ("|**"). Because Streamdown splits into blocks
+    // after the repair, that opener never renders as bold either way — the
+    // tail-bounded result is the cleaner of the two. This test documents
+    // the divergence so a future remend upgrade that changes the behavior
+    // gets noticed.
+    const text = `- item with **dangling\n- item two\n\n|`
+
+    expect(remend(text).endsWith('|**')).toBe(true)
+    expect(tailBoundedRemend(text).endsWith('|')).toBe(true)
+    expect(tailBoundedRemend(text).endsWith('|**')).toBe(false)
+  })
+})
--- a/apps/desktop/src/lib/remend-tail.ts
+++ b/apps/desktop/src/lib/remend-tail.ts
@ -0,0 +1,108 @@
+import remend from 'remend'
+
+// Tail-bounded incomplete-markdown repair.
+//
+// Streamdown's built-in `parseIncompleteMarkdown` runs `remend` over the whole
+// accumulated message on every streaming flush (~18% of script time on 50KB+
+// messages). But repairs only ever matter in the trailing block: inline
+// constructs can't cross a blank line, and Streamdown splits into blocks AFTER
+// the repair, so a dangling opener in an earlier block can't reach the tail.
+// We run `remend` on just that block instead.
+
+const BACKTICK = 96 // `
+const TILDE = 126 // ~
+const SPACE = 32
+const TAB = 9
+const BACKSLASH = 92
+
+const isSpace = (c: number) => c === SPACE || c === TAB
+
+/**
+ * Index of the last top-level block start — the char after the most recent
+ * blank line that sits outside any open code fence or `$$` math block. An
+ * unclosed fence/math always begins after that blank, so it stays wholly
+ * inside the window without separate tracking. One cheap char pass, no regex.
+ */
+export function findRemendWindowStart(text: string): number {
+  const n = text.length
+  let inFence = false
+  let fenceChar = 0
+  let fenceRun = 0
+  let inMath = false
+  let boundary = 0
+  let pending = -1 // a blank line, committed to `boundary` once content follows
+
+  for (let lineStart = 0; lineStart <= n; ) {
+    let lineEnd = text.indexOf('\n', lineStart)
+
+    if (lineEnd === -1) {
+      lineEnd = n
+    }
+
+    let i = lineStart
+
+    while (i < lineEnd && isSpace(text.charCodeAt(i))) {
+      i += 1
+    }
+
+    const first = i < lineEnd ? text.charCodeAt(i) : -1
+    let marker = false
+
+    // Fence open/close (``` or ~~~, ≤3 spaces indent).
+    if ((first === BACKTICK || first === TILDE) && i - lineStart <= 3) {
+      let run = i
+
+      while (run < lineEnd && text.charCodeAt(run) === first) {
+        run += 1
+      }
+
+      if (run - i >= 3) {
+        marker = true
+
+        if (!inFence) {
+          inFence = true
+          fenceChar = first
+          fenceRun = run - i
+        } else if (first === fenceChar && run - i >= fenceRun && onlyWhitespace(text, run, lineEnd)) {
+          inFence = false
+        }
+      }
+    }
+
+    // Toggle `$$` math state on plain lines ($$ inside a fence is literal).
+    if (!inFence && !marker) {
+      for (let s = text.indexOf('$$', lineStart); s !== -1 && s < lineEnd - 1; s = text.indexOf('$$', s + 2)) {
+        if (s === 0 || text.charCodeAt(s - 1) !== BACKSLASH) {
+          inMath = !inMath
+        }
+      }
+    }
+
+    if (first === -1 && !inFence && !inMath) {
+      pending = lineEnd + 1
+    } else if (pending !== -1) {
+      boundary = pending
+      pending = -1
+    }
+
+    lineStart = lineEnd + 1
+  }
+
+  return boundary
+}
+
+function onlyWhitespace(text: string, from: number, to: number): boolean {
+  for (let i = from; i < to; i += 1) {
+    if (!isSpace(text.charCodeAt(i))) {
+      return false
+    }
+  }
+
+  return true
+}
+
+export function tailBoundedRemend(text: string): string {
+  const start = findRemendWindowStart(text)
+
+  return start <= 0 ? remend(text) : text.slice(0, start) + remend(text.slice(start))
+}
--- a/apps/desktop/src/store/session.ts
+++ b/apps/desktop/src/store/session.ts
@ -1,5 +1,6 @@
-import { atom } from 'nanostores'
+import { atom, computed } from 'nanostores'

+import { lastVisibleMessageIsUser } from '@/app/chat/thread-loading'
 import type { ContextSuggestion } from '@/app/types'
 import type { HermesConnection } from '@/global'
 import type { ChatMessage } from '@/lib/chat-messages'
@ -195,6 +196,15 @@ export const $workingSessionIds = atom<string[]>([])
 export const $activeSessionId = atom<string | null>(null)
 export const $selectedStoredSessionId = atom<string | null>(null)
 export const $messages = atom<ChatMessage[]>([])
+
+// Streaming-stable derivations of $messages. During a token stream the array
+// is replaced ~30×/s; components that only care about coarse facts (is the
+// thread empty? is the tail a user message?) subscribe to these instead of
+// $messages so per-token flushes don't re-render them — nanostores' `computed`
+// only notifies when the derived VALUE changes.
+export const $messagesEmpty = computed($messages, messages => messages.length === 0)
+export const $lastVisibleMessageIsUser = computed($messages, lastVisibleMessageIsUser)
+
 export const $freshDraftReady = atom(false)
 export const $busy = atom(false)
 export const $awaitingResponse = atom(false)
--- a/nix/lib.nix
+++ b/nix/lib.nix
@ -21,7 +21,7 @@ let

  # Single npm deps fetch from the workspace root lockfile.
  # All workspace packages share this derivation.
-  npmDepsHash = "sha256-BfTSh6J2VZ/07tq2DYnKgUViZCgRhW1sC2uj18H65SE=";
+  npmDepsHash = "sha256-dFUlWvIIsCqvtGkoobs0qUzFlSdejuffI/uLoQxhW8Q=";

  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
--- a/package-lock.json
+++ b/package-lock.json
@ -119,6 +119,7 @@
        "react-router-dom": "^7.17.0",
        "react-shiki": "^0.9.3",
        "remark-math": "^6.0.0",
+        "remend": "^1.3.0",
        "shiki": "^4.0.2",
        "streamdown": "^2.5.0",
        "tailwind-merge": "^3.5.0",