diff --git a/apps/desktop/package.json b/apps/desktop/package.json
index 08f1cc1aa0..6fed75f563 100644
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -90,6 +90,7 @@
     "react-router-dom": "^7.17.0",
     "react-shiki": "^0.9.3",
     "remark-math": "^6.0.0",
+    "remend": "^1.3.0",
     "shiki": "^4.0.2",
     "streamdown": "^2.5.0",
     "tailwind-merge": "^3.5.0",
diff --git a/apps/desktop/src/app/chat/composer/index.tsx b/apps/desktop/src/app/chat/composer/index.tsx
index 43074b5ce3..6ab2abf72f 100644
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -174,7 +174,6 @@ export function ChatBar({
   const queuedPromptsBySession = useStore($queuedPromptsBySession)
   const statusItemsBySession = useStore($statusItemsBySession)
   const scrolledUp = useStore($threadScrolledUp)
-  const sessionMessages = useStore($messages)
   const activeQueueSessionKey = queueSessionKey || sessionId || null
 
   const queuedPrompts = useMemo(
@@ -866,7 +865,9 @@ export function ChatBar({
       event.preventDefault()
       triggerKeyConsumedRef.current = true
 
-      const history = deriveUserHistory(sessionMessages, chatMessageText)
+      // $messages is read imperatively (not subscribed) so the composer
+      // doesn't re-render on every streaming delta flush.
+      const history = deriveUserHistory($messages.get(), chatMessageText)
       const entry = browseBackward(sessionId, currentDraft, history)
 
       if (entry !== null) {
@@ -891,7 +892,7 @@ export function ChatBar({
         event.preventDefault()
         triggerKeyConsumedRef.current = true
 
-        const history = deriveUserHistory(sessionMessages, chatMessageText)
+        const history = deriveUserHistory($messages.get(), chatMessageText)
         const result = browseForward(sessionId, history)
 
         if (result !== null) {
diff --git a/apps/desktop/src/app/chat/index.tsx b/apps/desktop/src/app/chat/index.tsx
index 725039620f..ab1213ef16 100644
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@@ -35,7 +35,9 @@ import {
   $gatewayState,
   $introPersonality,
   $introSeed,
+  $lastVisibleMessageIsUser,
   $messages,
+  $messagesEmpty,
   $selectedStoredSessionId,
   $sessions,
   sessionPinId
@@ -55,7 +57,7 @@ import { type DroppedFile, partitionDroppedFiles } from './hooks/use-composer-ac
 import { useFileDropZone } from './hooks/use-file-drop-zone'
 import { ScrollToBottomButton } from './scroll-to-bottom-button'
 import { SessionActionsMenu } from './sidebar/session-actions-menu'
-import { lastVisibleMessageIsUser, threadLoadingState } from './thread-loading'
+import { threadLoadingState } from './thread-loading'
 
 interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
   gateway: HermesGateway | null
@@ -156,105 +158,35 @@ function ChatHeader({
   )
 }
 
-export function ChatView({
-  className,
-  gateway,
-  onToggleSelectedPin,
-  onDeleteSelectedSession,
+interface ChatRuntimeBoundaryProps {
+  busy: boolean
+  children: React.ReactNode
+  onCancel: () => Promise<void> | void
+  onEdit: (message: AppendMessage) => Promise<void>
+  onReload: (parentId: string | null) => Promise<void>
+  onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
+}
+
+/**
+ * Owns the $messages subscription and the assistant-ui external-store runtime.
+ *
+ * Isolated from ChatView so the per-token delta flush (which replaces the
+ * $messages atom ~30×/s during streaming) only re-renders this component and
+ * the runtime provider. The children (Thread, ChatBar) are created by
+ * ChatView, whose render output is stable across flushes — so React bails out
+ * of re-rendering them by element identity and the stream's render cost stays
+ * confined to the streaming message's own subtree.
+ */
+function ChatRuntimeBoundary({
+  busy,
+  children,
   onCancel,
-  onAddContextRef,
-  onAddUrl,
-  onAttachImageBlob,
-  onAttachDroppedItems,
-  onBranchInNewChat,
-  maxVoiceRecordingSeconds,
-  onPasteClipboardImage,
-  onPickFiles,
-  onPickFolders,
-  onPickImages,
-  onRemoveAttachment,
-  onSteer,
-  onSubmit,
-  onThreadMessagesChange,
   onEdit,
   onReload,
-  onRestoreToMessage,
-  onTranscribeAudio
-}: ChatViewProps) {
-  const location = useLocation()
-  const activeSessionId = useStore($activeSessionId)
-  const awaitingResponse = useStore($awaitingResponse)
-  const busy = useStore($busy)
-  const contextSuggestions = useStore($contextSuggestions)
-  const currentCwd = useStore($currentCwd)
-  const currentModel = useStore($currentModel)
-  const currentProvider = useStore($currentProvider)
-  const freshDraftReady = useStore($freshDraftReady)
-  const gatewayState = useStore($gatewayState)
-  const gatewaySwapTarget = useStore($gatewaySwapTarget)
-  const gatewayOpen = gatewayState === 'open'
-  const introPersonality = useStore($introPersonality)
-  const introSeed = useStore($introSeed)
+  onThreadMessagesChange
+}: ChatRuntimeBoundaryProps) {
   const messages = useStore($messages)
-  const selectedSessionId = useStore($selectedStoredSessionId)
   const runtimeMessageCacheRef = useRef(new WeakMap<ChatMessage, ThreadMessage>())
-  const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
-
-  const showIntro =
-    freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messages.length === 0
-
-  // Session is still loading if the route references a session we haven't
-  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
-  // session exists — even if it has zero messages (a brand-new routed
-  // session). The flicker where `busy` flips true briefly during hydrate
-  // is handled by `threadLoadingState`'s last-visible-user gate.
-  const loadingSession = isRoutedSessionView && messages.length === 0 && !activeSessionId
-  const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleMessageIsUser(messages))
-  const showChatBar = !loadingSession
-  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
-
-  const modelOptionsQuery = useQuery<ModelOptionsResponse>({
-    queryKey: ['model-options', activeSessionId || 'global'],
-    queryFn: () => {
-      if (!activeSessionId) {
-        return getGlobalModelOptions()
-      }
-
-      if (!gateway) {
-        throw new Error('Hermes gateway unavailable')
-      }
-
-      return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
-    },
-    enabled: gatewayOpen
-  })
-
-  const quickModels = useMemo(
-    () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
-    [currentModel, currentProvider, modelOptionsQuery.data]
-  )
-
-  const chatBarState = useMemo<ChatBarState>(
-    () => ({
-      model: {
-        model: currentModel,
-        provider: currentProvider,
-        canSwitch: gatewayOpen,
-        loading: !gatewayOpen || (!currentModel && !currentProvider),
-        quickModels
-      },
-      tools: {
-        enabled: true,
-        label: 'Add context',
-        suggestions: contextSuggestions
-      },
-      voice: {
-        enabled: true,
-        active: false
-      }
-    }),
-    [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
-  )
 
   const runtimeMessageRepository = useMemo(() => {
     const items: { message: ThreadMessage; parentId: string | null }[] = []
@@ -304,6 +236,113 @@ export function ChatView({
     onReload
   })
 
+  return <AssistantRuntimeProvider runtime={runtime}>{children}</AssistantRuntimeProvider>
+}
+
+export function ChatView({
+  className,
+  gateway,
+  onToggleSelectedPin,
+  onDeleteSelectedSession,
+  onCancel,
+  onAddContextRef,
+  onAddUrl,
+  onAttachImageBlob,
+  onAttachDroppedItems,
+  onBranchInNewChat,
+  maxVoiceRecordingSeconds,
+  onPasteClipboardImage,
+  onPickFiles,
+  onPickFolders,
+  onPickImages,
+  onRemoveAttachment,
+  onSteer,
+  onSubmit,
+  onThreadMessagesChange,
+  onEdit,
+  onReload,
+  onRestoreToMessage,
+  onTranscribeAudio
+}: ChatViewProps) {
+  const location = useLocation()
+  const activeSessionId = useStore($activeSessionId)
+  const awaitingResponse = useStore($awaitingResponse)
+  const busy = useStore($busy)
+  const contextSuggestions = useStore($contextSuggestions)
+  const currentCwd = useStore($currentCwd)
+  const currentModel = useStore($currentModel)
+  const currentProvider = useStore($currentProvider)
+  const freshDraftReady = useStore($freshDraftReady)
+  const gatewayState = useStore($gatewayState)
+  const gatewaySwapTarget = useStore($gatewaySwapTarget)
+  const gatewayOpen = gatewayState === 'open'
+  const introPersonality = useStore($introPersonality)
+  const introSeed = useStore($introSeed)
+  // PERF: ChatView must not subscribe to $messages — the atom is replaced on
+  // every streaming delta flush (~30×/s) and a subscription here re-renders
+  // the entire chat shell (header, chat bar, thread wrapper) per token. The
+  // runtime that DOES need the messages lives in ChatRuntimeBoundary below;
+  // this component only needs streaming-stable derivations.
+  const messagesEmpty = useStore($messagesEmpty)
+  const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
+  const selectedSessionId = useStore($selectedStoredSessionId)
+  const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
+
+  const showIntro = freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty
+
+  // Session is still loading if the route references a session we haven't
+  // resumed yet. Once `activeSessionId` is set (runtime has resumed), the
+  // session exists — even if it has zero messages (a brand-new routed
+  // session). The flicker where `busy` flips true briefly during hydrate
+  // is handled by `threadLoadingState`'s last-visible-user gate.
+  const loadingSession = isRoutedSessionView && messagesEmpty && !activeSessionId
+  const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
+  const showChatBar = !loadingSession
+  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
+
+  const modelOptionsQuery = useQuery<ModelOptionsResponse>({
+    queryKey: ['model-options', activeSessionId || 'global'],
+    queryFn: () => {
+      if (!activeSessionId) {
+        return getGlobalModelOptions()
+      }
+
+      if (!gateway) {
+        throw new Error('Hermes gateway unavailable')
+      }
+
+      return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
+    },
+    enabled: gatewayOpen
+  })
+
+  const quickModels = useMemo(
+    () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
+    [currentModel, currentProvider, modelOptionsQuery.data]
+  )
+
+  const chatBarState = useMemo<ChatBarState>(
+    () => ({
+      model: {
+        model: currentModel,
+        provider: currentProvider,
+        canSwitch: gatewayOpen,
+        loading: !gatewayOpen || (!currentModel && !currentProvider),
+        quickModels
+      },
+      tools: {
+        enabled: true,
+        label: 'Add context',
+        suggestions: contextSuggestions
+      },
+      voice: {
+        enabled: true,
+        active: false
+      }
+    }),
+    [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
+  )
+
   // Drop files anywhere in the conversation area, not just on the composer
   // input. In-app drags (project tree / gutter) carry workspace-relative paths
   // the gateway resolves directly, so they stay inline `@file:` refs. OS/Finder
@@ -356,7 +395,13 @@ export function ChatView({
         className="relative min-h-0 max-w-full flex-1 overflow-hidden bg-(--ui-chat-surface-background) contain-[layout_paint]"
         {...dropHandlers}
       >
-        <AssistantRuntimeProvider runtime={runtime}>
+        <ChatRuntimeBoundary
+          busy={busy}
+          onCancel={onCancel}
+          onEdit={onEdit}
+          onReload={onReload}
+          onThreadMessagesChange={onThreadMessagesChange}
+        >
           <Thread
             clampToComposer={showChatBar}
             cwd={currentCwd}
@@ -397,7 +442,7 @@ export function ChatView({
               />
             </Suspense>
           )}
-        </AssistantRuntimeProvider>
+        </ChatRuntimeBoundary>
         {showChatBar && <ScrollToBottomButton />}
         <ChatDropOverlay kind={dragKind} />
         <ChatSwapOverlay profile={gatewaySwapTarget} />
diff --git a/apps/desktop/src/app/chat/thread-loading.ts b/apps/desktop/src/app/chat/thread-loading.ts
index 97686c6550..05cfb08671 100644
--- a/apps/desktop/src/app/chat/thread-loading.ts
+++ b/apps/desktop/src/app/chat/thread-loading.ts
@@ -3,9 +3,14 @@ import type { ChatMessage } from '@/lib/chat-messages'
 export type ThreadLoadingState = 'response' | 'session'
 
 export function lastVisibleMessageIsUser(messages: ChatMessage[]): boolean {
-  const lastVisible = [...messages].reverse().find(message => !message.hidden)
+  // Allocation-free reverse scan — runs in a hot $messages computed.
+  for (let i = messages.length - 1; i >= 0; i -= 1) {
+    if (!messages[i].hidden) {
+      return messages[i].role === 'user'
+    }
+  }
 
-  return lastVisible?.role === 'user'
+  return false
 }
 
 export function threadLoadingState(
diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts
index a4a2feaaac..4e19c63795 100644
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@@ -618,10 +618,26 @@ export function useSessionActions({
         const watchWindow = isWatchWindow()
         let localSnapshot = $messages.get()
 
+        // REST transcript prefetch and the gateway resume RPC are independent
+        // — run them concurrently so a big session's wall time is
+        // max(prefetch, resume) instead of their sum. The prefetch paints the
+        // transcript as soon as it lands; the RPC binds the runtime id.
+        // Watch windows skip the prefetch — lazy resume attaches the live mirror.
+        const prefetchPromise = watchWindow ? null : getSessionMessages(storedSessionId, sessionProfile)
+
+        const resumePromise = requestGateway<SessionResumeResponse>('session.resume', {
+          session_id: storedSessionId,
+          cols: 96,
+          ...(watchWindow ? { lazy: true } : {}),
+          ...(sessionProfile ? { profile: sessionProfile } : {})
+        })
+        // The rejection is consumed by the `await` below; this guard only
+        // keeps it from surfacing as unhandled while the prefetch settles.
+        resumePromise.catch(() => undefined)
+
         try {
-          // Watch windows skip REST prefetch — lazy resume attaches the live mirror.
-          if (!watchWindow) {
-            const storedMessages = await getSessionMessages(storedSessionId, sessionProfile)
+          if (prefetchPromise) {
+            const storedMessages = await prefetchPromise
 
             if (isCurrentResume()) {
               localSnapshot = preserveLocalAssistantErrors(toChatMessages(storedMessages.messages), $messages.get())
@@ -635,12 +651,7 @@ export function useSessionActions({
           // Non-fatal: gateway resume below can still hydrate the session.
         }
 
-        const resumed = await requestGateway<SessionResumeResponse>('session.resume', {
-          session_id: storedSessionId,
-          cols: 96,
-          ...(watchWindow ? { lazy: true } : {}),
-          ...(sessionProfile ? { profile: sessionProfile } : {})
-        })
+        const resumed = await resumePromise
 
         if (!isCurrentResume()) {
           return
@@ -648,17 +659,22 @@ export function useSessionActions({
 
         const currentMessages = $messages.get()
 
-        const resumedMessages = preserveLocalAssistantErrors(
-          reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
-          currentMessages
-        )
-        // Keep the local snapshot when resume would only reshuffle runtime projection.
+        // Keep the local snapshot when resume would only reshuffle runtime
+        // projection. When the REST prefetch already hydrated the transcript,
+        // skip converting/reconciling the resume payload entirely — on a
+        // 1000+-message session that second conversion plus the deep
+        // equivalence compare costs over a second of main-thread time.
         const preferredMessages =
           localSnapshot.length > 0
             ? localSnapshot
-            : chatMessageArraysEquivalent(currentMessages, resumedMessages)
-              ? currentMessages
-              : resumedMessages
+            : (() => {
+                const resumedMessages = preserveLocalAssistantErrors(
+                  reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
+                  currentMessages
+                )
+
+                return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages
+              })()
 
         const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages)
 
diff --git a/apps/desktop/src/components/assistant-ui/markdown-text.tsx b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
index 8ec734bf8b..1c50b65eab 100644
--- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx
+++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
@@ -2,6 +2,7 @@
 
 import { TextMessagePartProvider, useMessagePartText } from '@assistant-ui/react'
 import {
+  parseMarkdownIntoBlocks,
   type StreamdownTextComponents,
   StreamdownTextPrimitive,
   type SyntaxHighlighterProps
@@ -26,6 +27,7 @@ import {
   mediaStreamUrl
 } from '@/lib/media'
 import { previewTargetFromMarkdownHref } from '@/lib/preview-targets'
+import { tailBoundedRemend } from '@/lib/remend-tail'
 import { cn } from '@/lib/utils'
 
 // Math rendering plugin (KaTeX). Configured once at module scope — the
@@ -42,6 +44,51 @@ import { cn } from '@/lib/utils'
 // LLM convention). The default false-setting only accepts `$$...$$`.
 const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })
 
+// Replaces Streamdown's `parseIncompleteMarkdown` (full-text remend per
+// flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay
+// module-scope so the prop identity is stable across renders.
+function preprocessWithTailRepair(text: string): string {
+  return tailBoundedRemend(preprocessMarkdown(text))
+}
+
+// Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full
+// `marked` lex of the entire message, ~1.6ms per 28KB) inside a useMemo keyed
+// on the text — but the same text is re-lexed every time a message REMOUNTS
+// (virtualizer scroll, session switch) and whenever multiple surfaces render
+// the same content (deferred + smooth reveal republish). A small module-level
+// LRU keyed by the exact source string removes all of those repeat parses
+// with zero correctness risk (same input → same output). Streaming tail
+// growth misses the cache by design (every flush is a new string) — that
+// single lex is the irreducible cost.
+const BLOCK_CACHE_MAX = 64
+const BLOCK_CACHE_MIN_LENGTH = 1024
+const blockCache = new Map<string, string[]>()
+
+function parseMarkdownIntoBlocksCached(markdown: string): string[] {
+  if (markdown.length < BLOCK_CACHE_MIN_LENGTH) {
+    return parseMarkdownIntoBlocks(markdown)
+  }
+
+  const hit = blockCache.get(markdown)
+
+  if (hit) {
+    // Refresh recency (Map iteration order is insertion order).
+    blockCache.delete(markdown)
+    blockCache.set(markdown, hit)
+
+    return hit
+  }
+
+  const blocks = parseMarkdownIntoBlocks(markdown)
+  blockCache.set(markdown, blocks)
+
+  if (blockCache.size > BLOCK_CACHE_MAX) {
+    blockCache.delete(blockCache.keys().next().value as string)
+  }
+
+  return blocks
+}
+
 async function mediaSrc(path: string): Promise<string> {
   if (/^(?:https?|data):/i.test(path)) {
     return path
@@ -241,6 +288,13 @@ function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>)
 // keeps draining its tail instead of snapping.
 const REVEAL_DRAIN_MS = 500
 const REVEAL_MAX_CHARS_PER_FRAME = 30
+// Floor between reveal commits. Each commit republishes the text context and
+// re-runs the whole Streamdown pipeline (preprocess → remend → lex → micromark
+// on the open block) over the full accumulated text — at raw rAF cadence
+// that's 60 full parses/second and was the dominant streaming cost for
+// reasoning text. ~33ms keeps the reveal visually fluid (2 frames) while
+// halving the parse work.
+const REVEAL_MIN_COMMIT_MS = 33
 
 function useSmoothReveal(text: string, isRunning: boolean): string {
   const [displayed, setDisplayed] = useState(isRunning ? '' : text)
@@ -273,10 +327,27 @@ function useSmoothReveal(text: string, isRunning: boolean): string {
     const tick = () => {
       const now = performance.now()
       const dt = now - lastTickRef.current
+
+      // Skip this frame if the floor hasn't elapsed — the backlog math below
+      // is dt-proportional, so delayed commits reveal proportionally more.
+      if (dt < REVEAL_MIN_COMMIT_MS) {
+        frameRef.current = requestAnimationFrame(tick)
+
+        return
+      }
+
       lastTickRef.current = now
 
       const remaining = targetRef.current.length - shownRef.current.length
-      const add = Math.min(remaining, REVEAL_MAX_CHARS_PER_FRAME, Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS)))
+
+      const add = Math.min(
+        remaining,
+        // dt-scaled so the per-commit cap stays equivalent to the old
+        // per-frame cap at any commit cadence.
+        Math.ceil((REVEAL_MAX_CHARS_PER_FRAME * dt) / 16.7),
+        Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS))
+      )
+
       shownRef.current = targetRef.current.slice(0, shownRef.current.length + add)
       setDisplayed(shownRef.current)
 
@@ -460,17 +531,20 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
       containerProps={containerProps}
       lineNumbers={false}
       mode="streaming"
-      // Always auto-close incomplete fences — even during streaming.
-      // Without this, an unclosed ```python ... ``` whose body contains
-      // `$` (very common: shell snippets, JS template strings, dollar
-      // amounts) leaks those dollars out to the math parser and they
-      // get rendered as broken inline math until the closing fence
-      // arrives. Shiki is independently deferred via `defer={isStreaming}`
-      // on the SyntaxHighlighter component, so we don't pay code-block
-      // tokenization on every token even with this set.
-      parseIncompleteMarkdown
+      // Incomplete-markdown repair is handled by `preprocessWithTailRepair`
+      // below (tail-bounded remend) instead of Streamdown's built-in pass,
+      // which re-runs remend over the ENTIRE message on every flush — ~18%
+      // of streaming script time on 50KB+ messages. The repair itself stays
+      // always-on (even between flushes / for completed messages): an
+      // unclosed ```python ... ``` whose body contains `$` (shell snippets,
+      // JS template strings, dollar amounts) would otherwise leak those
+      // dollars to the math parser and render broken inline math. Shiki is
+      // independently deferred via `defer={isStreaming}` on the
+      // SyntaxHighlighter component.
+      parseIncompleteMarkdown={false}
+      parseMarkdownIntoBlocksFn={parseMarkdownIntoBlocksCached}
       plugins={plugins}
-      preprocess={preprocessMarkdown}
+      preprocess={preprocessWithTailRepair}
     />
   )
 }
diff --git a/apps/desktop/src/components/assistant-ui/thread.tsx b/apps/desktop/src/components/assistant-ui/thread.tsx
index effeb38e79..f2a574d475 100644
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@@ -7,7 +7,8 @@ import {
   MessagePrimitive,
   type ToolCallMessagePartProps,
   useAui,
-  useAuiState
+  useAuiState,
+  useMessageRuntime
 } from '@assistant-ui/react'
 import { useStore } from '@nanostores/react'
 import { IconPlayerStopFilled } from '@tabler/icons-react'
@@ -105,7 +106,11 @@ type ThreadLoadingState = 'response' | 'session'
 
 interface MessageActionProps {
   messageId: string
-  messageText: string
+  /** Lazy accessor — reads the live message text at action time. Passing the
+   *  text itself as a prop forces the whole footer to re-render on every
+   *  streaming delta flush (the text changes ~30×/s), which profiling showed
+   *  was a large slice of per-token script time on long transcripts. */
+  getMessageText: () => string
   onBranchInNewChat?: (messageId: string) => void
 }
 
@@ -133,6 +138,28 @@ function messageContentText(content: unknown): string {
   return Array.isArray(content) ? content.map(partText).join('').trim() : ''
 }
 
+// Cheap streaming-stable "does this message have visible text" check: returns
+// on the first non-whitespace text part without concatenating the whole
+// message. Used as a useAuiState selector so its boolean output stays stable
+// across token flushes (flips false→true once per turn).
+function contentHasVisibleText(content: unknown): boolean {
+  if (typeof content === 'string') {
+    return content.trim().length > 0
+  }
+
+  if (!Array.isArray(content)) {
+    return false
+  }
+
+  for (const part of content) {
+    if (partText(part).trim().length > 0) {
+      return true
+    }
+  }
+
+  return false
+}
+
 export const Thread: FC<{
   clampToComposer?: boolean
   cwd?: string | null
@@ -221,20 +248,39 @@ const CenteredThreadSpinner: FC = () => {
 
 const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> = ({ onBranchInNewChat }) => {
   const messageId = useAuiState(s => s.message.id)
-  const content = useAuiState(s => s.message.content)
-  const messageText = messageContentText(content)
+  const messageRuntime = useMessageRuntime()
+
+  // PERF: this component must NOT subscribe to the streaming text. Every
+  // selector here returns a value that stays referentially stable across
+  // token flushes (booleans, status strings, '' while running), so the
+  // 30 Hz delta stream only re-renders the markdown part and the tiny
+  // StreamStallIndicator leaf — not the footer/preview/root subtree.
+  const messageStatus = useAuiState(s => s.message.status?.type)
+  const isRunning = messageStatus === 'running'
+  const isPlaceholder = useAuiState(s => s.message.status?.type === 'running' && s.message.content.length === 0)
+  const hasVisibleText = useAuiState(s => contentHasVisibleText(s.message.content))
+
+  // Preview targets only materialize once the turn completes — while running
+  // the selector returns '' (stable), so per-token flushes skip the regex
+  // scan and the re-render it would cause.
+  const completedText = useAuiState(s =>
+    s.message.status?.type === 'running' ? '' : messageContentText(s.message.content)
+  )
 
   const previewTargets = useMemo(() => {
-    if (!messageText || !/(https?:\/\/|file:\/\/)/i.test(messageText)) {
+    if (!completedText || !/(https?:\/\/|file:\/\/)/i.test(completedText)) {
       return []
     }
 
-    return pickPrimaryPreviewTarget(extractPreviewTargets(messageText))
-  }, [messageText])
+    return pickPrimaryPreviewTarget(extractPreviewTargets(completedText))
+  }, [completedText])
 
-  const messageStatus = useAuiState(s => s.message.status?.type)
-  const isPlaceholder = messageStatus === 'running' && content.length === 0
-  const enterRef = useEnterAnimation(messageStatus === 'running', `assistant-message:${messageId}`)
+  const getMessageText = useCallback(
+    () => messageContentText(messageRuntime.getState().content),
+    [messageRuntime]
+  )
+
+  const enterRef = useEnterAnimation(isRunning, `assistant-message:${messageId}`)
 
   if (isPlaceholder) {
     return null
@@ -245,7 +291,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
       className="group flex w-full min-w-0 max-w-full flex-col gap-0 self-start overflow-hidden"
       data-role="assistant"
       data-slot="aui_assistant-message-root"
-      data-streaming={messageStatus === 'running' ? 'true' : undefined}
+      data-streaming={isRunning ? 'true' : undefined}
       ref={enterRef}
     >
       <div
@@ -254,7 +300,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
       >
         {/* Todos render in the composer status stack now, not inline. */}
         <MessagePrimitive.Parts components={MESSAGE_PARTS_COMPONENTS} />
-        {messageStatus === 'running' && <StreamStallIndicator activity={`${content.length}:${messageText.length}`} />}
+        {isRunning && <StreamStallIndicator />}
         {previewTargets.length > 0 && (
           <div className="mt-3 flex flex-wrap gap-2">
             {previewTargets.map(target => (
@@ -271,8 +317,8 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
           </ErrorPrimitive.Root>
         </MessagePrimitive.Error>
       </div>
-      {messageText.trim().length > 0 && (
-        <AssistantFooter messageId={messageId} messageText={messageText} onBranchInNewChat={onBranchInNewChat} />
+      {hasVisibleText && (
+        <AssistantFooter getMessageText={getMessageText} messageId={messageId} onBranchInNewChat={onBranchInNewChat} />
       )}
     </MessagePrimitive.Root>
   )
@@ -313,10 +359,28 @@ const STREAM_STALL_S = 2
 
 // Tail "still thinking" indicator: the pre-first-token spinner goes away once
 // text flows, but if the stream then goes quiet mid-turn (tool think-time,
-// provider stall) nothing signals that work continues. Watch a per-render
+// provider stall) nothing signals that work continues. Watch a per-flush
 // activity signal; when it hasn't changed for STREAM_STALL_S, re-show the
 // dither + a timer counting from the last activity.
-const StreamStallIndicator: FC<{ activity: string }> = ({ activity }) => {
+//
+// Subscribes to the activity signal ITSELF (rather than taking it as a prop)
+// so that per-token updates re-render only this leaf, not the whole
+// AssistantMessage subtree.
+const StreamStallIndicator: FC = () => {
+  const activity = useAuiState(s => {
+    let textLength = 0
+
+    for (const part of s.message.content) {
+      const text = (part as { text?: unknown }).text
+
+      if (typeof text === 'string') {
+        textLength += text.length
+      }
+    }
+
+    return `${s.message.content.length}:${textLength}`
+  })
+
   const [stalled, setStalled] = useState(false)
 
   useEffect(() => {
@@ -584,7 +648,7 @@ function formatMessageTimestamp(
   return SHORT_FMT.format(date)
 }
 
-const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, onBranchInNewChat }) => {
+const AssistantActionBar: FC<MessageActionProps> = ({ messageId, getMessageText, onBranchInNewChat }) => {
   const { t } = useI18n()
   const copy = t.assistant.thread
   const [menuOpen, setMenuOpen] = useState(false)
@@ -605,7 +669,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
         )}
         data-slot="aui_msg-actions"
       >
-        <CopyButton appearance="icon" buttonSize="icon" disabled={!messageText} label={copy.copy} text={messageText} />
+        <CopyButton appearance="icon" buttonSize="icon" label={copy.copy} text={getMessageText} />
         <ActionBarPrimitive.Reload asChild>
           <TooltipIconButton onClick={() => triggerHaptic('submit')} tooltip={copy.refresh}>
             <Codicon name="refresh" />
@@ -623,7 +687,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
               <GitBranchIcon />
               {copy.branchNewChat}
             </DropdownMenuItem>
-            <ReadAloudItem messageId={messageId} text={messageText} />
+            <ReadAloudItem getText={getMessageText} messageId={messageId} />
           </DropdownMenuContent>
         </DropdownMenu>
       </ActionBarPrimitive.Root>
@@ -631,7 +695,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
   )
 }
 
-const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, text }) => {
+const ReadAloudItem: FC<{ getText: () => string; messageId: string }> = ({ getText, messageId }) => {
   const { t } = useI18n()
   const copy = t.assistant.thread
   const voicePlayback = useStore($voicePlayback)
@@ -645,6 +709,8 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex
   const Icon = isPreparing ? Loader2Icon : isSpeaking ? VolumeXIcon : Volume2Icon
 
   const read = useCallback(async () => {
+    const text = getText()
+
     if (!text || $voicePlayback.get().status !== 'idle') {
       return
     }
@@ -654,11 +720,11 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex
     } catch (error) {
       notifyError(error, copy.readAloudFailed)
     }
-  }, [copy.readAloudFailed, messageId, text])
+  }, [copy.readAloudFailed, getText, messageId])
 
   return (
     <DropdownMenuItem
-      disabled={isPreparing || (!isSpeaking && (anyPlaybackActive || !text))}
+      disabled={isPreparing || (!isSpeaking && anyPlaybackActive)}
       onSelect={e => {
         e.preventDefault()
         void (isSpeaking ? stopVoicePlayback() : read())
@@ -820,8 +886,10 @@ const UserMessage: FC<{
   // changes, not on every frame while the outer max-height animates open.
   const clampInnerRef = useRef<HTMLDivElement | null>(null)
   const [bodyClamped, setBodyClamped] = useState(false)
+  const lastClampHeightRef = useRef(-1)
+  const lineHeightRef = useRef(0)
 
-  const measureClamp = useCallback(() => {
+  const measureClamp = useCallback((entries: readonly ResizeObserverEntry[]) => {
     const inner = clampInnerRef.current
     const outer = inner?.parentElement
 
@@ -829,12 +897,28 @@ const UserMessage: FC<{
       return
     }
 
-    const styles = getComputedStyle(inner)
-    const lineHeight = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
-    const fullHeight = inner.scrollHeight
+    // Prefer the size the ResizeObserver already computed — reading
+    // `scrollHeight` outside RO timing forces a synchronous layout, and with
+    // many user bubbles observed at once those reads interleave with the
+    // style write below into a read-write-read reflow cascade.
+    const entryHeight = entries.find(entry => entry.target === inner)?.borderBoxSize?.[0]?.blockSize
+    const fullHeight = Math.ceil(entryHeight ?? inner.scrollHeight)
+
+    if (fullHeight === lastClampHeightRef.current) {
+      return
+    }
+
+    lastClampHeightRef.current = fullHeight
+
+    // Line-height is stable for the life of the bubble (font settings don't
+    // change under it) — resolve the computed style once.
+    if (!lineHeightRef.current) {
+      const styles = getComputedStyle(inner)
+      lineHeightRef.current = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
+    }
 
     outer.style.setProperty('--human-msg-full', `${fullHeight}px`)
-    setBodyClamped(fullHeight > lineHeight * 2 + 1)
+    setBodyClamped(fullHeight > lineHeightRef.current * 2 + 1)
   }, [])
 
   useResizeObserver(measureClamp, clampInnerRef)
diff --git a/apps/desktop/src/components/haptics-provider.tsx b/apps/desktop/src/components/haptics-provider.tsx
index e86e4428f6..233dc2f75c 100644
--- a/apps/desktop/src/components/haptics-provider.tsx
+++ b/apps/desktop/src/components/haptics-provider.tsx
@@ -15,5 +15,29 @@ export function HapticsProvider({ children }: { children: ReactNode }) {
     return () => registerHapticTrigger(null)
   }, [muted, trigger])
 
+  // web-haptics builds its AudioContext lazily inside the first trigger(), and
+  // the process's first AudioContext pays the CoreAudio spin-up (~850ms stall
+  // in profiles) — which landed on the first streamStart haptic as the first
+  // token painted. Open/close a throwaway context at idle so the real one
+  // connects to an already-warm audio service in single-digit ms.
+  useEffect(() => {
+    if (typeof requestIdleCallback !== 'function' || typeof AudioContext === 'undefined') {
+      return undefined
+    }
+
+    const id = requestIdleCallback(
+      () => {
+        try {
+          void new AudioContext().close().catch(() => undefined)
+        } catch {
+          // No audio device (headless CI) — nothing to warm.
+        }
+      },
+      { timeout: 2000 }
+    )
+
+    return () => cancelIdleCallback(id)
+  }, [])
+
   return <>{children}</>
 }
diff --git a/apps/desktop/src/components/ui/fade-text.tsx b/apps/desktop/src/components/ui/fade-text.tsx
index f80c32c213..b487d87f6f 100644
--- a/apps/desktop/src/components/ui/fade-text.tsx
+++ b/apps/desktop/src/components/ui/fade-text.tsx
@@ -34,14 +34,21 @@ function FadeTextImpl({ children, className, fadeWidth = '3rem', style, ...rest
   const ref = useRef<HTMLSpanElement>(null)
   const [overflowing, setOverflowing] = useState(false)
 
-  const measureOverflow = useCallback(() => {
+  const measureOverflow = useCallback((entries: readonly ResizeObserverEntry[]) => {
     const el = ref.current
 
     if (!el) {
       return
     }
 
-    setOverflowing(el.scrollWidth - el.clientWidth > 1)
+    // `clientWidth` from the RO entry when available (already computed);
+    // `scrollWidth` is unavoidable — content width isn't part of the entry —
+    // but inside RO timing layout is already clean so the read is cheap.
+    const clientWidth = entries.find(entry => entry.target === el)?.contentRect?.width ?? el.clientWidth
+
+    // setState is identity-stable: React bails out when the boolean doesn't
+    // change, so repeated RO fires with the same answer don't re-render.
+    setOverflowing(el.scrollWidth - clientWidth > 1)
   }, [])
 
   useResizeObserver(measureOverflow, ref)
diff --git a/apps/desktop/src/hooks/use-resize-observer.ts b/apps/desktop/src/hooks/use-resize-observer.ts
index b350a367d7..e9a0b0b50a 100644
--- a/apps/desktop/src/hooks/use-resize-observer.ts
+++ b/apps/desktop/src/hooks/use-resize-observer.ts
@@ -1,17 +1,26 @@
 import { type RefObject, useLayoutEffect, useRef } from 'react'
 
-export function useResizeObserver(onResize: () => void, ...refs: readonly RefObject<Element | null>[]) {
+/**
+ * Observe element resizes. The callback receives the ResizeObserver entries
+ * (empty on the initial synchronous call and in non-RO environments) so
+ * callers can read the observed size off the entry instead of forcing a
+ * fresh layout read.
+ */
+export function useResizeObserver(
+  onResize: (entries: readonly ResizeObserverEntry[]) => void,
+  ...refs: readonly RefObject<Element | null>[]
+) {
   const refsRef = useRef(refs)
   refsRef.current = refs
 
   useLayoutEffect(() => {
     if (typeof ResizeObserver === 'undefined') {
-      onResize()
+      onResize([])
 
       return
     }
 
-    const observer = new ResizeObserver(() => onResize())
+    const observer = new ResizeObserver(entries => onResize(entries))
     let observed = false
 
     for (const ref of refsRef.current) {
@@ -31,7 +40,7 @@ export function useResizeObserver(onResize: () => void, ...refs: readonly RefObj
       return
     }
 
-    onResize()
+    onResize([])
 
     return () => observer.disconnect()
   }, [onResize])
diff --git a/apps/desktop/src/lib/remend-tail.test.ts b/apps/desktop/src/lib/remend-tail.test.ts
new file mode 100644
index 0000000000..c730937356
--- /dev/null
+++ b/apps/desktop/src/lib/remend-tail.test.ts
@@ -0,0 +1,105 @@
+import { parseMarkdownIntoBlocks } from '@assistant-ui/react-streamdown'
+import remend from 'remend'
+import { describe, expect, it } from 'vitest'
+
+import { findRemendWindowStart, tailBoundedRemend } from './remend-tail'
+
+const CORPUS = `# Heading one
+
+Intro paragraph with **bold**, *italic*, \`inline code\`, and a [link](https://example.com).
+
+## Code
+
+\`\`\`python
+def main():
+    cost = "$5"
+    print(f"total: $\{cost}")
+\`\`\`
+
+Some text after the fence with $x^2 + y^2$ inline math.
+
+$$
+\\int_0^1 f(x) dx
+$$
+
+- list item one with **bold**
+- list item two
+
+| col a | col b |
+| ----- | ----- |
+| 1     | 2     |
+
+~~~js
+const s = \`template \${value}\`
+~~~
+
+Final paragraph with ~~strike~~ and unfinished [link text](https://exa
+`
+
+/**
+ * Render-equivalence oracle: full-text remend and tail-bounded remend may
+ * differ in raw string output ONLY in ways that cannot affect rendering —
+ * i.e. after block splitting, every block must be identical. (Streamdown
+ * renders blocks independently, so block-level equality IS render equality.)
+ */
+function blocksOf(text: string): string[] {
+  return parseMarkdownIntoBlocks(text)
+}
+
+describe('tailBoundedRemend', () => {
+  it('matches full remend block output at every streaming prefix', () => {
+    for (let end = 1; end <= CORPUS.length; end++) {
+      const prefix = CORPUS.slice(0, end)
+      const full = blocksOf(remend(prefix))
+      const tail = blocksOf(tailBoundedRemend(prefix))
+
+      expect(tail, `prefix length ${end}: ${JSON.stringify(prefix.slice(-60))}`).toEqual(full)
+    }
+  })
+
+  it('repairs an unclosed fence opened early in a long message', () => {
+    const text = `intro\n\n\`\`\`python\n${'x = 1\n'.repeat(500)}print("$dollar")`
+    const repaired = tailBoundedRemend(text)
+
+    expect(blocksOf(repaired)).toEqual(blocksOf(remend(text)))
+    // the window must reach back to the fence opener
+    expect(findRemendWindowStart(text)).toBe(text.indexOf('```python'))
+  })
+
+  it('bounds the window to the tail paragraph when no fence is open', () => {
+    const text = `para one\n\npara two\n\npara three with **bold`
+    const start = findRemendWindowStart(text)
+
+    expect(start).toBe(text.indexOf('para three'))
+    expect(tailBoundedRemend(text)).toBe(remend(text))
+  })
+
+  it('widens the window across an open $$ math block', () => {
+    const text = `before\n\n$$\n\\frac{a}{b}`
+    const start = findRemendWindowStart(text)
+
+    expect(start).toBeLessThanOrEqual(text.indexOf('$$'))
+    expect(blocksOf(tailBoundedRemend(text))).toEqual(blocksOf(remend(text)))
+  })
+
+  it('handles closed constructs without modification', () => {
+    const text = `done **bold** and \`code\`\n\n\`\`\`js\nconst a = 1\n\`\`\`\n\nlast line.`
+
+    expect(tailBoundedRemend(text)).toBe(text)
+  })
+
+  it('intentionally diverges from full remend on cross-block dangling openers', () => {
+    // Full remend scans the whole document and appends `**` for an opener
+    // left dangling in an EARLIER block, dumping stray asterisks into the
+    // unrelated tail block ("|**"). Because Streamdown splits into blocks
+    // after the repair, that opener never renders as bold either way — the
+    // tail-bounded result is the cleaner of the two. This test documents
+    // the divergence so a future remend upgrade that changes the behavior
+    // gets noticed.
+    const text = `- item with **dangling\n- item two\n\n|`
+
+    expect(remend(text).endsWith('|**')).toBe(true)
+    expect(tailBoundedRemend(text).endsWith('|')).toBe(true)
+    expect(tailBoundedRemend(text).endsWith('|**')).toBe(false)
+  })
+})
diff --git a/apps/desktop/src/lib/remend-tail.ts b/apps/desktop/src/lib/remend-tail.ts
new file mode 100644
index 0000000000..683f7dc193
--- /dev/null
+++ b/apps/desktop/src/lib/remend-tail.ts
@@ -0,0 +1,108 @@
+import remend from 'remend'
+
+// Tail-bounded incomplete-markdown repair.
+//
+// Streamdown's built-in `parseIncompleteMarkdown` runs `remend` over the whole
+// accumulated message on every streaming flush (~18% of script time on 50KB+
+// messages). But repairs only ever matter in the trailing block: inline
+// constructs can't cross a blank line, and Streamdown splits into blocks AFTER
+// the repair, so a dangling opener in an earlier block can't reach the tail.
+// We run `remend` on just that block instead.
+
+const BACKTICK = 96 // `
+const TILDE = 126 // ~
+const SPACE = 32
+const TAB = 9
+const BACKSLASH = 92
+
+const isSpace = (c: number) => c === SPACE || c === TAB
+
+/**
+ * Index of the last top-level block start — the char after the most recent
+ * blank line that sits outside any open code fence or `$$` math block. An
+ * unclosed fence/math always begins after that blank, so it stays wholly
+ * inside the window without separate tracking. One cheap char pass, no regex.
+ */
+export function findRemendWindowStart(text: string): number {
+  const n = text.length
+  let inFence = false
+  let fenceChar = 0
+  let fenceRun = 0
+  let inMath = false
+  let boundary = 0
+  let pending = -1 // a blank line, committed to `boundary` once content follows
+
+  for (let lineStart = 0; lineStart <= n; ) {
+    let lineEnd = text.indexOf('\n', lineStart)
+
+    if (lineEnd === -1) {
+      lineEnd = n
+    }
+
+    let i = lineStart
+
+    while (i < lineEnd && isSpace(text.charCodeAt(i))) {
+      i += 1
+    }
+
+    const first = i < lineEnd ? text.charCodeAt(i) : -1
+    let marker = false
+
+    // Fence open/close (``` or ~~~, ≤3 spaces indent).
+    if ((first === BACKTICK || first === TILDE) && i - lineStart <= 3) {
+      let run = i
+
+      while (run < lineEnd && text.charCodeAt(run) === first) {
+        run += 1
+      }
+
+      if (run - i >= 3) {
+        marker = true
+
+        if (!inFence) {
+          inFence = true
+          fenceChar = first
+          fenceRun = run - i
+        } else if (first === fenceChar && run - i >= fenceRun && onlyWhitespace(text, run, lineEnd)) {
+          inFence = false
+        }
+      }
+    }
+
+    // Toggle `$$` math state on plain lines ($$ inside a fence is literal).
+    if (!inFence && !marker) {
+      for (let s = text.indexOf('$$', lineStart); s !== -1 && s < lineEnd - 1; s = text.indexOf('$$', s + 2)) {
+        if (s === 0 || text.charCodeAt(s - 1) !== BACKSLASH) {
+          inMath = !inMath
+        }
+      }
+    }
+
+    if (first === -1 && !inFence && !inMath) {
+      pending = lineEnd + 1
+    } else if (pending !== -1) {
+      boundary = pending
+      pending = -1
+    }
+
+    lineStart = lineEnd + 1
+  }
+
+  return boundary
+}
+
+function onlyWhitespace(text: string, from: number, to: number): boolean {
+  for (let i = from; i < to; i += 1) {
+    if (!isSpace(text.charCodeAt(i))) {
+      return false
+    }
+  }
+
+  return true
+}
+
+export function tailBoundedRemend(text: string): string {
+  const start = findRemendWindowStart(text)
+
+  return start <= 0 ? remend(text) : text.slice(0, start) + remend(text.slice(start))
+}
diff --git a/apps/desktop/src/store/session.ts b/apps/desktop/src/store/session.ts
index dcf778c469..f1e1e2ee61 100644
--- a/apps/desktop/src/store/session.ts
+++ b/apps/desktop/src/store/session.ts
@@ -1,5 +1,6 @@
-import { atom } from 'nanostores'
+import { atom, computed } from 'nanostores'
 
+import { lastVisibleMessageIsUser } from '@/app/chat/thread-loading'
 import type { ContextSuggestion } from '@/app/types'
 import type { HermesConnection } from '@/global'
 import type { ChatMessage } from '@/lib/chat-messages'
@@ -195,6 +196,15 @@ export const $workingSessionIds = atom<string[]>([])
 export const $activeSessionId = atom<string | null>(null)
 export const $selectedStoredSessionId = atom<string | null>(null)
 export const $messages = atom<ChatMessage[]>([])
+
+// Streaming-stable derivations of $messages. During a token stream the array
+// is replaced ~30×/s; components that only care about coarse facts (is the
+// thread empty? is the tail a user message?) subscribe to these instead of
+// $messages so per-token flushes don't re-render them — nanostores' `computed`
+// only notifies when the derived VALUE changes.
+export const $messagesEmpty = computed($messages, messages => messages.length === 0)
+export const $lastVisibleMessageIsUser = computed($messages, lastVisibleMessageIsUser)
+
 export const $freshDraftReady = atom(false)
 export const $busy = atom(false)
 export const $awaitingResponse = atom(false)
diff --git a/nix/lib.nix b/nix/lib.nix
index 1e6ad96a43..da5762ad44 100644
--- a/nix/lib.nix
+++ b/nix/lib.nix
@@ -21,7 +21,7 @@ let
 
   # Single npm deps fetch from the workspace root lockfile.
   # All workspace packages share this derivation.
-  npmDepsHash = "sha256-BfTSh6J2VZ/07tq2DYnKgUViZCgRhW1sC2uj18H65SE=";
+  npmDepsHash = "sha256-dFUlWvIIsCqvtGkoobs0qUzFlSdejuffI/uLoQxhW8Q=";
 
   npmDeps = pkgs.fetchNpmDeps {
     inherit src;
diff --git a/package-lock.json b/package-lock.json
index 018074f302..717f7a12c2 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -119,6 +119,7 @@
         "react-router-dom": "^7.17.0",
         "react-shiki": "^0.9.3",
         "remark-math": "^6.0.0",
+        "remend": "^1.3.0",
         "shiki": "^4.0.2",
         "streamdown": "^2.5.0",
         "tailwind-merge": "^3.5.0",