diff --git a/apps/desktop/src/app/chat/composer/queue-panel.tsx b/apps/desktop/src/app/chat/composer/queue-panel.tsx
index ea45999385d..33906452026 100644
--- a/apps/desktop/src/app/chat/composer/queue-panel.tsx
+++ b/apps/desktop/src/app/chat/composer/queue-panel.tsx
@@ -30,13 +30,13 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
   }
 
   return (
-    <div className="rounded-t-2xl border border-b-0 border-border/65 bg-[color-mix(in_srgb,var(--dt-card)_70%,transparent)] pt-0.5 pb-1">
+    <div className="rounded-t-2xl border border-b-0 border-border/65 bg-[color-mix(in_srgb,var(--dt-card)_70%,transparent)] pt-0.5 pb-1 mx-1">
       <button
-        className="flex w-full items-center gap-1.5 px-2 py-0.5 text-left text-[0.72rem] font-medium text-muted-foreground/92 transition-colors hover:text-foreground/90"
+        className="flex w-full items-center gap-1.5 px-2 text-left text-[0.6rem] font-medium text-muted-foreground/92 transition-colors hover:text-foreground/90"
         onClick={() => setCollapsed(open => !open)}
         type="button"
       >
-        <DisclosureCaret className="shrink-0" open={!collapsed} size="0.875rem" />
+        <DisclosureCaret className="shrink-0" open={!collapsed} size="1em" />
         <span className="truncate">{c.queued(entries.length)}</span>
       </button>
 
@@ -64,11 +64,7 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
                   <p className="truncate text-[0.73rem] leading-4 text-foreground/92">{entryPreview(entry, c)}</p>
                   {(attachmentsCount > 0 || isEditing) && (
                     <div className="mt-0.5 flex items-center gap-1.5 text-[0.64rem] text-muted-foreground/75">
-                      {attachmentsCount > 0 && (
-                        <span>
-                          {c.attachments(attachmentsCount)}
-                        </span>
-                      )}
+                      {attachmentsCount > 0 && <span>{c.attachments(attachmentsCount)}</span>}
                       {isEditing && (
                         <span className="text-[color-mix(in_srgb,var(--dt-composer-ring)_78%,var(--muted-foreground))]">
                           {c.editingInComposer}
diff --git a/apps/desktop/src/app/session/hooks/use-message-stream.ts b/apps/desktop/src/app/session/hooks/use-message-stream.ts
index 59f2af2d13a..a77114b9778 100644
--- a/apps/desktop/src/app/session/hooks/use-message-stream.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream.ts
@@ -410,6 +410,10 @@ export function useMessageStream({
       phase: 'running' | 'complete',
       sourceEventType?: string
     ) => {
+      // Text deltas flush on a timer but tool events apply now; flush first so
+      // a tool part can't jump ahead of the text that preceded it.
+      flushQueuedDeltas(sessionId)
+
       if (!nativeSubagentSessionsRef.current.has(sessionId)) {
         for (const subagentPayload of delegateTaskPayloads(payload, phase, sourceEventType)) {
           upsertSubagent(
@@ -428,7 +432,7 @@ export function useMessageStream({
         { pending: m => phase !== 'complete' || (m.pending ?? false) }
       )
     },
-    [mutateStream]
+    [flushQueuedDeltas, mutateStream]
   )
 
   const completeAssistantMessage = useCallback(
diff --git a/apps/desktop/src/components/assistant-ui/clarify-tool.tsx b/apps/desktop/src/components/assistant-ui/clarify-tool.tsx
index e0784b06c5b..cc05f1bccf3 100644
--- a/apps/desktop/src/components/assistant-ui/clarify-tool.tsx
+++ b/apps/desktop/src/components/assistant-ui/clarify-tool.tsx
@@ -160,7 +160,7 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {
 
   return (
     <div
-      className="relative mb-3 mt-2 grid gap-2 rounded-[0.5rem] border border-border/70 bg-card/40 px-3 py-2.5 text-sm shadow-[inset_0_1px_0_color-mix(in_srgb,var(--foreground)_3%,transparent)]"
+      className="relative mb-3 mt-2 grid gap-6 rounded-[0.5rem] border border-border/70 bg-card/40 px-3 py-2.5 text-sm shadow-[inset_0_1px_0_color-mix(in_srgb,var(--foreground)_3%,transparent)]"
       data-slot="clarify-inline"
     >
       <span aria-hidden className="arc-border" />
diff --git a/apps/desktop/src/components/assistant-ui/markdown-text.tsx b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
index 57c5b7b392f..3ec9db314ec 100644
--- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx
+++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
@@ -7,7 +7,7 @@ import {
   type SyntaxHighlighterProps
 } from '@assistant-ui/react-streamdown'
 import { code } from '@streamdown/code'
-import { type ComponentProps, memo, type ReactNode, useDeferredValue, useEffect, useMemo, useState } from 'react'
+import { type ComponentProps, memo, type ReactNode, useDeferredValue, useEffect, useMemo, useRef, useState } from 'react'
 
 import { PreviewAttachment } from '@/components/chat/preview-attachment'
 import { SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
@@ -224,6 +224,88 @@ function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>)
   )
 }
 
+// Steady character-reveal for streaming text: decouples visible cadence from
+// bursty arrival so text flows instead of popping (cf. assistant-ui's useSmooth,
+// reimplemented for a tunable rate). Proportional drain — each frame reveals a
+// slice of the backlog so the reveal converges within ~REVEAL_DRAIN_MS whatever
+// the size; the per-frame cap stops a huge dump rendering as one slab. The loop
+// is gated on backlog, not isRunning, so a stream that completes mid-reveal
+// keeps draining its tail instead of snapping.
+const REVEAL_DRAIN_MS = 500
+const REVEAL_MAX_CHARS_PER_FRAME = 30
+
+function useSmoothReveal(text: string, isRunning: boolean): string {
+  const [displayed, setDisplayed] = useState(isRunning ? '' : text)
+  const targetRef = useRef(text)
+  const shownRef = useRef(displayed)
+  const frameRef = useRef<number | null>(null)
+  const lastTickRef = useRef(0)
+
+  shownRef.current = displayed
+  targetRef.current = text
+
+  useEffect(() => {
+    if (typeof window === 'undefined') {
+      return
+    }
+
+    // Non-extending change (regenerate / branch / history swap): restart from
+    // empty while streaming, else snap to the replacement.
+    if (!text.startsWith(shownRef.current)) {
+      shownRef.current = isRunning ? '' : text
+      setDisplayed(shownRef.current)
+    }
+
+    if (shownRef.current.length >= text.length || frameRef.current !== null) {
+      return
+    }
+
+    lastTickRef.current = performance.now()
+
+    const tick = () => {
+      const now = performance.now()
+      const dt = now - lastTickRef.current
+      lastTickRef.current = now
+
+      const remaining = targetRef.current.length - shownRef.current.length
+      const add = Math.min(remaining, REVEAL_MAX_CHARS_PER_FRAME, Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS)))
+      shownRef.current = targetRef.current.slice(0, shownRef.current.length + add)
+      setDisplayed(shownRef.current)
+
+      frameRef.current = shownRef.current.length < targetRef.current.length ? requestAnimationFrame(tick) : null
+    }
+
+    frameRef.current = requestAnimationFrame(tick)
+  }, [text, isRunning])
+
+  useEffect(
+    () => () => {
+      if (frameRef.current !== null && typeof window !== 'undefined') {
+        cancelAnimationFrame(frameRef.current)
+      }
+    },
+    []
+  )
+
+  return displayed
+}
+
+// Re-publish the part context with a smooth character-reveal, above
+// DeferStreamingText so the reveal feeds the deferred markdown pipeline. Status
+// stays running while revealing so the caret persists past the underlying part
+// settling.
+function SmoothStreamingText({ children }: { children: ReactNode }) {
+  const { text, status } = useMessagePartText()
+  const isRunning = status.type === 'running'
+  const revealed = useSmoothReveal(text, isRunning)
+
+  return (
+    <TextMessagePartProvider isRunning={isRunning || revealed !== text} text={revealed}>
+      {children}
+    </TextMessagePartProvider>
+  )
+}
+
 /**
  * Re-publish the active message-part context with React's `useDeferredValue`
  * applied to the streaming text and status. The outer wrapper still re-renders
@@ -280,7 +362,7 @@ const MARKDOWN_CONTAINER_CLASS_NAME = cn(
   'prose-a:break-words prose-p:[overflow-wrap:anywhere]',
   'prose-li:marker:text-muted-foreground/70',
   'prose-code:rounded-[0.25rem] prose-code:px-[0.1875rem] prose-code:py-px prose-code:font-mono prose-code:text-[0.9em] prose-code:font-normal prose-code:before:content-none prose-code:after:content-none',
-  '[&>*:first-child]:mt-0 [&>*:last-child]:mb-0 [&>*+*]:mt-1'
+  '[&>*:first-child]:mt-0 [&>*:last-child]:mb-0 [&>*+*]:mt-(--paragraph-gap)'
 )
 
 function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTextSurfaceProps) {
@@ -308,12 +390,14 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
           <h4 className={cn('my-1 font-semibold', HEADING_SIZES.h4, className)} {...props} />
         ),
         p: ({ className, ...props }: ComponentProps<'p'>) => (
-          <p className={cn('my-1 wrap-anywhere leading-(--dt-line-height)', className)} {...props} />
+          // Vertical rhythm is owned by styles.css (`--paragraph-gap`), which
+          // must out-specify Tailwind Typography's `prose` margins — so no
+          // `my-*` here on purpose.
+          <p className={cn('wrap-anywhere leading-(--dt-line-height)', className)} {...props} />
         ),
         a: MarkdownLink,
-        hr: ({ className, ...props }: ComponentProps<'hr'>) => (
-          <hr className={cn('border-border', className)} {...props} />
-        ),
+        // `---` as quiet spacing, not a heavy full-width rule.
+        hr: (_props: ComponentProps<'hr'>) => <div aria-hidden className="my-3" />,
         blockquote: ({ className, ...props }: ComponentProps<'blockquote'>) => (
           <blockquote
             className={cn('border-l-2 border-border pl-3 text-muted-foreground italic', className)}
@@ -391,18 +475,22 @@ interface MarkdownTextContentProps extends MarkdownTextSurfaceProps {
 export function MarkdownTextContent({ isRunning, text, ...surfaceProps }: MarkdownTextContentProps) {
   return (
     <TextMessagePartProvider isRunning={isRunning} text={text}>
-      <DeferStreamingText>
-        <MarkdownTextSurface {...surfaceProps} />
-      </DeferStreamingText>
+      <SmoothStreamingText>
+        <DeferStreamingText>
+          <MarkdownTextSurface {...surfaceProps} />
+        </DeferStreamingText>
+      </SmoothStreamingText>
     </TextMessagePartProvider>
   )
 }
 
 const MarkdownTextImpl = () => {
   return (
-    <DeferStreamingText>
-      <MarkdownTextSurface />
-    </DeferStreamingText>
+    <SmoothStreamingText>
+      <DeferStreamingText>
+        <MarkdownTextSurface />
+      </DeferStreamingText>
+    </SmoothStreamingText>
   )
 }
 
diff --git a/apps/desktop/src/components/assistant-ui/thread.tsx b/apps/desktop/src/components/assistant-ui/thread.tsx
index 21c91bf8b3d..315bee5c12b 100644
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@@ -236,6 +236,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
       >
         {hoistedTodos.length > 0 && <HoistedTodoPanel todos={hoistedTodos} />}
         <MessagePrimitive.Parts components={MESSAGE_PARTS_COMPONENTS} />
+        {messageStatus === 'running' && <StreamStallIndicator activity={`${content.length}:${messageText.length}`} />}
         {previewTargets.length > 0 && (
           <div className="mt-3 flex flex-wrap gap-2">
             {previewTargets.map(target => (
@@ -287,6 +288,39 @@ const ResponseLoadingIndicator: FC = () => {
   )
 }
 
+// Seconds of no visible output (text or part count) before a still-running turn
+// is treated as stalled and the thinking indicator returns at the tail.
+const STREAM_STALL_S = 2
+
+// Tail "still thinking" indicator: the pre-first-token spinner goes away once
+// text flows, but if the stream then goes quiet mid-turn (tool think-time,
+// provider stall) nothing signals that work continues. Watch a per-render
+// activity signal; when it hasn't changed for STREAM_STALL_S, re-show the
+// dither + a timer counting from the last activity.
+const StreamStallIndicator: FC<{ activity: string }> = ({ activity }) => {
+  const [stalled, setStalled] = useState(false)
+
+  useEffect(() => {
+    setStalled(false)
+    const id = window.setTimeout(() => setStalled(true), STREAM_STALL_S * 1000)
+
+    return () => window.clearTimeout(id)
+  }, [activity])
+
+  const elapsed = useElapsedSeconds(stalled)
+
+  if (!stalled) {
+    return null
+  }
+
+  return (
+    <StatusRow className="mt-1.5" data-slot="aui_stream-stall" label="Hermes is thinking">
+      <span aria-hidden="true" className="dither inline-block size-3 rounded-[2px] text-midground/80 animate-pulse" />
+      <ActivityTimerText seconds={elapsed} />
+    </StatusRow>
+  )
+}
+
 const ImageGenerateTool: FC<ToolCallMessagePartProps> = ({ result }) => {
   const generatedImage = useGeneratedImageContext()
   const running = result === undefined
@@ -434,6 +468,22 @@ const ReasoningAccordionGroup: FC<{ children?: ReactNode; endIndex: number; star
         .some(p => p?.type === 'reasoning' && p.status?.type !== 'complete')
   )
 
+  // A reasoning group with no actual text is pure noise — drop the whole
+  // "Thinking" disclosure rather than leave an empty header eating a row. This
+  // applies live too: encrypted/spinner-coerced reasoning (Opus reasoning max)
+  // never carries visible text, and the bottom-of-thread loader already signals
+  // "thinking", so an empty header is never wanted. Real reasoning surfaces the
+  // instant its first token lands.
+  const hasContent = useAuiState(s =>
+    s.message.parts
+      .slice(Math.max(0, startIndex), endIndex + 1)
+      .some(p => p?.type === 'reasoning' && typeof p.text === 'string' && p.text.trim().length > 0)
+  )
+
+  if (!hasContent) {
+    return null
+  }
+
   return (
     <ThinkingDisclosure messageRunning={messageRunning} pending={pending} timerKey={`reasoning:${messageId}`}>
       {children}
@@ -449,7 +499,7 @@ const ReasoningTextPart: FC<{ text: string; status?: { type: string } }> = ({ te
   return (
     <MarkdownTextContent
       containerClassName={cn(
-        'text-xs leading-relaxed text-muted-foreground/85',
+        'text-xs leading-snug text-muted-foreground/85',
         isRunning && 'shimmer text-muted-foreground/55'
       )}
       containerProps={{ 'data-slot': 'aui_reasoning-text' } as ComponentProps<'div'>}
diff --git a/apps/desktop/src/components/assistant-ui/tool-approval-group.test.tsx b/apps/desktop/src/components/assistant-ui/tool-approval-group.test.tsx
index b3dfff2e928..0f897e54d75 100644
--- a/apps/desktop/src/components/assistant-ui/tool-approval-group.test.tsx
+++ b/apps/desktop/src/components/assistant-ui/tool-approval-group.test.tsx
@@ -1,5 +1,5 @@
 import { AssistantRuntimeProvider, type ThreadMessage, useExternalStoreRuntime } from '@assistant-ui/react'
-import { cleanup, render, screen, waitFor } from '@testing-library/react'
+import { cleanup, render, waitFor } from '@testing-library/react'
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
 
 import { clearAllPrompts, setApprovalRequest } from '@/store/prompts'
@@ -8,12 +8,11 @@ import { $toolDisclosureStates } from '@/store/tool-view'
 
 import { Thread } from './thread'
 
-// Regression coverage for the "approval buried behind a collapsed tool group"
-// bug. When 2+ tools group into a collapsed "Tool actions · N steps" row, the
-// pending tool's inline ApprovalBar lives inside the group body — which is
-// `hidden` until expanded. A live approval must surface WITHOUT the user
-// expanding anything, so ToolGroupSlot force-opens its body while an approval
-// targeting one of its pending tools is in flight.
+// Regression coverage for the "approval must never be buried" bug. Tools now
+// render as a flat list (no collapsible "N steps" group), so a pending tool's
+// inline ApprovalBar is always in the visual flow — never inside a `hidden`
+// body. These assert the bar shows only when an approval is live and is never
+// trapped under a `hidden` ancestor.
 
 const createdAt = new Date('2026-06-03T00:00:00.000Z')
 
@@ -71,8 +70,7 @@ stubOffsetDimension('offsetWidth', 'clientWidth', 800)
 stubOffsetDimension('offsetHeight', 'clientHeight', 600)
 
 // A running assistant message with two tools: a completed read_file plus a
-// pending terminal (no result). Two visible tools → ToolGroupSlot groups them
-// behind a collapsed "Tool actions · 2 steps" header.
+// pending terminal (no result), rendered as a flat two-row list.
 function groupedPendingMessage(): ThreadMessage {
   return {
     id: 'assistant-group-1',
@@ -132,32 +130,28 @@ afterEach(() => {
   $activeSessionId.set(null)
 })
 
-describe('ToolGroupSlot approval surfacing', () => {
-  it('hides the grouped pending tool body when there is no approval', async () => {
+describe('flat tool list approval surfacing', () => {
+  it('renders no inline approval bar when there is no live approval', async () => {
     const { container } = render(<GroupHarness message={groupedPendingMessage()} />)
 
-    // Group header renders collapsed; the inline approval strip lives in the
-    // hidden body, so with no live approval it must not render at all (the
-    // ApprovalBar returns null when $approvalRequest is empty).
+    // The pending terminal row mounts immediately, but its inline ApprovalBar
+    // returns null while $approvalRequest is empty.
     await waitFor(() => {
-      expect(screen.getByText(/Tool actions/)).toBeTruthy()
+      expect(container.querySelectorAll('[data-slot="tool-block"]').length).toBeGreaterThan(0)
     })
     expect(container.querySelector('[data-slot="tool-approval-inline"]')).toBeNull()
   })
 
-  it('force-opens the group body so the approval surfaces without expanding', async () => {
+  it('surfaces the approval inline and never under a hidden ancestor', async () => {
     setApprovalRequest({ command: 'rm -rf /tmp/x', description: 'dangerous command', sessionId: 'sess-1' })
 
     const { container } = render(<GroupHarness message={groupedPendingMessage()} />)
 
-    // Even though the group defaults collapsed, the live approval forces the
-    // body open so the inline controls are visible (and reachable, not in a
-    // hidden subtree) immediately.
     await waitFor(() => {
       const bar = container.querySelector('[data-slot="tool-approval-inline"]')
       expect(bar).not.toBeNull()
-      // The forced-open group body must not be hidden — assert no ancestor
-      // carries the `hidden` attribute that would keep the bar off-screen.
+      // Flat rows live directly in the flow — nothing should ever wrap the bar
+      // in a `hidden` subtree.
       expect(bar?.closest('[hidden]')).toBeNull()
     })
   })
diff --git a/apps/desktop/src/components/assistant-ui/tool-fallback-model.ts b/apps/desktop/src/components/assistant-ui/tool-fallback-model.ts
index 25fa75190a1..f827384682e 100644
--- a/apps/desktop/src/components/assistant-ui/tool-fallback-model.ts
+++ b/apps/desktop/src/components/assistant-ui/tool-fallback-model.ts
@@ -88,10 +88,12 @@ const TOOL_META: Record<string, ToolMeta> = {
     tone: 'browser'
   },
   browser_type: { done: 'Typed on page', pending: 'Typing on page', icon: 'globe', tone: 'browser' },
+  clarify: { done: 'Asked a question', pending: 'Asking a question', icon: 'question', tone: 'agent' },
   edit_file: { done: 'Edited file', pending: 'Editing file', icon: 'edit', tone: 'file' },
   execute_code: { done: 'Ran code', pending: 'Running code', icon: 'terminal', tone: 'terminal' },
   image_generate: { done: 'Generated image', pending: 'Generating image', icon: 'file-media', tone: 'image' },
   list_files: { done: 'Listed files', pending: 'Listing files', icon: 'files', tone: 'file' },
+  patch: { done: 'Patched file', pending: 'Patching file', icon: 'diff', tone: 'file' },
   read_file: { done: 'Read file', pending: 'Reading file', icon: 'file', tone: 'file' },
   search_files: { done: 'Searched files', pending: 'Searching files', icon: 'search', tone: 'file' },
   session_search_recall: {
@@ -102,6 +104,7 @@ const TOOL_META: Record<string, ToolMeta> = {
   },
   terminal: { done: 'Ran command', pending: 'Running command', icon: 'terminal', tone: 'terminal' },
   todo: { done: 'Updated todos', pending: 'Updating todos', icon: 'tools', tone: 'agent' },
+  vision_analyze: { done: 'Analyzed image', pending: 'Analyzing image', icon: 'eye', tone: 'image' },
   web_extract: { done: 'Read webpage', pending: 'Reading webpage', icon: 'globe', tone: 'web' },
   web_search: { done: 'Searched web', pending: 'Searching web', icon: 'search', tone: 'web' },
   write_file: { done: 'Edited file', pending: 'Editing file', icon: 'edit', tone: 'file' }
@@ -1268,124 +1271,3 @@ export function buildToolView(part: ToolPart, inlineDiff: string): ToolView {
     tone: meta.tone
   }
 }
-
-function isToolPart(part: unknown): part is ToolPart {
-  if (!part || typeof part !== 'object') {
-    return false
-  }
-
-  const row = part as Record<string, unknown>
-
-  return row.type === 'tool-call' && typeof row.toolName === 'string'
-}
-
-export function groupToolParts(content: unknown): ToolPart[][] {
-  if (!Array.isArray(content)) {
-    return []
-  }
-
-  const groups: ToolPart[][] = []
-  let current: ToolPart[] = []
-
-  for (const part of content) {
-    // todo parts render in their own hoisted panel; skip from grouped tools.
-    if (isToolPart(part) && part.toolName !== 'todo') {
-      current.push(part)
-
-      continue
-    }
-
-    if (current.length) {
-      groups.push(current)
-      current = []
-    }
-  }
-
-  if (current.length) {
-    groups.push(current)
-  }
-
-  return groups
-}
-
-export function groupStatus(parts: ToolPart[]): ToolStatus {
-  if (parts.some(p => p.result === undefined)) {
-    return 'running'
-  }
-
-  const statuses = parts.map(part => toolStatus(part, parseMaybeObject(part.result)))
-  const hasError = statuses.includes('error')
-
-  if (!hasError) {
-    return 'success'
-  }
-
-  return statuses.at(-1) === 'success' ? 'warning' : 'error'
-}
-
-export function groupTitle(parts: ToolPart[]): string {
-  const prefix = PREFIX_META.find(p => parts.every(part => part.toolName.startsWith(p.prefix)))
-  const verb = prefix?.verb || 'Tool'
-
-  return `${verb} actions · ${parts.length} steps`
-}
-
-export function groupPreviewTargets(parts: ToolPart[]): string[] {
-  const seen = new Set<string>()
-  const targets: string[] = []
-
-  for (const part of parts) {
-    const view = buildToolView(part, inlineDiffFromResult(part.result))
-    const target = view.previewTarget
-
-    if (target && isPreviewableTarget(target) && !seen.has(target)) {
-      seen.add(target)
-      targets.push(target)
-    }
-  }
-
-  return targets
-}
-
-export function groupFailedStepCount(parts: ToolPart[]): number {
-  return parts.filter(part => toolStatus(part, parseMaybeObject(part.result)) === 'error').length
-}
-
-export function groupTotalDurationLabel(parts: ToolPart[]): string {
-  const seconds = parts.reduce((sum, part) => {
-    const value = numberValue(parseMaybeObject(part.result).duration_s)
-
-    return sum + (value && value > 0 ? value : 0)
-  }, 0)
-
-  if (!seconds) {
-    return ''
-  }
-
-  return formatDurationSeconds(seconds)
-}
-
-export function groupTailSubtitle(parts: ToolPart[]): string {
-  const tail = parts.at(-1)
-
-  return tail ? buildToolView(tail, '').subtitle : ''
-}
-
-export function groupCopyText(parts: ToolPart[]): string {
-  return parts
-    .map(part => {
-      const view = buildToolView(part, '')
-      const lines = [view.title]
-
-      if (view.subtitle && view.subtitle !== view.title) {
-        lines.push(view.subtitle)
-      }
-
-      if (view.detail && view.detail !== view.subtitle) {
-        lines.push(view.detail)
-      }
-
-      return lines.join('\n')
-    })
-    .join('\n\n')
-}
diff --git a/apps/desktop/src/components/assistant-ui/tool-fallback.tsx b/apps/desktop/src/components/assistant-ui/tool-fallback.tsx
index ff0a4652fc0..3afd202e12f 100644
--- a/apps/desktop/src/components/assistant-ui/tool-fallback.tsx
+++ b/apps/desktop/src/components/assistant-ui/tool-fallback.tsx
@@ -3,7 +3,6 @@
 import { type ToolCallMessagePartProps, useAuiState } from '@assistant-ui/react'
 import { useStore } from '@nanostores/react'
 import { createContext, type FC, type PropsWithChildren, type ReactNode, useContext, useMemo } from 'react'
-import { useShallow } from 'zustand/shallow'
 
 import { AnsiText } from '@/components/assistant-ui/ansi-text'
 import { useElapsedSeconds } from '@/components/chat/activity-timer'
@@ -21,20 +20,13 @@ import { PrettyLink, LinkifiedText as SharedLinkifiedText, urlSlugTitleLabel } f
 import { AlertCircle, CheckCircle2 } from '@/lib/icons'
 import { useEnterAnimation } from '@/lib/use-enter-animation'
 import { cn } from '@/lib/utils'
-import { $approvalRequest } from '@/store/prompts'
 import { $toolInlineDiffs } from '@/store/tool-diffs'
 import { $toolDisclosureOpen, $toolViewMode, setToolDisclosureOpen } from '@/store/tool-view'
 
-import { APPROVAL_TOOLS, PendingToolApproval } from './tool-approval'
+import { PendingToolApproval } from './tool-approval'
 import {
-  groupCopyText as buildGroupCopyText,
   buildToolView,
   cleanVisibleText,
-  groupFailedStepCount,
-  groupPreviewTargets,
-  groupStatus,
-  groupTitle,
-  groupTotalDurationLabel,
   inlineDiffFromResult,
   isPreviewableTarget,
   looksRedundant,
@@ -47,14 +39,10 @@ import {
   type ToolStatus
 } from './tool-fallback-model'
 
-// Tool names that ChainToolFallback intercepts and renders as something
-// other than a ToolEntry — they don't count toward "is this a group of
-// tool calls?" because they have no visible tool block.
-const SPECIAL_TOOL_NAMES = new Set(['todo', 'image_generate', 'clarify'])
-
-// `true` when the current ToolEntry is being rendered inside a group
-// wrapper. Lets ToolEntry suppress per-row chrome (timer / preview) that
-// the group already shows.
+// `true` when a ToolEntry is rendered inside an embedding wrapper that owns
+// the per-row chrome (timer / preview). The flat ToolGroupSlot sets this
+// false, so every row currently owns its own chrome; kept as a seam for any
+// future embedding surface.
 const ToolEmbedContext = createContext(false)
 
 // Shared header chrome for tool rows. Both the single-tool DisclosureRow
@@ -263,6 +251,7 @@ function ToolEntry({ part }: ToolEntryProps) {
   const hasExpandableContent = Boolean(
     (view.previewTarget && isPreviewableTarget(view.previewTarget)) ||
     view.imageUrl ||
+    view.inlineDiff ||
     showDetail ||
     hasSearchHits ||
     toolViewMode === 'technical'
@@ -403,153 +392,42 @@ function ToolEntry({ part }: ToolEntryProps) {
           )}
         </div>
       )}
-      {view.inlineDiff && <DiffLines text={view.inlineDiff} />}
+      {open && view.inlineDiff && <DiffLines text={view.inlineDiff} />}
     </div>
   )
 }
 
 /**
- * Always-present wrapper around the consecutive tool-call range that
- * `MessagePrimitive.Parts` already grouped for us. Renders a header +
- * collapsible body when there are 2+ visible tools; otherwise it's a
- * transparent passthrough that just owns the entry animation for the
- * single ToolEntry inside.
+ * Flat, Cursor-style tool list. assistant-ui hands us a *range* of
+ * consecutive tool-call parts, but how that range is sliced is unstable: a
+ * live stream interleaves narration/reasoning between calls (many tiny
+ * ranges), while the settled message reconstructs every tool_call back-to-back
+ * (one big range). Rendering a "Tool actions · N steps" group off that range
+ * therefore reshuffled the whole turn the instant it settled.
  *
- * Crucially, the wrapper element is the SAME `<div>` regardless of
- * group size — only the optional header element appears/disappears.
- * That preserves React identity for the inner `MessagePartByIndex`
- * children when the 1→2 transition happens, so existing tool blocks
- * never remount when a new tool joins them mid-stream.
- *
- * The previous design (per-tool ToolFallback computing its own group
- * lookup and conditionally returning either `<ToolEntry>` or
- * `<ToolGroup>`) flipped the React element type at the 1→2 transition
- * and tore down the existing tool entirely, which is what showed up as
- * "the previous tool's animation resets every time a new tool arrives."
+ * So we never group: each tool is a standalone row, and the wrapper just lays
+ * its children out on the tight `--tool-row-gap` rhythm. One range or ten,
+ * fragmented or consecutive, the result is pixel-identical — a tight, stable
+ * stack. The wrapper stays a single `<div>` of stable identity so children
+ * never remount as the range grows mid-stream. `ToolEmbedContext` is false so
+ * every row owns its own chrome (timer / preview / copy / inline approval).
  */
 export const ToolGroupSlot: FC<PropsWithChildren<{ endIndex: number; startIndex: number }>> = ({
   children,
-  endIndex,
   startIndex
 }) => {
   const messageId = useAuiState(s => s.message.id)
   const messageRunning = useAuiState(selectMessageRunning)
-
-  // Pull the visible tool parts in this range. `useShallow` makes this
-  // re-render only when the actual part references change (assistant-ui
-  // gives stable refs for unchanged parts), not on every text/reasoning
-  // delta elsewhere in the message.
-  const visibleParts = useAuiState(
-    useShallow((s: { message: { parts: readonly unknown[] } }) =>
-      s.message.parts.slice(startIndex, endIndex + 1).filter((p): p is ToolPart => {
-        if (!p || typeof p !== 'object') {
-          return false
-        }
-
-        const row = p as { toolName?: unknown; type?: unknown }
-
-        return row.type === 'tool-call' && typeof row.toolName === 'string' && !SPECIAL_TOOL_NAMES.has(row.toolName)
-      })
-    )
-  )
-
-  const isGroup = visibleParts.length > 1
-  const isRunning = messageRunning && visibleParts.some(p => p.result === undefined)
-  // Stable across the group's lifetime (start index doesn't shift when
-  // tools append to the end), so user-driven open/close persists across
-  // streaming.
-  const disclosureId = `tool-group:${messageId}:${startIndex}`
-  const userOpen = useDisclosureOpen(disclosureId)
-
-  // A live approval request must NEVER be buried inside a collapsed group —
-  // the user has to be able to act on it without first expanding "Tool
-  // actions · N steps". When an approval is in flight and this group hosts
-  // the pending approval-eligible tool that raised it (terminal /
-  // execute_code with no result yet — see tool-approval.tsx for why the
-  // single pending row IS the one that raised it), force the body open so
-  // the inline ApprovalBar surfaces. The user can still collapse the group
-  // again once the approval resolves.
-  const approvalRequest = useStore($approvalRequest)
-
-  const hostsLiveApproval =
-    approvalRequest !== null &&
-    messageRunning &&
-    visibleParts.some(p => p.result === undefined && APPROVAL_TOOLS.has(p.toolName))
-
-  const open = userOpen || hostsLiveApproval
-  const enterRef = useEnterAnimation(messageRunning, disclosureId)
-
-  const status = groupStatus(visibleParts)
-  const displayStatus = !isRunning && status === 'running' ? 'success' : status
-  const failedStepCount = useMemo(() => groupFailedStepCount(visibleParts), [visibleParts])
-  const totalDurationLabel = useMemo(() => groupTotalDurationLabel(visibleParts), [visibleParts])
-
-  const statusSummary =
-    displayStatus === 'running' || failedStepCount === 0
-      ? ''
-      : displayStatus === 'warning'
-        ? failedStepCount === 1
-          ? 'Recovered after 1 failed step'
-          : `Recovered after ${failedStepCount} failed steps`
-        : failedStepCount === 1
-          ? '1 step failed'
-          : `${failedStepCount} steps failed`
-
-  const groupCopyText = useMemo(() => buildGroupCopyText(visibleParts), [visibleParts])
-  const previewTargets = useMemo(() => groupPreviewTargets(visibleParts), [visibleParts])
+  const enterRef = useEnterAnimation(messageRunning, `tool-group:${messageId}:${startIndex}`)
 
   return (
-    <ToolEmbedContext.Provider value={isGroup}>
-      <div className="min-w-0 max-w-full overflow-hidden" data-slot="tool-block" ref={enterRef}>
-        {isGroup && (
-          <DisclosureRow
-            key="header"
-            onToggle={() => setToolDisclosureOpen(disclosureId, !open)}
-            open={open}
-            trailing={
-              !isRunning && groupCopyText ? (
-                <CopyButton appearance="tool-row" label="Copy activity" stopPropagation text={groupCopyText} />
-              ) : undefined
-            }
-          >
-            <span className="flex min-w-0 items-center gap-1.5">
-              <ToolGlyph status={displayStatus === 'success' ? undefined : displayStatus} />
-              <FadeText
-                className={cn(
-                  TOOL_HEADER_TITLE_CLASS,
-                  displayStatus === 'error' && 'text-destructive',
-                  displayStatus === 'warning' && 'text-amber-700 dark:text-amber-300'
-                )}
-              >
-                {groupTitle(visibleParts)}
-              </FadeText>
-              {totalDurationLabel && <span className={TOOL_HEADER_DURATION_CLASS}>{totalDurationLabel}</span>}
-            </span>
-            {statusSummary && (
-              <FadeText
-                className={cn(
-                  TOOL_HEADER_SUBTITLE_CLASS,
-                  displayStatus === 'warning' ? 'text-amber-700/80 dark:text-amber-300/85' : 'text-destructive/85'
-                )}
-              >
-                {statusSummary}
-              </FadeText>
-            )}
-          </DisclosureRow>
-        )}
-        {isGroup && previewTargets.length > 0 && (
-          <div className="mt-2 grid w-full min-w-0 max-w-full gap-2 overflow-hidden pr-2 pl-3">
-            {previewTargets.map(target => (
-              <PreviewAttachment key={target} source="tool-result" target={target} />
-            ))}
-          </div>
-        )}
-        {/* Body is always rendered so children stay mounted across collapse/
-            expand and across the 1→2 group transition. `hidden` removes it
-            from a11y/visual flow without unmounting React subtree. */}
-        <div className={cn(isGroup && 'mt-0.5 w-full overflow-hidden pr-2 pl-3')} hidden={isGroup && !open} key="body">
-          {children}
-        </div>
+    <ToolEmbedContext.Provider value={false}>
+      <div
+        className="grid min-w-0 max-w-full gap-(--tool-row-gap) overflow-hidden"
+        data-slot="tool-block"
+        ref={enterRef}
+      >
+        {children}
       </div>
     </ToolEmbedContext.Provider>
   )
diff --git a/apps/desktop/src/components/chat/diff-lines.tsx b/apps/desktop/src/components/chat/diff-lines.tsx
index 926b77edf92..a6e025ae2ac 100644
--- a/apps/desktop/src/components/chat/diff-lines.tsx
+++ b/apps/desktop/src/components/chat/diff-lines.tsx
@@ -38,7 +38,7 @@ export function DiffLines({ className, text, ...props }: DiffLinesProps) {
   return (
     <pre
       className={cn(
-        'mt-2 max-h-96 max-w-full min-w-0 overflow-auto rounded-md border border-border/60 bg-muted/35 px-2.5 py-1.5 font-mono text-[0.7rem] leading-relaxed text-muted-foreground',
+        'mt-1 mb-1.5 max-h-96 max-w-full min-w-0 overflow-auto rounded-md border border-border/60 bg-muted/35 px-2.5 py-1.5 font-mono text-[0.7rem] leading-relaxed text-muted-foreground',
         className
       )}
       data-slot="diff-lines"
diff --git a/apps/desktop/src/i18n/en.ts b/apps/desktop/src/i18n/en.ts
index 19709a8e3da..179cc2b269f 100644
--- a/apps/desktop/src/i18n/en.ts
+++ b/apps/desktop/src/i18n/en.ts
@@ -521,8 +521,7 @@ export const en: Translations = {
     editTitle: 'Edit cron job',
     createTitle: 'New cron job',
     editDesc: 'Update the schedule, prompt, or delivery target. Changes apply on next run.',
-    createDesc:
-      'Schedule a prompt to run automatically. Use cron syntax or a natural phrase like "every 15 minutes".',
+    createDesc: 'Schedule a prompt to run automatically. Use cron syntax or a natural phrase like "every 15 minutes".',
     nameLabel: 'Name',
     namePlaceholder: 'Morning briefing',
     promptLabel: 'Prompt',
@@ -592,7 +591,7 @@ export const en: Translations = {
     groupTitleGrouped: 'Ungroup sessions',
     groupTitleUngrouped: 'Group by workspace',
     allPinned: 'Everything here is pinned. Unpin a chat to show it in recents.',
-    shiftClickHint: 'Shift-click a chat to pin · drag to reorder',
+    shiftClickHint: 'Shift-click a chat to pin',
     noWorkspace: 'No workspace',
     newSessionIn: label => `New session in ${label}`,
     reorderWorkspace: label => `Reorder workspace ${label}`,
diff --git a/apps/desktop/src/styles.css b/apps/desktop/src/styles.css
index 5d0d4ca2538..21de81a8c1a 100644
--- a/apps/desktop/src/styles.css
+++ b/apps/desktop/src/styles.css
@@ -278,11 +278,21 @@
     --composer-shell-pad-block-end: 0.625rem;
     --message-text-indent: 0.75rem;
     --conversation-text-font-size: 0.8125rem;
-    --conversation-tool-font-size: var(--conversation-text-font-size);
+    --conversation-tool-font-size: 0.6875rem;
     --conversation-caption-font-size: 0.75rem;
     --conversation-line-height: 1.125rem;
     --conversation-caption-line-height: 1rem;
     --conversation-turn-gap: 0.375rem;
+    /* Gap between top-level turn blocks (prose ↔ tools ↔ thinking) — enough air
+       that scaffolding reads as separate from the reply, not crammed into it. */
+    --turn-block-gap: 0.75rem;
+    /* Tight gap between tool rows inside a single action group, so a back-to-back
+       run still reads as one cohesive sequence. */
+    --tool-row-gap: 0.375rem;
+    /* Paragraph spacing — vertical gap between prose paragraphs, both inside a
+       markdown block and between consecutive prose parts. Single knob; tweak
+       freely. */
+    --paragraph-gap: 0.45rem;
     --sticky-human-top: 0.23rem;
     --file-tree-row-height: 1.375rem;
 
@@ -798,14 +808,27 @@ canvas {
   font-size: inherit;
 }
 
-/* Streamed prose hangs slightly indented from the tool/todo column so the
-   reading column reads as a "reply" within the conversation gutter. Tools,
-   todos, and thinking blocks keep the existing --message-text-indent so they
-   remain flush with the user message text above them. */
-[data-slot='aui_assistant-message-content'] > .aui-md {
-  padding-inline-start: var(--md-text-indent, 0.5rem);
+/* Tailwind Typography sets `.prose :where(p) { margin: 1.25em }` (~16px). That
+   selector ties our `my-*` utility on specificity and wins on source order, so
+   paragraph spacing must be reclaimed here at higher specificity. One tight
+   top-margin (bottom zeroed to avoid doubling), first child reset to flush. */
+[data-slot='aui_assistant-message-content'] .aui-md :where(p) {
+  margin-block: var(--paragraph-gap) 0;
 }
 
+/* First rendered element of a prose block is flush — the block-level gap above
+   (tool / paragraph) already provides the separation. Reach one level deep too:
+   Streamdown wraps blocks in a `div.space-y-*`, so the real first line is the
+   first child's first child. */
+[data-slot='aui_assistant-message-content'] .aui-md > :first-child,
+[data-slot='aui_assistant-message-content'] .aui-md > :first-child > :first-child {
+  margin-top: 0;
+}
+
+/* Prose, tools, todos, and thinking all share one left edge (the message
+   content's --message-text-indent). No extra prose indent — a single gutter
+   reads cleaner than a ragged tool-vs-reply column. */
+
 [data-slot='aui_user-message-root'] {
   top: var(--sticky-human-top);
 }
@@ -816,12 +839,13 @@ canvas {
 }
 
 /* Sticky human bubbles clamp to ~2 lines with a soft bottom fade so a long
-   prompt doesn't dominate the viewport while you read the response stuck
-   beneath it. The clamp lifts on hover / focus (clicking the bubble opens the
-   edit composer, which already shows the full text). --human-msg-full is the
-   measured content height (set in UserMessage) so expand/collapse animates to
-   the real height instead of overshooting the cap. */
+   prompt doesn't dominate the viewport. The clamp lifts on focus only (clicking
+   opens the edit composer, which shows the full text) — not on hover, so the
+   bubble doesn't jump as the pointer passes over it. --human-msg-full is the
+   measured content height (set in UserMessage) so it animates to the real
+   height instead of overshooting the cap. */
 .sticky-human-clamp {
+  cursor: pointer;
   max-height: calc(2 * var(--dt-line-height) * var(--conversation-text-font-size) + 0.15rem);
   overflow: hidden;
   transition: max-height 0.08s cubic-bezier(0.4, 0, 0.2, 1);
@@ -832,7 +856,6 @@ canvas {
   mask-image: linear-gradient(to bottom, #000 55%, transparent);
 }
 
-.composer-human-message:hover .sticky-human-clamp,
 .composer-human-message:focus-within .sticky-human-clamp {
   max-height: min(var(--human-msg-full, 24rem), 24rem);
   overflow-y: auto;
@@ -992,7 +1015,7 @@ canvas {
 [data-slot='aui_assistant-message-content'] .aui-md [data-streamdown='code-block'] {
   contain: none;
   overflow: visible;
-  margin-block: 0.375rem !important;
+  margin-block: var(--paragraph-gap) 0 !important;
   padding: 0 !important;
   gap: 0 !important;
   border: 0 !important;
@@ -1006,6 +1029,11 @@ canvas {
 }
 
 [data-slot='aui_assistant-message-content'] .aui-md [data-slot='code-card'] {
+  /* Streamdown nests blocks, so the container's child-combinator rhythm can't
+     reach the card. Carry the paragraph gap on the card itself (top-owned);
+     collapses cleanly with the wrapper's margin when one is present, and the
+     first-child reset still flushes a leading code block. */
+  margin-block: var(--paragraph-gap) 0;
   position: relative;
   transition:
     border-color 180ms ease-out,
@@ -1075,34 +1103,25 @@ canvas {
   opacity: 1;
 }
 
-/* Conversation block rhythm. Consecutive tool calls stay tight so a step
-   sequence reads as one action group; the gap between any scaffolding
-   block and adjacent prose bumps up so the model's reply visually
-   separates from its scaffolding. */
-[data-slot='tool-block'] + [data-slot='tool-block'] {
-  margin-top: 0.375rem;
-}
-
-[data-slot='tool-block']:has(> :nth-child(2)) + [data-slot='tool-block'] {
-  margin-top: 0.625rem;
-}
-
+/* Conversation block rhythm. assistant-ui renders each range as a direct child
+   of the message content with no per-part wrapper, so adjacency rules cover
+   every pairing — first block needs no reset, nested tool rows are untouched.
+   Two tiers: scaffolding (tool / thinking) gets a roomy block gap so it reads
+   as separate from the reply; consecutive prose collapses to a tight paragraph
+   rhythm so split-out text parts don't look like a big gap. */
+/* Scaffolding adjacent to anything → roomy block gap. */
 [data-slot='aui_assistant-message-content']
-  :is([data-slot='tool-block'], [data-slot='aui_thinking-disclosure'])
-  + .aui-md,
+  > :is([data-slot='tool-block'], [data-slot='aui_thinking-disclosure'])
+  + :is([data-slot='tool-block'], [data-slot='aui_thinking-disclosure'], .aui-md),
 [data-slot='aui_assistant-message-content']
-  .aui-md
+  > .aui-md
   + :is([data-slot='tool-block'], [data-slot='aui_thinking-disclosure']) {
-  margin-top: 1rem;
+  margin-top: var(--turn-block-gap);
 }
 
-[data-slot='aui_assistant-message-content'] [data-slot='aui_thinking-disclosure'] + [data-slot='tool-block'],
-[data-slot='aui_assistant-message-content'] [data-slot='tool-block'] + [data-slot='aui_thinking-disclosure'] {
-  margin-top: 0.75rem;
-}
-
-[data-slot='aui_assistant-message-content'] > [data-slot='tool-block']:first-child {
-  margin-top: 0;
+/* Prose ↔ prose → tight paragraph rhythm, matching in-block paragraph spacing. */
+[data-slot='aui_assistant-message-content'] > .aui-md + .aui-md {
+  margin-top: var(--paragraph-gap);
 }
 
 /* Message action bars — flat icon hits with default dim; only the hovered/focused control is full-strength. */
diff --git a/tui_gateway/ws.py b/tui_gateway/ws.py
index e822f7f874c..1babfc1d3c2 100644
--- a/tui_gateway/ws.py
+++ b/tui_gateway/ws.py
@@ -26,6 +26,7 @@ from __future__ import annotations
 import asyncio
 import json
 import logging
+import socket
 from typing import Any
 
 from tui_gateway import server
@@ -137,6 +138,24 @@ def _ws_peer_label(ws: Any) -> str:
     return f"{host}:{port}" if port is not None else host
 
 
+def _disable_nagle(ws: Any) -> None:
+    """Disable Nagle so streamed JSON-RPC frames go out individually.
+
+    Without it the kernel coalesces the small per-token frames, so a burst after
+    the model's think-pause lands on the client in one tick and no client-side
+    smoothing can recover the cadence. GUI/WS only; chat platforms don't hit
+    this path. Best-effort — skip silently if the socket isn't reachable.
+    """
+    try:
+        scope = getattr(ws, "scope", None) or {}
+        transport = (scope.get("extensions") or {}).get("transport") or getattr(ws, "transport", None)
+        sock = transport.get_extra_info("socket") if transport is not None else None
+        if sock is not None:
+            sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+    except Exception as exc:  # pragma: no cover - best-effort tuning
+        _log.debug("ws TCP_NODELAY skip: %s", exc)
+
+
 async def handle_ws(ws: Any) -> None:
     """Run one WebSocket session. Wire-compatible with ``tui_gateway.entry``."""
     peer = _ws_peer_label(ws)
@@ -150,6 +169,9 @@ async def handle_ws(ws: Any) -> None:
     try:
         await ws.accept()
         disconnect_reason = "connected"
+        # Push small streamed frames out immediately instead of letting Nagle
+        # batch them — keeps the live token cadence intact for GUI clients.
+        _disable_nagle(ws)
         _log.info("ws accepted peer=%s", peer)
 
         transport = WSTransport(ws, asyncio.get_running_loop(), peer=peer)