Tighten conversation rhythm, flatten the tool list, and smooth streaming text

Conversation rhythm: - Single `--paragraph-gap` knob drives paragraph spacing both inside a markdown block and between consecutive prose parts, out-specifying Tailwind Typography's prose margins. Code cards carry the same gap themselves so it holds at any Streamdown nesting depth. - Two-tier vertical rhythm: `--turn-block-gap` separates scaffolding (tools / thinking) from the reply; `--tool-row-gap` keeps a tool run tight. - Drop the prose indent so prose, tools, todos, and thinking share one left edge. `---` renders as quiet spacing, not a heavy rule. Flat tool list: - Tools always render as a standalone-row stack, never a "Tool actions · N steps" group. assistant-ui slices the tool range unstably (interleaved live vs. reconstructed-consecutive when settled), so grouping reshuffled the whole turn the instant it settled. Flat rows are pixel-identical either way. - Inline approvals can no longer be buried in a collapsed group body. - Remove the now-dead grouping helpers from tool-fallback-model. Empty thinking: - Suppress reasoning disclosures with no visible text (encrypted / spinner- coerced reasoning) instead of leaving an empty "Thinking" header. - Tail stall indicator returns "thinking" when a running turn goes quiet. Streaming cadence: - Smooth character-reveal decouples visible cadence from bursty arrival. - Flush queued text deltas before applying tool events so a tool row can't jump ahead of its preceding text. - Disable Nagle on the GUI WebSocket so per-token frames aren't coalesced. Polish: clarify/patch/vision_analyze tool meta, queue-panel + diff-lines spacing, sticky human bubble expands on focus (not hover).
2026-07-24 16:54:43 +00:00 · 2026-06-06 10:45:31 -05:00 · 2026-06-06 10:45:31 -05:00 · 9d31577590
commit 9d31577590
parent 6bbc5eefa0
12 changed files with 287 additions and 355 deletions
--- a/apps/desktop/src/app/chat/composer/queue-panel.tsx
+++ b/apps/desktop/src/app/chat/composer/queue-panel.tsx
@ -30,13 +30,13 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
  }

  return (
-    <div className="rounded-t-2xl border border-b-0 border-border/65 bg-[color-mix(in_srgb,var(--dt-card)_70%,transparent)] pt-0.5 pb-1">
+    <div className="rounded-t-2xl border border-b-0 border-border/65 bg-[color-mix(in_srgb,var(--dt-card)_70%,transparent)] pt-0.5 pb-1 mx-1">
      <button
-        className="flex w-full items-center gap-1.5 px-2 py-0.5 text-left text-[0.72rem] font-medium text-muted-foreground/92 transition-colors hover:text-foreground/90"
+        className="flex w-full items-center gap-1.5 px-2 text-left text-[0.6rem] font-medium text-muted-foreground/92 transition-colors hover:text-foreground/90"
        onClick={() => setCollapsed(open => !open)}
        type="button"
      >
-        <DisclosureCaret className="shrink-0" open={!collapsed} size="0.875rem" />
+        <DisclosureCaret className="shrink-0" open={!collapsed} size="1em" />
        <span className="truncate">{c.queued(entries.length)}</span>
      </button>

@ -64,11 +64,7 @@ export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendN
                  <p className="truncate text-[0.73rem] leading-4 text-foreground/92">{entryPreview(entry, c)}</p>
                  {(attachmentsCount > 0 || isEditing) && (
                    <div className="mt-0.5 flex items-center gap-1.5 text-[0.64rem] text-muted-foreground/75">
-                      {attachmentsCount > 0 && (
-                        <span>
-                          {c.attachments(attachmentsCount)}
-                        </span>
-                      )}
+                      {attachmentsCount > 0 && <span>{c.attachments(attachmentsCount)}</span>}
                      {isEditing && (
                        <span className="text-[color-mix(in_srgb,var(--dt-composer-ring)_78%,var(--muted-foreground))]">
                          {c.editingInComposer}
--- a/apps/desktop/src/app/session/hooks/use-message-stream.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream.ts
@ -410,6 +410,10 @@ export function useMessageStream({
      phase: 'running' | 'complete',
      sourceEventType?: string
    ) => {
+      // Text deltas flush on a timer but tool events apply now; flush first so
+      // a tool part can't jump ahead of the text that preceded it.
+      flushQueuedDeltas(sessionId)
+
      if (!nativeSubagentSessionsRef.current.has(sessionId)) {
        for (const subagentPayload of delegateTaskPayloads(payload, phase, sourceEventType)) {
          upsertSubagent(
@ -428,7 +432,7 @@ export function useMessageStream({
        { pending: m => phase !== 'complete' || (m.pending ?? false) }
      )
    },
-    [mutateStream]
+    [flushQueuedDeltas, mutateStream]
  )

  const completeAssistantMessage = useCallback(
--- a/apps/desktop/src/components/assistant-ui/clarify-tool.tsx
+++ b/apps/desktop/src/components/assistant-ui/clarify-tool.tsx
@ -160,7 +160,7 @@ function ClarifyToolPending({ args }: ToolCallMessagePartProps) {

  return (
    <div
-      className="relative mb-3 mt-2 grid gap-2 rounded-[0.5rem] border border-border/70 bg-card/40 px-3 py-2.5 text-sm shadow-[inset_0_1px_0_color-mix(in_srgb,var(--foreground)_3%,transparent)]"
+      className="relative mb-3 mt-2 grid gap-6 rounded-[0.5rem] border border-border/70 bg-card/40 px-3 py-2.5 text-sm shadow-[inset_0_1px_0_color-mix(in_srgb,var(--foreground)_3%,transparent)]"
      data-slot="clarify-inline"
    >
      <span aria-hidden className="arc-border" />
--- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx
+++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
@ -7,7 +7,7 @@ import {
  type SyntaxHighlighterProps
 } from '@assistant-ui/react-streamdown'
 import { code } from '@streamdown/code'
-import { type ComponentProps, memo, type ReactNode, useDeferredValue, useEffect, useMemo, useState } from 'react'
+import { type ComponentProps, memo, type ReactNode, useDeferredValue, useEffect, useMemo, useRef, useState } from 'react'

 import { PreviewAttachment } from '@/components/chat/preview-attachment'
 import { SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
@ -224,6 +224,88 @@ function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>)
  )
 }

+// Steady character-reveal for streaming text: decouples visible cadence from
+// bursty arrival so text flows instead of popping (cf. assistant-ui's useSmooth,
+// reimplemented for a tunable rate). Proportional drain — each frame reveals a
+// slice of the backlog so the reveal converges within ~REVEAL_DRAIN_MS whatever
+// the size; the per-frame cap stops a huge dump rendering as one slab. The loop
+// is gated on backlog, not isRunning, so a stream that completes mid-reveal
+// keeps draining its tail instead of snapping.
+const REVEAL_DRAIN_MS = 500
+const REVEAL_MAX_CHARS_PER_FRAME = 30
+
+function useSmoothReveal(text: string, isRunning: boolean): string {
+  const [displayed, setDisplayed] = useState(isRunning ? '' : text)
+  const targetRef = useRef(text)
+  const shownRef = useRef(displayed)
+  const frameRef = useRef<number | null>(null)
+  const lastTickRef = useRef(0)
+
+  shownRef.current = displayed
+  targetRef.current = text
+
+  useEffect(() => {
+    if (typeof window === 'undefined') {
+      return
+    }
+
+    // Non-extending change (regenerate / branch / history swap): restart from
+    // empty while streaming, else snap to the replacement.
+    if (!text.startsWith(shownRef.current)) {
+      shownRef.current = isRunning ? '' : text
+      setDisplayed(shownRef.current)
+    }
+
+    if (shownRef.current.length >= text.length || frameRef.current !== null) {
+      return
+    }
+
+    lastTickRef.current = performance.now()
+
+    const tick = () => {
+      const now = performance.now()
+      const dt = now - lastTickRef.current
+      lastTickRef.current = now
+
+      const remaining = targetRef.current.length - shownRef.current.length
+      const add = Math.min(remaining, REVEAL_MAX_CHARS_PER_FRAME, Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS)))
+      shownRef.current = targetRef.current.slice(0, shownRef.current.length + add)
+      setDisplayed(shownRef.current)
+
+      frameRef.current = shownRef.current.length < targetRef.current.length ? requestAnimationFrame(tick) : null
+    }
+
+    frameRef.current = requestAnimationFrame(tick)
+  }, [text, isRunning])
+
+  useEffect(
+    () => () => {
+      if (frameRef.current !== null && typeof window !== 'undefined') {
+        cancelAnimationFrame(frameRef.current)
+      }
+    },
+    []
+  )
+
+  return displayed
+}
+
+// Re-publish the part context with a smooth character-reveal, above
+// DeferStreamingText so the reveal feeds the deferred markdown pipeline. Status
+// stays running while revealing so the caret persists past the underlying part
+// settling.
+function SmoothStreamingText({ children }: { children: ReactNode }) {
+  const { text, status } = useMessagePartText()
+  const isRunning = status.type === 'running'
+  const revealed = useSmoothReveal(text, isRunning)
+
+  return (
+    <TextMessagePartProvider isRunning={isRunning || revealed !== text} text={revealed}>
+      {children}
+    </TextMessagePartProvider>
+  )
+}
+
 /**
 * Re-publish the active message-part context with React's `useDeferredValue`
 * applied to the streaming text and status. The outer wrapper still re-renders
@ -280,7 +362,7 @@ const MARKDOWN_CONTAINER_CLASS_NAME = cn(
  'prose-a:break-words prose-p:[overflow-wrap:anywhere]',
  'prose-li:marker:text-muted-foreground/70',
  'prose-code:rounded-[0.25rem] prose-code:px-[0.1875rem] prose-code:py-px prose-code:font-mono prose-code:text-[0.9em] prose-code:font-normal prose-code:before:content-none prose-code:after:content-none',
-  '[&>*:first-child]:mt-0 [&>*:last-child]:mb-0 [&>*+*]:mt-1'
+  '[&>*:first-child]:mt-0 [&>*:last-child]:mb-0 [&>*+*]:mt-(--paragraph-gap)'
 )

 function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTextSurfaceProps) {
@ -308,12 +390,14 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
          <h4 className={cn('my-1 font-semibold', HEADING_SIZES.h4, className)} {...props} />
        ),
        p: ({ className, ...props }: ComponentProps<'p'>) => (
-          <p className={cn('my-1 wrap-anywhere leading-(--dt-line-height)', className)} {...props} />
+          // Vertical rhythm is owned by styles.css (`--paragraph-gap`), which
+          // must out-specify Tailwind Typography's `prose` margins — so no
+          // `my-*` here on purpose.
+          <p className={cn('wrap-anywhere leading-(--dt-line-height)', className)} {...props} />
        ),
        a: MarkdownLink,
-        hr: ({ className, ...props }: ComponentProps<'hr'>) => (
-          <hr className={cn('border-border', className)} {...props} />
-        ),
+        // `---` as quiet spacing, not a heavy full-width rule.
+        hr: (_props: ComponentProps<'hr'>) => <div aria-hidden className="my-3" />,
        blockquote: ({ className, ...props }: ComponentProps<'blockquote'>) => (
          <blockquote
            className={cn('border-l-2 border-border pl-3 text-muted-foreground italic', className)}
@ -391,18 +475,22 @@ interface MarkdownTextContentProps extends MarkdownTextSurfaceProps {
 export function MarkdownTextContent({ isRunning, text, ...surfaceProps }: MarkdownTextContentProps) {
  return (
    <TextMessagePartProvider isRunning={isRunning} text={text}>
-      <DeferStreamingText>
-        <MarkdownTextSurface {...surfaceProps} />
-      </DeferStreamingText>
+      <SmoothStreamingText>
+        <DeferStreamingText>
+          <MarkdownTextSurface {...surfaceProps} />
+        </DeferStreamingText>
+      </SmoothStreamingText>
    </TextMessagePartProvider>
  )
 }

 const MarkdownTextImpl = () => {
  return (
-    <DeferStreamingText>
-      <MarkdownTextSurface />
-    </DeferStreamingText>
+    <SmoothStreamingText>
+      <DeferStreamingText>
+        <MarkdownTextSurface />
+      </DeferStreamingText>
+    </SmoothStreamingText>
  )
 }

--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@ -236,6 +236,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
      >
        {hoistedTodos.length > 0 && <HoistedTodoPanel todos={hoistedTodos} />}
        <MessagePrimitive.Parts components={MESSAGE_PARTS_COMPONENTS} />
+        {messageStatus === 'running' && <StreamStallIndicator activity={`${content.length}:${messageText.length}`} />}
        {previewTargets.length > 0 && (
          <div className="mt-3 flex flex-wrap gap-2">
            {previewTargets.map(target => (
@ -287,6 +288,39 @@ const ResponseLoadingIndicator: FC = () => {
  )
 }

+// Seconds of no visible output (text or part count) before a still-running turn
+// is treated as stalled and the thinking indicator returns at the tail.
+const STREAM_STALL_S = 2
+
+// Tail "still thinking" indicator: the pre-first-token spinner goes away once
+// text flows, but if the stream then goes quiet mid-turn (tool think-time,
+// provider stall) nothing signals that work continues. Watch a per-render
+// activity signal; when it hasn't changed for STREAM_STALL_S, re-show the
+// dither + a timer counting from the last activity.
+const StreamStallIndicator: FC<{ activity: string }> = ({ activity }) => {
+  const [stalled, setStalled] = useState(false)
+
+  useEffect(() => {
+    setStalled(false)
+    const id = window.setTimeout(() => setStalled(true), STREAM_STALL_S * 1000)
+
+    return () => window.clearTimeout(id)
+  }, [activity])
+
+  const elapsed = useElapsedSeconds(stalled)
+
+  if (!stalled) {
+    return null
+  }
+
+  return (
+    <StatusRow className="mt-1.5" data-slot="aui_stream-stall" label="Hermes is thinking">
+      <span aria-hidden="true" className="dither inline-block size-3 rounded-[2px] text-midground/80 animate-pulse" />
+      <ActivityTimerText seconds={elapsed} />
+    </StatusRow>
+  )
+}
+
 const ImageGenerateTool: FC<ToolCallMessagePartProps> = ({ result }) => {
  const generatedImage = useGeneratedImageContext()
  const running = result === undefined
@ -434,6 +468,22 @@ const ReasoningAccordionGroup: FC<{ children?: ReactNode; endIndex: number; star
        .some(p => p?.type === 'reasoning' && p.status?.type !== 'complete')
  )

+  // A reasoning group with no actual text is pure noise — drop the whole
+  // "Thinking" disclosure rather than leave an empty header eating a row. This
+  // applies live too: encrypted/spinner-coerced reasoning (Opus reasoning max)
+  // never carries visible text, and the bottom-of-thread loader already signals
+  // "thinking", so an empty header is never wanted. Real reasoning surfaces the
+  // instant its first token lands.
+  const hasContent = useAuiState(s =>
+    s.message.parts
+      .slice(Math.max(0, startIndex), endIndex + 1)
+      .some(p => p?.type === 'reasoning' && typeof p.text === 'string' && p.text.trim().length > 0)
+  )
+
+  if (!hasContent) {
+    return null
+  }
+
  return (
    <ThinkingDisclosure messageRunning={messageRunning} pending={pending} timerKey={`reasoning:${messageId}`}>
      {children}
@ -449,7 +499,7 @@ const ReasoningTextPart: FC<{ text: string; status?: { type: string } }> = ({ te
  return (
    <MarkdownTextContent
      containerClassName={cn(
-        'text-xs leading-relaxed text-muted-foreground/85',
+        'text-xs leading-snug text-muted-foreground/85',
        isRunning && 'shimmer text-muted-foreground/55'
      )}
      containerProps={{ 'data-slot': 'aui_reasoning-text' } as ComponentProps<'div'>}
--- a/apps/desktop/src/components/assistant-ui/tool-approval-group.test.tsx
+++ b/apps/desktop/src/components/assistant-ui/tool-approval-group.test.tsx
@ -1,5 +1,5 @@
 import { AssistantRuntimeProvider, type ThreadMessage, useExternalStoreRuntime } from '@assistant-ui/react'
-import { cleanup, render, screen, waitFor } from '@testing-library/react'
+import { cleanup, render, waitFor } from '@testing-library/react'
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'

 import { clearAllPrompts, setApprovalRequest } from '@/store/prompts'
@ -8,12 +8,11 @@ import { $toolDisclosureStates } from '@/store/tool-view'

 import { Thread } from './thread'

-// Regression coverage for the "approval buried behind a collapsed tool group"
-// bug. When 2+ tools group into a collapsed "Tool actions · N steps" row, the
-// pending tool's inline ApprovalBar lives inside the group body — which is
-// `hidden` until expanded. A live approval must surface WITHOUT the user
-// expanding anything, so ToolGroupSlot force-opens its body while an approval
-// targeting one of its pending tools is in flight.
+// Regression coverage for the "approval must never be buried" bug. Tools now
+// render as a flat list (no collapsible "N steps" group), so a pending tool's
+// inline ApprovalBar is always in the visual flow — never inside a `hidden`
+// body. These assert the bar shows only when an approval is live and is never
+// trapped under a `hidden` ancestor.

 const createdAt = new Date('2026-06-03T00:00:00.000Z')

@ -71,8 +70,7 @@ stubOffsetDimension('offsetWidth', 'clientWidth', 800)
 stubOffsetDimension('offsetHeight', 'clientHeight', 600)

 // A running assistant message with two tools: a completed read_file plus a
-// pending terminal (no result). Two visible tools → ToolGroupSlot groups them
-// behind a collapsed "Tool actions · 2 steps" header.
+// pending terminal (no result), rendered as a flat two-row list.
 function groupedPendingMessage(): ThreadMessage {
  return {
    id: 'assistant-group-1',
@ -132,32 +130,28 @@ afterEach(() => {
  $activeSessionId.set(null)
 })

-describe('ToolGroupSlot approval surfacing', () => {
-  it('hides the grouped pending tool body when there is no approval', async () => {
+describe('flat tool list approval surfacing', () => {
+  it('renders no inline approval bar when there is no live approval', async () => {
    const { container } = render(<GroupHarness message={groupedPendingMessage()} />)

-    // Group header renders collapsed; the inline approval strip lives in the
-    // hidden body, so with no live approval it must not render at all (the
-    // ApprovalBar returns null when $approvalRequest is empty).
+    // The pending terminal row mounts immediately, but its inline ApprovalBar
+    // returns null while $approvalRequest is empty.
    await waitFor(() => {
-      expect(screen.getByText(/Tool actions/)).toBeTruthy()
+      expect(container.querySelectorAll('[data-slot="tool-block"]').length).toBeGreaterThan(0)
    })
    expect(container.querySelector('[data-slot="tool-approval-inline"]')).toBeNull()
  })

-  it('force-opens the group body so the approval surfaces without expanding', async () => {
+  it('surfaces the approval inline and never under a hidden ancestor', async () => {
    setApprovalRequest({ command: 'rm -rf /tmp/x', description: 'dangerous command', sessionId: 'sess-1' })

    const { container } = render(<GroupHarness message={groupedPendingMessage()} />)

-    // Even though the group defaults collapsed, the live approval forces the
-    // body open so the inline controls are visible (and reachable, not in a
-    // hidden subtree) immediately.
    await waitFor(() => {
      const bar = container.querySelector('[data-slot="tool-approval-inline"]')
      expect(bar).not.toBeNull()
-      // The forced-open group body must not be hidden — assert no ancestor
-      // carries the `hidden` attribute that would keep the bar off-screen.
+      // Flat rows live directly in the flow — nothing should ever wrap the bar
+      // in a `hidden` subtree.
      expect(bar?.closest('[hidden]')).toBeNull()
    })
  })
--- a/apps/desktop/src/components/assistant-ui/tool-fallback-model.ts
+++ b/apps/desktop/src/components/assistant-ui/tool-fallback-model.ts
@ -88,10 +88,12 @@ const TOOL_META: Record<string, ToolMeta> = {
    tone: 'browser'
  },
  browser_type: { done: 'Typed on page', pending: 'Typing on page', icon: 'globe', tone: 'browser' },
+  clarify: { done: 'Asked a question', pending: 'Asking a question', icon: 'question', tone: 'agent' },
  edit_file: { done: 'Edited file', pending: 'Editing file', icon: 'edit', tone: 'file' },
  execute_code: { done: 'Ran code', pending: 'Running code', icon: 'terminal', tone: 'terminal' },
  image_generate: { done: 'Generated image', pending: 'Generating image', icon: 'file-media', tone: 'image' },
  list_files: { done: 'Listed files', pending: 'Listing files', icon: 'files', tone: 'file' },
+  patch: { done: 'Patched file', pending: 'Patching file', icon: 'diff', tone: 'file' },
  read_file: { done: 'Read file', pending: 'Reading file', icon: 'file', tone: 'file' },
  search_files: { done: 'Searched files', pending: 'Searching files', icon: 'search', tone: 'file' },
  session_search_recall: {
@ -102,6 +104,7 @@ const TOOL_META: Record<string, ToolMeta> = {
  },
  terminal: { done: 'Ran command', pending: 'Running command', icon: 'terminal', tone: 'terminal' },
  todo: { done: 'Updated todos', pending: 'Updating todos', icon: 'tools', tone: 'agent' },
+  vision_analyze: { done: 'Analyzed image', pending: 'Analyzing image', icon: 'eye', tone: 'image' },
  web_extract: { done: 'Read webpage', pending: 'Reading webpage', icon: 'globe', tone: 'web' },
  web_search: { done: 'Searched web', pending: 'Searching web', icon: 'search', tone: 'web' },
  write_file: { done: 'Edited file', pending: 'Editing file', icon: 'edit', tone: 'file' }
@ -1268,124 +1271,3 @@ export function buildToolView(part: ToolPart, inlineDiff: string): ToolView {
    tone: meta.tone
  }
 }
-
-function isToolPart(part: unknown): part is ToolPart {
-  if (!part || typeof part !== 'object') {
-    return false
-  }
-
-  const row = part as Record<string, unknown>
-
-  return row.type === 'tool-call' && typeof row.toolName === 'string'
-}
-
-export function groupToolParts(content: unknown): ToolPart[][] {
-  if (!Array.isArray(content)) {
-    return []
-  }
-
-  const groups: ToolPart[][] = []
-  let current: ToolPart[] = []
-
-  for (const part of content) {
-    // todo parts render in their own hoisted panel; skip from grouped tools.
-    if (isToolPart(part) && part.toolName !== 'todo') {
-      current.push(part)
-
-      continue
-    }
-
-    if (current.length) {
-      groups.push(current)
-      current = []
-    }
-  }
-
-  if (current.length) {
-    groups.push(current)
-  }
-
-  return groups
-}
-
-export function groupStatus(parts: ToolPart[]): ToolStatus {
-  if (parts.some(p => p.result === undefined)) {
-    return 'running'
-  }
-
-  const statuses = parts.map(part => toolStatus(part, parseMaybeObject(part.result)))
-  const hasError = statuses.includes('error')
-
-  if (!hasError) {
-    return 'success'
-  }
-
-  return statuses.at(-1) === 'success' ? 'warning' : 'error'
-}
-
-export function groupTitle(parts: ToolPart[]): string {
-  const prefix = PREFIX_META.find(p => parts.every(part => part.toolName.startsWith(p.prefix)))
-  const verb = prefix?.verb || 'Tool'
-
-  return `${verb} actions · ${parts.length} steps`
-}
-
-export function groupPreviewTargets(parts: ToolPart[]): string[] {
-  const seen = new Set<string>()
-  const targets: string[] = []
-
-  for (const part of parts) {
-    const view = buildToolView(part, inlineDiffFromResult(part.result))
-    const target = view.previewTarget
-
-    if (target && isPreviewableTarget(target) && !seen.has(target)) {
-      seen.add(target)
-      targets.push(target)
-    }
-  }
-
-  return targets
-}
-
-export function groupFailedStepCount(parts: ToolPart[]): number {
-  return parts.filter(part => toolStatus(part, parseMaybeObject(part.result)) === 'error').length
-}
-
-export function groupTotalDurationLabel(parts: ToolPart[]): string {
-  const seconds = parts.reduce((sum, part) => {
-    const value = numberValue(parseMaybeObject(part.result).duration_s)
-
-    return sum + (value && value > 0 ? value : 0)
-  }, 0)
-
-  if (!seconds) {
-    return ''
-  }
-
-  return formatDurationSeconds(seconds)
-}
-
-export function groupTailSubtitle(parts: ToolPart[]): string {
-  const tail = parts.at(-1)
-
-  return tail ? buildToolView(tail, '').subtitle : ''
-}
-
-export function groupCopyText(parts: ToolPart[]): string {
-  return parts
-    .map(part => {
-      const view = buildToolView(part, '')
-      const lines = [view.title]
-
-      if (view.subtitle && view.subtitle !== view.title) {
-        lines.push(view.subtitle)
-      }
-
-      if (view.detail && view.detail !== view.subtitle) {
-        lines.push(view.detail)
-      }
-
-      return lines.join('\n')
-    })
-    .join('\n\n')
-}
--- a/apps/desktop/src/components/assistant-ui/tool-fallback.tsx
+++ b/apps/desktop/src/components/assistant-ui/tool-fallback.tsx
@ -3,7 +3,6 @@
 import { type ToolCallMessagePartProps, useAuiState } from '@assistant-ui/react'
 import { useStore } from '@nanostores/react'
 import { createContext, type FC, type PropsWithChildren, type ReactNode, useContext, useMemo } from 'react'
-import { useShallow } from 'zustand/shallow'

 import { AnsiText } from '@/components/assistant-ui/ansi-text'
 import { useElapsedSeconds } from '@/components/chat/activity-timer'
@ -21,20 +20,13 @@ import { PrettyLink, LinkifiedText as SharedLinkifiedText, urlSlugTitleLabel } f
 import { AlertCircle, CheckCircle2 } from '@/lib/icons'
 import { useEnterAnimation } from '@/lib/use-enter-animation'
 import { cn } from '@/lib/utils'
-import { $approvalRequest } from '@/store/prompts'
 import { $toolInlineDiffs } from '@/store/tool-diffs'
 import { $toolDisclosureOpen, $toolViewMode, setToolDisclosureOpen } from '@/store/tool-view'

-import { APPROVAL_TOOLS, PendingToolApproval } from './tool-approval'
+import { PendingToolApproval } from './tool-approval'
 import {
-  groupCopyText as buildGroupCopyText,
  buildToolView,
  cleanVisibleText,
-  groupFailedStepCount,
-  groupPreviewTargets,
-  groupStatus,
-  groupTitle,
-  groupTotalDurationLabel,
  inlineDiffFromResult,
  isPreviewableTarget,
  looksRedundant,
@ -47,14 +39,10 @@ import {
  type ToolStatus
 } from './tool-fallback-model'

-// Tool names that ChainToolFallback intercepts and renders as something
-// other than a ToolEntry — they don't count toward "is this a group of
-// tool calls?" because they have no visible tool block.
-const SPECIAL_TOOL_NAMES = new Set(['todo', 'image_generate', 'clarify'])
-
-// `true` when the current ToolEntry is being rendered inside a group
-// wrapper. Lets ToolEntry suppress per-row chrome (timer / preview) that
-// the group already shows.
+// `true` when a ToolEntry is rendered inside an embedding wrapper that owns
+// the per-row chrome (timer / preview). The flat ToolGroupSlot sets this
+// false, so every row currently owns its own chrome; kept as a seam for any
+// future embedding surface.
 const ToolEmbedContext = createContext(false)

 // Shared header chrome for tool rows. Both the single-tool DisclosureRow
@ -263,6 +251,7 @@ function ToolEntry({ part }: ToolEntryProps) {
  const hasExpandableContent = Boolean(
    (view.previewTarget && isPreviewableTarget(view.previewTarget)) ||
    view.imageUrl ||
+    view.inlineDiff ||
    showDetail ||
    hasSearchHits ||
    toolViewMode === 'technical'
@ -403,153 +392,42 @@ function ToolEntry({ part }: ToolEntryProps) {
          )}
        </div>
      )}
-      {view.inlineDiff && <DiffLines text={view.inlineDiff} />}
+      {open && view.inlineDiff && <DiffLines text={view.inlineDiff} />}
    </div>
  )
 }

 /**
- * Always-present wrapper around the consecutive tool-call range that
- * `MessagePrimitive.Parts` already grouped for us. Renders a header +
- * collapsible body when there are 2+ visible tools; otherwise it's a
- * transparent passthrough that just owns the entry animation for the
- * single ToolEntry inside.
+ * Flat, Cursor-style tool list. assistant-ui hands us a *range* of
+ * consecutive tool-call parts, but how that range is sliced is unstable: a
+ * live stream interleaves narration/reasoning between calls (many tiny
+ * ranges), while the settled message reconstructs every tool_call back-to-back
+ * (one big range). Rendering a "Tool actions · N steps" group off that range
+ * therefore reshuffled the whole turn the instant it settled.
 *
- * Crucially, the wrapper element is the SAME `<div>` regardless of
- * group size — only the optional header element appears/disappears.
- * That preserves React identity for the inner `MessagePartByIndex`
- * children when the 1→2 transition happens, so existing tool blocks
- * never remount when a new tool joins them mid-stream.
- *
- * The previous design (per-tool ToolFallback computing its own group
- * lookup and conditionally returning either `<ToolEntry>` or
- * `<ToolGroup>`) flipped the React element type at the 1→2 transition
- * and tore down the existing tool entirely, which is what showed up as
- * "the previous tool's animation resets every time a new tool arrives."
+ * So we never group: each tool is a standalone row, and the wrapper just lays
+ * its children out on the tight `--tool-row-gap` rhythm. One range or ten,
+ * fragmented or consecutive, the result is pixel-identical — a tight, stable
+ * stack. The wrapper stays a single `<div>` of stable identity so children
+ * never remount as the range grows mid-stream. `ToolEmbedContext` is false so
+ * every row owns its own chrome (timer / preview / copy / inline approval).
 */
 export const ToolGroupSlot: FC<PropsWithChildren<{ endIndex: number; startIndex: number }>> = ({
  children,
-  endIndex,
  startIndex
 }) => {
  const messageId = useAuiState(s => s.message.id)
  const messageRunning = useAuiState(selectMessageRunning)
-
-  // Pull the visible tool parts in this range. `useShallow` makes this
-  // re-render only when the actual part references change (assistant-ui
-  // gives stable refs for unchanged parts), not on every text/reasoning
-  // delta elsewhere in the message.
-  const visibleParts = useAuiState(
-    useShallow((s: { message: { parts: readonly unknown[] } }) =>
-      s.message.parts.slice(startIndex, endIndex + 1).filter((p): p is ToolPart => {
-        if (!p || typeof p !== 'object') {
-          return false
-        }
-
-        const row = p as { toolName?: unknown; type?: unknown }
-
-        return row.type === 'tool-call' && typeof row.toolName === 'string' && !SPECIAL_TOOL_NAMES.has(row.toolName)
-      })
-    )
-  )
-
-  const isGroup = visibleParts.length > 1
-  const isRunning = messageRunning && visibleParts.some(p => p.result === undefined)
-  // Stable across the group's lifetime (start index doesn't shift when
-  // tools append to the end), so user-driven open/close persists across
-  // streaming.
-  const disclosureId = `tool-group:${messageId}:${startIndex}`
-  const userOpen = useDisclosureOpen(disclosureId)
-
-  // A live approval request must NEVER be buried inside a collapsed group —
-  // the user has to be able to act on it without first expanding "Tool
-  // actions · N steps". When an approval is in flight and this group hosts
-  // the pending approval-eligible tool that raised it (terminal /
-  // execute_code with no result yet — see tool-approval.tsx for why the
-  // single pending row IS the one that raised it), force the body open so
-  // the inline ApprovalBar surfaces. The user can still collapse the group
-  // again once the approval resolves.
-  const approvalRequest = useStore($approvalRequest)
-
-  const hostsLiveApproval =
-    approvalRequest !== null &&
-    messageRunning &&
-    visibleParts.some(p => p.result === undefined && APPROVAL_TOOLS.has(p.toolName))
-
-  const open = userOpen || hostsLiveApproval
-  const enterRef = useEnterAnimation(messageRunning, disclosureId)
-
-  const status = groupStatus(visibleParts)
-  const displayStatus = !isRunning && status === 'running' ? 'success' : status
-  const failedStepCount = useMemo(() => groupFailedStepCount(visibleParts), [visibleParts])
-  const totalDurationLabel = useMemo(() => groupTotalDurationLabel(visibleParts), [visibleParts])
-
-  const statusSummary =
-    displayStatus === 'running' || failedStepCount === 0
-      ? ''
-      : displayStatus === 'warning'
-        ? failedStepCount === 1
-          ? 'Recovered after 1 failed step'
-          : `Recovered after ${failedStepCount} failed steps`
-        : failedStepCount === 1
-          ? '1 step failed'
-          : `${failedStepCount} steps failed`
-
-  const groupCopyText = useMemo(() => buildGroupCopyText(visibleParts), [visibleParts])
-  const previewTargets = useMemo(() => groupPreviewTargets(visibleParts), [visibleParts])
+  const enterRef = useEnterAnimation(messageRunning, `tool-group:${messageId}:${startIndex}`)

  return (
-    <ToolEmbedContext.Provider value={isGroup}>
-      <div className="min-w-0 max-w-full overflow-hidden" data-slot="tool-block" ref={enterRef}>
-        {isGroup && (
-          <DisclosureRow
-            key="header"
-            onToggle={() => setToolDisclosureOpen(disclosureId, !open)}
-            open={open}
-            trailing={
-              !isRunning && groupCopyText ? (
-                <CopyButton appearance="tool-row" label="Copy activity" stopPropagation text={groupCopyText} />
-              ) : undefined
-            }
-          >
-            <span className="flex min-w-0 items-center gap-1.5">
-              <ToolGlyph status={displayStatus === 'success' ? undefined : displayStatus} />
-              <FadeText
-                className={cn(
-                  TOOL_HEADER_TITLE_CLASS,
-                  displayStatus === 'error' && 'text-destructive',
-                  displayStatus === 'warning' && 'text-amber-700 dark:text-amber-300'
-                )}
-              >
-                {groupTitle(visibleParts)}
-              </FadeText>
-              {totalDurationLabel && <span className={TOOL_HEADER_DURATION_CLASS}>{totalDurationLabel}</span>}
-            </span>
-            {statusSummary && (
-              <FadeText
-                className={cn(
-                  TOOL_HEADER_SUBTITLE_CLASS,
-                  displayStatus === 'warning' ? 'text-amber-700/80 dark:text-amber-300/85' : 'text-destructive/85'
-                )}
-              >
-                {statusSummary}
-              </FadeText>
-            )}
-          </DisclosureRow>
-        )}
-        {isGroup && previewTargets.length > 0 && (
-          <div className="mt-2 grid w-full min-w-0 max-w-full gap-2 overflow-hidden pr-2 pl-3">
-            {previewTargets.map(target => (
-              <PreviewAttachment key={target} source="tool-result" target={target} />
-            ))}
-          </div>
-        )}
-        {/* Body is always rendered so children stay mounted across collapse/
-            expand and across the 1→2 group transition. `hidden` removes it
-            from a11y/visual flow without unmounting React subtree. */}
-        <div className={cn(isGroup && 'mt-0.5 w-full overflow-hidden pr-2 pl-3')} hidden={isGroup && !open} key="body">
-          {children}
-        </div>
+    <ToolEmbedContext.Provider value={false}>
+      <div
+        className="grid min-w-0 max-w-full gap-(--tool-row-gap) overflow-hidden"
+        data-slot="tool-block"
+        ref={enterRef}
+      >
+        {children}
      </div>
    </ToolEmbedContext.Provider>
  )
--- a/apps/desktop/src/components/chat/diff-lines.tsx
+++ b/apps/desktop/src/components/chat/diff-lines.tsx
@ -38,7 +38,7 @@ export function DiffLines({ className, text, ...props }: DiffLinesProps) {
  return (
    <pre
      className={cn(
-        'mt-2 max-h-96 max-w-full min-w-0 overflow-auto rounded-md border border-border/60 bg-muted/35 px-2.5 py-1.5 font-mono text-[0.7rem] leading-relaxed text-muted-foreground',
+        'mt-1 mb-1.5 max-h-96 max-w-full min-w-0 overflow-auto rounded-md border border-border/60 bg-muted/35 px-2.5 py-1.5 font-mono text-[0.7rem] leading-relaxed text-muted-foreground',
        className
      )}
      data-slot="diff-lines"
--- a/apps/desktop/src/i18n/en.ts
+++ b/apps/desktop/src/i18n/en.ts
@ -521,8 +521,7 @@ export const en: Translations = {
    editTitle: 'Edit cron job',
    createTitle: 'New cron job',
    editDesc: 'Update the schedule, prompt, or delivery target. Changes apply on next run.',
-    createDesc:
-      'Schedule a prompt to run automatically. Use cron syntax or a natural phrase like "every 15 minutes".',
+    createDesc: 'Schedule a prompt to run automatically. Use cron syntax or a natural phrase like "every 15 minutes".',
    nameLabel: 'Name',
    namePlaceholder: 'Morning briefing',
    promptLabel: 'Prompt',
@ -592,7 +591,7 @@ export const en: Translations = {
    groupTitleGrouped: 'Ungroup sessions',
    groupTitleUngrouped: 'Group by workspace',
    allPinned: 'Everything here is pinned. Unpin a chat to show it in recents.',
-    shiftClickHint: 'Shift-click a chat to pin · drag to reorder',
+    shiftClickHint: 'Shift-click a chat to pin',
    noWorkspace: 'No workspace',
    newSessionIn: label => `New session in ${label}`,
    reorderWorkspace: label => `Reorder workspace ${label}`,
--- a/apps/desktop/src/styles.css
+++ b/apps/desktop/src/styles.css
@ -278,11 +278,21 @@
    --composer-shell-pad-block-end: 0.625rem;
    --message-text-indent: 0.75rem;
    --conversation-text-font-size: 0.8125rem;
-    --conversation-tool-font-size: var(--conversation-text-font-size);
+    --conversation-tool-font-size: 0.6875rem;
    --conversation-caption-font-size: 0.75rem;
    --conversation-line-height: 1.125rem;
    --conversation-caption-line-height: 1rem;
    --conversation-turn-gap: 0.375rem;
+    /* Gap between top-level turn blocks (prose ↔ tools ↔ thinking) — enough air
+       that scaffolding reads as separate from the reply, not crammed into it. */
+    --turn-block-gap: 0.75rem;
+    /* Tight gap between tool rows inside a single action group, so a back-to-back
+       run still reads as one cohesive sequence. */
+    --tool-row-gap: 0.375rem;
+    /* Paragraph spacing — vertical gap between prose paragraphs, both inside a
+       markdown block and between consecutive prose parts. Single knob; tweak
+       freely. */
+    --paragraph-gap: 0.45rem;
    --sticky-human-top: 0.23rem;
    --file-tree-row-height: 1.375rem;

@ -798,14 +808,27 @@ canvas {
  font-size: inherit;
 }

-/* Streamed prose hangs slightly indented from the tool/todo column so the
-   reading column reads as a "reply" within the conversation gutter. Tools,
-   todos, and thinking blocks keep the existing --message-text-indent so they
-   remain flush with the user message text above them. */
-[data-slot='aui_assistant-message-content'] > .aui-md {
-  padding-inline-start: var(--md-text-indent, 0.5rem);
+/* Tailwind Typography sets `.prose :where(p) { margin: 1.25em }` (~16px). That
+   selector ties our `my-*` utility on specificity and wins on source order, so
+   paragraph spacing must be reclaimed here at higher specificity. One tight
+   top-margin (bottom zeroed to avoid doubling), first child reset to flush. */
+[data-slot='aui_assistant-message-content'] .aui-md :where(p) {
+  margin-block: var(--paragraph-gap) 0;
 }

+/* First rendered element of a prose block is flush — the block-level gap above
+   (tool / paragraph) already provides the separation. Reach one level deep too:
+   Streamdown wraps blocks in a `div.space-y-*`, so the real first line is the
+   first child's first child. */
+[data-slot='aui_assistant-message-content'] .aui-md > :first-child,
+[data-slot='aui_assistant-message-content'] .aui-md > :first-child > :first-child {
+  margin-top: 0;
+}
+
+/* Prose, tools, todos, and thinking all share one left edge (the message
+   content's --message-text-indent). No extra prose indent — a single gutter
+   reads cleaner than a ragged tool-vs-reply column. */
+
 [data-slot='aui_user-message-root'] {
  top: var(--sticky-human-top);
 }
@ -816,12 +839,13 @@ canvas {
 }

 /* Sticky human bubbles clamp to ~2 lines with a soft bottom fade so a long
-   prompt doesn't dominate the viewport while you read the response stuck
-   beneath it. The clamp lifts on hover / focus (clicking the bubble opens the
-   edit composer, which already shows the full text). --human-msg-full is the
-   measured content height (set in UserMessage) so expand/collapse animates to
-   the real height instead of overshooting the cap. */
+   prompt doesn't dominate the viewport. The clamp lifts on focus only (clicking
+   opens the edit composer, which shows the full text) — not on hover, so the
+   bubble doesn't jump as the pointer passes over it. --human-msg-full is the
+   measured content height (set in UserMessage) so it animates to the real
+   height instead of overshooting the cap. */
 .sticky-human-clamp {
+  cursor: pointer;
  max-height: calc(2 * var(--dt-line-height) * var(--conversation-text-font-size) + 0.15rem);
  overflow: hidden;
  transition: max-height 0.08s cubic-bezier(0.4, 0, 0.2, 1);
@ -832,7 +856,6 @@ canvas {
  mask-image: linear-gradient(to bottom, #000 55%, transparent);
 }

-.composer-human-message:hover .sticky-human-clamp,
 .composer-human-message:focus-within .sticky-human-clamp {
  max-height: min(var(--human-msg-full, 24rem), 24rem);
  overflow-y: auto;
@ -992,7 +1015,7 @@ canvas {
 [data-slot='aui_assistant-message-content'] .aui-md [data-streamdown='code-block'] {
  contain: none;
  overflow: visible;
-  margin-block: 0.375rem !important;
+  margin-block: var(--paragraph-gap) 0 !important;
  padding: 0 !important;
  gap: 0 !important;
  border: 0 !important;
@ -1006,6 +1029,11 @@ canvas {
 }

 [data-slot='aui_assistant-message-content'] .aui-md [data-slot='code-card'] {
+  /* Streamdown nests blocks, so the container's child-combinator rhythm can't
+     reach the card. Carry the paragraph gap on the card itself (top-owned);
+     collapses cleanly with the wrapper's margin when one is present, and the
+     first-child reset still flushes a leading code block. */
+  margin-block: var(--paragraph-gap) 0;
  position: relative;
  transition:
    border-color 180ms ease-out,
@ -1075,34 +1103,25 @@ canvas {
  opacity: 1;
 }

-/* Conversation block rhythm. Consecutive tool calls stay tight so a step
-   sequence reads as one action group; the gap between any scaffolding
-   block and adjacent prose bumps up so the model's reply visually
-   separates from its scaffolding. */
-[data-slot='tool-block'] + [data-slot='tool-block'] {
-  margin-top: 0.375rem;
-}
-
-[data-slot='tool-block']:has(> :nth-child(2)) + [data-slot='tool-block'] {
-  margin-top: 0.625rem;
-}
-
+/* Conversation block rhythm. assistant-ui renders each range as a direct child
+   of the message content with no per-part wrapper, so adjacency rules cover
+   every pairing — first block needs no reset, nested tool rows are untouched.
+   Two tiers: scaffolding (tool / thinking) gets a roomy block gap so it reads
+   as separate from the reply; consecutive prose collapses to a tight paragraph
+   rhythm so split-out text parts don't look like a big gap. */
+/* Scaffolding adjacent to anything → roomy block gap. */
 [data-slot='aui_assistant-message-content']
-  :is([data-slot='tool-block'], [data-slot='aui_thinking-disclosure'])
-  + .aui-md,
+  > :is([data-slot='tool-block'], [data-slot='aui_thinking-disclosure'])
+  + :is([data-slot='tool-block'], [data-slot='aui_thinking-disclosure'], .aui-md),
 [data-slot='aui_assistant-message-content']
-  .aui-md
+  > .aui-md
  + :is([data-slot='tool-block'], [data-slot='aui_thinking-disclosure']) {
-  margin-top: 1rem;
+  margin-top: var(--turn-block-gap);
 }

-[data-slot='aui_assistant-message-content'] [data-slot='aui_thinking-disclosure'] + [data-slot='tool-block'],
-[data-slot='aui_assistant-message-content'] [data-slot='tool-block'] + [data-slot='aui_thinking-disclosure'] {
-  margin-top: 0.75rem;
-}
-
-[data-slot='aui_assistant-message-content'] > [data-slot='tool-block']:first-child {
-  margin-top: 0;
+/* Prose ↔ prose → tight paragraph rhythm, matching in-block paragraph spacing. */
+[data-slot='aui_assistant-message-content'] > .aui-md + .aui-md {
+  margin-top: var(--paragraph-gap);
 }

 /* Message action bars — flat icon hits with default dim; only the hovered/focused control is full-strength. */
--- a/tui_gateway/ws.py
+++ b/tui_gateway/ws.py
@ -26,6 +26,7 @@ from __future__ import annotations
 import asyncio
 import json
 import logging
+import socket
 from typing import Any

 from tui_gateway import server
@ -137,6 +138,24 @@ def _ws_peer_label(ws: Any) -> str:
    return f"{host}:{port}" if port is not None else host


+def _disable_nagle(ws: Any) -> None:
+    """Disable Nagle so streamed JSON-RPC frames go out individually.
+
+    Without it the kernel coalesces the small per-token frames, so a burst after
+    the model's think-pause lands on the client in one tick and no client-side
+    smoothing can recover the cadence. GUI/WS only; chat platforms don't hit
+    this path. Best-effort — skip silently if the socket isn't reachable.
+    """
+    try:
+        scope = getattr(ws, "scope", None) or {}
+        transport = (scope.get("extensions") or {}).get("transport") or getattr(ws, "transport", None)
+        sock = transport.get_extra_info("socket") if transport is not None else None
+        if sock is not None:
+            sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+    except Exception as exc:  # pragma: no cover - best-effort tuning
+        _log.debug("ws TCP_NODELAY skip: %s", exc)
+
+
 async def handle_ws(ws: Any) -> None:
    """Run one WebSocket session. Wire-compatible with ``tui_gateway.entry``."""
    peer = _ws_peer_label(ws)
@ -150,6 +169,9 @@ async def handle_ws(ws: Any) -> None:
    try:
        await ws.accept()
        disconnect_reason = "connected"
+        # Push small streamed frames out immediately instead of letting Nagle
+        # batch them — keeps the live token cadence intact for GUI clients.
+        _disable_nagle(ws)
        _log.info("ws accepted peer=%s", peer)

        transport = WSTransport(ws, asyncio.get_running_loop(), peer=peer)