diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
index ef55d807c..07721d441 100644
--- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
@@ -15,8 +15,7 @@ const buildCtx = (appended: Msg[]) =>
     composer: {
       dequeue: () => undefined,
       queueEditRef: ref<null | number>(null),
-      sendQueued: vi.fn(),
-      setInput: vi.fn()
+      sendQueued: vi.fn()
     },
     gateway: {
       gw: { request: vi.fn() },
@@ -30,9 +29,6 @@ const buildCtx = (appended: Msg[]) =>
       resumeById: vi.fn(),
       setCatalog: vi.fn()
     },
-    submission: {
-      submitRef: { current: vi.fn() }
-    },
     system: {
       bellOnComplete: false,
       sys: vi.fn()
@@ -42,11 +38,6 @@ const buildCtx = (appended: Msg[]) =>
       panel: (title: string, sections: any[]) =>
         appended.push({ kind: 'panel', panelData: { sections, title }, role: 'system', text: '' }),
       setHistoryItems: vi.fn()
-    },
-    voice: {
-      setProcessing: vi.fn(),
-      setRecording: vi.fn(),
-      setVoiceEnabled: vi.fn()
     }
   }) as any
 
@@ -152,12 +143,16 @@ describe('createGatewayEventHandler', () => {
     expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer))
   })
 
-  it('attaches inline_diff to the assistant completion body', () => {
+  it('anchors inline_diff as its own segment where the edit happened', () => {
     const appended: Msg[] = []
     const onEvent = createGatewayEventHandler(buildCtx(appended))
     const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new'
     const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
 
+    // Narration → tool → tool-complete → more narration → message-complete.
+    // The diff MUST land between the two narration segments, not tacked
+    // onto the final one.
+    onEvent({ payload: { text: 'Editing the file' }, type: 'message.delta' } as any)
     onEvent({
       payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
       type: 'tool.start'
@@ -167,24 +162,27 @@ describe('createGatewayEventHandler', () => {
       type: 'tool.complete'
     } as any)
 
-    // Diff is buffered for message.complete and sanitized (ANSI stripped).
+    // Diff is already committed to segmentMessages as its own segment —
+    // nothing is "pending" anymore. The pre-tool narration is also flushed.
     expect(appended).toHaveLength(0)
-    expect(turnController.pendingInlineDiffs).toEqual([cleaned])
+    expect(turnController.segmentMessages.map(m => m.text)).toEqual([
+      'Editing the file',
+      `\`\`\`diff\n${cleaned}\n\`\`\``
+    ])
 
-    onEvent({
-      payload: { text: 'patch applied' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any)
 
-    // Diff is rendered in the same assistant message body as the completion.
-    expect(appended).toHaveLength(1)
-    expect(appended[0]).toMatchObject({ role: 'assistant' })
-    expect(appended[0]?.text).toContain('patch applied')
-    expect(appended[0]?.text).toContain('```diff')
-    expect(appended[0]?.text).toContain(cleaned)
+    // Three messages in the transcript, in order: pre-tool narration →
+    // diff → post-tool narration. The final message does NOT contain
+    // `diff` content.
+    expect(appended).toHaveLength(3)
+    expect(appended[0]?.text).toBe('Editing the file')
+    expect(appended[1]?.text).toBe(`\`\`\`diff\n${cleaned}\n\`\`\``)
+    expect(appended[2]?.text).toBe('patch applied')
+    expect(appended[2]?.text).not.toContain('```diff')
   })
 
-  it('does not append inline_diff twice when assistant text already contains it', () => {
+  it('drops the diff segment when the final assistant text narrates the same diff', () => {
     const appended: Msg[] = []
     const onEvent = createGatewayEventHandler(buildCtx(appended))
     const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
@@ -194,17 +192,16 @@ describe('createGatewayEventHandler', () => {
       payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' },
       type: 'tool.complete'
     } as any)
-    onEvent({
-      payload: { text: assistantText },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)
 
+    // Only the final message — diff-only segment dropped so we don't
+    // render two stacked copies of the same patch.
     expect(appended).toHaveLength(1)
     expect(appended[0]?.text).toBe(assistantText)
     expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
   })
 
-  it('strips the CLI "┊ review diff" header from queued inline diffs', () => {
+  it('strips the CLI "┊ review diff" header from inline diff segments', () => {
     const appended: Msg[] = []
     const onEvent = createGatewayEventHandler(buildCtx(appended))
     const raw = '  \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
@@ -213,17 +210,16 @@ describe('createGatewayEventHandler', () => {
       payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' },
       type: 'tool.complete'
     } as any)
-    onEvent({
-      payload: { text: 'done' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)
 
-    expect(appended).toHaveLength(1)
+    // diff segment first, final narration second
+    expect(appended).toHaveLength(2)
     expect(appended[0]?.text).not.toContain('┊ review diff')
     expect(appended[0]?.text).toContain('--- a/foo.ts')
+    expect(appended[1]?.text).toBe('done')
   })
 
-  it('suppresses inline_diff when assistant already wrote a diff fence', () => {
+  it('drops the diff segment when assistant writes its own ```diff fence', () => {
     const appended: Msg[] = []
     const onEvent = createGatewayEventHandler(buildCtx(appended))
     const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
@@ -233,10 +229,7 @@ describe('createGatewayEventHandler', () => {
       payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
       type: 'tool.complete'
     } as any)
-    onEvent({
-      payload: { text: assistantText },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)
 
     expect(appended).toHaveLength(1)
     expect(appended[0]?.text).toBe(assistantText)
@@ -252,15 +245,16 @@ describe('createGatewayEventHandler', () => {
       payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' },
       type: 'tool.complete'
     } as any)
-    onEvent({
-      payload: { text: 'done' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)
 
-    expect(appended).toHaveLength(1)
-    expect(appended[0]?.tools?.[0]).toContain('Review Diff')
-    expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
+    // Two segments: diff block (no tool row), final narration (tool row
+    // belongs here since pendingSegmentTools carries across the flush).
+    expect(appended).toHaveLength(2)
     expect(appended[0]?.text).toContain('```diff')
+    expect(appended[0]?.tools ?? []).toEqual([])
+    expect(appended[1]?.text).toBe('done')
+    expect(appended[1]?.tools?.[0]).toContain('Review Diff')
+    expect(appended[1]?.tools?.[0]).not.toContain('--- a/foo.ts')
   })
 
   it('shows setup panel for missing provider startup error', () => {
diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index 50f6fa3af..2d3b48d39 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -51,9 +51,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
   const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session
   const { bellOnComplete, stdout, sys } = ctx.system
   const { appendMessage, panel, setHistoryItems } = ctx.transcript
-  const { setInput } = ctx.composer
-  const { submitRef } = ctx.submission
-  const { setProcessing: setVoiceProcessing, setRecording: setVoiceRecording, setVoiceEnabled } = ctx.voice
 
   let pendingThinkingStatus = ''
   let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null
@@ -264,57 +261,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
         return
       }
 
-      case 'voice.status': {
-        // Continuous VAD loop reports its internal state so the status bar
-        // can show listening / transcribing / idle without polling.
-        const state = String(ev.payload?.state ?? '')
-
-        if (state === 'listening') {
-          setVoiceRecording(true)
-          setVoiceProcessing(false)
-        } else if (state === 'transcribing') {
-          setVoiceRecording(false)
-          setVoiceProcessing(true)
-        } else {
-          setVoiceRecording(false)
-          setVoiceProcessing(false)
-        }
-
-        return
-      }
-
-      case 'voice.transcript': {
-        // CLI parity: the 3-strikes silence detector flipped off automatically.
-        // Mirror that on the UI side and tell the user why the mode is off.
-        if (ev.payload?.no_speech_limit) {
-          setVoiceEnabled(false)
-          setVoiceRecording(false)
-          setVoiceProcessing(false)
-          sys('voice: no speech detected 3 times, continuous mode stopped')
-
-          return
-        }
-
-        const text = String(ev.payload?.text ?? '').trim()
-
-        if (!text) {
-          return
-        }
-
-        // CLI parity: _pending_input.put(transcript) unconditionally feeds
-        // the transcript to the agent as its next turn — draft handling
-        // doesn't apply because voice-mode users are speaking, not typing.
-        //
-        // We can't branch on composer input from inside a setInput updater
-        // (React strict mode double-invokes it, duplicating the submit).
-        // Just clear + defer submit so the cleared input is committed before
-        // submit reads it.
-        setInput('')
-        setTimeout(() => submitRef.current(text), 0)
-
-        return
-      }
-
       case 'gateway.start_timeout': {
         const { cwd, python } = ev.payload ?? {}
         const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : ''
@@ -385,10 +331,13 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
           return
         }
 
-        // Keep inline diffs attached to the assistant completion body so
-        // they render in the same message flow, not as a standalone system
-        // artifact that can look out-of-place around tool rows.
-        turnController.queueInlineDiff(inlineDiffText)
+        // Anchor the diff to the segment where the edit actually happened
+        // (between the narration that preceded the tool call and whatever
+        // the agent streams afterwards). The previous end-merge put the
+        // diff at the bottom of the final message even when the edit fired
+        // mid-turn, which read as "the agent wrote this after saying
+        // that" — misleading, and dropped for #14XXX.
+        turnController.pushInlineDiffSegment(inlineDiffText)
 
         return
       }
diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts
index 804394bb1..5bcbb05f2 100644
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@@ -19,6 +19,21 @@ const INTERRUPT_COOLDOWN_MS = 1500
 const ACTIVITY_LIMIT = 8
 const TRAIL_LIMIT = 8
 
+// Matches segments produced by pushInlineDiffSegment — a bare ```diff fence
+// wrapping the raw patch, no surrounding prose. Used at message.complete to
+// dedupe against final assistant text that narrates the same patch.
+const DIFF_SEGMENT_RE = /^```diff\n([\s\S]*?)\n```$/
+
+const diffSegmentBody = (msg: Msg): null | string => {
+  if (msg.role !== 'assistant' || msg.tools?.length) {
+    return null
+  }
+
+  const m = msg.text.match(DIFF_SEGMENT_RE)
+
+  return m ? m[1]! : null
+}
+
 export interface InterruptDeps {
   appendMessage: (msg: Msg) => void
   gw: { request: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T> }
@@ -40,7 +55,6 @@ class TurnController {
   bufRef = ''
   interrupted = false
   lastStatusNote = ''
-  pendingInlineDiffs: string[] = []
   persistedToolLabels = new Set<string>()
   persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
   protocolWarned = false
@@ -79,7 +93,6 @@ class TurnController {
     this.activeTools = []
     this.streamTimer = clear(this.streamTimer)
     this.bufRef = ''
-    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.segmentMessages = []
 
@@ -186,18 +199,35 @@ class TurnController {
     }, REASONING_PULSE_MS)
   }
 
-  queueInlineDiff(diffText: string) {
+  pushInlineDiffSegment(diffText: string) {
     // Strip CLI chrome the gateway emits before the unified diff (e.g. a
     // leading "┊ review diff" header written by `_emit_inline_diff` for the
     // terminal printer). That header only makes sense as stdout dressing,
     // not inside a markdown ```diff block.
-    const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
+    const stripped = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
 
-    if (!text || this.pendingInlineDiffs.includes(text)) {
+    if (!stripped) {
       return
     }
 
-    this.pendingInlineDiffs = [...this.pendingInlineDiffs, text]
+    // Flush any in-progress streaming text as its own segment first, so the
+    // diff lands BETWEEN the assistant narration that preceded the edit and
+    // whatever the agent streams afterwards — not glued onto the final
+    // message. This is the whole point of segment-anchored diffs: the diff
+    // renders where the edit actually happened.
+    this.flushStreamingSegment()
+
+    const block = `\`\`\`diff\n${stripped}\n\`\`\``
+
+    // Skip consecutive duplicates (same tool firing tool.complete twice, or
+    // two edits producing the same patch). Keeping this cheap — deeper
+    // dedupe against the final assistant text happens at message.complete.
+    if (this.segmentMessages.at(-1)?.text === block) {
+      return
+    }
+
+    this.segmentMessages = [...this.segmentMessages, { role: 'assistant', text: block }]
+    patchTurnState({ streamSegments: this.segmentMessages })
   }
 
   pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) {
@@ -234,7 +264,6 @@ class TurnController {
     this.idle()
     this.clearReasoning()
     this.clearStatusTimer()
-    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.segmentMessages = []
     this.turnTools = []
@@ -245,31 +274,35 @@ class TurnController {
     const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
     const split = splitReasoning(rawText)
     const finalText = split.text
-    // Skip appending if the assistant already narrated the diff inside a
-    // markdown fence of its own — otherwise we render two stacked diff
-    // blocks for the same edit.
-    const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
-
-    const remainingInlineDiffs = assistantAlreadyHasDiff
-      ? []
-      : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
-
-    const inlineDiffBlock = remainingInlineDiffs.length
-      ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
-      : ''
-
-    const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
     const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
     const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
     const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
     const savedToolTokens = this.toolTokenAcc
     const tools = this.pendingSegmentTools
-    const finalMessages = [...this.segmentMessages]
 
-    if (mergedText) {
+    // Drop diff-only segments the agent is about to narrate in the final
+    // reply. Without this, a closing "here's the diff …" message would
+    // render two stacked copies of the same patch. Only touches segments
+    // whose entire body is a ```diff``` fence emitted by pushInlineDiff-
+    // Segment — real assistant narration stays put.
+    const finalHasOwnDiffFence = /```(?:diff|patch)\b/i.test(finalText)
+
+    const segments = this.segmentMessages.filter(msg => {
+      const body = diffSegmentBody(msg)
+
+      if (body === null) {
+        return true
+      }
+
+      return !finalHasOwnDiffFence && !finalText.includes(body)
+    })
+
+    const finalMessages = [...segments]
+
+    if (finalText) {
       finalMessages.push({
         role: 'assistant',
-        text: mergedText,
+        text: finalText,
         thinking: savedReasoning || undefined,
         thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
         toolTokens: savedToolTokens || undefined,
@@ -300,7 +333,7 @@ class TurnController {
     this.bufRef = ''
     patchTurnState({ activity: [], outcome: '' })
 
-    return { finalMessages, finalText: mergedText, wasInterrupted }
+    return { finalMessages, finalText, wasInterrupted }
   }
 
   recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) {
@@ -406,7 +439,6 @@ class TurnController {
     this.bufRef = ''
     this.interrupted = false
     this.lastStatusNote = ''
-    this.pendingInlineDiffs = []
     this.pendingSegmentTools = []
     this.protocolWarned = false
     this.segmentMessages = []
@@ -452,7 +484,6 @@ class TurnController {
     this.endReasoningPhase()
     this.clearReasoning()
     this.activeTools = []
-    this.pendingInlineDiffs = []
     this.turnTools = []
     this.toolTokenAcc = 0
     this.persistedToolLabels.clear()