fix(tui): anchor inline_diff to the segment where the edit happened

Revisits #13729. That PR buffered each `tool.complete`'s inline_diff and merged them into the final assistant message body as a fenced ```diff block. The merge-at-end placement reads as "the agent wrote this after the summary", even when the edit fired mid-turn — which is both misleading and (per blitz feedback) feels like noise tacked onto the end of every task. Segment-anchored placement instead: - On tool.complete with inline_diff, `pushInlineDiffSegment` calls `flushStreamingSegment` first (so any in-progress narration lands as its own segment), then pushes the ```diff block as its own segment into segmentMessages. The diff is now anchored BETWEEN the narration that preceded the edit and whatever the agent streams afterwards, which is where the edit actually happened. - `recordMessageComplete` no longer merges buffered diffs. The only remaining dedupe is "drop diff-only segments whose body the final assistant text narrates verbatim (or whose diff fence the final text already contains)" — same tradeoff as before, kept so an agent that narrates its own diff doesn't render two stacked copies. - Drops `pendingInlineDiffs` and `queueInlineDiff` — buffer + end- merge machinery is gone; segmentMessages is now the only source of truth. Side benefit: Ctrl+C interrupt (`interruptTurn`) iterates segmentMessages, so diff segments are now preserved in the transcript when the user cancels after an edit. Previously the pending buffer was silently dropped on interrupt. Reported by Teknium during blitz usage: "no diffs are ever at the end because it didn't make this file edit after the final message".
2026-04-25 00:51:20 +00:00 · 2026-04-23 19:02:44 -05:00 · 2026-04-23 19:02:44 -05:00 · 11b2942f16
commit 11b2942f16
parent c95c6bdb7c
3 changed files with 105 additions and 131 deletions
--- a/ui-tui/src/tests/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/tests/createGatewayEventHandler.test.ts
@ -15,8 +15,7 @@ const buildCtx = (appended: Msg[]) =>
    composer: {
      dequeue: () => undefined,
      queueEditRef: ref<null | number>(null),
-      sendQueued: vi.fn(),
-      setInput: vi.fn()
+      sendQueued: vi.fn()
    },
    gateway: {
      gw: { request: vi.fn() },
@ -30,9 +29,6 @@ const buildCtx = (appended: Msg[]) =>
      resumeById: vi.fn(),
      setCatalog: vi.fn()
    },
-    submission: {
-      submitRef: { current: vi.fn() }
-    },
    system: {
      bellOnComplete: false,
      sys: vi.fn()
@ -42,11 +38,6 @@ const buildCtx = (appended: Msg[]) =>
      panel: (title: string, sections: any[]) =>
        appended.push({ kind: 'panel', panelData: { sections, title }, role: 'system', text: '' }),
      setHistoryItems: vi.fn()
-    },
-    voice: {
-      setProcessing: vi.fn(),
-      setRecording: vi.fn(),
-      setVoiceEnabled: vi.fn()
    }
  }) as any

@ -152,12 +143,16 @@ describe('createGatewayEventHandler', () => {
    expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer))
  })

-  it('attaches inline_diff to the assistant completion body', () => {
+  it('anchors inline_diff as its own segment where the edit happened', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new'
    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'

+    // Narration → tool → tool-complete → more narration → message-complete.
+    // The diff MUST land between the two narration segments, not tacked
+    // onto the final one.
+    onEvent({ payload: { text: 'Editing the file' }, type: 'message.delta' } as any)
    onEvent({
      payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
      type: 'tool.start'
@ -167,24 +162,27 @@ describe('createGatewayEventHandler', () => {
      type: 'tool.complete'
    } as any)

-    // Diff is buffered for message.complete and sanitized (ANSI stripped).
+    // Diff is already committed to segmentMessages as its own segment —
+    // nothing is "pending" anymore. The pre-tool narration is also flushed.
    expect(appended).toHaveLength(0)
-    expect(turnController.pendingInlineDiffs).toEqual([cleaned])
+    expect(turnController.segmentMessages.map(m => m.text)).toEqual([
+      'Editing the file',
+      `\`\`\`diff\n${cleaned}\n\`\`\``
+    ])

-    onEvent({
-      payload: { text: 'patch applied' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any)

-    // Diff is rendered in the same assistant message body as the completion.
-    expect(appended).toHaveLength(1)
-    expect(appended[0]).toMatchObject({ role: 'assistant' })
-    expect(appended[0]?.text).toContain('patch applied')
-    expect(appended[0]?.text).toContain('```diff')
-    expect(appended[0]?.text).toContain(cleaned)
+    // Three messages in the transcript, in order: pre-tool narration →
+    // diff → post-tool narration. The final message does NOT contain
+    // `diff` content.
+    expect(appended).toHaveLength(3)
+    expect(appended[0]?.text).toBe('Editing the file')
+    expect(appended[1]?.text).toBe(`\`\`\`diff\n${cleaned}\n\`\`\``)
+    expect(appended[2]?.text).toBe('patch applied')
+    expect(appended[2]?.text).not.toContain('```diff')
  })

-  it('does not append inline_diff twice when assistant text already contains it', () => {
+  it('drops the diff segment when the final assistant text narrates the same diff', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
@ -194,17 +192,16 @@ describe('createGatewayEventHandler', () => {
      payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' },
      type: 'tool.complete'
    } as any)
-    onEvent({
-      payload: { text: assistantText },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)

+    // Only the final message — diff-only segment dropped so we don't
+    // render two stacked copies of the same patch.
    expect(appended).toHaveLength(1)
    expect(appended[0]?.text).toBe(assistantText)
    expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
  })

-  it('strips the CLI "┊ review diff" header from queued inline diffs', () => {
+  it('strips the CLI "┊ review diff" header from inline diff segments', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const raw = '  \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
@ -213,17 +210,16 @@ describe('createGatewayEventHandler', () => {
      payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' },
      type: 'tool.complete'
    } as any)
-    onEvent({
-      payload: { text: 'done' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)

-    expect(appended).toHaveLength(1)
+    // diff segment first, final narration second
+    expect(appended).toHaveLength(2)
    expect(appended[0]?.text).not.toContain('┊ review diff')
    expect(appended[0]?.text).toContain('--- a/foo.ts')
+    expect(appended[1]?.text).toBe('done')
  })

-  it('suppresses inline_diff when assistant already wrote a diff fence', () => {
+  it('drops the diff segment when assistant writes its own ```diff fence', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
@ -233,10 +229,7 @@ describe('createGatewayEventHandler', () => {
      payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
      type: 'tool.complete'
    } as any)
-    onEvent({
-      payload: { text: assistantText },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)

    expect(appended).toHaveLength(1)
    expect(appended[0]?.text).toBe(assistantText)
@ -252,15 +245,16 @@ describe('createGatewayEventHandler', () => {
      payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' },
      type: 'tool.complete'
    } as any)
-    onEvent({
-      payload: { text: 'done' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)

-    expect(appended).toHaveLength(1)
-    expect(appended[0]?.tools?.[0]).toContain('Review Diff')
-    expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
+    // Two segments: diff block (no tool row), final narration (tool row
+    // belongs here since pendingSegmentTools carries across the flush).
+    expect(appended).toHaveLength(2)
    expect(appended[0]?.text).toContain('```diff')
+    expect(appended[0]?.tools ?? []).toEqual([])
+    expect(appended[1]?.text).toBe('done')
+    expect(appended[1]?.tools?.[0]).toContain('Review Diff')
+    expect(appended[1]?.tools?.[0]).not.toContain('--- a/foo.ts')
  })

  it('shows setup panel for missing provider startup error', () => {
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@ -51,9 +51,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
  const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session
  const { bellOnComplete, stdout, sys } = ctx.system
  const { appendMessage, panel, setHistoryItems } = ctx.transcript
-  const { setInput } = ctx.composer
-  const { submitRef } = ctx.submission
-  const { setProcessing: setVoiceProcessing, setRecording: setVoiceRecording, setVoiceEnabled } = ctx.voice

  let pendingThinkingStatus = ''
  let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null
@ -264,57 +261,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
        return
      }

-      case 'voice.status': {
-        // Continuous VAD loop reports its internal state so the status bar
-        // can show listening / transcribing / idle without polling.
-        const state = String(ev.payload?.state ?? '')
-
-        if (state === 'listening') {
-          setVoiceRecording(true)
-          setVoiceProcessing(false)
-        } else if (state === 'transcribing') {
-          setVoiceRecording(false)
-          setVoiceProcessing(true)
-        } else {
-          setVoiceRecording(false)
-          setVoiceProcessing(false)
-        }
-
-        return
-      }
-
-      case 'voice.transcript': {
-        // CLI parity: the 3-strikes silence detector flipped off automatically.
-        // Mirror that on the UI side and tell the user why the mode is off.
-        if (ev.payload?.no_speech_limit) {
-          setVoiceEnabled(false)
-          setVoiceRecording(false)
-          setVoiceProcessing(false)
-          sys('voice: no speech detected 3 times, continuous mode stopped')
-
-          return
-        }
-
-        const text = String(ev.payload?.text ?? '').trim()
-
-        if (!text) {
-          return
-        }
-
-        // CLI parity: _pending_input.put(transcript) unconditionally feeds
-        // the transcript to the agent as its next turn — draft handling
-        // doesn't apply because voice-mode users are speaking, not typing.
-        //
-        // We can't branch on composer input from inside a setInput updater
-        // (React strict mode double-invokes it, duplicating the submit).
-        // Just clear + defer submit so the cleared input is committed before
-        // submit reads it.
-        setInput('')
-        setTimeout(() => submitRef.current(text), 0)
-
-        return
-      }
-
      case 'gateway.start_timeout': {
        const { cwd, python } = ev.payload ?? {}
        const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : ''
@ -385,10 +331,13 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
          return
        }

-        // Keep inline diffs attached to the assistant completion body so
-        // they render in the same message flow, not as a standalone system
-        // artifact that can look out-of-place around tool rows.
-        turnController.queueInlineDiff(inlineDiffText)
+        // Anchor the diff to the segment where the edit actually happened
+        // (between the narration that preceded the tool call and whatever
+        // the agent streams afterwards). The previous end-merge put the
+        // diff at the bottom of the final message even when the edit fired
+        // mid-turn, which read as "the agent wrote this after saying
+        // that" — misleading, and dropped for #14XXX.
+        turnController.pushInlineDiffSegment(inlineDiffText)

        return
      }
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@ -19,6 +19,21 @@ const INTERRUPT_COOLDOWN_MS = 1500
 const ACTIVITY_LIMIT = 8
 const TRAIL_LIMIT = 8

+// Matches segments produced by pushInlineDiffSegment — a bare ```diff fence
+// wrapping the raw patch, no surrounding prose. Used at message.complete to
+// dedupe against final assistant text that narrates the same patch.
+const DIFF_SEGMENT_RE = /^```diff\n([\s\S]*?)\n```$/
+
+const diffSegmentBody = (msg: Msg): null | string => {
+  if (msg.role !== 'assistant' || msg.tools?.length) {
+    return null
+  }
+
+  const m = msg.text.match(DIFF_SEGMENT_RE)
+
+  return m ? m[1]! : null
+}
+
 export interface InterruptDeps {
  appendMessage: (msg: Msg) => void
  gw: { request: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T> }
@ -40,7 +55,6 @@ class TurnController {
  bufRef = ''
  interrupted = false
  lastStatusNote = ''
-  pendingInlineDiffs: string[] = []
  persistedToolLabels = new Set<string>()
  persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
  protocolWarned = false
@ -79,7 +93,6 @@ class TurnController {
    this.activeTools = []
    this.streamTimer = clear(this.streamTimer)
    this.bufRef = ''
-    this.pendingInlineDiffs = []
    this.pendingSegmentTools = []
    this.segmentMessages = []

@ -186,18 +199,35 @@ class TurnController {
    }, REASONING_PULSE_MS)
  }

-  queueInlineDiff(diffText: string) {
+  pushInlineDiffSegment(diffText: string) {
    // Strip CLI chrome the gateway emits before the unified diff (e.g. a
    // leading "┊ review diff" header written by `_emit_inline_diff` for the
    // terminal printer). That header only makes sense as stdout dressing,
    // not inside a markdown ```diff block.
-    const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
+    const stripped = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()

-    if (!text || this.pendingInlineDiffs.includes(text)) {
+    if (!stripped) {
      return
    }

-    this.pendingInlineDiffs = [...this.pendingInlineDiffs, text]
+    // Flush any in-progress streaming text as its own segment first, so the
+    // diff lands BETWEEN the assistant narration that preceded the edit and
+    // whatever the agent streams afterwards — not glued onto the final
+    // message. This is the whole point of segment-anchored diffs: the diff
+    // renders where the edit actually happened.
+    this.flushStreamingSegment()
+
+    const block = `\`\`\`diff\n${stripped}\n\`\`\``
+
+    // Skip consecutive duplicates (same tool firing tool.complete twice, or
+    // two edits producing the same patch). Keeping this cheap — deeper
+    // dedupe against the final assistant text happens at message.complete.
+    if (this.segmentMessages.at(-1)?.text === block) {
+      return
+    }
+
+    this.segmentMessages = [...this.segmentMessages, { role: 'assistant', text: block }]
+    patchTurnState({ streamSegments: this.segmentMessages })
  }

  pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) {
@ -234,7 +264,6 @@ class TurnController {
    this.idle()
    this.clearReasoning()
    this.clearStatusTimer()
-    this.pendingInlineDiffs = []
    this.pendingSegmentTools = []
    this.segmentMessages = []
    this.turnTools = []
@ -245,31 +274,35 @@ class TurnController {
    const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
    const split = splitReasoning(rawText)
    const finalText = split.text
-    // Skip appending if the assistant already narrated the diff inside a
-    // markdown fence of its own — otherwise we render two stacked diff
-    // blocks for the same edit.
-    const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
-
-    const remainingInlineDiffs = assistantAlreadyHasDiff
-      ? []
-      : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
-
-    const inlineDiffBlock = remainingInlineDiffs.length
-      ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
-      : ''
-
-    const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
    const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
    const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
    const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
    const savedToolTokens = this.toolTokenAcc
    const tools = this.pendingSegmentTools
-    const finalMessages = [...this.segmentMessages]

-    if (mergedText) {
+    // Drop diff-only segments the agent is about to narrate in the final
+    // reply. Without this, a closing "here's the diff …" message would
+    // render two stacked copies of the same patch. Only touches segments
+    // whose entire body is a ```diff``` fence emitted by pushInlineDiff-
+    // Segment — real assistant narration stays put.
+    const finalHasOwnDiffFence = /```(?:diff|patch)\b/i.test(finalText)
+
+    const segments = this.segmentMessages.filter(msg => {
+      const body = diffSegmentBody(msg)
+
+      if (body === null) {
+        return true
+      }
+
+      return !finalHasOwnDiffFence && !finalText.includes(body)
+    })
+
+    const finalMessages = [...segments]
+
+    if (finalText) {
      finalMessages.push({
        role: 'assistant',
-        text: mergedText,
+        text: finalText,
        thinking: savedReasoning || undefined,
        thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
        toolTokens: savedToolTokens || undefined,
@ -300,7 +333,7 @@ class TurnController {
    this.bufRef = ''
    patchTurnState({ activity: [], outcome: '' })

-    return { finalMessages, finalText: mergedText, wasInterrupted }
+    return { finalMessages, finalText, wasInterrupted }
  }

  recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) {
@ -406,7 +439,6 @@ class TurnController {
    this.bufRef = ''
    this.interrupted = false
    this.lastStatusNote = ''
-    this.pendingInlineDiffs = []
    this.pendingSegmentTools = []
    this.protocolWarned = false
    this.segmentMessages = []
@ -452,7 +484,6 @@ class TurnController {
    this.endReasoningPhase()
    this.clearReasoning()
    this.activeTools = []
-    this.pendingInlineDiffs = []
    this.turnTools = []
    this.toolTokenAcc = 0
    this.persistedToolLabels.clear()