chore(desktop): synthetic-stream perf harness + scripts

Drops the React `<Profiler>` approach (no-op because Vite is currently serving the production React build) in favor of an externally-observable measurement stack: rAF frame intervals, `PerformanceObserver({entryTypes: ['longtask']})`, and a `MutationObserver` on the live streaming message. Adds a synthetic stream driver — `window.__PERF_DRIVE__.stream({...})` — that pushes tokens through the live `$messages` atom at a controlled rate, so the assistant-ui runtime, incremental repository, and Streamdown markdown pipeline see the same workload they'd see during a real LLM stream, without the LLM cost. The driver lives in `src/app/chat/perf-probe.tsx`; `main.tsx` side-imports it under `import.meta.env.MODE !== 'production'` so it tree-shakes out of prod builds. (Using `MODE` rather than `DEV` because our Vite setup currently reports `DEV=false` even under `vite dev` — see the dev-build note in `profile-typing-lag.md`.) Scripts: - measure-synthetic-stream.mjs drive synthetic + record frame/longtask/mutation - profile-synth-stream.mjs CPU profile + top self-time during synthetic - measure-real-stream.mjs same harness, real LLM stream - profile-real-stream.mjs CPU profile bracketing the real stream window - eval.mjs / reload.mjs small CDP helpers A real-LLM measurement on Cloud Shadows (gpt-4o-mini, 39 s window) showed 12 longtasks in the same 75-127 ms range the synthetic predicted, so the synthetic is a faithful proxy.
2026-07-29 18:46:59 +00:00 · 2026-05-21 19:38:26 -05:00 · 2026-05-21 19:38:26 -05:00 · 99f2a9503c
commit 99f2a9503c
parent 5abf89ddd1
9 changed files with 1173 additions and 0 deletions
--- a/apps/desktop/scripts/eval.mjs
+++ b/apps/desktop/scripts/eval.mjs
@ -0,0 +1,21 @@
+// Simple eval helper — runs an expression and returns the result.value.
+const targets = await (await fetch('http://127.0.0.1:9222/json')).json()
+const t = targets.find((t) => t.url.includes('5174'))
+const ws = new WebSocket(t.webSocketDebuggerUrl)
+let id = 0
+const pending = new Map()
+ws.addEventListener('message', (ev) => {
+  const m = JSON.parse(ev.data)
+  if (pending.has(m.id)) { pending.get(m.id)(m); pending.delete(m.id) }
+})
+await new Promise((r) => ws.addEventListener('open', r))
+const send = (method, params) => new Promise((res) => { const i = ++id; pending.set(i, res); ws.send(JSON.stringify({ id: i, method, params })) })
+
+const expr = process.argv[2] || '1+1'
+const r = await send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
+if (r.result.exceptionDetails) {
+  console.error('EXCEPTION:', r.result.exceptionDetails.exception?.description)
+} else {
+  console.log(JSON.stringify(r.result.result.value, null, 2))
+}
+ws.close()
--- a/apps/desktop/scripts/measure-real-stream.mjs
+++ b/apps/desktop/scripts/measure-real-stream.mjs
@ -0,0 +1,252 @@
+// REAL streaming measurement — no React internals.
+//
+// Measures:
+//   1) rAF frame intervals during a verified live stream (long-frame histogram)
+//   2) MutationObserver: how often does the live assistant message mutate, what's the budget per mutation
+//   3) Text length growth rate (chars/sec)
+//   4) PerformanceObserver `longtask` entries (any task > 50ms blocks input)
+//
+// Detects REAL stream by waiting for assistant-message DOM count to grow past baseline.
+// Does NOT cancel — lets the stream run to completion or hits TIMEOUT_MS.
+
+const CDP_HTTP = 'http://127.0.0.1:9222'
+const PROMPT = process.env.PROMPT || 'count from 1 to 80, one number per line'
+const TIMEOUT_MS = Number(process.env.TIMEOUT_MS || 60000)
+
+async function getTarget() {
+  const list = await (await fetch(`${CDP_HTTP}/json`)).json()
+  const t = list.find((t) => t.type === 'page' && /5174/.test(t.url))
+  if (!t) throw new Error('renderer not found')
+  return t
+}
+
+class CDP {
+  constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
+  static async open(url) {
+    const ws = new WebSocket(url)
+    await new Promise((r, j) => {
+      ws.addEventListener('open', r, { once: true })
+      ws.addEventListener('error', (e) => j(e), { once: true })
+    })
+    const cdp = new CDP(ws)
+    ws.addEventListener('message', (event) => {
+      const m = JSON.parse(event.data.toString())
+      if (m.id != null && cdp.pending.has(m.id)) {
+        const { resolve, reject } = cdp.pending.get(m.id)
+        cdp.pending.delete(m.id)
+        if (m.error) reject(new Error(m.error.message))
+        else resolve(m.result)
+      }
+    })
+    return cdp
+  }
+  send(method, params) {
+    const id = ++this.id
+    return new Promise((res, rej) => {
+      this.pending.set(id, { resolve: res, reject: rej })
+      this.ws.send(JSON.stringify({ id, method, params }))
+    })
+  }
+  async eval(expr) {
+    const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
+    if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
+    return r.result.value
+  }
+  close() { this.ws.close() }
+}
+
+async function main() {
+  const target = await getTarget()
+  const cdp = await CDP.open(target.webSocketDebuggerUrl)
+
+  // Install recorders.
+  await cdp.eval(`
+    (() => {
+      // rAF frame intervals
+      window.__FT__ = { times: [], stop: false }
+      let last = performance.now()
+      const tick = () => {
+        if (window.__FT__.stop) return
+        const now = performance.now()
+        window.__FT__.times.push(now - last)
+        last = now
+        requestAnimationFrame(tick)
+      }
+      requestAnimationFrame(tick)
+
+      // longtask observer
+      window.__LT__ = { entries: [], stop: false }
+      try {
+        const po = new PerformanceObserver((list) => {
+          if (window.__LT__.stop) return
+          for (const e of list.getEntries()) {
+            window.__LT__.entries.push({ name: e.name, duration: e.duration, startTime: e.startTime })
+          }
+        })
+        po.observe({ entryTypes: ['longtask'] })
+        window.__LT__.po = po
+      } catch {}
+
+      // mutation observer on streaming message
+      window.__MO__ = { mutations: [], stop: false, currentMsg: null }
+      const tryArm = () => {
+        const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]')
+        const last = all[all.length - 1]
+        if (!last || last === window.__MO__.currentMsg) return
+        window.__MO__.currentMsg = last
+        if (window.__MO__.obs) window.__MO__.obs.disconnect()
+        const obs = new MutationObserver((muts) => {
+          if (window.__MO__.stop) return
+          const t = performance.now()
+          window.__MO__.mutations.push({ t, count: muts.length, len: last.textContent.length })
+        })
+        obs.observe(last, { childList: true, subtree: true, characterData: true })
+        window.__MO__.obs = obs
+      }
+      window.__MO__.arm = tryArm
+      return 'recorders armed'
+    })()
+  `)
+
+  // Baseline
+  const base = JSON.parse(await cdp.eval(`
+    JSON.stringify({
+      assistantCount: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length,
+      busy: !!document.querySelector('[data-status="running"], [data-busy="true"]'),
+      hasComposer: !!document.querySelector('[contenteditable="true"]'),
+    })
+  `))
+  console.log('baseline:', base)
+  if (!base.hasComposer) { console.error('no composer'); cdp.close(); return }
+
+  // Type + submit
+  await cdp.eval(`
+    (() => {
+      const ed = document.querySelector('[contenteditable="true"]')
+      ed.focus()
+      document.execCommand('insertText', false, ${JSON.stringify(PROMPT)})
+      return 'typed'
+    })()
+  `)
+  const submitT0 = Date.now()
+  await cdp.eval(`
+    (() => {
+      const ed = document.querySelector('[contenteditable="true"]')
+      ed.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true, cancelable: true }))
+      return 'submitted'
+    })()
+  `)
+
+  // Poll for REAL stream (assistant count > baseline). 30 seconds — accommodates
+  // slow first-token latencies on big providers.
+  let realStreamT = null
+  for (let i = 0; i < 600; i++) {
+    await new Promise((r) => setTimeout(r, 50))
+    const s = JSON.parse(await cdp.eval(`
+      JSON.stringify({
+        n: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length,
+        busy: !!document.querySelector('[data-status="running"], [data-busy="true"]'),
+        text: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })()
+      })
+    `))
+    if (s.n > base.assistantCount) {
+      realStreamT = Date.now()
+      console.log('REAL stream started after', realStreamT - submitT0, 'ms — busy=', s.busy, 'text=', s.text)
+      // Arm mutation observer on the new message
+      await cdp.eval('window.__MO__.arm()')
+      break
+    }
+  }
+  if (!realStreamT) {
+    console.error('REAL STREAM NEVER STARTED')
+    cdp.close()
+    return
+  }
+
+  // Sample length growth, wait for completion or timeout
+  const samples = []
+  const start = Date.now()
+  while (Date.now() - start < TIMEOUT_MS) {
+    await new Promise((r) => setTimeout(r, 250))
+    const s = JSON.parse(await cdp.eval(`
+      JSON.stringify({
+        t: performance.now(),
+        len: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })(),
+        busy: !!document.querySelector('[data-status="running"], [data-busy="true"]')
+      })
+    `))
+    samples.push(s)
+    if (!s.busy && samples.length > 4) {
+      await new Promise((r) => setTimeout(r, 300))
+      break
+    }
+  }
+
+  // Pull recordings
+  const data = JSON.parse(await cdp.eval(`
+    (() => {
+      window.__FT__.stop = true
+      window.__LT__.stop = true
+      window.__MO__.stop = true
+      try { window.__LT__.po && window.__LT__.po.disconnect() } catch {}
+      try { window.__MO__.obs && window.__MO__.obs.disconnect() } catch {}
+      return JSON.stringify({
+        frames: window.__FT__.times,
+        longtasks: window.__LT__.entries,
+        mutations: window.__MO__.mutations,
+      })
+    })()
+  `))
+
+  const { frames, longtasks, mutations } = data
+
+  // Frame histogram (filter to stream window)
+  const buckets = { '<=16.7': 0, '16.7-33': 0, '33-50': 0, '50-100': 0, '100-200': 0, '>200': 0 }
+  let frameTotal = 0
+  let maxFrame = 0
+  for (const f of frames) {
+    frameTotal += f
+    if (f > maxFrame) maxFrame = f
+    if (f <= 16.7) buckets['<=16.7']++
+    else if (f <= 33) buckets['16.7-33']++
+    else if (f <= 50) buckets['33-50']++
+    else if (f <= 100) buckets['50-100']++
+    else if (f <= 200) buckets['100-200']++
+    else buckets['>200']++
+  }
+  const avgFps = frames.length ? (frames.length / (frameTotal / 1000)).toFixed(1) : 'n/a'
+  const slowFrames = frames.filter((f) => f > 33).length
+  const veryslowFrames = frames.filter((f) => f > 100).length
+
+  // Longtask summary
+  const ltMs = longtasks.reduce((a, b) => a + b.duration, 0)
+  const ltMax = longtasks.length ? Math.max(...longtasks.map((e) => e.duration)) : 0
+
+  // Mutation rate
+  let mutTotal = mutations.length
+  let mutDurs = []
+  for (let i = 1; i < mutations.length; i++) {
+    mutDurs.push(mutations[i].t - mutations[i - 1].t)
+  }
+  mutDurs.sort((a, b) => a - b)
+  const mutP50 = mutDurs[Math.floor(mutDurs.length * 0.5)] ?? 0
+  const mutP95 = mutDurs[Math.floor(mutDurs.length * 0.95)] ?? 0
+
+  // Growth rate
+  const firstLen = samples[0]?.len ?? 0
+  const lastLen = samples[samples.length - 1]?.len ?? 0
+  const elapsedS = samples.length ? (samples[samples.length - 1].t - samples[0].t) / 1000 : 0
+  const charsPerSec = elapsedS ? ((lastLen - firstLen) / elapsedS).toFixed(1) : 'n/a'
+
+  console.log('\n=== STREAM RESULTS ===')
+  console.log('window:', (frameTotal / 1000).toFixed(1), 's | frames:', frames.length, '| avgFps:', avgFps, '| maxFrame:', maxFrame.toFixed(1), 'ms')
+  console.log('frame histogram:', buckets)
+  console.log('slow frames (>33ms):', slowFrames, '| very slow (>100ms):', veryslowFrames)
+  console.log('longtasks:', longtasks.length, 'total', ltMs.toFixed(0), 'ms — max', ltMax.toFixed(1), 'ms')
+  console.log('text grew', firstLen, '→', lastLen, 'chars (', charsPerSec, 'char/s )')
+  console.log('mutations on streaming msg:', mutTotal, '| inter-mutation p50:', mutP50.toFixed(1), 'ms', 'p95:', mutP95.toFixed(1), 'ms')
+
+  cdp.close()
+}
+
+main().catch((e) => { console.error(e); process.exit(1) })
--- a/apps/desktop/scripts/measure-synthetic-stream.mjs
+++ b/apps/desktop/scripts/measure-synthetic-stream.mjs
@ -0,0 +1,318 @@
+// Measure render cost of a synthetic stream driven through the live $messages atom.
+//
+// Why synthetic: the user's LLM credits are depleted; we can't fire a real stream.
+// The synthetic stream exercises the exact same React pipeline (assistant-ui runtime →
+// repository.addOrUpdateMessage → MessagePrimitive re-render → markdown reflow) as a
+// real stream. The only thing it does NOT exercise is the gateway → SSE → optimistic-
+// merge path, which is orthogonal to the rendering question.
+//
+// What we record:
+//   1) rAF frame intervals (long-frame histogram; >33ms = perceived jank, >100ms = bad)
+//   2) PerformanceObserver `longtask` entries (task >50ms blocks input)
+//   3) MutationObserver: per-message mutation count & inter-mutation latency
+//   4) Optional: typing latency overlay — typing into composer while streaming
+//
+// Output is plain text suitable for terminal + a JSON sidecar for diffing across runs.
+
+import { writeFileSync } from 'node:fs'
+
+const CDP_HTTP = 'http://127.0.0.1:9222'
+const TOKENS = Number(process.env.TOKENS || 300)
+const INTERVAL_MS = Number(process.env.INTERVAL_MS || 16)
+const CHUNK = process.env.CHUNK || 'lorem ipsum '
+const TYPE_WHILE_STREAMING = process.env.TYPE_WHILE_STREAMING === '1'
+const LABEL = process.env.LABEL || 'baseline'
+const OUT = process.env.OUT || `frame-times-${LABEL}.json`
+
+async function getTarget() {
+  const list = await (await fetch(`${CDP_HTTP}/json`)).json()
+  const t = list.find((t) => t.type === 'page' && /5174/.test(t.url))
+  if (!t) throw new Error('renderer not found')
+  return t
+}
+
+class CDP {
+  constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
+  static async open(url) {
+    const ws = new WebSocket(url)
+    await new Promise((r, j) => {
+      ws.addEventListener('open', r, { once: true })
+      ws.addEventListener('error', (e) => j(e), { once: true })
+    })
+    const cdp = new CDP(ws)
+    ws.addEventListener('message', (ev) => {
+      const m = JSON.parse(ev.data.toString())
+      if (m.id != null && cdp.pending.has(m.id)) {
+        const { resolve, reject } = cdp.pending.get(m.id)
+        cdp.pending.delete(m.id)
+        if (m.error) reject(new Error(m.error.message))
+        else resolve(m.result)
+      }
+    })
+    return cdp
+  }
+  send(method, params) {
+    const id = ++this.id
+    return new Promise((res, rej) => {
+      this.pending.set(id, { resolve: res, reject: rej })
+      this.ws.send(JSON.stringify({ id, method, params }))
+    })
+  }
+  async eval(expr) {
+    const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
+    if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
+    return r.result.value
+  }
+  close() { this.ws.close() }
+}
+
+function pct(arr, p) {
+  if (!arr.length) return 0
+  const i = Math.min(arr.length - 1, Math.floor(arr.length * p))
+  return arr[i]
+}
+
+async function main() {
+  const target = await getTarget()
+  const cdp = await CDP.open(target.webSocketDebuggerUrl)
+
+  // Sanity check driver is loaded.
+  const probeOk = await cdp.eval('!!window.__PERF_DRIVE__ && !!window.__PERF_DRIVE__.stream')
+  if (!probeOk) {
+    console.error('__PERF_DRIVE__ not on window — did you reload the renderer after editing perf-probe.tsx?')
+    cdp.close()
+    process.exit(2)
+  }
+
+  // Install recorders.
+  await cdp.eval(`
+    (() => {
+      window.__FT__ = { times: [], stop: false }
+      let last = performance.now()
+      const tick = () => {
+        if (window.__FT__.stop) return
+        const now = performance.now()
+        window.__FT__.times.push(now - last)
+        last = now
+        requestAnimationFrame(tick)
+      }
+      requestAnimationFrame(tick)
+
+      window.__LT__ = { entries: [], stop: false }
+      try {
+        const po = new PerformanceObserver((list) => {
+          if (window.__LT__.stop) return
+          for (const e of list.getEntries()) {
+            window.__LT__.entries.push({ name: e.name, duration: e.duration, startTime: e.startTime })
+          }
+        })
+        po.observe({ entryTypes: ['longtask'] })
+        window.__LT__.po = po
+      } catch {}
+
+      window.__MO__ = { mutations: [], stop: false, currentMsg: null }
+      const arm = () => {
+        const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]')
+        const last = all[all.length - 1]
+        if (!last || last === window.__MO__.currentMsg) return
+        window.__MO__.currentMsg = last
+        if (window.__MO__.obs) window.__MO__.obs.disconnect()
+        const obs = new MutationObserver((muts) => {
+          if (window.__MO__.stop) return
+          const t = performance.now()
+          window.__MO__.mutations.push({ t, count: muts.length, len: last.textContent.length })
+        })
+        obs.observe(last, { childList: true, subtree: true, characterData: true })
+        window.__MO__.obs = obs
+      }
+      window.__MO__.arm = arm
+
+      // Optional: typing observer — fires keystroke timings if asked.
+      window.__TYP__ = { times: [], stop: false, lastKey: 0 }
+      return 'recorders armed'
+    })()
+  `)
+
+  // Baseline state.
+  const base = JSON.parse(await cdp.eval(`
+    JSON.stringify({
+      assistantCount: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length,
+      atomCount: window.__PERF_DRIVE__.snapshotMsgs()
+    })
+  `))
+  console.log('baseline:', base)
+
+  // Drive a synthetic stream.
+  const streamStart = Date.now()
+  await cdp.eval(`window.__PERF_DRIVE__.stream({ chunk: ${JSON.stringify(CHUNK)}, intervalMs: ${INTERVAL_MS}, totalTokens: ${TOKENS} })`)
+
+  // After the first paint, arm MO on the new message.
+  await new Promise((r) => setTimeout(r, 200))
+  await cdp.eval('window.__MO__.arm()')
+
+  // Optional: type while streaming.
+  if (TYPE_WHILE_STREAMING) {
+    await new Promise((r) => setTimeout(r, 400))
+    await cdp.eval(`(() => {
+      const ed = document.querySelector('[contenteditable="true"]')
+      ed.focus()
+      window.__TYP__.startedAt = performance.now()
+      const text = 'the quick brown fox jumps over the lazy dog '
+      let i = 0
+      const tick = () => {
+        if (i >= text.length) return
+        const t0 = performance.now()
+        document.execCommand('insertText', false, text[i])
+        // requestAnimationFrame to wait for next paint
+        requestAnimationFrame(() => {
+          window.__TYP__.times.push(performance.now() - t0)
+        })
+        i++
+        setTimeout(tick, 60)
+      }
+      tick()
+      return 'typing'
+    })()`)
+  }
+
+  // Wait for stream to complete + small grace.
+  const expectedMs = TOKENS * INTERVAL_MS + 1500
+  await new Promise((r) => setTimeout(r, expectedMs))
+
+  // Pull recordings.
+  const data = JSON.parse(await cdp.eval(`
+    (() => {
+      window.__FT__.stop = true
+      window.__LT__.stop = true
+      window.__MO__.stop = true
+      window.__TYP__.stop = true
+      try { window.__LT__.po && window.__LT__.po.disconnect() } catch {}
+      try { window.__MO__.obs && window.__MO__.obs.disconnect() } catch {}
+      return JSON.stringify({
+        frames: window.__FT__.times,
+        longtasks: window.__LT__.entries,
+        mutations: window.__MO__.mutations,
+        typing: window.__TYP__.times,
+        finalText: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })()
+      })
+    })()
+  `))
+
+  // Reset DOM back to baseline so we don't accumulate fake messages.
+  await cdp.eval('window.__PERF_DRIVE__.reset()')
+
+  // Analysis (trim warm-up: drop frames before first mutation timestamp).
+  const firstMut = data.mutations[0]?.t
+  const frames = data.frames
+
+  // Sum durations to figure out when each frame happened (relative to recorder start).
+  const frameTimeline = []
+  let acc = 0
+  for (const f of frames) { acc += f; frameTimeline.push(acc) }
+
+  // Mutations are in performance.now() ms; frames started recording when we installed
+  // the recorder (before stream). To align: compute total stream window from frames
+  // after mutation activity began. Simpler heuristic: drop first 500ms of frames as warm-up.
+  const WARMUP_MS = 500
+  let dropIdx = 0
+  for (let i = 0; i < frames.length; i++) {
+    if (frameTimeline[i] >= WARMUP_MS) { dropIdx = i; break }
+  }
+  const streamFrames = frames.slice(dropIdx)
+
+  const buckets = { '<=16.7': 0, '16.7-33': 0, '33-50': 0, '50-100': 0, '100-200': 0, '>200': 0 }
+  let frameTotal = 0
+  let maxFrame = 0
+  for (const f of streamFrames) {
+    frameTotal += f
+    if (f > maxFrame) maxFrame = f
+    if (f <= 16.7) buckets['<=16.7']++
+    else if (f <= 33) buckets['16.7-33']++
+    else if (f <= 50) buckets['33-50']++
+    else if (f <= 100) buckets['50-100']++
+    else if (f <= 200) buckets['100-200']++
+    else buckets['>200']++
+  }
+  const sortedFrames = [...streamFrames].sort((a, b) => a - b)
+  const fAvgFps = streamFrames.length ? (streamFrames.length / (frameTotal / 1000)).toFixed(1) : 'n/a'
+  const fP50 = pct(sortedFrames, 0.5).toFixed(1)
+  const fP95 = pct(sortedFrames, 0.95).toFixed(1)
+  const fP99 = pct(sortedFrames, 0.99).toFixed(1)
+  const slowFrames = streamFrames.filter((f) => f > 33).length
+  const veryslowFrames = streamFrames.filter((f) => f > 100).length
+
+  const ltDur = data.longtasks.map((e) => e.duration).sort((a, b) => a - b)
+  const ltMs = ltDur.reduce((a, b) => a + b, 0)
+  const ltMax = ltDur.length ? ltDur[ltDur.length - 1] : 0
+  const ltP95 = pct(ltDur, 0.95)
+
+  // Mutation cadence.
+  const mutDurs = []
+  for (let i = 1; i < data.mutations.length; i++) mutDurs.push(data.mutations[i].t - data.mutations[i - 1].t)
+  mutDurs.sort((a, b) => a - b)
+  const mutP50 = pct(mutDurs, 0.5)
+  const mutP95 = pct(mutDurs, 0.95)
+  const mutMax = mutDurs.length ? mutDurs[mutDurs.length - 1] : 0
+
+  // Typing latency (optional).
+  let typingSummary = null
+  if (TYPE_WHILE_STREAMING && data.typing.length) {
+    const t = [...data.typing].sort((a, b) => a - b)
+    typingSummary = {
+      n: t.length,
+      p50: pct(t, 0.5).toFixed(1),
+      p95: pct(t, 0.95).toFixed(1),
+      max: t[t.length - 1].toFixed(1)
+    }
+  }
+
+  const result = {
+    label: LABEL,
+    timestamp: new Date().toISOString(),
+    config: { TOKENS, INTERVAL_MS, CHUNK, TYPE_WHILE_STREAMING },
+    streamWallMs: Date.now() - streamStart,
+    frames: {
+      total: streamFrames.length,
+      avgFps: fAvgFps,
+      windowS: (frameTotal / 1000).toFixed(1),
+      p50: fP50,
+      p95: fP95,
+      p99: fP99,
+      max: maxFrame.toFixed(1),
+      slow33: slowFrames,
+      veryslow100: veryslowFrames,
+      histogram: buckets
+    },
+    longtasks: {
+      n: data.longtasks.length,
+      totalMs: ltMs.toFixed(0),
+      maxMs: ltMax.toFixed(1),
+      p95Ms: ltP95.toFixed(1)
+    },
+    mutations: {
+      n: data.mutations.length,
+      finalTextLen: data.finalText,
+      interMutP50ms: mutP50.toFixed(1),
+      interMutP95ms: mutP95.toFixed(1),
+      interMutMaxMs: mutMax.toFixed(1)
+    },
+    typing: typingSummary
+  }
+
+  writeFileSync(OUT, JSON.stringify(result, null, 2))
+
+  console.log('\n=== SYNTHETIC STREAM RESULTS ===')
+  console.log('label:', LABEL, '| tokens:', TOKENS, '@', INTERVAL_MS, 'ms')
+  console.log('streamWallMs:', result.streamWallMs)
+  console.log('FRAMES: avgFps', fAvgFps, '| p50', fP50, 'ms | p95', fP95, 'ms | p99', fP99, 'ms | max', maxFrame.toFixed(1), 'ms')
+  console.log('FRAMES histogram:', buckets)
+  console.log('FRAMES slow(>33):', slowFrames, '/ veryslow(>100):', veryslowFrames, 'of', streamFrames.length)
+  console.log('LONGTASKS:', data.longtasks.length, '| total', ltMs.toFixed(0), 'ms | max', ltMax.toFixed(1), 'ms | p95', ltP95.toFixed(1), 'ms')
+  console.log('MUTATIONS:', data.mutations.length, '| finalLen', data.finalText, 'chars | inter p50', mutP50.toFixed(1), 'ms | p95', mutP95.toFixed(1), 'ms')
+  if (typingSummary) console.log('TYPING-WHILE-STREAMING latency: p50', typingSummary.p50, 'ms | p95', typingSummary.p95, 'ms | n=', typingSummary.n)
+  console.log('written to', OUT)
+
+  cdp.close()
+}
+
+main().catch((e) => { console.error(e); process.exit(1) })
--- a/apps/desktop/scripts/profile-real-stream.mjs
+++ b/apps/desktop/scripts/profile-real-stream.mjs
@ -0,0 +1,137 @@
+// CPU-profile during a real LLM stream — confirms or refutes whether the
+// synthetic stream's hotspots (Streamdown markdown re-parse, FadeText)
+// match real-world content.
+//
+// Run *after* model is set to something fast + cheap (gpt-4o-mini etc.).
+// Sends a prompt likely to produce markdown + a numbered list.
+
+import { writeFileSync } from 'node:fs'
+
+const CDP_HTTP = 'http://127.0.0.1:9222'
+const PROMPT = process.env.PROMPT || 'Give me a numbered list of 8 useful bash one-liners. For each: a brief description, then the command in a code block. No preamble.'
+const OUT = process.env.OUT || `/tmp/real-stream-${Date.now()}.cpuprofile`
+const START_TIMEOUT = Number(process.env.START_TIMEOUT || 45000)
+const STREAM_TIMEOUT = Number(process.env.STREAM_TIMEOUT || 60000)
+
+class CDP {
+  constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
+  static async open(url) {
+    const ws = new WebSocket(url)
+    await new Promise((r) => ws.addEventListener('open', r, { once: true }))
+    const cdp = new CDP(ws)
+    ws.addEventListener('message', (ev) => {
+      const m = JSON.parse(ev.data.toString())
+      if (m.id != null && cdp.pending.has(m.id)) {
+        const { resolve, reject } = cdp.pending.get(m.id)
+        cdp.pending.delete(m.id)
+        if (m.error) reject(new Error(m.error.message))
+        else resolve(m.result)
+      }
+    })
+    return cdp
+  }
+  send(method, params) {
+    const id = ++this.id
+    return new Promise((res, rej) => {
+      this.pending.set(id, { resolve: res, reject: rej })
+      this.ws.send(JSON.stringify({ id, method, params }))
+    })
+  }
+  async eval(expr) {
+    const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
+    if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
+    return r.result.value
+  }
+  close() { this.ws.close() }
+}
+
+async function main() {
+  const list = await (await fetch(`${CDP_HTTP}/json`)).json()
+  const target = list.find((t) => t.type === 'page' && /5174/.test(t.url))
+  const cdp = await CDP.open(target.webSocketDebuggerUrl)
+
+  const baseCount = await cdp.eval('document.querySelectorAll("[data-slot=aui_assistant-message-root]").length')
+
+  // Submit prompt
+  await cdp.eval(`(() => {
+    const ed = document.querySelector('[contenteditable="true"]')
+    ed.focus()
+    document.execCommand('insertText', false, ${JSON.stringify(PROMPT)})
+    ed.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', which: 13, keyCode: 13, bubbles: true, cancelable: true }))
+    return 'submitted'
+  })()`)
+
+  // Wait for real stream start (assistant count grows).
+  const submitT0 = Date.now()
+  let streamT = null
+  for (let i = 0; i < START_TIMEOUT / 50; i++) {
+    await new Promise((r) => setTimeout(r, 50))
+    const n = await cdp.eval('document.querySelectorAll("[data-slot=aui_assistant-message-root]").length')
+    if (n > baseCount) { streamT = Date.now(); break }
+  }
+  if (!streamT) {
+    console.error('stream never started within', START_TIMEOUT, 'ms')
+    cdp.close()
+    process.exit(2)
+  }
+  console.log('REAL stream started after', streamT - submitT0, 'ms — starting CPU profile NOW')
+
+  // Start CPU profile NOW, only during stream phase.
+  await cdp.send('Profiler.enable')
+  await cdp.send('Profiler.setSamplingInterval', { interval: 100 })
+  await cdp.send('Profiler.start')
+
+  // Wait until busy goes false + grace, or timeout.
+  const cutoff = Date.now() + STREAM_TIMEOUT
+  while (Date.now() < cutoff) {
+    await new Promise((r) => setTimeout(r, 500))
+    const busy = await cdp.eval('!!document.querySelector("[data-status=running], [data-busy=true]")')
+    if (!busy) {
+      await new Promise((r) => setTimeout(r, 500))
+      break
+    }
+  }
+
+  const { profile } = await cdp.send('Profiler.stop')
+  writeFileSync(OUT, JSON.stringify(profile))
+  console.log('wrote', OUT)
+
+  const samples = profile.samples || []
+  const timeDeltas = profile.timeDeltas || []
+  const nodes = new Map(profile.nodes.map((n) => [n.id, n]))
+  const selfTime = new Map()
+  for (let i = 0; i < samples.length; i++) {
+    const id = samples[i]
+    const dt = timeDeltas[i] ?? 0
+    selfTime.set(id, (selfTime.get(id) || 0) + dt)
+  }
+  const ranked = [...selfTime.entries()]
+    .map(([id, us]) => {
+      const n = nodes.get(id)
+      const cf = n?.callFrame || {}
+      return {
+        ms: us / 1000,
+        name: cf.functionName || '(anonymous)',
+        url: (cf.url || '').slice(-60),
+        line: cf.lineNumber
+      }
+    })
+    .filter((x) => !/\(root\)|\(idle\)|\(garbage collector\)|\(program\)/.test(x.name))
+    .sort((a, b) => b.ms - a.ms)
+    .slice(0, 25)
+
+  const finalText = await cdp.eval(`(() => {
+    const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]')
+    return all.length ? all[all.length-1].textContent.length : 0
+  })()`)
+  console.log('\nfinal assistant message length:', finalText, 'chars')
+
+  console.log('\n=== TOP 25 SELF TIME (ms) DURING REAL STREAM ===')
+  for (const r of ranked) {
+    console.log(`${r.ms.toFixed(1).padStart(7)}  ${r.name.padEnd(40)}  ${r.url}:${r.line}`)
+  }
+
+  cdp.close()
+}
+
+main().catch((e) => { console.error(e); process.exit(1) })
--- a/apps/desktop/scripts/profile-synth-stream.mjs
+++ b/apps/desktop/scripts/profile-synth-stream.mjs
@ -0,0 +1,103 @@
+// CPU-profile a synthetic stream — outputs a .cpuprofile and a top-self ranking.
+// Open the .cpuprofile in Chrome DevTools Performance panel for a flamegraph.
+
+import { writeFileSync } from 'node:fs'
+
+const CDP_HTTP = 'http://127.0.0.1:9222'
+const TOKENS = Number(process.env.TOKENS || 400)
+const INTERVAL_MS = Number(process.env.INTERVAL_MS || 8)
+const CHUNK = process.env.CHUNK || '**word** in _italic_ with `code` '
+const LABEL = process.env.LABEL || 'profile'
+const OUT = process.env.OUT || `synth-${LABEL}.cpuprofile`
+
+class CDP {
+  constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
+  static async open(url) {
+    const ws = new WebSocket(url)
+    await new Promise((r) => ws.addEventListener('open', r, { once: true }))
+    const cdp = new CDP(ws)
+    ws.addEventListener('message', (ev) => {
+      const m = JSON.parse(ev.data.toString())
+      if (m.id != null && cdp.pending.has(m.id)) {
+        const { resolve, reject } = cdp.pending.get(m.id)
+        cdp.pending.delete(m.id)
+        if (m.error) reject(new Error(m.error.message))
+        else resolve(m.result)
+      }
+    })
+    return cdp
+  }
+  send(method, params) {
+    const id = ++this.id
+    return new Promise((res, rej) => {
+      this.pending.set(id, { resolve: res, reject: rej })
+      this.ws.send(JSON.stringify({ id, method, params }))
+    })
+  }
+  async eval(expr) {
+    const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
+    if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
+    return r.result.value
+  }
+  close() { this.ws.close() }
+}
+
+async function main() {
+  const list = await (await fetch(`${CDP_HTTP}/json`)).json()
+  const target = list.find((t) => t.type === 'page' && /5174/.test(t.url))
+  const cdp = await CDP.open(target.webSocketDebuggerUrl)
+
+  if (!await cdp.eval('!!window.__PERF_DRIVE__')) {
+    console.error('no __PERF_DRIVE__')
+    cdp.close()
+    process.exit(2)
+  }
+
+  await cdp.send('Profiler.enable')
+  // High-resolution sampling: 100us
+  await cdp.send('Profiler.setSamplingInterval', { interval: 100 })
+  await cdp.send('Profiler.start')
+
+  await cdp.eval(`window.__PERF_DRIVE__.stream({ chunk: ${JSON.stringify(CHUNK)}, intervalMs: ${INTERVAL_MS}, totalTokens: ${TOKENS} })`)
+  await new Promise((r) => setTimeout(r, TOKENS * INTERVAL_MS + 1500))
+  await cdp.eval('window.__PERF_DRIVE__.reset()')
+
+  const { profile } = await cdp.send('Profiler.stop')
+  writeFileSync(OUT, JSON.stringify(profile))
+  console.log('wrote', OUT)
+
+  // Compute top self time per function.
+  const samples = profile.samples || []
+  const timeDeltas = profile.timeDeltas || []
+  const nodes = new Map(profile.nodes.map((n) => [n.id, n]))
+  const selfTime = new Map() // id -> microseconds
+  for (let i = 0; i < samples.length; i++) {
+    const id = samples[i]
+    const dt = timeDeltas[i] ?? 0
+    selfTime.set(id, (selfTime.get(id) || 0) + dt)
+  }
+  const ranked = [...selfTime.entries()]
+    .map(([id, us]) => {
+      const n = nodes.get(id)
+      const cf = n?.callFrame || {}
+      return {
+        us,
+        ms: us / 1000,
+        name: cf.functionName || '(anonymous)',
+        url: (cf.url || '').slice(-60),
+        line: cf.lineNumber
+      }
+    })
+    .filter((x) => !/\(root\)|\(idle\)|\(garbage collector\)|\(program\)/.test(x.name))
+    .sort((a, b) => b.us - a.us)
+    .slice(0, 30)
+
+  console.log('\n=== TOP 30 SELF TIME (ms) ===')
+  for (const r of ranked) {
+    console.log(`${r.ms.toFixed(1).padStart(7)}  ${r.name.padEnd(40)}  ${r.url}:${r.line}`)
+  }
+
+  cdp.close()
+}
+
+main().catch((e) => { console.error(e); process.exit(1) })
--- a/apps/desktop/scripts/profile-typing-lag.md
+++ b/apps/desktop/scripts/profile-typing-lag.md
@ -153,3 +153,133 @@ streaming. `scripts/measure-submit.mjs` measures
 `enter → composer-cleared → user-message-rendered → first-paint`. The
 script triggers a real prompt submission, so use it on a throwaway
 session. Not enabled in CI.
+
+## Streaming "5fps" investigation (May 21, 2026)
+
+User complaint: "the streaming must bring fps to like 5? lol" — felt
+hitches during assistant streaming on long threads.
+
+### Tooling added
+
+- **`src/app/chat/perf-probe.tsx`** — dev-only side-effect import (guarded by
+  `import.meta.env.MODE !== 'production'` in `main.tsx`). Attaches two
+  helpers to `window`:
+  - `__PERF_PROBE__` — React `<Profiler>` recorder. Currently inert because
+    Vite is serving the production React build (see "Vite dev-build issue"
+    below); kept for when that's fixed.
+  - `__PERF_DRIVE__` — synthetic stream driver. Pushes tokens through the
+    live `$messages` atom at a fixed cadence, so the assistant-ui runtime,
+    incremental repository, Streamdown markdown renderer, and React commit
+    pipeline all see the same workload they'd see from a real LLM stream —
+    but with no LLM call (and no credit cost).
+- **`scripts/measure-synthetic-stream.mjs`** — drives `__PERF_DRIVE__`,
+  records rAF frame intervals, `PerformanceObserver({entryTypes:['longtask']})`
+  entries, `MutationObserver` cadence on the live message, and optional
+  type-while-streaming keystroke latency.
+- **`scripts/profile-synth-stream.mjs`** — CPU profile during a synthetic
+  stream; writes a `.cpuprofile` (open in Chrome DevTools Performance panel)
+  and a top-30 self-time table.
+- **`scripts/measure-real-stream.mjs`** — same harness as the synthetic but
+  fires a real LLM prompt. Use when you have credits and want to confirm
+  the synthetic predictions hold.
+- **`scripts/profile-real-stream.mjs`** — CPU profile over the duration of
+  a real LLM stream.
+
+Helpers: `scripts/eval.mjs` (one-shot CDP eval), `scripts/reload.mjs`
+(hard reload renderer over CDP).
+
+### Findings
+
+Measured on the Cloud Shadows session (7 turns, ~11k px scrollHeight) and
+the 34 MB session `session_20260514_215353_fe0ac8.json` (110 FadeText
+instances, lots of historical tool calls).
+
+| metric | Cloud Shadows | 34 MB session |
+|---|---|---|
+| avgFps (60 tok/sec, 5s) | 60.0 | 58.6 |
+| frame p50 / p95 / p99 (ms) | 16.7 / 18.0 / 21.1 | 16.6 / 25.6 / 31.4 |
+| max frame (ms) | 31.1 | 97-127 (varies) |
+| longtasks per 5s window | 0 | 1-2, 75-127 ms |
+| type-while-stream p95 latency (ms) | 17 | — |
+
+A single real-LLM stream on Cloud Shadows (gpt-4o-mini, 39s window) saw
+12 longtasks totalling 1.26 s — same cadence the synthetic predicted
+(~1 hitch per 3.25 s, max 123 ms). So the **synthetic stream is a faithful
+proxy for the real one** and is fine for iterating on fixes without paying
+for tokens.
+
+### CPU profile during streaming (synthetic, markdown content)
+
+Top self-time costs (5 s window, 400 tokens at 125 tok/s, markdown chunks):
+
+| ms (self) | function | source |
+|---|---|---|
+| 260 | `bn$1` | `chunk-BO2N…js:20003` (micromark tokenize) |
+| 249 | `m$1` | `chunk-BO2N…js:19949` (micromark) |
+| 128 | `compile` | `chunk-BO2N…js:21884` (mdast → hast compile) |
+| 73 | FadeText body | `components/ui/fade-text.tsx` |
+| 62 | `parser` | `chunk-BO2N…js:22680` |
+| 49 | `fromThreadMessageLike` | `@assistant-ui/internal` |
+
+That `chunk-BO2N2NFS` is the vendored bundle containing `micromark`,
+`mdast-util-from-markdown`, `mdast-util-to-hast`, `rehype-raw`,
+`hast-util-sanitize`, etc. — i.e. **Streamdown's markdown pipeline,
+re-parsing the entire growing assistant message on every token append**.
+Cost scales linearly with message length.
+
+Compare plain-text (no markdown) — the `chunk-BO2N…` entries drop out
+of the top 30 entirely; total work per 5 s window halves.
+
+### Fix landed: `FadeText` memo
+
+`FadeText` is used in `tool-fallback.tsx` (110 instances on a tool-heavy
+thread). Before: each parent re-render during streaming triggered a
+`useEffect([children])` that forced a `scrollWidth` layout read — even
+when the title text was unchanged. The `useResizeObserver` already covers
+the genuine resize case, so the effect was strictly redundant.
+
+After: wrapped in `React.memo` with a custom comparator that compares
+`children` (scalar fast-path), `className`, `fadeWidth`, and `style`
+field-by-field. Verified via temporary render counter:
+**122 renders during a 2 s synthetic stream vs ~11 000 without memo**
+(110 instances × ~100 stream updates). Doesn't move the longtask needle
+on its own — Streamdown dwarfs it — but eliminates a class of forced
+layouts and removes a steady CPU floor.
+
+### Not fixed: Streamdown markdown re-parse
+
+This is the dominant cost and the cause of the user's perceived hitches.
+The renderer re-parses the entire message buffer on every stream update.
+At ~3-5 k chars, each parse costs ~30 ms; when several pile into one
+frame the result is a 75-125 ms longtask = the "5 fps moment".
+
+Possible approaches (none implemented here):
+
+1. **Coalesce/throttle Streamdown updates** — render at most every 32 ms
+   instead of every set-state. Reduces parses but doesn't reduce
+   per-parse cost; trades latency for smoothness.
+2. **Memoize per-prefix** — diff the new text against the prior parsed
+   version; only re-parse the changed suffix.
+3. **Render in stable segments** — close-form historical paragraphs as
+   immutable React nodes; only the live tail goes through markdown each
+   token. Probably the highest-impact change but requires forking or
+   patching `@assistant-ui/react-streamdown`.
+4. **Move parsing to a Web Worker** — main thread no longer blocks on
+   markdown. Largest surgery; requires double-buffered hast.
+
+### Vite dev-build issue (separate)
+
+`http://127.0.0.1:5174/node_modules/.vite/deps/react.js` resolves to
+`react/cjs/react.production.js`, and `react-dom_client.js` →
+`react-dom-client.production.js`. As a result:
+
+- `<React.Profiler>` `onRender` is never called (production build is a
+  no-op).
+- `import.meta.env.DEV` is `false`, `PROD` is `true` even under `vite dev`
+  (hence `MODE !== 'production'` as the workaround in `main.tsx`).
+- All the React 19 dev-only warnings/devtools backend hooks are absent.
+
+Root cause likely sits in `vite.config.ts` aliasing + dedupe + Vite 8's
+new `optimizeDeps` defaults. Worth a separate fix pass — when it's
+resolved, the `<PerfProbe>` blocks in `perf-probe.tsx` become useful
+(per-id commit timings) instead of inert.
--- a/apps/desktop/scripts/reload.mjs
+++ b/apps/desktop/scripts/reload.mjs
@ -0,0 +1,36 @@
+// Hard reload the Electron renderer over CDP. Vite-no-HMR mode means edits
+// don't auto-apply — call this after editing source.
+const targets = await (await fetch('http://127.0.0.1:9222/json')).json()
+const t = targets.find((t) => t.url.includes('5174'))
+if (!t) {
+  console.error('renderer not found')
+  process.exit(1)
+}
+const ws = new WebSocket(t.webSocketDebuggerUrl)
+let id = 0
+const pending = new Map()
+ws.addEventListener('message', (ev) => {
+  const m = JSON.parse(ev.data)
+  if (pending.has(m.id)) {
+    pending.get(m.id)(m)
+    pending.delete(m.id)
+  }
+})
+await new Promise((r) => ws.addEventListener('open', r))
+const send = (method, params = {}) =>
+  new Promise((res) => {
+    const i = ++id
+    pending.set(i, res)
+    ws.send(JSON.stringify({ id: i, method, params }))
+  })
+
+await send('Page.reload', { ignoreCache: true })
+console.log('reload sent')
+// Wait for new doc.
+await new Promise((r) => setTimeout(r, 2500))
+const r = await send('Runtime.evaluate', {
+  expression: 'JSON.stringify({ hasProbe: !!window.__PERF_PROBE__, composer: !!document.querySelector("[contenteditable=true]"), url: location.hash })',
+  returnByValue: true,
+})
+console.log(r.result.result.value)
+ws.close()
--- a/apps/desktop/src/app/chat/perf-probe.tsx
+++ b/apps/desktop/src/app/chat/perf-probe.tsx
@ -0,0 +1,167 @@
+import { Profiler, type ProfilerOnRenderCallback, type ReactNode } from 'react'
+
+import { $messages, setMessages, setBusy } from '@/store/session'
+
+type Sample = {
+  id: string
+  phase: string
+  actualDuration: number
+  baseDuration: number
+  startTime: number
+  commitTime: number
+}
+
+type SyntheticDriverHandle = { stop: () => void }
+
+declare global {
+  interface Window {
+    __PERF_PROBE__?: {
+      samples: Sample[]
+      enabled: boolean
+      clear: () => void
+      summary: () => Record<string, { count: number; total: number; max: number; p50: number; p95: number }>
+    }
+    __PERF_DRIVE__?: {
+      /** Inject an assistant message and grow it by `chunk` every `intervalMs`. Returns a stop handle. */
+      stream: (opts?: { chunk?: string; intervalMs?: number; totalTokens?: number }) => SyntheticDriverHandle
+      reset: () => void
+      snapshotMsgs: () => number
+    }
+  }
+}
+
+if (typeof window !== 'undefined' && !window.__PERF_PROBE__) {
+  const samples: Sample[] = []
+  window.__PERF_PROBE__ = {
+    samples,
+    enabled: false,
+    clear: () => {
+      samples.length = 0
+    },
+    summary: () => {
+      const byId = new Map<string, number[]>()
+      for (const s of samples) {
+        const k = `${s.id}:${s.phase}`
+        const arr = byId.get(k) ?? []
+        arr.push(s.actualDuration)
+        byId.set(k, arr)
+      }
+      const out: Record<string, { count: number; total: number; max: number; p50: number; p95: number }> = {}
+      for (const [k, arr] of byId) {
+        arr.sort((a, b) => a - b)
+        const total = arr.reduce((a, b) => a + b, 0)
+        out[k] = {
+          count: arr.length,
+          total: Math.round(total * 100) / 100,
+          max: Math.round(arr[arr.length - 1] * 100) / 100,
+          p50: Math.round(arr[Math.floor(arr.length * 0.5)] * 100) / 100,
+          p95: Math.round(arr[Math.floor(arr.length * 0.95)] * 100) / 100,
+        }
+      }
+      return out
+    },
+  }
+}
+
+const onRender: ProfilerOnRenderCallback = (id, phase, actualDuration, baseDuration, startTime, commitTime) => {
+  const probe = typeof window !== 'undefined' ? window.__PERF_PROBE__ : undefined
+  if (!probe || !probe.enabled) return
+  probe.samples.push({ id, phase, actualDuration, baseDuration, startTime, commitTime })
+  if (probe.samples.length > 5000) probe.samples.splice(0, probe.samples.length - 5000)
+}
+
+if (typeof window !== 'undefined' && !window.__PERF_DRIVE__) {
+  // Synthetic stream driver — pushes tokens through the live $messages atom so the
+  // assistant-ui runtime + react tree sees them exactly as a real LLM stream would.
+  // Used by scripts/measure-real-stream.mjs when no live LLM credit is available.
+  let baseline: ReturnType<typeof $messages.get> | null = null
+  let activeHandle: SyntheticDriverHandle | null = null
+
+  const stop = () => {
+    activeHandle = null
+    setBusy(false)
+  }
+
+  window.__PERF_DRIVE__ = {
+    snapshotMsgs: () => $messages.get().length,
+    reset: () => {
+      activeHandle?.stop()
+      if (baseline) setMessages(baseline)
+      baseline = null
+      setBusy(false)
+    },
+    stream: ({ chunk = 'word ', intervalMs = 16, totalTokens = 400 } = {}) => {
+      activeHandle?.stop()
+      const current = $messages.get()
+      if (!baseline) baseline = current
+      const msgId = `synthetic-${Date.now()}`
+      // Seed an empty assistant message — assistant-ui will see it grow.
+      setMessages([
+        ...current,
+        {
+          id: msgId,
+          role: 'assistant',
+          parts: [{ type: 'text', text: '' }],
+          timestamp: Date.now(),
+          pending: true
+        }
+      ])
+      setBusy(true)
+
+      let pushed = 0
+      let timer: ReturnType<typeof setTimeout> | null = null
+      const handle: SyntheticDriverHandle = {
+        stop: () => {
+          if (timer) clearTimeout(timer)
+          timer = null
+          activeHandle = null
+          // Mark message finalized.
+          setMessages(prev =>
+            prev.map(m =>
+              m.id === msgId
+                ? { ...m, pending: false }
+                : m
+            )
+          )
+          setBusy(false)
+        }
+      }
+      activeHandle = handle
+
+      const tick = () => {
+        if (activeHandle !== handle) return
+        if (pushed >= totalTokens) {
+          handle.stop()
+          return
+        }
+        pushed += 1
+        setMessages(prev =>
+          prev.map(m => {
+            if (m.id !== msgId) return m
+            const head = m.parts.slice(0, -1)
+            const last = m.parts.at(-1)
+            const lastText = last && last.type === 'text' ? last.text : ''
+            return {
+              ...m,
+              parts: [...head, { type: 'text', text: lastText + chunk }]
+            }
+          })
+        )
+        timer = setTimeout(tick, intervalMs)
+      }
+      timer = setTimeout(tick, intervalMs)
+      return handle
+    }
+  }
+
+  // Suppress dead-import warning.
+  void stop
+}
+
+export function PerfProbe({ id, children }: { id: string; children: ReactNode }) {
+  return (
+    <Profiler id={id} onRender={onRender}>
+      {children}
+    </Profiler>
+  )
+}
--- a/apps/desktop/src/main.tsx
+++ b/apps/desktop/src/main.tsx
@ -12,6 +12,15 @@ import { ThemeProvider } from './themes/context'

 installClipboardShim()

+// Dev-only: install __PERF_DRIVE__ + __PERF_PROBE__ on window so the
+// scripts/ harnesses can drive a synthetic stream + record render cost.
+// Tree-shaken out of production builds. (Uses MODE rather than DEV because
+// our Vite setup currently bundles with PROD=true even in `vite dev`; see
+// scripts/dev-no-hmr.mjs for the surrounding workarounds.)
+if (import.meta.env.MODE !== 'production') {
+  import('./app/chat/perf-probe')
+}
+
 const queryClient = new QueryClient({
  defaultOptions: {
    queries: {