chore(desktop): synthetic-stream perf harness + scripts

Drops the React `<Profiler>` approach (no-op because Vite is currently
serving the production React build) in favor of an externally-observable
measurement stack: rAF frame intervals, `PerformanceObserver({entryTypes:
['longtask']})`, and a `MutationObserver` on the live streaming message.

Adds a synthetic stream driver — `window.__PERF_DRIVE__.stream({...})` —
that pushes tokens through the live `$messages` atom at a controlled rate,
so the assistant-ui runtime, incremental repository, and Streamdown
markdown pipeline see the same workload they'd see during a real LLM
stream, without the LLM cost.

The driver lives in `src/app/chat/perf-probe.tsx`; `main.tsx` side-imports
it under `import.meta.env.MODE !== 'production'` so it tree-shakes out of
prod builds. (Using `MODE` rather than `DEV` because our Vite setup
currently reports `DEV=false` even under `vite dev` — see the dev-build
note in `profile-typing-lag.md`.)

Scripts:
  - measure-synthetic-stream.mjs  drive synthetic + record frame/longtask/mutation
  - profile-synth-stream.mjs      CPU profile + top self-time during synthetic
  - measure-real-stream.mjs       same harness, real LLM stream
  - profile-real-stream.mjs       CPU profile bracketing the real stream window
  - eval.mjs / reload.mjs         small CDP helpers

A real-LLM measurement on Cloud Shadows (gpt-4o-mini, 39 s window) showed
12 longtasks in the same 75-127 ms range the synthetic predicted, so the
synthetic is a faithful proxy.
This commit is contained in:
Brooklyn Nicholson 2026-05-21 19:38:26 -05:00
parent 5abf89ddd1
commit 99f2a9503c
9 changed files with 1173 additions and 0 deletions

View file

@ -0,0 +1,21 @@
// Simple eval helper — runs an expression and returns the result.value.
const targets = await (await fetch('http://127.0.0.1:9222/json')).json()
const t = targets.find((t) => t.url.includes('5174'))
const ws = new WebSocket(t.webSocketDebuggerUrl)
let id = 0
const pending = new Map()
ws.addEventListener('message', (ev) => {
const m = JSON.parse(ev.data)
if (pending.has(m.id)) { pending.get(m.id)(m); pending.delete(m.id) }
})
await new Promise((r) => ws.addEventListener('open', r))
const send = (method, params) => new Promise((res) => { const i = ++id; pending.set(i, res); ws.send(JSON.stringify({ id: i, method, params })) })
const expr = process.argv[2] || '1+1'
const r = await send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
if (r.result.exceptionDetails) {
console.error('EXCEPTION:', r.result.exceptionDetails.exception?.description)
} else {
console.log(JSON.stringify(r.result.result.value, null, 2))
}
ws.close()

View file

@ -0,0 +1,252 @@
// REAL streaming measurement — no React internals.
//
// Measures:
// 1) rAF frame intervals during a verified live stream (long-frame histogram)
// 2) MutationObserver: how often does the live assistant message mutate, what's the budget per mutation
// 3) Text length growth rate (chars/sec)
// 4) PerformanceObserver `longtask` entries (any task > 50ms blocks input)
//
// Detects REAL stream by waiting for assistant-message DOM count to grow past baseline.
// Does NOT cancel — lets the stream run to completion or hits TIMEOUT_MS.
const CDP_HTTP = 'http://127.0.0.1:9222'
const PROMPT = process.env.PROMPT || 'count from 1 to 80, one number per line'
const TIMEOUT_MS = Number(process.env.TIMEOUT_MS || 60000)
async function getTarget() {
const list = await (await fetch(`${CDP_HTTP}/json`)).json()
const t = list.find((t) => t.type === 'page' && /5174/.test(t.url))
if (!t) throw new Error('renderer not found')
return t
}
class CDP {
constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
static async open(url) {
const ws = new WebSocket(url)
await new Promise((r, j) => {
ws.addEventListener('open', r, { once: true })
ws.addEventListener('error', (e) => j(e), { once: true })
})
const cdp = new CDP(ws)
ws.addEventListener('message', (event) => {
const m = JSON.parse(event.data.toString())
if (m.id != null && cdp.pending.has(m.id)) {
const { resolve, reject } = cdp.pending.get(m.id)
cdp.pending.delete(m.id)
if (m.error) reject(new Error(m.error.message))
else resolve(m.result)
}
})
return cdp
}
send(method, params) {
const id = ++this.id
return new Promise((res, rej) => {
this.pending.set(id, { resolve: res, reject: rej })
this.ws.send(JSON.stringify({ id, method, params }))
})
}
async eval(expr) {
const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
return r.result.value
}
close() { this.ws.close() }
}
async function main() {
const target = await getTarget()
const cdp = await CDP.open(target.webSocketDebuggerUrl)
// Install recorders.
await cdp.eval(`
(() => {
// rAF frame intervals
window.__FT__ = { times: [], stop: false }
let last = performance.now()
const tick = () => {
if (window.__FT__.stop) return
const now = performance.now()
window.__FT__.times.push(now - last)
last = now
requestAnimationFrame(tick)
}
requestAnimationFrame(tick)
// longtask observer
window.__LT__ = { entries: [], stop: false }
try {
const po = new PerformanceObserver((list) => {
if (window.__LT__.stop) return
for (const e of list.getEntries()) {
window.__LT__.entries.push({ name: e.name, duration: e.duration, startTime: e.startTime })
}
})
po.observe({ entryTypes: ['longtask'] })
window.__LT__.po = po
} catch {}
// mutation observer on streaming message
window.__MO__ = { mutations: [], stop: false, currentMsg: null }
const tryArm = () => {
const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]')
const last = all[all.length - 1]
if (!last || last === window.__MO__.currentMsg) return
window.__MO__.currentMsg = last
if (window.__MO__.obs) window.__MO__.obs.disconnect()
const obs = new MutationObserver((muts) => {
if (window.__MO__.stop) return
const t = performance.now()
window.__MO__.mutations.push({ t, count: muts.length, len: last.textContent.length })
})
obs.observe(last, { childList: true, subtree: true, characterData: true })
window.__MO__.obs = obs
}
window.__MO__.arm = tryArm
return 'recorders armed'
})()
`)
// Baseline
const base = JSON.parse(await cdp.eval(`
JSON.stringify({
assistantCount: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length,
busy: !!document.querySelector('[data-status="running"], [data-busy="true"]'),
hasComposer: !!document.querySelector('[contenteditable="true"]'),
})
`))
console.log('baseline:', base)
if (!base.hasComposer) { console.error('no composer'); cdp.close(); return }
// Type + submit
await cdp.eval(`
(() => {
const ed = document.querySelector('[contenteditable="true"]')
ed.focus()
document.execCommand('insertText', false, ${JSON.stringify(PROMPT)})
return 'typed'
})()
`)
const submitT0 = Date.now()
await cdp.eval(`
(() => {
const ed = document.querySelector('[contenteditable="true"]')
ed.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true, cancelable: true }))
return 'submitted'
})()
`)
// Poll for REAL stream (assistant count > baseline). 30 seconds — accommodates
// slow first-token latencies on big providers.
let realStreamT = null
for (let i = 0; i < 600; i++) {
await new Promise((r) => setTimeout(r, 50))
const s = JSON.parse(await cdp.eval(`
JSON.stringify({
n: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length,
busy: !!document.querySelector('[data-status="running"], [data-busy="true"]'),
text: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })()
})
`))
if (s.n > base.assistantCount) {
realStreamT = Date.now()
console.log('REAL stream started after', realStreamT - submitT0, 'ms — busy=', s.busy, 'text=', s.text)
// Arm mutation observer on the new message
await cdp.eval('window.__MO__.arm()')
break
}
}
if (!realStreamT) {
console.error('REAL STREAM NEVER STARTED')
cdp.close()
return
}
// Sample length growth, wait for completion or timeout
const samples = []
const start = Date.now()
while (Date.now() - start < TIMEOUT_MS) {
await new Promise((r) => setTimeout(r, 250))
const s = JSON.parse(await cdp.eval(`
JSON.stringify({
t: performance.now(),
len: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })(),
busy: !!document.querySelector('[data-status="running"], [data-busy="true"]')
})
`))
samples.push(s)
if (!s.busy && samples.length > 4) {
await new Promise((r) => setTimeout(r, 300))
break
}
}
// Pull recordings
const data = JSON.parse(await cdp.eval(`
(() => {
window.__FT__.stop = true
window.__LT__.stop = true
window.__MO__.stop = true
try { window.__LT__.po && window.__LT__.po.disconnect() } catch {}
try { window.__MO__.obs && window.__MO__.obs.disconnect() } catch {}
return JSON.stringify({
frames: window.__FT__.times,
longtasks: window.__LT__.entries,
mutations: window.__MO__.mutations,
})
})()
`))
const { frames, longtasks, mutations } = data
// Frame histogram (filter to stream window)
const buckets = { '<=16.7': 0, '16.7-33': 0, '33-50': 0, '50-100': 0, '100-200': 0, '>200': 0 }
let frameTotal = 0
let maxFrame = 0
for (const f of frames) {
frameTotal += f
if (f > maxFrame) maxFrame = f
if (f <= 16.7) buckets['<=16.7']++
else if (f <= 33) buckets['16.7-33']++
else if (f <= 50) buckets['33-50']++
else if (f <= 100) buckets['50-100']++
else if (f <= 200) buckets['100-200']++
else buckets['>200']++
}
const avgFps = frames.length ? (frames.length / (frameTotal / 1000)).toFixed(1) : 'n/a'
const slowFrames = frames.filter((f) => f > 33).length
const veryslowFrames = frames.filter((f) => f > 100).length
// Longtask summary
const ltMs = longtasks.reduce((a, b) => a + b.duration, 0)
const ltMax = longtasks.length ? Math.max(...longtasks.map((e) => e.duration)) : 0
// Mutation rate
let mutTotal = mutations.length
let mutDurs = []
for (let i = 1; i < mutations.length; i++) {
mutDurs.push(mutations[i].t - mutations[i - 1].t)
}
mutDurs.sort((a, b) => a - b)
const mutP50 = mutDurs[Math.floor(mutDurs.length * 0.5)] ?? 0
const mutP95 = mutDurs[Math.floor(mutDurs.length * 0.95)] ?? 0
// Growth rate
const firstLen = samples[0]?.len ?? 0
const lastLen = samples[samples.length - 1]?.len ?? 0
const elapsedS = samples.length ? (samples[samples.length - 1].t - samples[0].t) / 1000 : 0
const charsPerSec = elapsedS ? ((lastLen - firstLen) / elapsedS).toFixed(1) : 'n/a'
console.log('\n=== STREAM RESULTS ===')
console.log('window:', (frameTotal / 1000).toFixed(1), 's | frames:', frames.length, '| avgFps:', avgFps, '| maxFrame:', maxFrame.toFixed(1), 'ms')
console.log('frame histogram:', buckets)
console.log('slow frames (>33ms):', slowFrames, '| very slow (>100ms):', veryslowFrames)
console.log('longtasks:', longtasks.length, 'total', ltMs.toFixed(0), 'ms — max', ltMax.toFixed(1), 'ms')
console.log('text grew', firstLen, '→', lastLen, 'chars (', charsPerSec, 'char/s )')
console.log('mutations on streaming msg:', mutTotal, '| inter-mutation p50:', mutP50.toFixed(1), 'ms', 'p95:', mutP95.toFixed(1), 'ms')
cdp.close()
}
main().catch((e) => { console.error(e); process.exit(1) })

View file

@ -0,0 +1,318 @@
// Measure render cost of a synthetic stream driven through the live $messages atom.
//
// Why synthetic: the user's LLM credits are depleted; we can't fire a real stream.
// The synthetic stream exercises the exact same React pipeline (assistant-ui runtime →
// repository.addOrUpdateMessage → MessagePrimitive re-render → markdown reflow) as a
// real stream. The only thing it does NOT exercise is the gateway → SSE → optimistic-
// merge path, which is orthogonal to the rendering question.
//
// What we record:
// 1) rAF frame intervals (long-frame histogram; >33ms = perceived jank, >100ms = bad)
// 2) PerformanceObserver `longtask` entries (task >50ms blocks input)
// 3) MutationObserver: per-message mutation count & inter-mutation latency
// 4) Optional: typing latency overlay — typing into composer while streaming
//
// Output is plain text suitable for terminal + a JSON sidecar for diffing across runs.
import { writeFileSync } from 'node:fs'
const CDP_HTTP = 'http://127.0.0.1:9222'
const TOKENS = Number(process.env.TOKENS || 300)
const INTERVAL_MS = Number(process.env.INTERVAL_MS || 16)
const CHUNK = process.env.CHUNK || 'lorem ipsum '
const TYPE_WHILE_STREAMING = process.env.TYPE_WHILE_STREAMING === '1'
const LABEL = process.env.LABEL || 'baseline'
const OUT = process.env.OUT || `frame-times-${LABEL}.json`
async function getTarget() {
const list = await (await fetch(`${CDP_HTTP}/json`)).json()
const t = list.find((t) => t.type === 'page' && /5174/.test(t.url))
if (!t) throw new Error('renderer not found')
return t
}
class CDP {
constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
static async open(url) {
const ws = new WebSocket(url)
await new Promise((r, j) => {
ws.addEventListener('open', r, { once: true })
ws.addEventListener('error', (e) => j(e), { once: true })
})
const cdp = new CDP(ws)
ws.addEventListener('message', (ev) => {
const m = JSON.parse(ev.data.toString())
if (m.id != null && cdp.pending.has(m.id)) {
const { resolve, reject } = cdp.pending.get(m.id)
cdp.pending.delete(m.id)
if (m.error) reject(new Error(m.error.message))
else resolve(m.result)
}
})
return cdp
}
send(method, params) {
const id = ++this.id
return new Promise((res, rej) => {
this.pending.set(id, { resolve: res, reject: rej })
this.ws.send(JSON.stringify({ id, method, params }))
})
}
async eval(expr) {
const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
return r.result.value
}
close() { this.ws.close() }
}
function pct(arr, p) {
if (!arr.length) return 0
const i = Math.min(arr.length - 1, Math.floor(arr.length * p))
return arr[i]
}
async function main() {
const target = await getTarget()
const cdp = await CDP.open(target.webSocketDebuggerUrl)
// Sanity check driver is loaded.
const probeOk = await cdp.eval('!!window.__PERF_DRIVE__ && !!window.__PERF_DRIVE__.stream')
if (!probeOk) {
console.error('__PERF_DRIVE__ not on window — did you reload the renderer after editing perf-probe.tsx?')
cdp.close()
process.exit(2)
}
// Install recorders.
await cdp.eval(`
(() => {
window.__FT__ = { times: [], stop: false }
let last = performance.now()
const tick = () => {
if (window.__FT__.stop) return
const now = performance.now()
window.__FT__.times.push(now - last)
last = now
requestAnimationFrame(tick)
}
requestAnimationFrame(tick)
window.__LT__ = { entries: [], stop: false }
try {
const po = new PerformanceObserver((list) => {
if (window.__LT__.stop) return
for (const e of list.getEntries()) {
window.__LT__.entries.push({ name: e.name, duration: e.duration, startTime: e.startTime })
}
})
po.observe({ entryTypes: ['longtask'] })
window.__LT__.po = po
} catch {}
window.__MO__ = { mutations: [], stop: false, currentMsg: null }
const arm = () => {
const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]')
const last = all[all.length - 1]
if (!last || last === window.__MO__.currentMsg) return
window.__MO__.currentMsg = last
if (window.__MO__.obs) window.__MO__.obs.disconnect()
const obs = new MutationObserver((muts) => {
if (window.__MO__.stop) return
const t = performance.now()
window.__MO__.mutations.push({ t, count: muts.length, len: last.textContent.length })
})
obs.observe(last, { childList: true, subtree: true, characterData: true })
window.__MO__.obs = obs
}
window.__MO__.arm = arm
// Optional: typing observer — fires keystroke timings if asked.
window.__TYP__ = { times: [], stop: false, lastKey: 0 }
return 'recorders armed'
})()
`)
// Baseline state.
const base = JSON.parse(await cdp.eval(`
JSON.stringify({
assistantCount: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length,
atomCount: window.__PERF_DRIVE__.snapshotMsgs()
})
`))
console.log('baseline:', base)
// Drive a synthetic stream.
const streamStart = Date.now()
await cdp.eval(`window.__PERF_DRIVE__.stream({ chunk: ${JSON.stringify(CHUNK)}, intervalMs: ${INTERVAL_MS}, totalTokens: ${TOKENS} })`)
// After the first paint, arm MO on the new message.
await new Promise((r) => setTimeout(r, 200))
await cdp.eval('window.__MO__.arm()')
// Optional: type while streaming.
if (TYPE_WHILE_STREAMING) {
await new Promise((r) => setTimeout(r, 400))
await cdp.eval(`(() => {
const ed = document.querySelector('[contenteditable="true"]')
ed.focus()
window.__TYP__.startedAt = performance.now()
const text = 'the quick brown fox jumps over the lazy dog '
let i = 0
const tick = () => {
if (i >= text.length) return
const t0 = performance.now()
document.execCommand('insertText', false, text[i])
// requestAnimationFrame to wait for next paint
requestAnimationFrame(() => {
window.__TYP__.times.push(performance.now() - t0)
})
i++
setTimeout(tick, 60)
}
tick()
return 'typing'
})()`)
}
// Wait for stream to complete + small grace.
const expectedMs = TOKENS * INTERVAL_MS + 1500
await new Promise((r) => setTimeout(r, expectedMs))
// Pull recordings.
const data = JSON.parse(await cdp.eval(`
(() => {
window.__FT__.stop = true
window.__LT__.stop = true
window.__MO__.stop = true
window.__TYP__.stop = true
try { window.__LT__.po && window.__LT__.po.disconnect() } catch {}
try { window.__MO__.obs && window.__MO__.obs.disconnect() } catch {}
return JSON.stringify({
frames: window.__FT__.times,
longtasks: window.__LT__.entries,
mutations: window.__MO__.mutations,
typing: window.__TYP__.times,
finalText: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })()
})
})()
`))
// Reset DOM back to baseline so we don't accumulate fake messages.
await cdp.eval('window.__PERF_DRIVE__.reset()')
// Analysis (trim warm-up: drop frames before first mutation timestamp).
const firstMut = data.mutations[0]?.t
const frames = data.frames
// Sum durations to figure out when each frame happened (relative to recorder start).
const frameTimeline = []
let acc = 0
for (const f of frames) { acc += f; frameTimeline.push(acc) }
// Mutations are in performance.now() ms; frames started recording when we installed
// the recorder (before stream). To align: compute total stream window from frames
// after mutation activity began. Simpler heuristic: drop first 500ms of frames as warm-up.
const WARMUP_MS = 500
let dropIdx = 0
for (let i = 0; i < frames.length; i++) {
if (frameTimeline[i] >= WARMUP_MS) { dropIdx = i; break }
}
const streamFrames = frames.slice(dropIdx)
const buckets = { '<=16.7': 0, '16.7-33': 0, '33-50': 0, '50-100': 0, '100-200': 0, '>200': 0 }
let frameTotal = 0
let maxFrame = 0
for (const f of streamFrames) {
frameTotal += f
if (f > maxFrame) maxFrame = f
if (f <= 16.7) buckets['<=16.7']++
else if (f <= 33) buckets['16.7-33']++
else if (f <= 50) buckets['33-50']++
else if (f <= 100) buckets['50-100']++
else if (f <= 200) buckets['100-200']++
else buckets['>200']++
}
const sortedFrames = [...streamFrames].sort((a, b) => a - b)
const fAvgFps = streamFrames.length ? (streamFrames.length / (frameTotal / 1000)).toFixed(1) : 'n/a'
const fP50 = pct(sortedFrames, 0.5).toFixed(1)
const fP95 = pct(sortedFrames, 0.95).toFixed(1)
const fP99 = pct(sortedFrames, 0.99).toFixed(1)
const slowFrames = streamFrames.filter((f) => f > 33).length
const veryslowFrames = streamFrames.filter((f) => f > 100).length
const ltDur = data.longtasks.map((e) => e.duration).sort((a, b) => a - b)
const ltMs = ltDur.reduce((a, b) => a + b, 0)
const ltMax = ltDur.length ? ltDur[ltDur.length - 1] : 0
const ltP95 = pct(ltDur, 0.95)
// Mutation cadence.
const mutDurs = []
for (let i = 1; i < data.mutations.length; i++) mutDurs.push(data.mutations[i].t - data.mutations[i - 1].t)
mutDurs.sort((a, b) => a - b)
const mutP50 = pct(mutDurs, 0.5)
const mutP95 = pct(mutDurs, 0.95)
const mutMax = mutDurs.length ? mutDurs[mutDurs.length - 1] : 0
// Typing latency (optional).
let typingSummary = null
if (TYPE_WHILE_STREAMING && data.typing.length) {
const t = [...data.typing].sort((a, b) => a - b)
typingSummary = {
n: t.length,
p50: pct(t, 0.5).toFixed(1),
p95: pct(t, 0.95).toFixed(1),
max: t[t.length - 1].toFixed(1)
}
}
const result = {
label: LABEL,
timestamp: new Date().toISOString(),
config: { TOKENS, INTERVAL_MS, CHUNK, TYPE_WHILE_STREAMING },
streamWallMs: Date.now() - streamStart,
frames: {
total: streamFrames.length,
avgFps: fAvgFps,
windowS: (frameTotal / 1000).toFixed(1),
p50: fP50,
p95: fP95,
p99: fP99,
max: maxFrame.toFixed(1),
slow33: slowFrames,
veryslow100: veryslowFrames,
histogram: buckets
},
longtasks: {
n: data.longtasks.length,
totalMs: ltMs.toFixed(0),
maxMs: ltMax.toFixed(1),
p95Ms: ltP95.toFixed(1)
},
mutations: {
n: data.mutations.length,
finalTextLen: data.finalText,
interMutP50ms: mutP50.toFixed(1),
interMutP95ms: mutP95.toFixed(1),
interMutMaxMs: mutMax.toFixed(1)
},
typing: typingSummary
}
writeFileSync(OUT, JSON.stringify(result, null, 2))
console.log('\n=== SYNTHETIC STREAM RESULTS ===')
console.log('label:', LABEL, '| tokens:', TOKENS, '@', INTERVAL_MS, 'ms')
console.log('streamWallMs:', result.streamWallMs)
console.log('FRAMES: avgFps', fAvgFps, '| p50', fP50, 'ms | p95', fP95, 'ms | p99', fP99, 'ms | max', maxFrame.toFixed(1), 'ms')
console.log('FRAMES histogram:', buckets)
console.log('FRAMES slow(>33):', slowFrames, '/ veryslow(>100):', veryslowFrames, 'of', streamFrames.length)
console.log('LONGTASKS:', data.longtasks.length, '| total', ltMs.toFixed(0), 'ms | max', ltMax.toFixed(1), 'ms | p95', ltP95.toFixed(1), 'ms')
console.log('MUTATIONS:', data.mutations.length, '| finalLen', data.finalText, 'chars | inter p50', mutP50.toFixed(1), 'ms | p95', mutP95.toFixed(1), 'ms')
if (typingSummary) console.log('TYPING-WHILE-STREAMING latency: p50', typingSummary.p50, 'ms | p95', typingSummary.p95, 'ms | n=', typingSummary.n)
console.log('written to', OUT)
cdp.close()
}
main().catch((e) => { console.error(e); process.exit(1) })

View file

@ -0,0 +1,137 @@
// CPU-profile during a real LLM stream — confirms or refutes whether the
// synthetic stream's hotspots (Streamdown markdown re-parse, FadeText)
// match real-world content.
//
// Run *after* model is set to something fast + cheap (gpt-4o-mini etc.).
// Sends a prompt likely to produce markdown + a numbered list.
import { writeFileSync } from 'node:fs'
const CDP_HTTP = 'http://127.0.0.1:9222'
const PROMPT = process.env.PROMPT || 'Give me a numbered list of 8 useful bash one-liners. For each: a brief description, then the command in a code block. No preamble.'
const OUT = process.env.OUT || `/tmp/real-stream-${Date.now()}.cpuprofile`
const START_TIMEOUT = Number(process.env.START_TIMEOUT || 45000)
const STREAM_TIMEOUT = Number(process.env.STREAM_TIMEOUT || 60000)
class CDP {
constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
static async open(url) {
const ws = new WebSocket(url)
await new Promise((r) => ws.addEventListener('open', r, { once: true }))
const cdp = new CDP(ws)
ws.addEventListener('message', (ev) => {
const m = JSON.parse(ev.data.toString())
if (m.id != null && cdp.pending.has(m.id)) {
const { resolve, reject } = cdp.pending.get(m.id)
cdp.pending.delete(m.id)
if (m.error) reject(new Error(m.error.message))
else resolve(m.result)
}
})
return cdp
}
send(method, params) {
const id = ++this.id
return new Promise((res, rej) => {
this.pending.set(id, { resolve: res, reject: rej })
this.ws.send(JSON.stringify({ id, method, params }))
})
}
async eval(expr) {
const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
return r.result.value
}
close() { this.ws.close() }
}
async function main() {
const list = await (await fetch(`${CDP_HTTP}/json`)).json()
const target = list.find((t) => t.type === 'page' && /5174/.test(t.url))
const cdp = await CDP.open(target.webSocketDebuggerUrl)
const baseCount = await cdp.eval('document.querySelectorAll("[data-slot=aui_assistant-message-root]").length')
// Submit prompt
await cdp.eval(`(() => {
const ed = document.querySelector('[contenteditable="true"]')
ed.focus()
document.execCommand('insertText', false, ${JSON.stringify(PROMPT)})
ed.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', which: 13, keyCode: 13, bubbles: true, cancelable: true }))
return 'submitted'
})()`)
// Wait for real stream start (assistant count grows).
const submitT0 = Date.now()
let streamT = null
for (let i = 0; i < START_TIMEOUT / 50; i++) {
await new Promise((r) => setTimeout(r, 50))
const n = await cdp.eval('document.querySelectorAll("[data-slot=aui_assistant-message-root]").length')
if (n > baseCount) { streamT = Date.now(); break }
}
if (!streamT) {
console.error('stream never started within', START_TIMEOUT, 'ms')
cdp.close()
process.exit(2)
}
console.log('REAL stream started after', streamT - submitT0, 'ms — starting CPU profile NOW')
// Start CPU profile NOW, only during stream phase.
await cdp.send('Profiler.enable')
await cdp.send('Profiler.setSamplingInterval', { interval: 100 })
await cdp.send('Profiler.start')
// Wait until busy goes false + grace, or timeout.
const cutoff = Date.now() + STREAM_TIMEOUT
while (Date.now() < cutoff) {
await new Promise((r) => setTimeout(r, 500))
const busy = await cdp.eval('!!document.querySelector("[data-status=running], [data-busy=true]")')
if (!busy) {
await new Promise((r) => setTimeout(r, 500))
break
}
}
const { profile } = await cdp.send('Profiler.stop')
writeFileSync(OUT, JSON.stringify(profile))
console.log('wrote', OUT)
const samples = profile.samples || []
const timeDeltas = profile.timeDeltas || []
const nodes = new Map(profile.nodes.map((n) => [n.id, n]))
const selfTime = new Map()
for (let i = 0; i < samples.length; i++) {
const id = samples[i]
const dt = timeDeltas[i] ?? 0
selfTime.set(id, (selfTime.get(id) || 0) + dt)
}
const ranked = [...selfTime.entries()]
.map(([id, us]) => {
const n = nodes.get(id)
const cf = n?.callFrame || {}
return {
ms: us / 1000,
name: cf.functionName || '(anonymous)',
url: (cf.url || '').slice(-60),
line: cf.lineNumber
}
})
.filter((x) => !/\(root\)|\(idle\)|\(garbage collector\)|\(program\)/.test(x.name))
.sort((a, b) => b.ms - a.ms)
.slice(0, 25)
const finalText = await cdp.eval(`(() => {
const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]')
return all.length ? all[all.length-1].textContent.length : 0
})()`)
console.log('\nfinal assistant message length:', finalText, 'chars')
console.log('\n=== TOP 25 SELF TIME (ms) DURING REAL STREAM ===')
for (const r of ranked) {
console.log(`${r.ms.toFixed(1).padStart(7)} ${r.name.padEnd(40)} ${r.url}:${r.line}`)
}
cdp.close()
}
main().catch((e) => { console.error(e); process.exit(1) })

View file

@ -0,0 +1,103 @@
// CPU-profile a synthetic stream — outputs a .cpuprofile and a top-self ranking.
// Open the .cpuprofile in Chrome DevTools Performance panel for a flamegraph.
import { writeFileSync } from 'node:fs'
const CDP_HTTP = 'http://127.0.0.1:9222'
const TOKENS = Number(process.env.TOKENS || 400)
const INTERVAL_MS = Number(process.env.INTERVAL_MS || 8)
const CHUNK = process.env.CHUNK || '**word** in _italic_ with `code` '
const LABEL = process.env.LABEL || 'profile'
const OUT = process.env.OUT || `synth-${LABEL}.cpuprofile`
class CDP {
constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
static async open(url) {
const ws = new WebSocket(url)
await new Promise((r) => ws.addEventListener('open', r, { once: true }))
const cdp = new CDP(ws)
ws.addEventListener('message', (ev) => {
const m = JSON.parse(ev.data.toString())
if (m.id != null && cdp.pending.has(m.id)) {
const { resolve, reject } = cdp.pending.get(m.id)
cdp.pending.delete(m.id)
if (m.error) reject(new Error(m.error.message))
else resolve(m.result)
}
})
return cdp
}
send(method, params) {
const id = ++this.id
return new Promise((res, rej) => {
this.pending.set(id, { resolve: res, reject: rej })
this.ws.send(JSON.stringify({ id, method, params }))
})
}
async eval(expr) {
const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
return r.result.value
}
close() { this.ws.close() }
}
async function main() {
const list = await (await fetch(`${CDP_HTTP}/json`)).json()
const target = list.find((t) => t.type === 'page' && /5174/.test(t.url))
const cdp = await CDP.open(target.webSocketDebuggerUrl)
if (!await cdp.eval('!!window.__PERF_DRIVE__')) {
console.error('no __PERF_DRIVE__')
cdp.close()
process.exit(2)
}
await cdp.send('Profiler.enable')
// High-resolution sampling: 100us
await cdp.send('Profiler.setSamplingInterval', { interval: 100 })
await cdp.send('Profiler.start')
await cdp.eval(`window.__PERF_DRIVE__.stream({ chunk: ${JSON.stringify(CHUNK)}, intervalMs: ${INTERVAL_MS}, totalTokens: ${TOKENS} })`)
await new Promise((r) => setTimeout(r, TOKENS * INTERVAL_MS + 1500))
await cdp.eval('window.__PERF_DRIVE__.reset()')
const { profile } = await cdp.send('Profiler.stop')
writeFileSync(OUT, JSON.stringify(profile))
console.log('wrote', OUT)
// Compute top self time per function.
const samples = profile.samples || []
const timeDeltas = profile.timeDeltas || []
const nodes = new Map(profile.nodes.map((n) => [n.id, n]))
const selfTime = new Map() // id -> microseconds
for (let i = 0; i < samples.length; i++) {
const id = samples[i]
const dt = timeDeltas[i] ?? 0
selfTime.set(id, (selfTime.get(id) || 0) + dt)
}
const ranked = [...selfTime.entries()]
.map(([id, us]) => {
const n = nodes.get(id)
const cf = n?.callFrame || {}
return {
us,
ms: us / 1000,
name: cf.functionName || '(anonymous)',
url: (cf.url || '').slice(-60),
line: cf.lineNumber
}
})
.filter((x) => !/\(root\)|\(idle\)|\(garbage collector\)|\(program\)/.test(x.name))
.sort((a, b) => b.us - a.us)
.slice(0, 30)
console.log('\n=== TOP 30 SELF TIME (ms) ===')
for (const r of ranked) {
console.log(`${r.ms.toFixed(1).padStart(7)} ${r.name.padEnd(40)} ${r.url}:${r.line}`)
}
cdp.close()
}
main().catch((e) => { console.error(e); process.exit(1) })

View file

@ -153,3 +153,133 @@ streaming. `scripts/measure-submit.mjs` measures
`enter → composer-cleared → user-message-rendered → first-paint`. The
script triggers a real prompt submission, so use it on a throwaway
session. Not enabled in CI.
## Streaming "5fps" investigation (May 21, 2026)
User complaint: "the streaming must bring fps to like 5? lol" — felt
hitches during assistant streaming on long threads.
### Tooling added
- **`src/app/chat/perf-probe.tsx`** — dev-only side-effect import (guarded by
`import.meta.env.MODE !== 'production'` in `main.tsx`). Attaches two
helpers to `window`:
- `__PERF_PROBE__` — React `<Profiler>` recorder. Currently inert because
Vite is serving the production React build (see "Vite dev-build issue"
below); kept for when that's fixed.
- `__PERF_DRIVE__` — synthetic stream driver. Pushes tokens through the
live `$messages` atom at a fixed cadence, so the assistant-ui runtime,
incremental repository, Streamdown markdown renderer, and React commit
pipeline all see the same workload they'd see from a real LLM stream —
but with no LLM call (and no credit cost).
- **`scripts/measure-synthetic-stream.mjs`** — drives `__PERF_DRIVE__`,
records rAF frame intervals, `PerformanceObserver({entryTypes:['longtask']})`
entries, `MutationObserver` cadence on the live message, and optional
type-while-streaming keystroke latency.
- **`scripts/profile-synth-stream.mjs`** — CPU profile during a synthetic
stream; writes a `.cpuprofile` (open in Chrome DevTools Performance panel)
and a top-30 self-time table.
- **`scripts/measure-real-stream.mjs`** — same harness as the synthetic but
fires a real LLM prompt. Use when you have credits and want to confirm
the synthetic predictions hold.
- **`scripts/profile-real-stream.mjs`** — CPU profile over the duration of
a real LLM stream.
Helpers: `scripts/eval.mjs` (one-shot CDP eval), `scripts/reload.mjs`
(hard reload renderer over CDP).
### Findings
Measured on the Cloud Shadows session (7 turns, ~11k px scrollHeight) and
the 34 MB session `session_20260514_215353_fe0ac8.json` (110 FadeText
instances, lots of historical tool calls).
| metric | Cloud Shadows | 34 MB session |
|---|---|---|
| avgFps (60 tok/sec, 5s) | 60.0 | 58.6 |
| frame p50 / p95 / p99 (ms) | 16.7 / 18.0 / 21.1 | 16.6 / 25.6 / 31.4 |
| max frame (ms) | 31.1 | 97-127 (varies) |
| longtasks per 5s window | 0 | 1-2, 75-127 ms |
| type-while-stream p95 latency (ms) | 17 | — |
A single real-LLM stream on Cloud Shadows (gpt-4o-mini, 39s window) saw
12 longtasks totalling 1.26 s — same cadence the synthetic predicted
(~1 hitch per 3.25 s, max 123 ms). So the **synthetic stream is a faithful
proxy for the real one** and is fine for iterating on fixes without paying
for tokens.
### CPU profile during streaming (synthetic, markdown content)
Top self-time costs (5 s window, 400 tokens at 125 tok/s, markdown chunks):
| ms (self) | function | source |
|---|---|---|
| 260 | `bn$1` | `chunk-BO2N…js:20003` (micromark tokenize) |
| 249 | `m$1` | `chunk-BO2N…js:19949` (micromark) |
| 128 | `compile` | `chunk-BO2N…js:21884` (mdast → hast compile) |
| 73 | FadeText body | `components/ui/fade-text.tsx` |
| 62 | `parser` | `chunk-BO2N…js:22680` |
| 49 | `fromThreadMessageLike` | `@assistant-ui/internal` |
That `chunk-BO2N2NFS` is the vendored bundle containing `micromark`,
`mdast-util-from-markdown`, `mdast-util-to-hast`, `rehype-raw`,
`hast-util-sanitize`, etc. — i.e. **Streamdown's markdown pipeline,
re-parsing the entire growing assistant message on every token append**.
Cost scales linearly with message length.
Compare plain-text (no markdown) — the `chunk-BO2N…` entries drop out
of the top 30 entirely; total work per 5 s window halves.
### Fix landed: `FadeText` memo
`FadeText` is used in `tool-fallback.tsx` (110 instances on a tool-heavy
thread). Before: each parent re-render during streaming triggered a
`useEffect([children])` that forced a `scrollWidth` layout read — even
when the title text was unchanged. The `useResizeObserver` already covers
the genuine resize case, so the effect was strictly redundant.
After: wrapped in `React.memo` with a custom comparator that compares
`children` (scalar fast-path), `className`, `fadeWidth`, and `style`
field-by-field. Verified via temporary render counter:
**122 renders during a 2 s synthetic stream vs ~11 000 without memo**
(110 instances × ~100 stream updates). Doesn't move the longtask needle
on its own — Streamdown dwarfs it — but eliminates a class of forced
layouts and removes a steady CPU floor.
### Not fixed: Streamdown markdown re-parse
This is the dominant cost and the cause of the user's perceived hitches.
The renderer re-parses the entire message buffer on every stream update.
At ~3-5 k chars, each parse costs ~30 ms; when several pile into one
frame the result is a 75-125 ms longtask = the "5 fps moment".
Possible approaches (none implemented here):
1. **Coalesce/throttle Streamdown updates** — render at most every 32 ms
instead of every set-state. Reduces parses but doesn't reduce
per-parse cost; trades latency for smoothness.
2. **Memoize per-prefix** — diff the new text against the prior parsed
version; only re-parse the changed suffix.
3. **Render in stable segments** — close-form historical paragraphs as
immutable React nodes; only the live tail goes through markdown each
token. Probably the highest-impact change but requires forking or
patching `@assistant-ui/react-streamdown`.
4. **Move parsing to a Web Worker** — main thread no longer blocks on
markdown. Largest surgery; requires double-buffered hast.
### Vite dev-build issue (separate)
`http://127.0.0.1:5174/node_modules/.vite/deps/react.js` resolves to
`react/cjs/react.production.js`, and `react-dom_client.js`
`react-dom-client.production.js`. As a result:
- `<React.Profiler>` `onRender` is never called (production build is a
no-op).
- `import.meta.env.DEV` is `false`, `PROD` is `true` even under `vite dev`
(hence `MODE !== 'production'` as the workaround in `main.tsx`).
- All the React 19 dev-only warnings/devtools backend hooks are absent.
Root cause likely sits in `vite.config.ts` aliasing + dedupe + Vite 8's
new `optimizeDeps` defaults. Worth a separate fix pass — when it's
resolved, the `<PerfProbe>` blocks in `perf-probe.tsx` become useful
(per-id commit timings) instead of inert.

View file

@ -0,0 +1,36 @@
// Hard reload the Electron renderer over CDP. Vite-no-HMR mode means edits
// don't auto-apply — call this after editing source.
const targets = await (await fetch('http://127.0.0.1:9222/json')).json()
const t = targets.find((t) => t.url.includes('5174'))
if (!t) {
console.error('renderer not found')
process.exit(1)
}
const ws = new WebSocket(t.webSocketDebuggerUrl)
let id = 0
const pending = new Map()
ws.addEventListener('message', (ev) => {
const m = JSON.parse(ev.data)
if (pending.has(m.id)) {
pending.get(m.id)(m)
pending.delete(m.id)
}
})
await new Promise((r) => ws.addEventListener('open', r))
const send = (method, params = {}) =>
new Promise((res) => {
const i = ++id
pending.set(i, res)
ws.send(JSON.stringify({ id: i, method, params }))
})
await send('Page.reload', { ignoreCache: true })
console.log('reload sent')
// Wait for new doc.
await new Promise((r) => setTimeout(r, 2500))
const r = await send('Runtime.evaluate', {
expression: 'JSON.stringify({ hasProbe: !!window.__PERF_PROBE__, composer: !!document.querySelector("[contenteditable=true]"), url: location.hash })',
returnByValue: true,
})
console.log(r.result.result.value)
ws.close()

View file

@ -0,0 +1,167 @@
import { Profiler, type ProfilerOnRenderCallback, type ReactNode } from 'react'
import { $messages, setMessages, setBusy } from '@/store/session'
type Sample = {
id: string
phase: string
actualDuration: number
baseDuration: number
startTime: number
commitTime: number
}
type SyntheticDriverHandle = { stop: () => void }
declare global {
interface Window {
__PERF_PROBE__?: {
samples: Sample[]
enabled: boolean
clear: () => void
summary: () => Record<string, { count: number; total: number; max: number; p50: number; p95: number }>
}
__PERF_DRIVE__?: {
/** Inject an assistant message and grow it by `chunk` every `intervalMs`. Returns a stop handle. */
stream: (opts?: { chunk?: string; intervalMs?: number; totalTokens?: number }) => SyntheticDriverHandle
reset: () => void
snapshotMsgs: () => number
}
}
}
if (typeof window !== 'undefined' && !window.__PERF_PROBE__) {
const samples: Sample[] = []
window.__PERF_PROBE__ = {
samples,
enabled: false,
clear: () => {
samples.length = 0
},
summary: () => {
const byId = new Map<string, number[]>()
for (const s of samples) {
const k = `${s.id}:${s.phase}`
const arr = byId.get(k) ?? []
arr.push(s.actualDuration)
byId.set(k, arr)
}
const out: Record<string, { count: number; total: number; max: number; p50: number; p95: number }> = {}
for (const [k, arr] of byId) {
arr.sort((a, b) => a - b)
const total = arr.reduce((a, b) => a + b, 0)
out[k] = {
count: arr.length,
total: Math.round(total * 100) / 100,
max: Math.round(arr[arr.length - 1] * 100) / 100,
p50: Math.round(arr[Math.floor(arr.length * 0.5)] * 100) / 100,
p95: Math.round(arr[Math.floor(arr.length * 0.95)] * 100) / 100,
}
}
return out
},
}
}
const onRender: ProfilerOnRenderCallback = (id, phase, actualDuration, baseDuration, startTime, commitTime) => {
const probe = typeof window !== 'undefined' ? window.__PERF_PROBE__ : undefined
if (!probe || !probe.enabled) return
probe.samples.push({ id, phase, actualDuration, baseDuration, startTime, commitTime })
if (probe.samples.length > 5000) probe.samples.splice(0, probe.samples.length - 5000)
}
if (typeof window !== 'undefined' && !window.__PERF_DRIVE__) {
// Synthetic stream driver — pushes tokens through the live $messages atom so the
// assistant-ui runtime + react tree sees them exactly as a real LLM stream would.
// Used by scripts/measure-real-stream.mjs when no live LLM credit is available.
let baseline: ReturnType<typeof $messages.get> | null = null
let activeHandle: SyntheticDriverHandle | null = null
const stop = () => {
activeHandle = null
setBusy(false)
}
window.__PERF_DRIVE__ = {
snapshotMsgs: () => $messages.get().length,
reset: () => {
activeHandle?.stop()
if (baseline) setMessages(baseline)
baseline = null
setBusy(false)
},
stream: ({ chunk = 'word ', intervalMs = 16, totalTokens = 400 } = {}) => {
activeHandle?.stop()
const current = $messages.get()
if (!baseline) baseline = current
const msgId = `synthetic-${Date.now()}`
// Seed an empty assistant message — assistant-ui will see it grow.
setMessages([
...current,
{
id: msgId,
role: 'assistant',
parts: [{ type: 'text', text: '' }],
timestamp: Date.now(),
pending: true
}
])
setBusy(true)
let pushed = 0
let timer: ReturnType<typeof setTimeout> | null = null
const handle: SyntheticDriverHandle = {
stop: () => {
if (timer) clearTimeout(timer)
timer = null
activeHandle = null
// Mark message finalized.
setMessages(prev =>
prev.map(m =>
m.id === msgId
? { ...m, pending: false }
: m
)
)
setBusy(false)
}
}
activeHandle = handle
const tick = () => {
if (activeHandle !== handle) return
if (pushed >= totalTokens) {
handle.stop()
return
}
pushed += 1
setMessages(prev =>
prev.map(m => {
if (m.id !== msgId) return m
const head = m.parts.slice(0, -1)
const last = m.parts.at(-1)
const lastText = last && last.type === 'text' ? last.text : ''
return {
...m,
parts: [...head, { type: 'text', text: lastText + chunk }]
}
})
)
timer = setTimeout(tick, intervalMs)
}
timer = setTimeout(tick, intervalMs)
return handle
}
}
// Suppress dead-import warning.
void stop
}
export function PerfProbe({ id, children }: { id: string; children: ReactNode }) {
return (
<Profiler id={id} onRender={onRender}>
{children}
</Profiler>
)
}

View file

@ -12,6 +12,15 @@ import { ThemeProvider } from './themes/context'
installClipboardShim()
// Dev-only: install __PERF_DRIVE__ + __PERF_PROBE__ on window so the
// scripts/ harnesses can drive a synthetic stream + record render cost.
// Tree-shaken out of production builds. (Uses MODE rather than DEV because
// our Vite setup currently bundles with PROD=true even in `vite dev`; see
// scripts/dev-no-hmr.mjs for the surrounding workarounds.)
if (import.meta.env.MODE !== 'production') {
import('./app/chat/perf-probe')
}
const queryClient = new QueryClient({
defaultOptions: {
queries: {