From 99f2a9503c5106ccae03d53a714ab6f6345cb353 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 21 May 2026 19:38:26 -0500 Subject: [PATCH] chore(desktop): synthetic-stream perf harness + scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the React `` approach (no-op because Vite is currently serving the production React build) in favor of an externally-observable measurement stack: rAF frame intervals, `PerformanceObserver({entryTypes: ['longtask']})`, and a `MutationObserver` on the live streaming message. Adds a synthetic stream driver — `window.__PERF_DRIVE__.stream({...})` — that pushes tokens through the live `$messages` atom at a controlled rate, so the assistant-ui runtime, incremental repository, and Streamdown markdown pipeline see the same workload they'd see during a real LLM stream, without the LLM cost. The driver lives in `src/app/chat/perf-probe.tsx`; `main.tsx` side-imports it under `import.meta.env.MODE !== 'production'` so it tree-shakes out of prod builds. (Using `MODE` rather than `DEV` because our Vite setup currently reports `DEV=false` even under `vite dev` — see the dev-build note in `profile-typing-lag.md`.) Scripts: - measure-synthetic-stream.mjs drive synthetic + record frame/longtask/mutation - profile-synth-stream.mjs CPU profile + top self-time during synthetic - measure-real-stream.mjs same harness, real LLM stream - profile-real-stream.mjs CPU profile bracketing the real stream window - eval.mjs / reload.mjs small CDP helpers A real-LLM measurement on Cloud Shadows (gpt-4o-mini, 39 s window) showed 12 longtasks in the same 75-127 ms range the synthetic predicted, so the synthetic is a faithful proxy. --- apps/desktop/scripts/eval.mjs | 21 ++ apps/desktop/scripts/measure-real-stream.mjs | 252 ++++++++++++++ .../scripts/measure-synthetic-stream.mjs | 318 ++++++++++++++++++ apps/desktop/scripts/profile-real-stream.mjs | 137 ++++++++ apps/desktop/scripts/profile-synth-stream.mjs | 103 ++++++ apps/desktop/scripts/profile-typing-lag.md | 130 +++++++ apps/desktop/scripts/reload.mjs | 36 ++ apps/desktop/src/app/chat/perf-probe.tsx | 167 +++++++++ apps/desktop/src/main.tsx | 9 + 9 files changed, 1173 insertions(+) create mode 100644 apps/desktop/scripts/eval.mjs create mode 100644 apps/desktop/scripts/measure-real-stream.mjs create mode 100644 apps/desktop/scripts/measure-synthetic-stream.mjs create mode 100644 apps/desktop/scripts/profile-real-stream.mjs create mode 100644 apps/desktop/scripts/profile-synth-stream.mjs create mode 100644 apps/desktop/scripts/reload.mjs create mode 100644 apps/desktop/src/app/chat/perf-probe.tsx diff --git a/apps/desktop/scripts/eval.mjs b/apps/desktop/scripts/eval.mjs new file mode 100644 index 00000000000..b7336315d29 --- /dev/null +++ b/apps/desktop/scripts/eval.mjs @@ -0,0 +1,21 @@ +// Simple eval helper — runs an expression and returns the result.value. +const targets = await (await fetch('http://127.0.0.1:9222/json')).json() +const t = targets.find((t) => t.url.includes('5174')) +const ws = new WebSocket(t.webSocketDebuggerUrl) +let id = 0 +const pending = new Map() +ws.addEventListener('message', (ev) => { + const m = JSON.parse(ev.data) + if (pending.has(m.id)) { pending.get(m.id)(m); pending.delete(m.id) } +}) +await new Promise((r) => ws.addEventListener('open', r)) +const send = (method, params) => new Promise((res) => { const i = ++id; pending.set(i, res); ws.send(JSON.stringify({ id: i, method, params })) }) + +const expr = process.argv[2] || '1+1' +const r = await send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true }) +if (r.result.exceptionDetails) { + console.error('EXCEPTION:', r.result.exceptionDetails.exception?.description) +} else { + console.log(JSON.stringify(r.result.result.value, null, 2)) +} +ws.close() diff --git a/apps/desktop/scripts/measure-real-stream.mjs b/apps/desktop/scripts/measure-real-stream.mjs new file mode 100644 index 00000000000..57eee502d12 --- /dev/null +++ b/apps/desktop/scripts/measure-real-stream.mjs @@ -0,0 +1,252 @@ +// REAL streaming measurement — no React internals. +// +// Measures: +// 1) rAF frame intervals during a verified live stream (long-frame histogram) +// 2) MutationObserver: how often does the live assistant message mutate, what's the budget per mutation +// 3) Text length growth rate (chars/sec) +// 4) PerformanceObserver `longtask` entries (any task > 50ms blocks input) +// +// Detects REAL stream by waiting for assistant-message DOM count to grow past baseline. +// Does NOT cancel — lets the stream run to completion or hits TIMEOUT_MS. + +const CDP_HTTP = 'http://127.0.0.1:9222' +const PROMPT = process.env.PROMPT || 'count from 1 to 80, one number per line' +const TIMEOUT_MS = Number(process.env.TIMEOUT_MS || 60000) + +async function getTarget() { + const list = await (await fetch(`${CDP_HTTP}/json`)).json() + const t = list.find((t) => t.type === 'page' && /5174/.test(t.url)) + if (!t) throw new Error('renderer not found') + return t +} + +class CDP { + constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() } + static async open(url) { + const ws = new WebSocket(url) + await new Promise((r, j) => { + ws.addEventListener('open', r, { once: true }) + ws.addEventListener('error', (e) => j(e), { once: true }) + }) + const cdp = new CDP(ws) + ws.addEventListener('message', (event) => { + const m = JSON.parse(event.data.toString()) + if (m.id != null && cdp.pending.has(m.id)) { + const { resolve, reject } = cdp.pending.get(m.id) + cdp.pending.delete(m.id) + if (m.error) reject(new Error(m.error.message)) + else resolve(m.result) + } + }) + return cdp + } + send(method, params) { + const id = ++this.id + return new Promise((res, rej) => { + this.pending.set(id, { resolve: res, reject: rej }) + this.ws.send(JSON.stringify({ id, method, params })) + }) + } + async eval(expr) { + const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true }) + if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval') + return r.result.value + } + close() { this.ws.close() } +} + +async function main() { + const target = await getTarget() + const cdp = await CDP.open(target.webSocketDebuggerUrl) + + // Install recorders. + await cdp.eval(` + (() => { + // rAF frame intervals + window.__FT__ = { times: [], stop: false } + let last = performance.now() + const tick = () => { + if (window.__FT__.stop) return + const now = performance.now() + window.__FT__.times.push(now - last) + last = now + requestAnimationFrame(tick) + } + requestAnimationFrame(tick) + + // longtask observer + window.__LT__ = { entries: [], stop: false } + try { + const po = new PerformanceObserver((list) => { + if (window.__LT__.stop) return + for (const e of list.getEntries()) { + window.__LT__.entries.push({ name: e.name, duration: e.duration, startTime: e.startTime }) + } + }) + po.observe({ entryTypes: ['longtask'] }) + window.__LT__.po = po + } catch {} + + // mutation observer on streaming message + window.__MO__ = { mutations: [], stop: false, currentMsg: null } + const tryArm = () => { + const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]') + const last = all[all.length - 1] + if (!last || last === window.__MO__.currentMsg) return + window.__MO__.currentMsg = last + if (window.__MO__.obs) window.__MO__.obs.disconnect() + const obs = new MutationObserver((muts) => { + if (window.__MO__.stop) return + const t = performance.now() + window.__MO__.mutations.push({ t, count: muts.length, len: last.textContent.length }) + }) + obs.observe(last, { childList: true, subtree: true, characterData: true }) + window.__MO__.obs = obs + } + window.__MO__.arm = tryArm + return 'recorders armed' + })() + `) + + // Baseline + const base = JSON.parse(await cdp.eval(` + JSON.stringify({ + assistantCount: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length, + busy: !!document.querySelector('[data-status="running"], [data-busy="true"]'), + hasComposer: !!document.querySelector('[contenteditable="true"]'), + }) + `)) + console.log('baseline:', base) + if (!base.hasComposer) { console.error('no composer'); cdp.close(); return } + + // Type + submit + await cdp.eval(` + (() => { + const ed = document.querySelector('[contenteditable="true"]') + ed.focus() + document.execCommand('insertText', false, ${JSON.stringify(PROMPT)}) + return 'typed' + })() + `) + const submitT0 = Date.now() + await cdp.eval(` + (() => { + const ed = document.querySelector('[contenteditable="true"]') + ed.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true, cancelable: true })) + return 'submitted' + })() + `) + + // Poll for REAL stream (assistant count > baseline). 30 seconds — accommodates + // slow first-token latencies on big providers. + let realStreamT = null + for (let i = 0; i < 600; i++) { + await new Promise((r) => setTimeout(r, 50)) + const s = JSON.parse(await cdp.eval(` + JSON.stringify({ + n: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length, + busy: !!document.querySelector('[data-status="running"], [data-busy="true"]'), + text: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })() + }) + `)) + if (s.n > base.assistantCount) { + realStreamT = Date.now() + console.log('REAL stream started after', realStreamT - submitT0, 'ms — busy=', s.busy, 'text=', s.text) + // Arm mutation observer on the new message + await cdp.eval('window.__MO__.arm()') + break + } + } + if (!realStreamT) { + console.error('REAL STREAM NEVER STARTED') + cdp.close() + return + } + + // Sample length growth, wait for completion or timeout + const samples = [] + const start = Date.now() + while (Date.now() - start < TIMEOUT_MS) { + await new Promise((r) => setTimeout(r, 250)) + const s = JSON.parse(await cdp.eval(` + JSON.stringify({ + t: performance.now(), + len: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })(), + busy: !!document.querySelector('[data-status="running"], [data-busy="true"]') + }) + `)) + samples.push(s) + if (!s.busy && samples.length > 4) { + await new Promise((r) => setTimeout(r, 300)) + break + } + } + + // Pull recordings + const data = JSON.parse(await cdp.eval(` + (() => { + window.__FT__.stop = true + window.__LT__.stop = true + window.__MO__.stop = true + try { window.__LT__.po && window.__LT__.po.disconnect() } catch {} + try { window.__MO__.obs && window.__MO__.obs.disconnect() } catch {} + return JSON.stringify({ + frames: window.__FT__.times, + longtasks: window.__LT__.entries, + mutations: window.__MO__.mutations, + }) + })() + `)) + + const { frames, longtasks, mutations } = data + + // Frame histogram (filter to stream window) + const buckets = { '<=16.7': 0, '16.7-33': 0, '33-50': 0, '50-100': 0, '100-200': 0, '>200': 0 } + let frameTotal = 0 + let maxFrame = 0 + for (const f of frames) { + frameTotal += f + if (f > maxFrame) maxFrame = f + if (f <= 16.7) buckets['<=16.7']++ + else if (f <= 33) buckets['16.7-33']++ + else if (f <= 50) buckets['33-50']++ + else if (f <= 100) buckets['50-100']++ + else if (f <= 200) buckets['100-200']++ + else buckets['>200']++ + } + const avgFps = frames.length ? (frames.length / (frameTotal / 1000)).toFixed(1) : 'n/a' + const slowFrames = frames.filter((f) => f > 33).length + const veryslowFrames = frames.filter((f) => f > 100).length + + // Longtask summary + const ltMs = longtasks.reduce((a, b) => a + b.duration, 0) + const ltMax = longtasks.length ? Math.max(...longtasks.map((e) => e.duration)) : 0 + + // Mutation rate + let mutTotal = mutations.length + let mutDurs = [] + for (let i = 1; i < mutations.length; i++) { + mutDurs.push(mutations[i].t - mutations[i - 1].t) + } + mutDurs.sort((a, b) => a - b) + const mutP50 = mutDurs[Math.floor(mutDurs.length * 0.5)] ?? 0 + const mutP95 = mutDurs[Math.floor(mutDurs.length * 0.95)] ?? 0 + + // Growth rate + const firstLen = samples[0]?.len ?? 0 + const lastLen = samples[samples.length - 1]?.len ?? 0 + const elapsedS = samples.length ? (samples[samples.length - 1].t - samples[0].t) / 1000 : 0 + const charsPerSec = elapsedS ? ((lastLen - firstLen) / elapsedS).toFixed(1) : 'n/a' + + console.log('\n=== STREAM RESULTS ===') + console.log('window:', (frameTotal / 1000).toFixed(1), 's | frames:', frames.length, '| avgFps:', avgFps, '| maxFrame:', maxFrame.toFixed(1), 'ms') + console.log('frame histogram:', buckets) + console.log('slow frames (>33ms):', slowFrames, '| very slow (>100ms):', veryslowFrames) + console.log('longtasks:', longtasks.length, 'total', ltMs.toFixed(0), 'ms — max', ltMax.toFixed(1), 'ms') + console.log('text grew', firstLen, '→', lastLen, 'chars (', charsPerSec, 'char/s )') + console.log('mutations on streaming msg:', mutTotal, '| inter-mutation p50:', mutP50.toFixed(1), 'ms', 'p95:', mutP95.toFixed(1), 'ms') + + cdp.close() +} + +main().catch((e) => { console.error(e); process.exit(1) }) diff --git a/apps/desktop/scripts/measure-synthetic-stream.mjs b/apps/desktop/scripts/measure-synthetic-stream.mjs new file mode 100644 index 00000000000..cd513719faa --- /dev/null +++ b/apps/desktop/scripts/measure-synthetic-stream.mjs @@ -0,0 +1,318 @@ +// Measure render cost of a synthetic stream driven through the live $messages atom. +// +// Why synthetic: the user's LLM credits are depleted; we can't fire a real stream. +// The synthetic stream exercises the exact same React pipeline (assistant-ui runtime → +// repository.addOrUpdateMessage → MessagePrimitive re-render → markdown reflow) as a +// real stream. The only thing it does NOT exercise is the gateway → SSE → optimistic- +// merge path, which is orthogonal to the rendering question. +// +// What we record: +// 1) rAF frame intervals (long-frame histogram; >33ms = perceived jank, >100ms = bad) +// 2) PerformanceObserver `longtask` entries (task >50ms blocks input) +// 3) MutationObserver: per-message mutation count & inter-mutation latency +// 4) Optional: typing latency overlay — typing into composer while streaming +// +// Output is plain text suitable for terminal + a JSON sidecar for diffing across runs. + +import { writeFileSync } from 'node:fs' + +const CDP_HTTP = 'http://127.0.0.1:9222' +const TOKENS = Number(process.env.TOKENS || 300) +const INTERVAL_MS = Number(process.env.INTERVAL_MS || 16) +const CHUNK = process.env.CHUNK || 'lorem ipsum ' +const TYPE_WHILE_STREAMING = process.env.TYPE_WHILE_STREAMING === '1' +const LABEL = process.env.LABEL || 'baseline' +const OUT = process.env.OUT || `frame-times-${LABEL}.json` + +async function getTarget() { + const list = await (await fetch(`${CDP_HTTP}/json`)).json() + const t = list.find((t) => t.type === 'page' && /5174/.test(t.url)) + if (!t) throw new Error('renderer not found') + return t +} + +class CDP { + constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() } + static async open(url) { + const ws = new WebSocket(url) + await new Promise((r, j) => { + ws.addEventListener('open', r, { once: true }) + ws.addEventListener('error', (e) => j(e), { once: true }) + }) + const cdp = new CDP(ws) + ws.addEventListener('message', (ev) => { + const m = JSON.parse(ev.data.toString()) + if (m.id != null && cdp.pending.has(m.id)) { + const { resolve, reject } = cdp.pending.get(m.id) + cdp.pending.delete(m.id) + if (m.error) reject(new Error(m.error.message)) + else resolve(m.result) + } + }) + return cdp + } + send(method, params) { + const id = ++this.id + return new Promise((res, rej) => { + this.pending.set(id, { resolve: res, reject: rej }) + this.ws.send(JSON.stringify({ id, method, params })) + }) + } + async eval(expr) { + const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true }) + if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval') + return r.result.value + } + close() { this.ws.close() } +} + +function pct(arr, p) { + if (!arr.length) return 0 + const i = Math.min(arr.length - 1, Math.floor(arr.length * p)) + return arr[i] +} + +async function main() { + const target = await getTarget() + const cdp = await CDP.open(target.webSocketDebuggerUrl) + + // Sanity check driver is loaded. + const probeOk = await cdp.eval('!!window.__PERF_DRIVE__ && !!window.__PERF_DRIVE__.stream') + if (!probeOk) { + console.error('__PERF_DRIVE__ not on window — did you reload the renderer after editing perf-probe.tsx?') + cdp.close() + process.exit(2) + } + + // Install recorders. + await cdp.eval(` + (() => { + window.__FT__ = { times: [], stop: false } + let last = performance.now() + const tick = () => { + if (window.__FT__.stop) return + const now = performance.now() + window.__FT__.times.push(now - last) + last = now + requestAnimationFrame(tick) + } + requestAnimationFrame(tick) + + window.__LT__ = { entries: [], stop: false } + try { + const po = new PerformanceObserver((list) => { + if (window.__LT__.stop) return + for (const e of list.getEntries()) { + window.__LT__.entries.push({ name: e.name, duration: e.duration, startTime: e.startTime }) + } + }) + po.observe({ entryTypes: ['longtask'] }) + window.__LT__.po = po + } catch {} + + window.__MO__ = { mutations: [], stop: false, currentMsg: null } + const arm = () => { + const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]') + const last = all[all.length - 1] + if (!last || last === window.__MO__.currentMsg) return + window.__MO__.currentMsg = last + if (window.__MO__.obs) window.__MO__.obs.disconnect() + const obs = new MutationObserver((muts) => { + if (window.__MO__.stop) return + const t = performance.now() + window.__MO__.mutations.push({ t, count: muts.length, len: last.textContent.length }) + }) + obs.observe(last, { childList: true, subtree: true, characterData: true }) + window.__MO__.obs = obs + } + window.__MO__.arm = arm + + // Optional: typing observer — fires keystroke timings if asked. + window.__TYP__ = { times: [], stop: false, lastKey: 0 } + return 'recorders armed' + })() + `) + + // Baseline state. + const base = JSON.parse(await cdp.eval(` + JSON.stringify({ + assistantCount: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length, + atomCount: window.__PERF_DRIVE__.snapshotMsgs() + }) + `)) + console.log('baseline:', base) + + // Drive a synthetic stream. + const streamStart = Date.now() + await cdp.eval(`window.__PERF_DRIVE__.stream({ chunk: ${JSON.stringify(CHUNK)}, intervalMs: ${INTERVAL_MS}, totalTokens: ${TOKENS} })`) + + // After the first paint, arm MO on the new message. + await new Promise((r) => setTimeout(r, 200)) + await cdp.eval('window.__MO__.arm()') + + // Optional: type while streaming. + if (TYPE_WHILE_STREAMING) { + await new Promise((r) => setTimeout(r, 400)) + await cdp.eval(`(() => { + const ed = document.querySelector('[contenteditable="true"]') + ed.focus() + window.__TYP__.startedAt = performance.now() + const text = 'the quick brown fox jumps over the lazy dog ' + let i = 0 + const tick = () => { + if (i >= text.length) return + const t0 = performance.now() + document.execCommand('insertText', false, text[i]) + // requestAnimationFrame to wait for next paint + requestAnimationFrame(() => { + window.__TYP__.times.push(performance.now() - t0) + }) + i++ + setTimeout(tick, 60) + } + tick() + return 'typing' + })()`) + } + + // Wait for stream to complete + small grace. + const expectedMs = TOKENS * INTERVAL_MS + 1500 + await new Promise((r) => setTimeout(r, expectedMs)) + + // Pull recordings. + const data = JSON.parse(await cdp.eval(` + (() => { + window.__FT__.stop = true + window.__LT__.stop = true + window.__MO__.stop = true + window.__TYP__.stop = true + try { window.__LT__.po && window.__LT__.po.disconnect() } catch {} + try { window.__MO__.obs && window.__MO__.obs.disconnect() } catch {} + return JSON.stringify({ + frames: window.__FT__.times, + longtasks: window.__LT__.entries, + mutations: window.__MO__.mutations, + typing: window.__TYP__.times, + finalText: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })() + }) + })() + `)) + + // Reset DOM back to baseline so we don't accumulate fake messages. + await cdp.eval('window.__PERF_DRIVE__.reset()') + + // Analysis (trim warm-up: drop frames before first mutation timestamp). + const firstMut = data.mutations[0]?.t + const frames = data.frames + + // Sum durations to figure out when each frame happened (relative to recorder start). + const frameTimeline = [] + let acc = 0 + for (const f of frames) { acc += f; frameTimeline.push(acc) } + + // Mutations are in performance.now() ms; frames started recording when we installed + // the recorder (before stream). To align: compute total stream window from frames + // after mutation activity began. Simpler heuristic: drop first 500ms of frames as warm-up. + const WARMUP_MS = 500 + let dropIdx = 0 + for (let i = 0; i < frames.length; i++) { + if (frameTimeline[i] >= WARMUP_MS) { dropIdx = i; break } + } + const streamFrames = frames.slice(dropIdx) + + const buckets = { '<=16.7': 0, '16.7-33': 0, '33-50': 0, '50-100': 0, '100-200': 0, '>200': 0 } + let frameTotal = 0 + let maxFrame = 0 + for (const f of streamFrames) { + frameTotal += f + if (f > maxFrame) maxFrame = f + if (f <= 16.7) buckets['<=16.7']++ + else if (f <= 33) buckets['16.7-33']++ + else if (f <= 50) buckets['33-50']++ + else if (f <= 100) buckets['50-100']++ + else if (f <= 200) buckets['100-200']++ + else buckets['>200']++ + } + const sortedFrames = [...streamFrames].sort((a, b) => a - b) + const fAvgFps = streamFrames.length ? (streamFrames.length / (frameTotal / 1000)).toFixed(1) : 'n/a' + const fP50 = pct(sortedFrames, 0.5).toFixed(1) + const fP95 = pct(sortedFrames, 0.95).toFixed(1) + const fP99 = pct(sortedFrames, 0.99).toFixed(1) + const slowFrames = streamFrames.filter((f) => f > 33).length + const veryslowFrames = streamFrames.filter((f) => f > 100).length + + const ltDur = data.longtasks.map((e) => e.duration).sort((a, b) => a - b) + const ltMs = ltDur.reduce((a, b) => a + b, 0) + const ltMax = ltDur.length ? ltDur[ltDur.length - 1] : 0 + const ltP95 = pct(ltDur, 0.95) + + // Mutation cadence. + const mutDurs = [] + for (let i = 1; i < data.mutations.length; i++) mutDurs.push(data.mutations[i].t - data.mutations[i - 1].t) + mutDurs.sort((a, b) => a - b) + const mutP50 = pct(mutDurs, 0.5) + const mutP95 = pct(mutDurs, 0.95) + const mutMax = mutDurs.length ? mutDurs[mutDurs.length - 1] : 0 + + // Typing latency (optional). + let typingSummary = null + if (TYPE_WHILE_STREAMING && data.typing.length) { + const t = [...data.typing].sort((a, b) => a - b) + typingSummary = { + n: t.length, + p50: pct(t, 0.5).toFixed(1), + p95: pct(t, 0.95).toFixed(1), + max: t[t.length - 1].toFixed(1) + } + } + + const result = { + label: LABEL, + timestamp: new Date().toISOString(), + config: { TOKENS, INTERVAL_MS, CHUNK, TYPE_WHILE_STREAMING }, + streamWallMs: Date.now() - streamStart, + frames: { + total: streamFrames.length, + avgFps: fAvgFps, + windowS: (frameTotal / 1000).toFixed(1), + p50: fP50, + p95: fP95, + p99: fP99, + max: maxFrame.toFixed(1), + slow33: slowFrames, + veryslow100: veryslowFrames, + histogram: buckets + }, + longtasks: { + n: data.longtasks.length, + totalMs: ltMs.toFixed(0), + maxMs: ltMax.toFixed(1), + p95Ms: ltP95.toFixed(1) + }, + mutations: { + n: data.mutations.length, + finalTextLen: data.finalText, + interMutP50ms: mutP50.toFixed(1), + interMutP95ms: mutP95.toFixed(1), + interMutMaxMs: mutMax.toFixed(1) + }, + typing: typingSummary + } + + writeFileSync(OUT, JSON.stringify(result, null, 2)) + + console.log('\n=== SYNTHETIC STREAM RESULTS ===') + console.log('label:', LABEL, '| tokens:', TOKENS, '@', INTERVAL_MS, 'ms') + console.log('streamWallMs:', result.streamWallMs) + console.log('FRAMES: avgFps', fAvgFps, '| p50', fP50, 'ms | p95', fP95, 'ms | p99', fP99, 'ms | max', maxFrame.toFixed(1), 'ms') + console.log('FRAMES histogram:', buckets) + console.log('FRAMES slow(>33):', slowFrames, '/ veryslow(>100):', veryslowFrames, 'of', streamFrames.length) + console.log('LONGTASKS:', data.longtasks.length, '| total', ltMs.toFixed(0), 'ms | max', ltMax.toFixed(1), 'ms | p95', ltP95.toFixed(1), 'ms') + console.log('MUTATIONS:', data.mutations.length, '| finalLen', data.finalText, 'chars | inter p50', mutP50.toFixed(1), 'ms | p95', mutP95.toFixed(1), 'ms') + if (typingSummary) console.log('TYPING-WHILE-STREAMING latency: p50', typingSummary.p50, 'ms | p95', typingSummary.p95, 'ms | n=', typingSummary.n) + console.log('written to', OUT) + + cdp.close() +} + +main().catch((e) => { console.error(e); process.exit(1) }) diff --git a/apps/desktop/scripts/profile-real-stream.mjs b/apps/desktop/scripts/profile-real-stream.mjs new file mode 100644 index 00000000000..cb5da652b33 --- /dev/null +++ b/apps/desktop/scripts/profile-real-stream.mjs @@ -0,0 +1,137 @@ +// CPU-profile during a real LLM stream — confirms or refutes whether the +// synthetic stream's hotspots (Streamdown markdown re-parse, FadeText) +// match real-world content. +// +// Run *after* model is set to something fast + cheap (gpt-4o-mini etc.). +// Sends a prompt likely to produce markdown + a numbered list. + +import { writeFileSync } from 'node:fs' + +const CDP_HTTP = 'http://127.0.0.1:9222' +const PROMPT = process.env.PROMPT || 'Give me a numbered list of 8 useful bash one-liners. For each: a brief description, then the command in a code block. No preamble.' +const OUT = process.env.OUT || `/tmp/real-stream-${Date.now()}.cpuprofile` +const START_TIMEOUT = Number(process.env.START_TIMEOUT || 45000) +const STREAM_TIMEOUT = Number(process.env.STREAM_TIMEOUT || 60000) + +class CDP { + constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() } + static async open(url) { + const ws = new WebSocket(url) + await new Promise((r) => ws.addEventListener('open', r, { once: true })) + const cdp = new CDP(ws) + ws.addEventListener('message', (ev) => { + const m = JSON.parse(ev.data.toString()) + if (m.id != null && cdp.pending.has(m.id)) { + const { resolve, reject } = cdp.pending.get(m.id) + cdp.pending.delete(m.id) + if (m.error) reject(new Error(m.error.message)) + else resolve(m.result) + } + }) + return cdp + } + send(method, params) { + const id = ++this.id + return new Promise((res, rej) => { + this.pending.set(id, { resolve: res, reject: rej }) + this.ws.send(JSON.stringify({ id, method, params })) + }) + } + async eval(expr) { + const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true }) + if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval') + return r.result.value + } + close() { this.ws.close() } +} + +async function main() { + const list = await (await fetch(`${CDP_HTTP}/json`)).json() + const target = list.find((t) => t.type === 'page' && /5174/.test(t.url)) + const cdp = await CDP.open(target.webSocketDebuggerUrl) + + const baseCount = await cdp.eval('document.querySelectorAll("[data-slot=aui_assistant-message-root]").length') + + // Submit prompt + await cdp.eval(`(() => { + const ed = document.querySelector('[contenteditable="true"]') + ed.focus() + document.execCommand('insertText', false, ${JSON.stringify(PROMPT)}) + ed.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', which: 13, keyCode: 13, bubbles: true, cancelable: true })) + return 'submitted' + })()`) + + // Wait for real stream start (assistant count grows). + const submitT0 = Date.now() + let streamT = null + for (let i = 0; i < START_TIMEOUT / 50; i++) { + await new Promise((r) => setTimeout(r, 50)) + const n = await cdp.eval('document.querySelectorAll("[data-slot=aui_assistant-message-root]").length') + if (n > baseCount) { streamT = Date.now(); break } + } + if (!streamT) { + console.error('stream never started within', START_TIMEOUT, 'ms') + cdp.close() + process.exit(2) + } + console.log('REAL stream started after', streamT - submitT0, 'ms — starting CPU profile NOW') + + // Start CPU profile NOW, only during stream phase. + await cdp.send('Profiler.enable') + await cdp.send('Profiler.setSamplingInterval', { interval: 100 }) + await cdp.send('Profiler.start') + + // Wait until busy goes false + grace, or timeout. + const cutoff = Date.now() + STREAM_TIMEOUT + while (Date.now() < cutoff) { + await new Promise((r) => setTimeout(r, 500)) + const busy = await cdp.eval('!!document.querySelector("[data-status=running], [data-busy=true]")') + if (!busy) { + await new Promise((r) => setTimeout(r, 500)) + break + } + } + + const { profile } = await cdp.send('Profiler.stop') + writeFileSync(OUT, JSON.stringify(profile)) + console.log('wrote', OUT) + + const samples = profile.samples || [] + const timeDeltas = profile.timeDeltas || [] + const nodes = new Map(profile.nodes.map((n) => [n.id, n])) + const selfTime = new Map() + for (let i = 0; i < samples.length; i++) { + const id = samples[i] + const dt = timeDeltas[i] ?? 0 + selfTime.set(id, (selfTime.get(id) || 0) + dt) + } + const ranked = [...selfTime.entries()] + .map(([id, us]) => { + const n = nodes.get(id) + const cf = n?.callFrame || {} + return { + ms: us / 1000, + name: cf.functionName || '(anonymous)', + url: (cf.url || '').slice(-60), + line: cf.lineNumber + } + }) + .filter((x) => !/\(root\)|\(idle\)|\(garbage collector\)|\(program\)/.test(x.name)) + .sort((a, b) => b.ms - a.ms) + .slice(0, 25) + + const finalText = await cdp.eval(`(() => { + const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]') + return all.length ? all[all.length-1].textContent.length : 0 + })()`) + console.log('\nfinal assistant message length:', finalText, 'chars') + + console.log('\n=== TOP 25 SELF TIME (ms) DURING REAL STREAM ===') + for (const r of ranked) { + console.log(`${r.ms.toFixed(1).padStart(7)} ${r.name.padEnd(40)} ${r.url}:${r.line}`) + } + + cdp.close() +} + +main().catch((e) => { console.error(e); process.exit(1) }) diff --git a/apps/desktop/scripts/profile-synth-stream.mjs b/apps/desktop/scripts/profile-synth-stream.mjs new file mode 100644 index 00000000000..1cc395c1bab --- /dev/null +++ b/apps/desktop/scripts/profile-synth-stream.mjs @@ -0,0 +1,103 @@ +// CPU-profile a synthetic stream — outputs a .cpuprofile and a top-self ranking. +// Open the .cpuprofile in Chrome DevTools Performance panel for a flamegraph. + +import { writeFileSync } from 'node:fs' + +const CDP_HTTP = 'http://127.0.0.1:9222' +const TOKENS = Number(process.env.TOKENS || 400) +const INTERVAL_MS = Number(process.env.INTERVAL_MS || 8) +const CHUNK = process.env.CHUNK || '**word** in _italic_ with `code` ' +const LABEL = process.env.LABEL || 'profile' +const OUT = process.env.OUT || `synth-${LABEL}.cpuprofile` + +class CDP { + constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() } + static async open(url) { + const ws = new WebSocket(url) + await new Promise((r) => ws.addEventListener('open', r, { once: true })) + const cdp = new CDP(ws) + ws.addEventListener('message', (ev) => { + const m = JSON.parse(ev.data.toString()) + if (m.id != null && cdp.pending.has(m.id)) { + const { resolve, reject } = cdp.pending.get(m.id) + cdp.pending.delete(m.id) + if (m.error) reject(new Error(m.error.message)) + else resolve(m.result) + } + }) + return cdp + } + send(method, params) { + const id = ++this.id + return new Promise((res, rej) => { + this.pending.set(id, { resolve: res, reject: rej }) + this.ws.send(JSON.stringify({ id, method, params })) + }) + } + async eval(expr) { + const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true }) + if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval') + return r.result.value + } + close() { this.ws.close() } +} + +async function main() { + const list = await (await fetch(`${CDP_HTTP}/json`)).json() + const target = list.find((t) => t.type === 'page' && /5174/.test(t.url)) + const cdp = await CDP.open(target.webSocketDebuggerUrl) + + if (!await cdp.eval('!!window.__PERF_DRIVE__')) { + console.error('no __PERF_DRIVE__') + cdp.close() + process.exit(2) + } + + await cdp.send('Profiler.enable') + // High-resolution sampling: 100us + await cdp.send('Profiler.setSamplingInterval', { interval: 100 }) + await cdp.send('Profiler.start') + + await cdp.eval(`window.__PERF_DRIVE__.stream({ chunk: ${JSON.stringify(CHUNK)}, intervalMs: ${INTERVAL_MS}, totalTokens: ${TOKENS} })`) + await new Promise((r) => setTimeout(r, TOKENS * INTERVAL_MS + 1500)) + await cdp.eval('window.__PERF_DRIVE__.reset()') + + const { profile } = await cdp.send('Profiler.stop') + writeFileSync(OUT, JSON.stringify(profile)) + console.log('wrote', OUT) + + // Compute top self time per function. + const samples = profile.samples || [] + const timeDeltas = profile.timeDeltas || [] + const nodes = new Map(profile.nodes.map((n) => [n.id, n])) + const selfTime = new Map() // id -> microseconds + for (let i = 0; i < samples.length; i++) { + const id = samples[i] + const dt = timeDeltas[i] ?? 0 + selfTime.set(id, (selfTime.get(id) || 0) + dt) + } + const ranked = [...selfTime.entries()] + .map(([id, us]) => { + const n = nodes.get(id) + const cf = n?.callFrame || {} + return { + us, + ms: us / 1000, + name: cf.functionName || '(anonymous)', + url: (cf.url || '').slice(-60), + line: cf.lineNumber + } + }) + .filter((x) => !/\(root\)|\(idle\)|\(garbage collector\)|\(program\)/.test(x.name)) + .sort((a, b) => b.us - a.us) + .slice(0, 30) + + console.log('\n=== TOP 30 SELF TIME (ms) ===') + for (const r of ranked) { + console.log(`${r.ms.toFixed(1).padStart(7)} ${r.name.padEnd(40)} ${r.url}:${r.line}`) + } + + cdp.close() +} + +main().catch((e) => { console.error(e); process.exit(1) }) diff --git a/apps/desktop/scripts/profile-typing-lag.md b/apps/desktop/scripts/profile-typing-lag.md index ecbab9d5475..6ef5c616da4 100644 --- a/apps/desktop/scripts/profile-typing-lag.md +++ b/apps/desktop/scripts/profile-typing-lag.md @@ -153,3 +153,133 @@ streaming. `scripts/measure-submit.mjs` measures `enter → composer-cleared → user-message-rendered → first-paint`. The script triggers a real prompt submission, so use it on a throwaway session. Not enabled in CI. + +## Streaming "5fps" investigation (May 21, 2026) + +User complaint: "the streaming must bring fps to like 5? lol" — felt +hitches during assistant streaming on long threads. + +### Tooling added + +- **`src/app/chat/perf-probe.tsx`** — dev-only side-effect import (guarded by + `import.meta.env.MODE !== 'production'` in `main.tsx`). Attaches two + helpers to `window`: + - `__PERF_PROBE__` — React `` recorder. Currently inert because + Vite is serving the production React build (see "Vite dev-build issue" + below); kept for when that's fixed. + - `__PERF_DRIVE__` — synthetic stream driver. Pushes tokens through the + live `$messages` atom at a fixed cadence, so the assistant-ui runtime, + incremental repository, Streamdown markdown renderer, and React commit + pipeline all see the same workload they'd see from a real LLM stream — + but with no LLM call (and no credit cost). +- **`scripts/measure-synthetic-stream.mjs`** — drives `__PERF_DRIVE__`, + records rAF frame intervals, `PerformanceObserver({entryTypes:['longtask']})` + entries, `MutationObserver` cadence on the live message, and optional + type-while-streaming keystroke latency. +- **`scripts/profile-synth-stream.mjs`** — CPU profile during a synthetic + stream; writes a `.cpuprofile` (open in Chrome DevTools Performance panel) + and a top-30 self-time table. +- **`scripts/measure-real-stream.mjs`** — same harness as the synthetic but + fires a real LLM prompt. Use when you have credits and want to confirm + the synthetic predictions hold. +- **`scripts/profile-real-stream.mjs`** — CPU profile over the duration of + a real LLM stream. + +Helpers: `scripts/eval.mjs` (one-shot CDP eval), `scripts/reload.mjs` +(hard reload renderer over CDP). + +### Findings + +Measured on the Cloud Shadows session (7 turns, ~11k px scrollHeight) and +the 34 MB session `session_20260514_215353_fe0ac8.json` (110 FadeText +instances, lots of historical tool calls). + +| metric | Cloud Shadows | 34 MB session | +|---|---|---| +| avgFps (60 tok/sec, 5s) | 60.0 | 58.6 | +| frame p50 / p95 / p99 (ms) | 16.7 / 18.0 / 21.1 | 16.6 / 25.6 / 31.4 | +| max frame (ms) | 31.1 | 97-127 (varies) | +| longtasks per 5s window | 0 | 1-2, 75-127 ms | +| type-while-stream p95 latency (ms) | 17 | — | + +A single real-LLM stream on Cloud Shadows (gpt-4o-mini, 39s window) saw +12 longtasks totalling 1.26 s — same cadence the synthetic predicted +(~1 hitch per 3.25 s, max 123 ms). So the **synthetic stream is a faithful +proxy for the real one** and is fine for iterating on fixes without paying +for tokens. + +### CPU profile during streaming (synthetic, markdown content) + +Top self-time costs (5 s window, 400 tokens at 125 tok/s, markdown chunks): + +| ms (self) | function | source | +|---|---|---| +| 260 | `bn$1` | `chunk-BO2N…js:20003` (micromark tokenize) | +| 249 | `m$1` | `chunk-BO2N…js:19949` (micromark) | +| 128 | `compile` | `chunk-BO2N…js:21884` (mdast → hast compile) | +| 73 | FadeText body | `components/ui/fade-text.tsx` | +| 62 | `parser` | `chunk-BO2N…js:22680` | +| 49 | `fromThreadMessageLike` | `@assistant-ui/internal` | + +That `chunk-BO2N2NFS` is the vendored bundle containing `micromark`, +`mdast-util-from-markdown`, `mdast-util-to-hast`, `rehype-raw`, +`hast-util-sanitize`, etc. — i.e. **Streamdown's markdown pipeline, +re-parsing the entire growing assistant message on every token append**. +Cost scales linearly with message length. + +Compare plain-text (no markdown) — the `chunk-BO2N…` entries drop out +of the top 30 entirely; total work per 5 s window halves. + +### Fix landed: `FadeText` memo + +`FadeText` is used in `tool-fallback.tsx` (110 instances on a tool-heavy +thread). Before: each parent re-render during streaming triggered a +`useEffect([children])` that forced a `scrollWidth` layout read — even +when the title text was unchanged. The `useResizeObserver` already covers +the genuine resize case, so the effect was strictly redundant. + +After: wrapped in `React.memo` with a custom comparator that compares +`children` (scalar fast-path), `className`, `fadeWidth`, and `style` +field-by-field. Verified via temporary render counter: +**122 renders during a 2 s synthetic stream vs ~11 000 without memo** +(110 instances × ~100 stream updates). Doesn't move the longtask needle +on its own — Streamdown dwarfs it — but eliminates a class of forced +layouts and removes a steady CPU floor. + +### Not fixed: Streamdown markdown re-parse + +This is the dominant cost and the cause of the user's perceived hitches. +The renderer re-parses the entire message buffer on every stream update. +At ~3-5 k chars, each parse costs ~30 ms; when several pile into one +frame the result is a 75-125 ms longtask = the "5 fps moment". + +Possible approaches (none implemented here): + +1. **Coalesce/throttle Streamdown updates** — render at most every 32 ms + instead of every set-state. Reduces parses but doesn't reduce + per-parse cost; trades latency for smoothness. +2. **Memoize per-prefix** — diff the new text against the prior parsed + version; only re-parse the changed suffix. +3. **Render in stable segments** — close-form historical paragraphs as + immutable React nodes; only the live tail goes through markdown each + token. Probably the highest-impact change but requires forking or + patching `@assistant-ui/react-streamdown`. +4. **Move parsing to a Web Worker** — main thread no longer blocks on + markdown. Largest surgery; requires double-buffered hast. + +### Vite dev-build issue (separate) + +`http://127.0.0.1:5174/node_modules/.vite/deps/react.js` resolves to +`react/cjs/react.production.js`, and `react-dom_client.js` → +`react-dom-client.production.js`. As a result: + +- `` `onRender` is never called (production build is a + no-op). +- `import.meta.env.DEV` is `false`, `PROD` is `true` even under `vite dev` + (hence `MODE !== 'production'` as the workaround in `main.tsx`). +- All the React 19 dev-only warnings/devtools backend hooks are absent. + +Root cause likely sits in `vite.config.ts` aliasing + dedupe + Vite 8's +new `optimizeDeps` defaults. Worth a separate fix pass — when it's +resolved, the `` blocks in `perf-probe.tsx` become useful +(per-id commit timings) instead of inert. diff --git a/apps/desktop/scripts/reload.mjs b/apps/desktop/scripts/reload.mjs new file mode 100644 index 00000000000..b5f7684735f --- /dev/null +++ b/apps/desktop/scripts/reload.mjs @@ -0,0 +1,36 @@ +// Hard reload the Electron renderer over CDP. Vite-no-HMR mode means edits +// don't auto-apply — call this after editing source. +const targets = await (await fetch('http://127.0.0.1:9222/json')).json() +const t = targets.find((t) => t.url.includes('5174')) +if (!t) { + console.error('renderer not found') + process.exit(1) +} +const ws = new WebSocket(t.webSocketDebuggerUrl) +let id = 0 +const pending = new Map() +ws.addEventListener('message', (ev) => { + const m = JSON.parse(ev.data) + if (pending.has(m.id)) { + pending.get(m.id)(m) + pending.delete(m.id) + } +}) +await new Promise((r) => ws.addEventListener('open', r)) +const send = (method, params = {}) => + new Promise((res) => { + const i = ++id + pending.set(i, res) + ws.send(JSON.stringify({ id: i, method, params })) + }) + +await send('Page.reload', { ignoreCache: true }) +console.log('reload sent') +// Wait for new doc. +await new Promise((r) => setTimeout(r, 2500)) +const r = await send('Runtime.evaluate', { + expression: 'JSON.stringify({ hasProbe: !!window.__PERF_PROBE__, composer: !!document.querySelector("[contenteditable=true]"), url: location.hash })', + returnByValue: true, +}) +console.log(r.result.result.value) +ws.close() diff --git a/apps/desktop/src/app/chat/perf-probe.tsx b/apps/desktop/src/app/chat/perf-probe.tsx new file mode 100644 index 00000000000..7e33ec2a986 --- /dev/null +++ b/apps/desktop/src/app/chat/perf-probe.tsx @@ -0,0 +1,167 @@ +import { Profiler, type ProfilerOnRenderCallback, type ReactNode } from 'react' + +import { $messages, setMessages, setBusy } from '@/store/session' + +type Sample = { + id: string + phase: string + actualDuration: number + baseDuration: number + startTime: number + commitTime: number +} + +type SyntheticDriverHandle = { stop: () => void } + +declare global { + interface Window { + __PERF_PROBE__?: { + samples: Sample[] + enabled: boolean + clear: () => void + summary: () => Record + } + __PERF_DRIVE__?: { + /** Inject an assistant message and grow it by `chunk` every `intervalMs`. Returns a stop handle. */ + stream: (opts?: { chunk?: string; intervalMs?: number; totalTokens?: number }) => SyntheticDriverHandle + reset: () => void + snapshotMsgs: () => number + } + } +} + +if (typeof window !== 'undefined' && !window.__PERF_PROBE__) { + const samples: Sample[] = [] + window.__PERF_PROBE__ = { + samples, + enabled: false, + clear: () => { + samples.length = 0 + }, + summary: () => { + const byId = new Map() + for (const s of samples) { + const k = `${s.id}:${s.phase}` + const arr = byId.get(k) ?? [] + arr.push(s.actualDuration) + byId.set(k, arr) + } + const out: Record = {} + for (const [k, arr] of byId) { + arr.sort((a, b) => a - b) + const total = arr.reduce((a, b) => a + b, 0) + out[k] = { + count: arr.length, + total: Math.round(total * 100) / 100, + max: Math.round(arr[arr.length - 1] * 100) / 100, + p50: Math.round(arr[Math.floor(arr.length * 0.5)] * 100) / 100, + p95: Math.round(arr[Math.floor(arr.length * 0.95)] * 100) / 100, + } + } + return out + }, + } +} + +const onRender: ProfilerOnRenderCallback = (id, phase, actualDuration, baseDuration, startTime, commitTime) => { + const probe = typeof window !== 'undefined' ? window.__PERF_PROBE__ : undefined + if (!probe || !probe.enabled) return + probe.samples.push({ id, phase, actualDuration, baseDuration, startTime, commitTime }) + if (probe.samples.length > 5000) probe.samples.splice(0, probe.samples.length - 5000) +} + +if (typeof window !== 'undefined' && !window.__PERF_DRIVE__) { + // Synthetic stream driver — pushes tokens through the live $messages atom so the + // assistant-ui runtime + react tree sees them exactly as a real LLM stream would. + // Used by scripts/measure-real-stream.mjs when no live LLM credit is available. + let baseline: ReturnType | null = null + let activeHandle: SyntheticDriverHandle | null = null + + const stop = () => { + activeHandle = null + setBusy(false) + } + + window.__PERF_DRIVE__ = { + snapshotMsgs: () => $messages.get().length, + reset: () => { + activeHandle?.stop() + if (baseline) setMessages(baseline) + baseline = null + setBusy(false) + }, + stream: ({ chunk = 'word ', intervalMs = 16, totalTokens = 400 } = {}) => { + activeHandle?.stop() + const current = $messages.get() + if (!baseline) baseline = current + const msgId = `synthetic-${Date.now()}` + // Seed an empty assistant message — assistant-ui will see it grow. + setMessages([ + ...current, + { + id: msgId, + role: 'assistant', + parts: [{ type: 'text', text: '' }], + timestamp: Date.now(), + pending: true + } + ]) + setBusy(true) + + let pushed = 0 + let timer: ReturnType | null = null + const handle: SyntheticDriverHandle = { + stop: () => { + if (timer) clearTimeout(timer) + timer = null + activeHandle = null + // Mark message finalized. + setMessages(prev => + prev.map(m => + m.id === msgId + ? { ...m, pending: false } + : m + ) + ) + setBusy(false) + } + } + activeHandle = handle + + const tick = () => { + if (activeHandle !== handle) return + if (pushed >= totalTokens) { + handle.stop() + return + } + pushed += 1 + setMessages(prev => + prev.map(m => { + if (m.id !== msgId) return m + const head = m.parts.slice(0, -1) + const last = m.parts.at(-1) + const lastText = last && last.type === 'text' ? last.text : '' + return { + ...m, + parts: [...head, { type: 'text', text: lastText + chunk }] + } + }) + ) + timer = setTimeout(tick, intervalMs) + } + timer = setTimeout(tick, intervalMs) + return handle + } + } + + // Suppress dead-import warning. + void stop +} + +export function PerfProbe({ id, children }: { id: string; children: ReactNode }) { + return ( + + {children} + + ) +} diff --git a/apps/desktop/src/main.tsx b/apps/desktop/src/main.tsx index daf26132f15..f203e42d7ce 100644 --- a/apps/desktop/src/main.tsx +++ b/apps/desktop/src/main.tsx @@ -12,6 +12,15 @@ import { ThemeProvider } from './themes/context' installClipboardShim() +// Dev-only: install __PERF_DRIVE__ + __PERF_PROBE__ on window so the +// scripts/ harnesses can drive a synthetic stream + record render cost. +// Tree-shaken out of production builds. (Uses MODE rather than DEV because +// our Vite setup currently bundles with PROD=true even in `vite dev`; see +// scripts/dev-no-hmr.mjs for the surrounding workarounds.) +if (import.meta.env.MODE !== 'production') { + import('./app/chat/perf-probe') +} + const queryClient = new QueryClient({ defaultOptions: { queries: {