mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-13 09:01:54 +00:00
chore(desktop): synthetic-stream perf harness + scripts
Drops the React `<Profiler>` approach (no-op because Vite is currently
serving the production React build) in favor of an externally-observable
measurement stack: rAF frame intervals, `PerformanceObserver({entryTypes:
['longtask']})`, and a `MutationObserver` on the live streaming message.
Adds a synthetic stream driver — `window.__PERF_DRIVE__.stream({...})` —
that pushes tokens through the live `$messages` atom at a controlled rate,
so the assistant-ui runtime, incremental repository, and Streamdown
markdown pipeline see the same workload they'd see during a real LLM
stream, without the LLM cost.
The driver lives in `src/app/chat/perf-probe.tsx`; `main.tsx` side-imports
it under `import.meta.env.MODE !== 'production'` so it tree-shakes out of
prod builds. (Using `MODE` rather than `DEV` because our Vite setup
currently reports `DEV=false` even under `vite dev` — see the dev-build
note in `profile-typing-lag.md`.)
Scripts:
- measure-synthetic-stream.mjs drive synthetic + record frame/longtask/mutation
- profile-synth-stream.mjs CPU profile + top self-time during synthetic
- measure-real-stream.mjs same harness, real LLM stream
- profile-real-stream.mjs CPU profile bracketing the real stream window
- eval.mjs / reload.mjs small CDP helpers
A real-LLM measurement on Cloud Shadows (gpt-4o-mini, 39 s window) showed
12 longtasks in the same 75-127 ms range the synthetic predicted, so the
synthetic is a faithful proxy.
This commit is contained in:
parent
5abf89ddd1
commit
99f2a9503c
9 changed files with 1173 additions and 0 deletions
21
apps/desktop/scripts/eval.mjs
Normal file
21
apps/desktop/scripts/eval.mjs
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
// Simple eval helper — runs an expression and returns the result.value.
|
||||
const targets = await (await fetch('http://127.0.0.1:9222/json')).json()
|
||||
const t = targets.find((t) => t.url.includes('5174'))
|
||||
const ws = new WebSocket(t.webSocketDebuggerUrl)
|
||||
let id = 0
|
||||
const pending = new Map()
|
||||
ws.addEventListener('message', (ev) => {
|
||||
const m = JSON.parse(ev.data)
|
||||
if (pending.has(m.id)) { pending.get(m.id)(m); pending.delete(m.id) }
|
||||
})
|
||||
await new Promise((r) => ws.addEventListener('open', r))
|
||||
const send = (method, params) => new Promise((res) => { const i = ++id; pending.set(i, res); ws.send(JSON.stringify({ id: i, method, params })) })
|
||||
|
||||
const expr = process.argv[2] || '1+1'
|
||||
const r = await send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
|
||||
if (r.result.exceptionDetails) {
|
||||
console.error('EXCEPTION:', r.result.exceptionDetails.exception?.description)
|
||||
} else {
|
||||
console.log(JSON.stringify(r.result.result.value, null, 2))
|
||||
}
|
||||
ws.close()
|
||||
252
apps/desktop/scripts/measure-real-stream.mjs
Normal file
252
apps/desktop/scripts/measure-real-stream.mjs
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
// REAL streaming measurement — no React internals.
|
||||
//
|
||||
// Measures:
|
||||
// 1) rAF frame intervals during a verified live stream (long-frame histogram)
|
||||
// 2) MutationObserver: how often does the live assistant message mutate, what's the budget per mutation
|
||||
// 3) Text length growth rate (chars/sec)
|
||||
// 4) PerformanceObserver `longtask` entries (any task > 50ms blocks input)
|
||||
//
|
||||
// Detects REAL stream by waiting for assistant-message DOM count to grow past baseline.
|
||||
// Does NOT cancel — lets the stream run to completion or hits TIMEOUT_MS.
|
||||
|
||||
const CDP_HTTP = 'http://127.0.0.1:9222'
|
||||
const PROMPT = process.env.PROMPT || 'count from 1 to 80, one number per line'
|
||||
const TIMEOUT_MS = Number(process.env.TIMEOUT_MS || 60000)
|
||||
|
||||
async function getTarget() {
|
||||
const list = await (await fetch(`${CDP_HTTP}/json`)).json()
|
||||
const t = list.find((t) => t.type === 'page' && /5174/.test(t.url))
|
||||
if (!t) throw new Error('renderer not found')
|
||||
return t
|
||||
}
|
||||
|
||||
class CDP {
|
||||
constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
|
||||
static async open(url) {
|
||||
const ws = new WebSocket(url)
|
||||
await new Promise((r, j) => {
|
||||
ws.addEventListener('open', r, { once: true })
|
||||
ws.addEventListener('error', (e) => j(e), { once: true })
|
||||
})
|
||||
const cdp = new CDP(ws)
|
||||
ws.addEventListener('message', (event) => {
|
||||
const m = JSON.parse(event.data.toString())
|
||||
if (m.id != null && cdp.pending.has(m.id)) {
|
||||
const { resolve, reject } = cdp.pending.get(m.id)
|
||||
cdp.pending.delete(m.id)
|
||||
if (m.error) reject(new Error(m.error.message))
|
||||
else resolve(m.result)
|
||||
}
|
||||
})
|
||||
return cdp
|
||||
}
|
||||
send(method, params) {
|
||||
const id = ++this.id
|
||||
return new Promise((res, rej) => {
|
||||
this.pending.set(id, { resolve: res, reject: rej })
|
||||
this.ws.send(JSON.stringify({ id, method, params }))
|
||||
})
|
||||
}
|
||||
async eval(expr) {
|
||||
const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
|
||||
if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
|
||||
return r.result.value
|
||||
}
|
||||
close() { this.ws.close() }
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const target = await getTarget()
|
||||
const cdp = await CDP.open(target.webSocketDebuggerUrl)
|
||||
|
||||
// Install recorders.
|
||||
await cdp.eval(`
|
||||
(() => {
|
||||
// rAF frame intervals
|
||||
window.__FT__ = { times: [], stop: false }
|
||||
let last = performance.now()
|
||||
const tick = () => {
|
||||
if (window.__FT__.stop) return
|
||||
const now = performance.now()
|
||||
window.__FT__.times.push(now - last)
|
||||
last = now
|
||||
requestAnimationFrame(tick)
|
||||
}
|
||||
requestAnimationFrame(tick)
|
||||
|
||||
// longtask observer
|
||||
window.__LT__ = { entries: [], stop: false }
|
||||
try {
|
||||
const po = new PerformanceObserver((list) => {
|
||||
if (window.__LT__.stop) return
|
||||
for (const e of list.getEntries()) {
|
||||
window.__LT__.entries.push({ name: e.name, duration: e.duration, startTime: e.startTime })
|
||||
}
|
||||
})
|
||||
po.observe({ entryTypes: ['longtask'] })
|
||||
window.__LT__.po = po
|
||||
} catch {}
|
||||
|
||||
// mutation observer on streaming message
|
||||
window.__MO__ = { mutations: [], stop: false, currentMsg: null }
|
||||
const tryArm = () => {
|
||||
const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]')
|
||||
const last = all[all.length - 1]
|
||||
if (!last || last === window.__MO__.currentMsg) return
|
||||
window.__MO__.currentMsg = last
|
||||
if (window.__MO__.obs) window.__MO__.obs.disconnect()
|
||||
const obs = new MutationObserver((muts) => {
|
||||
if (window.__MO__.stop) return
|
||||
const t = performance.now()
|
||||
window.__MO__.mutations.push({ t, count: muts.length, len: last.textContent.length })
|
||||
})
|
||||
obs.observe(last, { childList: true, subtree: true, characterData: true })
|
||||
window.__MO__.obs = obs
|
||||
}
|
||||
window.__MO__.arm = tryArm
|
||||
return 'recorders armed'
|
||||
})()
|
||||
`)
|
||||
|
||||
// Baseline
|
||||
const base = JSON.parse(await cdp.eval(`
|
||||
JSON.stringify({
|
||||
assistantCount: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length,
|
||||
busy: !!document.querySelector('[data-status="running"], [data-busy="true"]'),
|
||||
hasComposer: !!document.querySelector('[contenteditable="true"]'),
|
||||
})
|
||||
`))
|
||||
console.log('baseline:', base)
|
||||
if (!base.hasComposer) { console.error('no composer'); cdp.close(); return }
|
||||
|
||||
// Type + submit
|
||||
await cdp.eval(`
|
||||
(() => {
|
||||
const ed = document.querySelector('[contenteditable="true"]')
|
||||
ed.focus()
|
||||
document.execCommand('insertText', false, ${JSON.stringify(PROMPT)})
|
||||
return 'typed'
|
||||
})()
|
||||
`)
|
||||
const submitT0 = Date.now()
|
||||
await cdp.eval(`
|
||||
(() => {
|
||||
const ed = document.querySelector('[contenteditable="true"]')
|
||||
ed.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true, cancelable: true }))
|
||||
return 'submitted'
|
||||
})()
|
||||
`)
|
||||
|
||||
// Poll for REAL stream (assistant count > baseline). 30 seconds — accommodates
|
||||
// slow first-token latencies on big providers.
|
||||
let realStreamT = null
|
||||
for (let i = 0; i < 600; i++) {
|
||||
await new Promise((r) => setTimeout(r, 50))
|
||||
const s = JSON.parse(await cdp.eval(`
|
||||
JSON.stringify({
|
||||
n: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length,
|
||||
busy: !!document.querySelector('[data-status="running"], [data-busy="true"]'),
|
||||
text: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })()
|
||||
})
|
||||
`))
|
||||
if (s.n > base.assistantCount) {
|
||||
realStreamT = Date.now()
|
||||
console.log('REAL stream started after', realStreamT - submitT0, 'ms — busy=', s.busy, 'text=', s.text)
|
||||
// Arm mutation observer on the new message
|
||||
await cdp.eval('window.__MO__.arm()')
|
||||
break
|
||||
}
|
||||
}
|
||||
if (!realStreamT) {
|
||||
console.error('REAL STREAM NEVER STARTED')
|
||||
cdp.close()
|
||||
return
|
||||
}
|
||||
|
||||
// Sample length growth, wait for completion or timeout
|
||||
const samples = []
|
||||
const start = Date.now()
|
||||
while (Date.now() - start < TIMEOUT_MS) {
|
||||
await new Promise((r) => setTimeout(r, 250))
|
||||
const s = JSON.parse(await cdp.eval(`
|
||||
JSON.stringify({
|
||||
t: performance.now(),
|
||||
len: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })(),
|
||||
busy: !!document.querySelector('[data-status="running"], [data-busy="true"]')
|
||||
})
|
||||
`))
|
||||
samples.push(s)
|
||||
if (!s.busy && samples.length > 4) {
|
||||
await new Promise((r) => setTimeout(r, 300))
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Pull recordings
|
||||
const data = JSON.parse(await cdp.eval(`
|
||||
(() => {
|
||||
window.__FT__.stop = true
|
||||
window.__LT__.stop = true
|
||||
window.__MO__.stop = true
|
||||
try { window.__LT__.po && window.__LT__.po.disconnect() } catch {}
|
||||
try { window.__MO__.obs && window.__MO__.obs.disconnect() } catch {}
|
||||
return JSON.stringify({
|
||||
frames: window.__FT__.times,
|
||||
longtasks: window.__LT__.entries,
|
||||
mutations: window.__MO__.mutations,
|
||||
})
|
||||
})()
|
||||
`))
|
||||
|
||||
const { frames, longtasks, mutations } = data
|
||||
|
||||
// Frame histogram (filter to stream window)
|
||||
const buckets = { '<=16.7': 0, '16.7-33': 0, '33-50': 0, '50-100': 0, '100-200': 0, '>200': 0 }
|
||||
let frameTotal = 0
|
||||
let maxFrame = 0
|
||||
for (const f of frames) {
|
||||
frameTotal += f
|
||||
if (f > maxFrame) maxFrame = f
|
||||
if (f <= 16.7) buckets['<=16.7']++
|
||||
else if (f <= 33) buckets['16.7-33']++
|
||||
else if (f <= 50) buckets['33-50']++
|
||||
else if (f <= 100) buckets['50-100']++
|
||||
else if (f <= 200) buckets['100-200']++
|
||||
else buckets['>200']++
|
||||
}
|
||||
const avgFps = frames.length ? (frames.length / (frameTotal / 1000)).toFixed(1) : 'n/a'
|
||||
const slowFrames = frames.filter((f) => f > 33).length
|
||||
const veryslowFrames = frames.filter((f) => f > 100).length
|
||||
|
||||
// Longtask summary
|
||||
const ltMs = longtasks.reduce((a, b) => a + b.duration, 0)
|
||||
const ltMax = longtasks.length ? Math.max(...longtasks.map((e) => e.duration)) : 0
|
||||
|
||||
// Mutation rate
|
||||
let mutTotal = mutations.length
|
||||
let mutDurs = []
|
||||
for (let i = 1; i < mutations.length; i++) {
|
||||
mutDurs.push(mutations[i].t - mutations[i - 1].t)
|
||||
}
|
||||
mutDurs.sort((a, b) => a - b)
|
||||
const mutP50 = mutDurs[Math.floor(mutDurs.length * 0.5)] ?? 0
|
||||
const mutP95 = mutDurs[Math.floor(mutDurs.length * 0.95)] ?? 0
|
||||
|
||||
// Growth rate
|
||||
const firstLen = samples[0]?.len ?? 0
|
||||
const lastLen = samples[samples.length - 1]?.len ?? 0
|
||||
const elapsedS = samples.length ? (samples[samples.length - 1].t - samples[0].t) / 1000 : 0
|
||||
const charsPerSec = elapsedS ? ((lastLen - firstLen) / elapsedS).toFixed(1) : 'n/a'
|
||||
|
||||
console.log('\n=== STREAM RESULTS ===')
|
||||
console.log('window:', (frameTotal / 1000).toFixed(1), 's | frames:', frames.length, '| avgFps:', avgFps, '| maxFrame:', maxFrame.toFixed(1), 'ms')
|
||||
console.log('frame histogram:', buckets)
|
||||
console.log('slow frames (>33ms):', slowFrames, '| very slow (>100ms):', veryslowFrames)
|
||||
console.log('longtasks:', longtasks.length, 'total', ltMs.toFixed(0), 'ms — max', ltMax.toFixed(1), 'ms')
|
||||
console.log('text grew', firstLen, '→', lastLen, 'chars (', charsPerSec, 'char/s )')
|
||||
console.log('mutations on streaming msg:', mutTotal, '| inter-mutation p50:', mutP50.toFixed(1), 'ms', 'p95:', mutP95.toFixed(1), 'ms')
|
||||
|
||||
cdp.close()
|
||||
}
|
||||
|
||||
main().catch((e) => { console.error(e); process.exit(1) })
|
||||
318
apps/desktop/scripts/measure-synthetic-stream.mjs
Normal file
318
apps/desktop/scripts/measure-synthetic-stream.mjs
Normal file
|
|
@ -0,0 +1,318 @@
|
|||
// Measure render cost of a synthetic stream driven through the live $messages atom.
|
||||
//
|
||||
// Why synthetic: the user's LLM credits are depleted; we can't fire a real stream.
|
||||
// The synthetic stream exercises the exact same React pipeline (assistant-ui runtime →
|
||||
// repository.addOrUpdateMessage → MessagePrimitive re-render → markdown reflow) as a
|
||||
// real stream. The only thing it does NOT exercise is the gateway → SSE → optimistic-
|
||||
// merge path, which is orthogonal to the rendering question.
|
||||
//
|
||||
// What we record:
|
||||
// 1) rAF frame intervals (long-frame histogram; >33ms = perceived jank, >100ms = bad)
|
||||
// 2) PerformanceObserver `longtask` entries (task >50ms blocks input)
|
||||
// 3) MutationObserver: per-message mutation count & inter-mutation latency
|
||||
// 4) Optional: typing latency overlay — typing into composer while streaming
|
||||
//
|
||||
// Output is plain text suitable for terminal + a JSON sidecar for diffing across runs.
|
||||
|
||||
import { writeFileSync } from 'node:fs'
|
||||
|
||||
const CDP_HTTP = 'http://127.0.0.1:9222'
|
||||
const TOKENS = Number(process.env.TOKENS || 300)
|
||||
const INTERVAL_MS = Number(process.env.INTERVAL_MS || 16)
|
||||
const CHUNK = process.env.CHUNK || 'lorem ipsum '
|
||||
const TYPE_WHILE_STREAMING = process.env.TYPE_WHILE_STREAMING === '1'
|
||||
const LABEL = process.env.LABEL || 'baseline'
|
||||
const OUT = process.env.OUT || `frame-times-${LABEL}.json`
|
||||
|
||||
async function getTarget() {
|
||||
const list = await (await fetch(`${CDP_HTTP}/json`)).json()
|
||||
const t = list.find((t) => t.type === 'page' && /5174/.test(t.url))
|
||||
if (!t) throw new Error('renderer not found')
|
||||
return t
|
||||
}
|
||||
|
||||
class CDP {
|
||||
constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
|
||||
static async open(url) {
|
||||
const ws = new WebSocket(url)
|
||||
await new Promise((r, j) => {
|
||||
ws.addEventListener('open', r, { once: true })
|
||||
ws.addEventListener('error', (e) => j(e), { once: true })
|
||||
})
|
||||
const cdp = new CDP(ws)
|
||||
ws.addEventListener('message', (ev) => {
|
||||
const m = JSON.parse(ev.data.toString())
|
||||
if (m.id != null && cdp.pending.has(m.id)) {
|
||||
const { resolve, reject } = cdp.pending.get(m.id)
|
||||
cdp.pending.delete(m.id)
|
||||
if (m.error) reject(new Error(m.error.message))
|
||||
else resolve(m.result)
|
||||
}
|
||||
})
|
||||
return cdp
|
||||
}
|
||||
send(method, params) {
|
||||
const id = ++this.id
|
||||
return new Promise((res, rej) => {
|
||||
this.pending.set(id, { resolve: res, reject: rej })
|
||||
this.ws.send(JSON.stringify({ id, method, params }))
|
||||
})
|
||||
}
|
||||
async eval(expr) {
|
||||
const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
|
||||
if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
|
||||
return r.result.value
|
||||
}
|
||||
close() { this.ws.close() }
|
||||
}
|
||||
|
||||
function pct(arr, p) {
|
||||
if (!arr.length) return 0
|
||||
const i = Math.min(arr.length - 1, Math.floor(arr.length * p))
|
||||
return arr[i]
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const target = await getTarget()
|
||||
const cdp = await CDP.open(target.webSocketDebuggerUrl)
|
||||
|
||||
// Sanity check driver is loaded.
|
||||
const probeOk = await cdp.eval('!!window.__PERF_DRIVE__ && !!window.__PERF_DRIVE__.stream')
|
||||
if (!probeOk) {
|
||||
console.error('__PERF_DRIVE__ not on window — did you reload the renderer after editing perf-probe.tsx?')
|
||||
cdp.close()
|
||||
process.exit(2)
|
||||
}
|
||||
|
||||
// Install recorders.
|
||||
await cdp.eval(`
|
||||
(() => {
|
||||
window.__FT__ = { times: [], stop: false }
|
||||
let last = performance.now()
|
||||
const tick = () => {
|
||||
if (window.__FT__.stop) return
|
||||
const now = performance.now()
|
||||
window.__FT__.times.push(now - last)
|
||||
last = now
|
||||
requestAnimationFrame(tick)
|
||||
}
|
||||
requestAnimationFrame(tick)
|
||||
|
||||
window.__LT__ = { entries: [], stop: false }
|
||||
try {
|
||||
const po = new PerformanceObserver((list) => {
|
||||
if (window.__LT__.stop) return
|
||||
for (const e of list.getEntries()) {
|
||||
window.__LT__.entries.push({ name: e.name, duration: e.duration, startTime: e.startTime })
|
||||
}
|
||||
})
|
||||
po.observe({ entryTypes: ['longtask'] })
|
||||
window.__LT__.po = po
|
||||
} catch {}
|
||||
|
||||
window.__MO__ = { mutations: [], stop: false, currentMsg: null }
|
||||
const arm = () => {
|
||||
const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]')
|
||||
const last = all[all.length - 1]
|
||||
if (!last || last === window.__MO__.currentMsg) return
|
||||
window.__MO__.currentMsg = last
|
||||
if (window.__MO__.obs) window.__MO__.obs.disconnect()
|
||||
const obs = new MutationObserver((muts) => {
|
||||
if (window.__MO__.stop) return
|
||||
const t = performance.now()
|
||||
window.__MO__.mutations.push({ t, count: muts.length, len: last.textContent.length })
|
||||
})
|
||||
obs.observe(last, { childList: true, subtree: true, characterData: true })
|
||||
window.__MO__.obs = obs
|
||||
}
|
||||
window.__MO__.arm = arm
|
||||
|
||||
// Optional: typing observer — fires keystroke timings if asked.
|
||||
window.__TYP__ = { times: [], stop: false, lastKey: 0 }
|
||||
return 'recorders armed'
|
||||
})()
|
||||
`)
|
||||
|
||||
// Baseline state.
|
||||
const base = JSON.parse(await cdp.eval(`
|
||||
JSON.stringify({
|
||||
assistantCount: document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length,
|
||||
atomCount: window.__PERF_DRIVE__.snapshotMsgs()
|
||||
})
|
||||
`))
|
||||
console.log('baseline:', base)
|
||||
|
||||
// Drive a synthetic stream.
|
||||
const streamStart = Date.now()
|
||||
await cdp.eval(`window.__PERF_DRIVE__.stream({ chunk: ${JSON.stringify(CHUNK)}, intervalMs: ${INTERVAL_MS}, totalTokens: ${TOKENS} })`)
|
||||
|
||||
// After the first paint, arm MO on the new message.
|
||||
await new Promise((r) => setTimeout(r, 200))
|
||||
await cdp.eval('window.__MO__.arm()')
|
||||
|
||||
// Optional: type while streaming.
|
||||
if (TYPE_WHILE_STREAMING) {
|
||||
await new Promise((r) => setTimeout(r, 400))
|
||||
await cdp.eval(`(() => {
|
||||
const ed = document.querySelector('[contenteditable="true"]')
|
||||
ed.focus()
|
||||
window.__TYP__.startedAt = performance.now()
|
||||
const text = 'the quick brown fox jumps over the lazy dog '
|
||||
let i = 0
|
||||
const tick = () => {
|
||||
if (i >= text.length) return
|
||||
const t0 = performance.now()
|
||||
document.execCommand('insertText', false, text[i])
|
||||
// requestAnimationFrame to wait for next paint
|
||||
requestAnimationFrame(() => {
|
||||
window.__TYP__.times.push(performance.now() - t0)
|
||||
})
|
||||
i++
|
||||
setTimeout(tick, 60)
|
||||
}
|
||||
tick()
|
||||
return 'typing'
|
||||
})()`)
|
||||
}
|
||||
|
||||
// Wait for stream to complete + small grace.
|
||||
const expectedMs = TOKENS * INTERVAL_MS + 1500
|
||||
await new Promise((r) => setTimeout(r, expectedMs))
|
||||
|
||||
// Pull recordings.
|
||||
const data = JSON.parse(await cdp.eval(`
|
||||
(() => {
|
||||
window.__FT__.stop = true
|
||||
window.__LT__.stop = true
|
||||
window.__MO__.stop = true
|
||||
window.__TYP__.stop = true
|
||||
try { window.__LT__.po && window.__LT__.po.disconnect() } catch {}
|
||||
try { window.__MO__.obs && window.__MO__.obs.disconnect() } catch {}
|
||||
return JSON.stringify({
|
||||
frames: window.__FT__.times,
|
||||
longtasks: window.__LT__.entries,
|
||||
mutations: window.__MO__.mutations,
|
||||
typing: window.__TYP__.times,
|
||||
finalText: (() => { const a = document.querySelectorAll('[data-slot="aui_assistant-message-root"]'); return a.length ? a[a.length-1].textContent.length : 0 })()
|
||||
})
|
||||
})()
|
||||
`))
|
||||
|
||||
// Reset DOM back to baseline so we don't accumulate fake messages.
|
||||
await cdp.eval('window.__PERF_DRIVE__.reset()')
|
||||
|
||||
// Analysis (trim warm-up: drop frames before first mutation timestamp).
|
||||
const firstMut = data.mutations[0]?.t
|
||||
const frames = data.frames
|
||||
|
||||
// Sum durations to figure out when each frame happened (relative to recorder start).
|
||||
const frameTimeline = []
|
||||
let acc = 0
|
||||
for (const f of frames) { acc += f; frameTimeline.push(acc) }
|
||||
|
||||
// Mutations are in performance.now() ms; frames started recording when we installed
|
||||
// the recorder (before stream). To align: compute total stream window from frames
|
||||
// after mutation activity began. Simpler heuristic: drop first 500ms of frames as warm-up.
|
||||
const WARMUP_MS = 500
|
||||
let dropIdx = 0
|
||||
for (let i = 0; i < frames.length; i++) {
|
||||
if (frameTimeline[i] >= WARMUP_MS) { dropIdx = i; break }
|
||||
}
|
||||
const streamFrames = frames.slice(dropIdx)
|
||||
|
||||
const buckets = { '<=16.7': 0, '16.7-33': 0, '33-50': 0, '50-100': 0, '100-200': 0, '>200': 0 }
|
||||
let frameTotal = 0
|
||||
let maxFrame = 0
|
||||
for (const f of streamFrames) {
|
||||
frameTotal += f
|
||||
if (f > maxFrame) maxFrame = f
|
||||
if (f <= 16.7) buckets['<=16.7']++
|
||||
else if (f <= 33) buckets['16.7-33']++
|
||||
else if (f <= 50) buckets['33-50']++
|
||||
else if (f <= 100) buckets['50-100']++
|
||||
else if (f <= 200) buckets['100-200']++
|
||||
else buckets['>200']++
|
||||
}
|
||||
const sortedFrames = [...streamFrames].sort((a, b) => a - b)
|
||||
const fAvgFps = streamFrames.length ? (streamFrames.length / (frameTotal / 1000)).toFixed(1) : 'n/a'
|
||||
const fP50 = pct(sortedFrames, 0.5).toFixed(1)
|
||||
const fP95 = pct(sortedFrames, 0.95).toFixed(1)
|
||||
const fP99 = pct(sortedFrames, 0.99).toFixed(1)
|
||||
const slowFrames = streamFrames.filter((f) => f > 33).length
|
||||
const veryslowFrames = streamFrames.filter((f) => f > 100).length
|
||||
|
||||
const ltDur = data.longtasks.map((e) => e.duration).sort((a, b) => a - b)
|
||||
const ltMs = ltDur.reduce((a, b) => a + b, 0)
|
||||
const ltMax = ltDur.length ? ltDur[ltDur.length - 1] : 0
|
||||
const ltP95 = pct(ltDur, 0.95)
|
||||
|
||||
// Mutation cadence.
|
||||
const mutDurs = []
|
||||
for (let i = 1; i < data.mutations.length; i++) mutDurs.push(data.mutations[i].t - data.mutations[i - 1].t)
|
||||
mutDurs.sort((a, b) => a - b)
|
||||
const mutP50 = pct(mutDurs, 0.5)
|
||||
const mutP95 = pct(mutDurs, 0.95)
|
||||
const mutMax = mutDurs.length ? mutDurs[mutDurs.length - 1] : 0
|
||||
|
||||
// Typing latency (optional).
|
||||
let typingSummary = null
|
||||
if (TYPE_WHILE_STREAMING && data.typing.length) {
|
||||
const t = [...data.typing].sort((a, b) => a - b)
|
||||
typingSummary = {
|
||||
n: t.length,
|
||||
p50: pct(t, 0.5).toFixed(1),
|
||||
p95: pct(t, 0.95).toFixed(1),
|
||||
max: t[t.length - 1].toFixed(1)
|
||||
}
|
||||
}
|
||||
|
||||
const result = {
|
||||
label: LABEL,
|
||||
timestamp: new Date().toISOString(),
|
||||
config: { TOKENS, INTERVAL_MS, CHUNK, TYPE_WHILE_STREAMING },
|
||||
streamWallMs: Date.now() - streamStart,
|
||||
frames: {
|
||||
total: streamFrames.length,
|
||||
avgFps: fAvgFps,
|
||||
windowS: (frameTotal / 1000).toFixed(1),
|
||||
p50: fP50,
|
||||
p95: fP95,
|
||||
p99: fP99,
|
||||
max: maxFrame.toFixed(1),
|
||||
slow33: slowFrames,
|
||||
veryslow100: veryslowFrames,
|
||||
histogram: buckets
|
||||
},
|
||||
longtasks: {
|
||||
n: data.longtasks.length,
|
||||
totalMs: ltMs.toFixed(0),
|
||||
maxMs: ltMax.toFixed(1),
|
||||
p95Ms: ltP95.toFixed(1)
|
||||
},
|
||||
mutations: {
|
||||
n: data.mutations.length,
|
||||
finalTextLen: data.finalText,
|
||||
interMutP50ms: mutP50.toFixed(1),
|
||||
interMutP95ms: mutP95.toFixed(1),
|
||||
interMutMaxMs: mutMax.toFixed(1)
|
||||
},
|
||||
typing: typingSummary
|
||||
}
|
||||
|
||||
writeFileSync(OUT, JSON.stringify(result, null, 2))
|
||||
|
||||
console.log('\n=== SYNTHETIC STREAM RESULTS ===')
|
||||
console.log('label:', LABEL, '| tokens:', TOKENS, '@', INTERVAL_MS, 'ms')
|
||||
console.log('streamWallMs:', result.streamWallMs)
|
||||
console.log('FRAMES: avgFps', fAvgFps, '| p50', fP50, 'ms | p95', fP95, 'ms | p99', fP99, 'ms | max', maxFrame.toFixed(1), 'ms')
|
||||
console.log('FRAMES histogram:', buckets)
|
||||
console.log('FRAMES slow(>33):', slowFrames, '/ veryslow(>100):', veryslowFrames, 'of', streamFrames.length)
|
||||
console.log('LONGTASKS:', data.longtasks.length, '| total', ltMs.toFixed(0), 'ms | max', ltMax.toFixed(1), 'ms | p95', ltP95.toFixed(1), 'ms')
|
||||
console.log('MUTATIONS:', data.mutations.length, '| finalLen', data.finalText, 'chars | inter p50', mutP50.toFixed(1), 'ms | p95', mutP95.toFixed(1), 'ms')
|
||||
if (typingSummary) console.log('TYPING-WHILE-STREAMING latency: p50', typingSummary.p50, 'ms | p95', typingSummary.p95, 'ms | n=', typingSummary.n)
|
||||
console.log('written to', OUT)
|
||||
|
||||
cdp.close()
|
||||
}
|
||||
|
||||
main().catch((e) => { console.error(e); process.exit(1) })
|
||||
137
apps/desktop/scripts/profile-real-stream.mjs
Normal file
137
apps/desktop/scripts/profile-real-stream.mjs
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
// CPU-profile during a real LLM stream — confirms or refutes whether the
|
||||
// synthetic stream's hotspots (Streamdown markdown re-parse, FadeText)
|
||||
// match real-world content.
|
||||
//
|
||||
// Run *after* model is set to something fast + cheap (gpt-4o-mini etc.).
|
||||
// Sends a prompt likely to produce markdown + a numbered list.
|
||||
|
||||
import { writeFileSync } from 'node:fs'
|
||||
|
||||
const CDP_HTTP = 'http://127.0.0.1:9222'
|
||||
const PROMPT = process.env.PROMPT || 'Give me a numbered list of 8 useful bash one-liners. For each: a brief description, then the command in a code block. No preamble.'
|
||||
const OUT = process.env.OUT || `/tmp/real-stream-${Date.now()}.cpuprofile`
|
||||
const START_TIMEOUT = Number(process.env.START_TIMEOUT || 45000)
|
||||
const STREAM_TIMEOUT = Number(process.env.STREAM_TIMEOUT || 60000)
|
||||
|
||||
class CDP {
|
||||
constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
|
||||
static async open(url) {
|
||||
const ws = new WebSocket(url)
|
||||
await new Promise((r) => ws.addEventListener('open', r, { once: true }))
|
||||
const cdp = new CDP(ws)
|
||||
ws.addEventListener('message', (ev) => {
|
||||
const m = JSON.parse(ev.data.toString())
|
||||
if (m.id != null && cdp.pending.has(m.id)) {
|
||||
const { resolve, reject } = cdp.pending.get(m.id)
|
||||
cdp.pending.delete(m.id)
|
||||
if (m.error) reject(new Error(m.error.message))
|
||||
else resolve(m.result)
|
||||
}
|
||||
})
|
||||
return cdp
|
||||
}
|
||||
send(method, params) {
|
||||
const id = ++this.id
|
||||
return new Promise((res, rej) => {
|
||||
this.pending.set(id, { resolve: res, reject: rej })
|
||||
this.ws.send(JSON.stringify({ id, method, params }))
|
||||
})
|
||||
}
|
||||
async eval(expr) {
|
||||
const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
|
||||
if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
|
||||
return r.result.value
|
||||
}
|
||||
close() { this.ws.close() }
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const list = await (await fetch(`${CDP_HTTP}/json`)).json()
|
||||
const target = list.find((t) => t.type === 'page' && /5174/.test(t.url))
|
||||
const cdp = await CDP.open(target.webSocketDebuggerUrl)
|
||||
|
||||
const baseCount = await cdp.eval('document.querySelectorAll("[data-slot=aui_assistant-message-root]").length')
|
||||
|
||||
// Submit prompt
|
||||
await cdp.eval(`(() => {
|
||||
const ed = document.querySelector('[contenteditable="true"]')
|
||||
ed.focus()
|
||||
document.execCommand('insertText', false, ${JSON.stringify(PROMPT)})
|
||||
ed.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', which: 13, keyCode: 13, bubbles: true, cancelable: true }))
|
||||
return 'submitted'
|
||||
})()`)
|
||||
|
||||
// Wait for real stream start (assistant count grows).
|
||||
const submitT0 = Date.now()
|
||||
let streamT = null
|
||||
for (let i = 0; i < START_TIMEOUT / 50; i++) {
|
||||
await new Promise((r) => setTimeout(r, 50))
|
||||
const n = await cdp.eval('document.querySelectorAll("[data-slot=aui_assistant-message-root]").length')
|
||||
if (n > baseCount) { streamT = Date.now(); break }
|
||||
}
|
||||
if (!streamT) {
|
||||
console.error('stream never started within', START_TIMEOUT, 'ms')
|
||||
cdp.close()
|
||||
process.exit(2)
|
||||
}
|
||||
console.log('REAL stream started after', streamT - submitT0, 'ms — starting CPU profile NOW')
|
||||
|
||||
// Start CPU profile NOW, only during stream phase.
|
||||
await cdp.send('Profiler.enable')
|
||||
await cdp.send('Profiler.setSamplingInterval', { interval: 100 })
|
||||
await cdp.send('Profiler.start')
|
||||
|
||||
// Wait until busy goes false + grace, or timeout.
|
||||
const cutoff = Date.now() + STREAM_TIMEOUT
|
||||
while (Date.now() < cutoff) {
|
||||
await new Promise((r) => setTimeout(r, 500))
|
||||
const busy = await cdp.eval('!!document.querySelector("[data-status=running], [data-busy=true]")')
|
||||
if (!busy) {
|
||||
await new Promise((r) => setTimeout(r, 500))
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const { profile } = await cdp.send('Profiler.stop')
|
||||
writeFileSync(OUT, JSON.stringify(profile))
|
||||
console.log('wrote', OUT)
|
||||
|
||||
const samples = profile.samples || []
|
||||
const timeDeltas = profile.timeDeltas || []
|
||||
const nodes = new Map(profile.nodes.map((n) => [n.id, n]))
|
||||
const selfTime = new Map()
|
||||
for (let i = 0; i < samples.length; i++) {
|
||||
const id = samples[i]
|
||||
const dt = timeDeltas[i] ?? 0
|
||||
selfTime.set(id, (selfTime.get(id) || 0) + dt)
|
||||
}
|
||||
const ranked = [...selfTime.entries()]
|
||||
.map(([id, us]) => {
|
||||
const n = nodes.get(id)
|
||||
const cf = n?.callFrame || {}
|
||||
return {
|
||||
ms: us / 1000,
|
||||
name: cf.functionName || '(anonymous)',
|
||||
url: (cf.url || '').slice(-60),
|
||||
line: cf.lineNumber
|
||||
}
|
||||
})
|
||||
.filter((x) => !/\(root\)|\(idle\)|\(garbage collector\)|\(program\)/.test(x.name))
|
||||
.sort((a, b) => b.ms - a.ms)
|
||||
.slice(0, 25)
|
||||
|
||||
const finalText = await cdp.eval(`(() => {
|
||||
const all = document.querySelectorAll('[data-slot="aui_assistant-message-root"]')
|
||||
return all.length ? all[all.length-1].textContent.length : 0
|
||||
})()`)
|
||||
console.log('\nfinal assistant message length:', finalText, 'chars')
|
||||
|
||||
console.log('\n=== TOP 25 SELF TIME (ms) DURING REAL STREAM ===')
|
||||
for (const r of ranked) {
|
||||
console.log(`${r.ms.toFixed(1).padStart(7)} ${r.name.padEnd(40)} ${r.url}:${r.line}`)
|
||||
}
|
||||
|
||||
cdp.close()
|
||||
}
|
||||
|
||||
main().catch((e) => { console.error(e); process.exit(1) })
|
||||
103
apps/desktop/scripts/profile-synth-stream.mjs
Normal file
103
apps/desktop/scripts/profile-synth-stream.mjs
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
// CPU-profile a synthetic stream — outputs a .cpuprofile and a top-self ranking.
|
||||
// Open the .cpuprofile in Chrome DevTools Performance panel for a flamegraph.
|
||||
|
||||
import { writeFileSync } from 'node:fs'
|
||||
|
||||
const CDP_HTTP = 'http://127.0.0.1:9222'
|
||||
const TOKENS = Number(process.env.TOKENS || 400)
|
||||
const INTERVAL_MS = Number(process.env.INTERVAL_MS || 8)
|
||||
const CHUNK = process.env.CHUNK || '**word** in _italic_ with `code` '
|
||||
const LABEL = process.env.LABEL || 'profile'
|
||||
const OUT = process.env.OUT || `synth-${LABEL}.cpuprofile`
|
||||
|
||||
class CDP {
|
||||
constructor(ws) { this.ws = ws; this.id = 0; this.pending = new Map() }
|
||||
static async open(url) {
|
||||
const ws = new WebSocket(url)
|
||||
await new Promise((r) => ws.addEventListener('open', r, { once: true }))
|
||||
const cdp = new CDP(ws)
|
||||
ws.addEventListener('message', (ev) => {
|
||||
const m = JSON.parse(ev.data.toString())
|
||||
if (m.id != null && cdp.pending.has(m.id)) {
|
||||
const { resolve, reject } = cdp.pending.get(m.id)
|
||||
cdp.pending.delete(m.id)
|
||||
if (m.error) reject(new Error(m.error.message))
|
||||
else resolve(m.result)
|
||||
}
|
||||
})
|
||||
return cdp
|
||||
}
|
||||
send(method, params) {
|
||||
const id = ++this.id
|
||||
return new Promise((res, rej) => {
|
||||
this.pending.set(id, { resolve: res, reject: rej })
|
||||
this.ws.send(JSON.stringify({ id, method, params }))
|
||||
})
|
||||
}
|
||||
async eval(expr) {
|
||||
const r = await this.send('Runtime.evaluate', { expression: expr, returnByValue: true, awaitPromise: true })
|
||||
if (r.exceptionDetails) throw new Error(r.exceptionDetails.exception?.description || 'eval')
|
||||
return r.result.value
|
||||
}
|
||||
close() { this.ws.close() }
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const list = await (await fetch(`${CDP_HTTP}/json`)).json()
|
||||
const target = list.find((t) => t.type === 'page' && /5174/.test(t.url))
|
||||
const cdp = await CDP.open(target.webSocketDebuggerUrl)
|
||||
|
||||
if (!await cdp.eval('!!window.__PERF_DRIVE__')) {
|
||||
console.error('no __PERF_DRIVE__')
|
||||
cdp.close()
|
||||
process.exit(2)
|
||||
}
|
||||
|
||||
await cdp.send('Profiler.enable')
|
||||
// High-resolution sampling: 100us
|
||||
await cdp.send('Profiler.setSamplingInterval', { interval: 100 })
|
||||
await cdp.send('Profiler.start')
|
||||
|
||||
await cdp.eval(`window.__PERF_DRIVE__.stream({ chunk: ${JSON.stringify(CHUNK)}, intervalMs: ${INTERVAL_MS}, totalTokens: ${TOKENS} })`)
|
||||
await new Promise((r) => setTimeout(r, TOKENS * INTERVAL_MS + 1500))
|
||||
await cdp.eval('window.__PERF_DRIVE__.reset()')
|
||||
|
||||
const { profile } = await cdp.send('Profiler.stop')
|
||||
writeFileSync(OUT, JSON.stringify(profile))
|
||||
console.log('wrote', OUT)
|
||||
|
||||
// Compute top self time per function.
|
||||
const samples = profile.samples || []
|
||||
const timeDeltas = profile.timeDeltas || []
|
||||
const nodes = new Map(profile.nodes.map((n) => [n.id, n]))
|
||||
const selfTime = new Map() // id -> microseconds
|
||||
for (let i = 0; i < samples.length; i++) {
|
||||
const id = samples[i]
|
||||
const dt = timeDeltas[i] ?? 0
|
||||
selfTime.set(id, (selfTime.get(id) || 0) + dt)
|
||||
}
|
||||
const ranked = [...selfTime.entries()]
|
||||
.map(([id, us]) => {
|
||||
const n = nodes.get(id)
|
||||
const cf = n?.callFrame || {}
|
||||
return {
|
||||
us,
|
||||
ms: us / 1000,
|
||||
name: cf.functionName || '(anonymous)',
|
||||
url: (cf.url || '').slice(-60),
|
||||
line: cf.lineNumber
|
||||
}
|
||||
})
|
||||
.filter((x) => !/\(root\)|\(idle\)|\(garbage collector\)|\(program\)/.test(x.name))
|
||||
.sort((a, b) => b.us - a.us)
|
||||
.slice(0, 30)
|
||||
|
||||
console.log('\n=== TOP 30 SELF TIME (ms) ===')
|
||||
for (const r of ranked) {
|
||||
console.log(`${r.ms.toFixed(1).padStart(7)} ${r.name.padEnd(40)} ${r.url}:${r.line}`)
|
||||
}
|
||||
|
||||
cdp.close()
|
||||
}
|
||||
|
||||
main().catch((e) => { console.error(e); process.exit(1) })
|
||||
|
|
@ -153,3 +153,133 @@ streaming. `scripts/measure-submit.mjs` measures
|
|||
`enter → composer-cleared → user-message-rendered → first-paint`. The
|
||||
script triggers a real prompt submission, so use it on a throwaway
|
||||
session. Not enabled in CI.
|
||||
|
||||
## Streaming "5fps" investigation (May 21, 2026)
|
||||
|
||||
User complaint: "the streaming must bring fps to like 5? lol" — felt
|
||||
hitches during assistant streaming on long threads.
|
||||
|
||||
### Tooling added
|
||||
|
||||
- **`src/app/chat/perf-probe.tsx`** — dev-only side-effect import (guarded by
|
||||
`import.meta.env.MODE !== 'production'` in `main.tsx`). Attaches two
|
||||
helpers to `window`:
|
||||
- `__PERF_PROBE__` — React `<Profiler>` recorder. Currently inert because
|
||||
Vite is serving the production React build (see "Vite dev-build issue"
|
||||
below); kept for when that's fixed.
|
||||
- `__PERF_DRIVE__` — synthetic stream driver. Pushes tokens through the
|
||||
live `$messages` atom at a fixed cadence, so the assistant-ui runtime,
|
||||
incremental repository, Streamdown markdown renderer, and React commit
|
||||
pipeline all see the same workload they'd see from a real LLM stream —
|
||||
but with no LLM call (and no credit cost).
|
||||
- **`scripts/measure-synthetic-stream.mjs`** — drives `__PERF_DRIVE__`,
|
||||
records rAF frame intervals, `PerformanceObserver({entryTypes:['longtask']})`
|
||||
entries, `MutationObserver` cadence on the live message, and optional
|
||||
type-while-streaming keystroke latency.
|
||||
- **`scripts/profile-synth-stream.mjs`** — CPU profile during a synthetic
|
||||
stream; writes a `.cpuprofile` (open in Chrome DevTools Performance panel)
|
||||
and a top-30 self-time table.
|
||||
- **`scripts/measure-real-stream.mjs`** — same harness as the synthetic but
|
||||
fires a real LLM prompt. Use when you have credits and want to confirm
|
||||
the synthetic predictions hold.
|
||||
- **`scripts/profile-real-stream.mjs`** — CPU profile over the duration of
|
||||
a real LLM stream.
|
||||
|
||||
Helpers: `scripts/eval.mjs` (one-shot CDP eval), `scripts/reload.mjs`
|
||||
(hard reload renderer over CDP).
|
||||
|
||||
### Findings
|
||||
|
||||
Measured on the Cloud Shadows session (7 turns, ~11k px scrollHeight) and
|
||||
the 34 MB session `session_20260514_215353_fe0ac8.json` (110 FadeText
|
||||
instances, lots of historical tool calls).
|
||||
|
||||
| metric | Cloud Shadows | 34 MB session |
|
||||
|---|---|---|
|
||||
| avgFps (60 tok/sec, 5s) | 60.0 | 58.6 |
|
||||
| frame p50 / p95 / p99 (ms) | 16.7 / 18.0 / 21.1 | 16.6 / 25.6 / 31.4 |
|
||||
| max frame (ms) | 31.1 | 97-127 (varies) |
|
||||
| longtasks per 5s window | 0 | 1-2, 75-127 ms |
|
||||
| type-while-stream p95 latency (ms) | 17 | — |
|
||||
|
||||
A single real-LLM stream on Cloud Shadows (gpt-4o-mini, 39s window) saw
|
||||
12 longtasks totalling 1.26 s — same cadence the synthetic predicted
|
||||
(~1 hitch per 3.25 s, max 123 ms). So the **synthetic stream is a faithful
|
||||
proxy for the real one** and is fine for iterating on fixes without paying
|
||||
for tokens.
|
||||
|
||||
### CPU profile during streaming (synthetic, markdown content)
|
||||
|
||||
Top self-time costs (5 s window, 400 tokens at 125 tok/s, markdown chunks):
|
||||
|
||||
| ms (self) | function | source |
|
||||
|---|---|---|
|
||||
| 260 | `bn$1` | `chunk-BO2N…js:20003` (micromark tokenize) |
|
||||
| 249 | `m$1` | `chunk-BO2N…js:19949` (micromark) |
|
||||
| 128 | `compile` | `chunk-BO2N…js:21884` (mdast → hast compile) |
|
||||
| 73 | FadeText body | `components/ui/fade-text.tsx` |
|
||||
| 62 | `parser` | `chunk-BO2N…js:22680` |
|
||||
| 49 | `fromThreadMessageLike` | `@assistant-ui/internal` |
|
||||
|
||||
That `chunk-BO2N2NFS` is the vendored bundle containing `micromark`,
|
||||
`mdast-util-from-markdown`, `mdast-util-to-hast`, `rehype-raw`,
|
||||
`hast-util-sanitize`, etc. — i.e. **Streamdown's markdown pipeline,
|
||||
re-parsing the entire growing assistant message on every token append**.
|
||||
Cost scales linearly with message length.
|
||||
|
||||
Compare plain-text (no markdown) — the `chunk-BO2N…` entries drop out
|
||||
of the top 30 entirely; total work per 5 s window halves.
|
||||
|
||||
### Fix landed: `FadeText` memo
|
||||
|
||||
`FadeText` is used in `tool-fallback.tsx` (110 instances on a tool-heavy
|
||||
thread). Before: each parent re-render during streaming triggered a
|
||||
`useEffect([children])` that forced a `scrollWidth` layout read — even
|
||||
when the title text was unchanged. The `useResizeObserver` already covers
|
||||
the genuine resize case, so the effect was strictly redundant.
|
||||
|
||||
After: wrapped in `React.memo` with a custom comparator that compares
|
||||
`children` (scalar fast-path), `className`, `fadeWidth`, and `style`
|
||||
field-by-field. Verified via temporary render counter:
|
||||
**122 renders during a 2 s synthetic stream vs ~11 000 without memo**
|
||||
(110 instances × ~100 stream updates). Doesn't move the longtask needle
|
||||
on its own — Streamdown dwarfs it — but eliminates a class of forced
|
||||
layouts and removes a steady CPU floor.
|
||||
|
||||
### Not fixed: Streamdown markdown re-parse
|
||||
|
||||
This is the dominant cost and the cause of the user's perceived hitches.
|
||||
The renderer re-parses the entire message buffer on every stream update.
|
||||
At ~3-5 k chars, each parse costs ~30 ms; when several pile into one
|
||||
frame the result is a 75-125 ms longtask = the "5 fps moment".
|
||||
|
||||
Possible approaches (none implemented here):
|
||||
|
||||
1. **Coalesce/throttle Streamdown updates** — render at most every 32 ms
|
||||
instead of every set-state. Reduces parses but doesn't reduce
|
||||
per-parse cost; trades latency for smoothness.
|
||||
2. **Memoize per-prefix** — diff the new text against the prior parsed
|
||||
version; only re-parse the changed suffix.
|
||||
3. **Render in stable segments** — close-form historical paragraphs as
|
||||
immutable React nodes; only the live tail goes through markdown each
|
||||
token. Probably the highest-impact change but requires forking or
|
||||
patching `@assistant-ui/react-streamdown`.
|
||||
4. **Move parsing to a Web Worker** — main thread no longer blocks on
|
||||
markdown. Largest surgery; requires double-buffered hast.
|
||||
|
||||
### Vite dev-build issue (separate)
|
||||
|
||||
`http://127.0.0.1:5174/node_modules/.vite/deps/react.js` resolves to
|
||||
`react/cjs/react.production.js`, and `react-dom_client.js` →
|
||||
`react-dom-client.production.js`. As a result:
|
||||
|
||||
- `<React.Profiler>` `onRender` is never called (production build is a
|
||||
no-op).
|
||||
- `import.meta.env.DEV` is `false`, `PROD` is `true` even under `vite dev`
|
||||
(hence `MODE !== 'production'` as the workaround in `main.tsx`).
|
||||
- All the React 19 dev-only warnings/devtools backend hooks are absent.
|
||||
|
||||
Root cause likely sits in `vite.config.ts` aliasing + dedupe + Vite 8's
|
||||
new `optimizeDeps` defaults. Worth a separate fix pass — when it's
|
||||
resolved, the `<PerfProbe>` blocks in `perf-probe.tsx` become useful
|
||||
(per-id commit timings) instead of inert.
|
||||
|
|
|
|||
36
apps/desktop/scripts/reload.mjs
Normal file
36
apps/desktop/scripts/reload.mjs
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
// Hard reload the Electron renderer over CDP. Vite-no-HMR mode means edits
|
||||
// don't auto-apply — call this after editing source.
|
||||
const targets = await (await fetch('http://127.0.0.1:9222/json')).json()
|
||||
const t = targets.find((t) => t.url.includes('5174'))
|
||||
if (!t) {
|
||||
console.error('renderer not found')
|
||||
process.exit(1)
|
||||
}
|
||||
const ws = new WebSocket(t.webSocketDebuggerUrl)
|
||||
let id = 0
|
||||
const pending = new Map()
|
||||
ws.addEventListener('message', (ev) => {
|
||||
const m = JSON.parse(ev.data)
|
||||
if (pending.has(m.id)) {
|
||||
pending.get(m.id)(m)
|
||||
pending.delete(m.id)
|
||||
}
|
||||
})
|
||||
await new Promise((r) => ws.addEventListener('open', r))
|
||||
const send = (method, params = {}) =>
|
||||
new Promise((res) => {
|
||||
const i = ++id
|
||||
pending.set(i, res)
|
||||
ws.send(JSON.stringify({ id: i, method, params }))
|
||||
})
|
||||
|
||||
await send('Page.reload', { ignoreCache: true })
|
||||
console.log('reload sent')
|
||||
// Wait for new doc.
|
||||
await new Promise((r) => setTimeout(r, 2500))
|
||||
const r = await send('Runtime.evaluate', {
|
||||
expression: 'JSON.stringify({ hasProbe: !!window.__PERF_PROBE__, composer: !!document.querySelector("[contenteditable=true]"), url: location.hash })',
|
||||
returnByValue: true,
|
||||
})
|
||||
console.log(r.result.result.value)
|
||||
ws.close()
|
||||
167
apps/desktop/src/app/chat/perf-probe.tsx
Normal file
167
apps/desktop/src/app/chat/perf-probe.tsx
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
import { Profiler, type ProfilerOnRenderCallback, type ReactNode } from 'react'
|
||||
|
||||
import { $messages, setMessages, setBusy } from '@/store/session'
|
||||
|
||||
type Sample = {
|
||||
id: string
|
||||
phase: string
|
||||
actualDuration: number
|
||||
baseDuration: number
|
||||
startTime: number
|
||||
commitTime: number
|
||||
}
|
||||
|
||||
type SyntheticDriverHandle = { stop: () => void }
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
__PERF_PROBE__?: {
|
||||
samples: Sample[]
|
||||
enabled: boolean
|
||||
clear: () => void
|
||||
summary: () => Record<string, { count: number; total: number; max: number; p50: number; p95: number }>
|
||||
}
|
||||
__PERF_DRIVE__?: {
|
||||
/** Inject an assistant message and grow it by `chunk` every `intervalMs`. Returns a stop handle. */
|
||||
stream: (opts?: { chunk?: string; intervalMs?: number; totalTokens?: number }) => SyntheticDriverHandle
|
||||
reset: () => void
|
||||
snapshotMsgs: () => number
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof window !== 'undefined' && !window.__PERF_PROBE__) {
|
||||
const samples: Sample[] = []
|
||||
window.__PERF_PROBE__ = {
|
||||
samples,
|
||||
enabled: false,
|
||||
clear: () => {
|
||||
samples.length = 0
|
||||
},
|
||||
summary: () => {
|
||||
const byId = new Map<string, number[]>()
|
||||
for (const s of samples) {
|
||||
const k = `${s.id}:${s.phase}`
|
||||
const arr = byId.get(k) ?? []
|
||||
arr.push(s.actualDuration)
|
||||
byId.set(k, arr)
|
||||
}
|
||||
const out: Record<string, { count: number; total: number; max: number; p50: number; p95: number }> = {}
|
||||
for (const [k, arr] of byId) {
|
||||
arr.sort((a, b) => a - b)
|
||||
const total = arr.reduce((a, b) => a + b, 0)
|
||||
out[k] = {
|
||||
count: arr.length,
|
||||
total: Math.round(total * 100) / 100,
|
||||
max: Math.round(arr[arr.length - 1] * 100) / 100,
|
||||
p50: Math.round(arr[Math.floor(arr.length * 0.5)] * 100) / 100,
|
||||
p95: Math.round(arr[Math.floor(arr.length * 0.95)] * 100) / 100,
|
||||
}
|
||||
}
|
||||
return out
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const onRender: ProfilerOnRenderCallback = (id, phase, actualDuration, baseDuration, startTime, commitTime) => {
|
||||
const probe = typeof window !== 'undefined' ? window.__PERF_PROBE__ : undefined
|
||||
if (!probe || !probe.enabled) return
|
||||
probe.samples.push({ id, phase, actualDuration, baseDuration, startTime, commitTime })
|
||||
if (probe.samples.length > 5000) probe.samples.splice(0, probe.samples.length - 5000)
|
||||
}
|
||||
|
||||
if (typeof window !== 'undefined' && !window.__PERF_DRIVE__) {
|
||||
// Synthetic stream driver — pushes tokens through the live $messages atom so the
|
||||
// assistant-ui runtime + react tree sees them exactly as a real LLM stream would.
|
||||
// Used by scripts/measure-real-stream.mjs when no live LLM credit is available.
|
||||
let baseline: ReturnType<typeof $messages.get> | null = null
|
||||
let activeHandle: SyntheticDriverHandle | null = null
|
||||
|
||||
const stop = () => {
|
||||
activeHandle = null
|
||||
setBusy(false)
|
||||
}
|
||||
|
||||
window.__PERF_DRIVE__ = {
|
||||
snapshotMsgs: () => $messages.get().length,
|
||||
reset: () => {
|
||||
activeHandle?.stop()
|
||||
if (baseline) setMessages(baseline)
|
||||
baseline = null
|
||||
setBusy(false)
|
||||
},
|
||||
stream: ({ chunk = 'word ', intervalMs = 16, totalTokens = 400 } = {}) => {
|
||||
activeHandle?.stop()
|
||||
const current = $messages.get()
|
||||
if (!baseline) baseline = current
|
||||
const msgId = `synthetic-${Date.now()}`
|
||||
// Seed an empty assistant message — assistant-ui will see it grow.
|
||||
setMessages([
|
||||
...current,
|
||||
{
|
||||
id: msgId,
|
||||
role: 'assistant',
|
||||
parts: [{ type: 'text', text: '' }],
|
||||
timestamp: Date.now(),
|
||||
pending: true
|
||||
}
|
||||
])
|
||||
setBusy(true)
|
||||
|
||||
let pushed = 0
|
||||
let timer: ReturnType<typeof setTimeout> | null = null
|
||||
const handle: SyntheticDriverHandle = {
|
||||
stop: () => {
|
||||
if (timer) clearTimeout(timer)
|
||||
timer = null
|
||||
activeHandle = null
|
||||
// Mark message finalized.
|
||||
setMessages(prev =>
|
||||
prev.map(m =>
|
||||
m.id === msgId
|
||||
? { ...m, pending: false }
|
||||
: m
|
||||
)
|
||||
)
|
||||
setBusy(false)
|
||||
}
|
||||
}
|
||||
activeHandle = handle
|
||||
|
||||
const tick = () => {
|
||||
if (activeHandle !== handle) return
|
||||
if (pushed >= totalTokens) {
|
||||
handle.stop()
|
||||
return
|
||||
}
|
||||
pushed += 1
|
||||
setMessages(prev =>
|
||||
prev.map(m => {
|
||||
if (m.id !== msgId) return m
|
||||
const head = m.parts.slice(0, -1)
|
||||
const last = m.parts.at(-1)
|
||||
const lastText = last && last.type === 'text' ? last.text : ''
|
||||
return {
|
||||
...m,
|
||||
parts: [...head, { type: 'text', text: lastText + chunk }]
|
||||
}
|
||||
})
|
||||
)
|
||||
timer = setTimeout(tick, intervalMs)
|
||||
}
|
||||
timer = setTimeout(tick, intervalMs)
|
||||
return handle
|
||||
}
|
||||
}
|
||||
|
||||
// Suppress dead-import warning.
|
||||
void stop
|
||||
}
|
||||
|
||||
export function PerfProbe({ id, children }: { id: string; children: ReactNode }) {
|
||||
return (
|
||||
<Profiler id={id} onRender={onRender}>
|
||||
{children}
|
||||
</Profiler>
|
||||
)
|
||||
}
|
||||
|
|
@ -12,6 +12,15 @@ import { ThemeProvider } from './themes/context'
|
|||
|
||||
installClipboardShim()
|
||||
|
||||
// Dev-only: install __PERF_DRIVE__ + __PERF_PROBE__ on window so the
|
||||
// scripts/ harnesses can drive a synthetic stream + record render cost.
|
||||
// Tree-shaken out of production builds. (Uses MODE rather than DEV because
|
||||
// our Vite setup currently bundles with PROD=true even in `vite dev`; see
|
||||
// scripts/dev-no-hmr.mjs for the surrounding workarounds.)
|
||||
if (import.meta.env.MODE !== 'production') {
|
||||
import('./app/chat/perf-probe')
|
||||
}
|
||||
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue