diff --git a/apps/desktop/scripts/profile-long-stream.mjs b/apps/desktop/scripts/profile-long-stream.mjs new file mode 100644 index 00000000000..b0ae7922173 --- /dev/null +++ b/apps/desktop/scripts/profile-long-stream.mjs @@ -0,0 +1,191 @@ +#!/usr/bin/env node +// Long-running stream profile + frame-rate timeline. Submits a prompt that +// asks for ~30 paragraphs of output, then captures both a CPU profile and +// a per-100ms frame counter so we can see if FPS sags as the message grows. + +import { writeFileSync } from 'node:fs' + +const args = Object.fromEntries( + process.argv.slice(2).flatMap(s => { + const m = s.match(/^--([^=]+)(?:=(.*))?$/) + return m ? [[m[1], m[2] ?? true]] : [] + }) +) +const PORT = Number(args.port ?? 9222) +const OUT = String(args.out ?? `/tmp/hermes-long-stream-${Date.now()}`) +const STREAM_SEC = Number(args.seconds ?? 25) + +async function pickRenderer() { + const list = await (await fetch(`http://127.0.0.1:${PORT}/json/list`)).json() + return list.find(t => t.type === 'page' && t.url.startsWith('http')) +} + +function connect(url) { + return new Promise((resolve, reject) => { + const ws = new WebSocket(url) + let id = 0 + const pending = new Map() + ws.addEventListener('open', () => + resolve({ + send(method, params = {}) { + const myId = ++id + ws.send(JSON.stringify({ id: myId, method, params })) + return new Promise((res, rej) => pending.set(myId, { res, rej })) + }, + close: () => ws.close() + }) + ) + ws.addEventListener('error', reject) + ws.addEventListener('message', ev => { + const m = JSON.parse(typeof ev.data === 'string' ? ev.data : ev.data.toString('utf8')) + if (m.id != null) { + const p = pending.get(m.id) + if (!p) return + pending.delete(m.id) + m.error ? p.rej(new Error(m.error.message)) : p.res(m.result) + } + }) + }) +} + +async function evalP(cdp, expr) { + const r = await cdp.send('Runtime.evaluate', { expression: expr, returnByValue: true }) + if (r.exceptionDetails) throw new Error(r.exceptionDetails.text) + return r.result.value +} + +async function main() { + const tgt = await pickRenderer() + console.log('target', tgt.url) + const cdp = await connect(tgt.webSocketDebuggerUrl) + await cdp.send('Runtime.enable') + await cdp.send('Profiler.enable') + await cdp.send('Performance.enable') + + // Submit a long-form prompt + await evalP( + cdp, + `(() => { + const el = document.querySelector('[data-slot="composer-rich-input"]') + el.focus() + const r = document.createRange(); r.selectNodeContents(el); r.collapse(false) + window.getSelection().removeAllRanges(); window.getSelection().addRange(r) + })()` + ) + const prompt = 'write 15 paragraphs about gpu memory bandwidth, memory hierarchies, roofline model, and how modern transformer inference benefits from these. include diagrams in ascii where relevant. no code. fully detailed.' + for (const c of prompt) { + await cdp.send('Input.dispatchKeyEvent', { type: 'char', text: c, unmodifiedText: c }) + await new Promise(r => setTimeout(r, 5)) + } + await new Promise(r => setTimeout(r, 200)) + await cdp.send('Input.dispatchKeyEvent', { + type: 'rawKeyDown', windowsVirtualKeyCode: 13, key: 'Enter', code: 'Enter', text: '\r', unmodifiedText: '\r' + }) + await cdp.send('Input.dispatchKeyEvent', { type: 'keyUp', windowsVirtualKeyCode: 13, key: 'Enter', code: 'Enter' }) + + console.log('waiting for assistant…') + let streaming = false + for (let i = 0; i < 100; i++) { + const c = await evalP(cdp, `document.querySelectorAll('[data-slot="aui_assistant-message-root"]').length`) + if (c > 0) { streaming = true; break } + await new Promise(r => setTimeout(r, 100)) + } + if (!streaming) { + console.error('no assistant message') + cdp.close() + return + } + + // Install a per-rAF frame counter + await evalP( + cdp, + `(() => { + window.__fpsSamples = [] + window.__fpsT0 = performance.now() + window.__fpsLast = performance.now() + window.__fpsFrameCount = 0 + window.__fpsHistogram = [] // {t, fps, contentLen} + const tick = () => { + const now = performance.now() + const dt = now - window.__fpsLast + window.__fpsLast = now + window.__fpsFrameCount++ + window.__fpsSamples.push({ t: now - window.__fpsT0, dt }) + if (performance.now() - window.__fpsT0 < ${STREAM_SEC * 1000}) { + requestAnimationFrame(tick) + } + } + requestAnimationFrame(tick) + // Bucket fps every 500ms + window.__fpsBucket = setInterval(() => { + const now = performance.now() + const recentCount = window.__fpsSamples.filter(s => now - window.__fpsT0 - s.t < 500).length + const root = document.querySelector('[data-slot="aui_thread-content"]') + const len = root ? root.innerText.length : 0 + const v = document.querySelector('[data-slot="aui_thread-viewport"]') + window.__fpsHistogram.push({ + t: now - window.__fpsT0, + frames500ms: recentCount, + fps: recentCount * 2, + contentLen: len, + scrollTop: v?.scrollTop ?? 0, + scrollHeight: v?.scrollHeight ?? 0 + }) + }, 500) + })()` + ) + + // Start CPU profile + await cdp.send('Profiler.setSamplingInterval', { interval: 1000 }) + await cdp.send('Profiler.start') + + await new Promise(r => setTimeout(r, STREAM_SEC * 1000)) + + const { profile } = await cdp.send('Profiler.stop') + await evalP(cdp, `clearInterval(window.__fpsBucket)`) + + writeFileSync(`${OUT}.cpuprofile`, JSON.stringify(profile)) + console.log(`cpu profile → ${OUT}.cpuprofile`) + + // Pull fps histogram + const hist = JSON.parse(await evalP(cdp, `JSON.stringify(window.__fpsHistogram || [])`)) + writeFileSync(`${OUT}.fps.json`, JSON.stringify(hist, null, 2)) + + console.log(`\n=== FPS over time ===`) + console.log(` t(s) fps contentLen scrollTop/scrollHeight`) + for (const h of hist) { + const bar = '█'.repeat(Math.min(40, Math.max(0, Math.round(h.fps / 2)))) + console.log(` ${(h.t / 1000).toFixed(1).padStart(5)} ${String(h.fps).padStart(3)} ${String(h.contentLen).padStart(10)} ${h.scrollTop}/${h.scrollHeight} ${bar}`) + } + + // Top self frames + const total = (profile.endTime - profile.startTime) / 1000 + const intMs = total / Math.max(1, profile.samples?.length ?? 1) + const counts = new Map() + for (const s of profile.samples ?? []) counts.set(s, (counts.get(s) ?? 0) + 1) + const rows = profile.nodes + .map(n => ({ id: n.id, fn: n.callFrame.functionName || '(anon)', url: n.callFrame.url || '', line: n.callFrame.lineNumber, self: counts.get(n.id) ?? 0 })) + .sort((a, b) => b.self - a.self) + .slice(0, 25) + console.log(`\n=== ${total.toFixed(0)}ms wall, ${profile.samples?.length ?? 0} samples (${intMs.toFixed(2)}ms each) ===`) + for (const r of rows) { + if (r.self === 0) break + const url = r.url.replace(/^.*\/src\//, 'src/').replace(/\?.*$/, '').slice(0, 70) + console.log(` ${(r.self * intMs).toFixed(1).padStart(7)}ms (${String(r.self).padStart(4)} samp) ${r.fn.padEnd(45)} ${url}:${r.line}`) + } + + await evalP(cdp, ` + (() => { + for (const b of document.querySelectorAll('button')) { + if ((b.getAttribute('aria-label') || '').toLowerCase().includes('stop')) { b.click(); return } + } + })() + `) + + cdp.close() +} + +main().catch(e => { + console.error('fatal:', e.stack ?? e.message) + process.exit(1) +}) diff --git a/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx b/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx index 21cfa66f6eb..2e6bbaf8ff7 100644 --- a/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx +++ b/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx @@ -195,12 +195,6 @@ function useThreadScrollAnchor({ enabled, groupCount, scrollerRef, sessionKey, v const prevSessionKeyRef = useRef(sessionKey) const prevGroupCountRef = useRef(0) - // Track repins-in-a-row to break runaway loops during rapid layout churn. - // In healthy paths this drains to zero between frames; we only need the - // ceiling for pathological streaming bursts where content height keeps - // growing every frame. - const inFlightPinDepthRef = useRef(0) - const pinToBottom = useCallback(() => { const el = scrollerRef.current @@ -247,41 +241,20 @@ function useThreadScrollAnchor({ enabled, groupCount, scrollerRef, sessionKey, v const top = el.scrollTop // If this scroll event is the consequence of `pinToBottom` writing - // `el.scrollTop`, treat it as ours: never disarm, just consume the - // gate. If we landed short of bottom (because content also grew in - // the same frame and the browser clamped our scrollTop = scrollHeight - // write to the now-stale scrollHeight - clientHeight), schedule - // another pin on the next frame. Without this the post-pin scrollTop - // gets misread as the user scrolling up, disarming sticky-bottom - // permanently and leaving the just-submitted message below the fold. + // `el.scrollTop`, treat it as ours: don't disarm. The RO + rAF pin + // loop will re-pin on the next frame if the browser clamped us + // short of bottom (because content grew in the same frame). + // Without this guard the post-pin scrollTop gets misread as the + // user scrolling up, disarming sticky-bottom permanently and + // leaving the just-submitted message below the fold. if (programmaticScrollPendingRef.current > 0) { programmaticScrollPendingRef.current -= 1 lastTopRef.current = top - // Stay armed regardless — sticky-bottom should hold through clamp - // races. + // Always re-arm — sticky-bottom should hold through clamp races. armedRef.current = true const atBottom = el.scrollHeight - (top + el.clientHeight) <= AT_BOTTOM_THRESHOLD setThreadScrolledUp(!atBottom) - if (atBottom) { - inFlightPinDepthRef.current = 0 - } else if (inFlightPinDepthRef.current < 8) { - // Re-pin synchronously: the browser already laid out for this - // scroll event, so reading scrollHeight now gives us the up-to-date - // value and writing scrollTop lands us at the actual bottom in the - // same frame. Doing this in a rAF causes a 1-frame visual flicker - // (distFromBottom briefly nonzero), so we accept one extra - // synchronous pin cycle (which goes back through this very - // handler with the counter incremented and arm preserved). The - // depth guard prevents pathological runaway loops if content - // height keeps growing every frame; 8 is generous for any - // realistic rendering pattern. - inFlightPinDepthRef.current += 1 - pinToBottom() - } else { - inFlightPinDepthRef.current = 0 - } - return } @@ -318,7 +291,11 @@ function useThreadScrollAnchor({ enabled, groupCount, scrollerRef, sessionKey, v }, [scrollerRef]) // Follow content growth (streaming, item measurements, loading indicator) - // while armed. + // while armed. During fast streaming the ResizeObserver can fire many + // times per frame as Streamdown re-tokenizes; coalesce to one pin per + // animation frame so we don't run the scroll-event/re-pin chain + // (~20+ ms self in `Virtualizer.getMaxScrollOffset`) several times per + // token. useEffect(() => { if (!enabled) { return undefined @@ -330,11 +307,21 @@ function useThreadScrollAnchor({ enabled, groupCount, scrollerRef, sessionKey, v return undefined } - const observer = new ResizeObserver(() => { - if (armedRef.current) { - pinToBottom() + let pinRafScheduled = false + const schedulePin = () => { + if (pinRafScheduled || !armedRef.current) { + return } - }) + pinRafScheduled = true + requestAnimationFrame(() => { + pinRafScheduled = false + if (armedRef.current) { + pinToBottom() + } + }) + } + + const observer = new ResizeObserver(schedulePin) observer.observe(el) @@ -366,6 +353,15 @@ function useThreadScrollAnchor({ enabled, groupCount, scrollerRef, sessionKey, v // mutation but before the browser paints. Without this, there's a ~50ms // visual window where the new message sits below the fold while we wait // for the ResizeObserver / scroll event chain to fire and re-pin. + // + // We pin TWICE in this critical path — once synchronously, then once on + // the next rAF. The second pin catches the case where React mounts the + // new message in the second commit (after our layout effect ran), which + // grows scrollHeight again; without the rAF pin the user briefly sees a + // ~15 px gap below the new message until the RO catches up. Streaming + // tokens use the rate-limited RO path only; only the group-count change + // (which fires once per user submit / new turn arrival) pays for the + // extra pin. const prevGroupCountForLayoutRef = useRef(groupCount) useLayoutEffect(() => { if (!enabled) { @@ -373,6 +369,11 @@ function useThreadScrollAnchor({ enabled, groupCount, scrollerRef, sessionKey, v } if (groupCount > prevGroupCountForLayoutRef.current && armedRef.current) { pinToBottom() + requestAnimationFrame(() => { + if (armedRef.current) { + pinToBottom() + } + }) } prevGroupCountForLayoutRef.current = groupCount }, [enabled, groupCount, pinToBottom])