diff --git a/apps/desktop/package.json b/apps/desktop/package.json index 08f1cc1aa0..6fed75f563 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -90,6 +90,7 @@ "react-router-dom": "^7.17.0", "react-shiki": "^0.9.3", "remark-math": "^6.0.0", + "remend": "^1.3.0", "shiki": "^4.0.2", "streamdown": "^2.5.0", "tailwind-merge": "^3.5.0", diff --git a/apps/desktop/src/app/chat/composer/index.tsx b/apps/desktop/src/app/chat/composer/index.tsx index 43074b5ce3..6ab2abf72f 100644 --- a/apps/desktop/src/app/chat/composer/index.tsx +++ b/apps/desktop/src/app/chat/composer/index.tsx @@ -174,7 +174,6 @@ export function ChatBar({ const queuedPromptsBySession = useStore($queuedPromptsBySession) const statusItemsBySession = useStore($statusItemsBySession) const scrolledUp = useStore($threadScrolledUp) - const sessionMessages = useStore($messages) const activeQueueSessionKey = queueSessionKey || sessionId || null const queuedPrompts = useMemo( @@ -866,7 +865,9 @@ export function ChatBar({ event.preventDefault() triggerKeyConsumedRef.current = true - const history = deriveUserHistory(sessionMessages, chatMessageText) + // $messages is read imperatively (not subscribed) so the composer + // doesn't re-render on every streaming delta flush. + const history = deriveUserHistory($messages.get(), chatMessageText) const entry = browseBackward(sessionId, currentDraft, history) if (entry !== null) { @@ -891,7 +892,7 @@ export function ChatBar({ event.preventDefault() triggerKeyConsumedRef.current = true - const history = deriveUserHistory(sessionMessages, chatMessageText) + const history = deriveUserHistory($messages.get(), chatMessageText) const result = browseForward(sessionId, history) if (result !== null) { diff --git a/apps/desktop/src/app/chat/index.tsx b/apps/desktop/src/app/chat/index.tsx index 725039620f..ab1213ef16 100644 --- a/apps/desktop/src/app/chat/index.tsx +++ b/apps/desktop/src/app/chat/index.tsx @@ -35,7 +35,9 @@ import { $gatewayState, $introPersonality, $introSeed, + $lastVisibleMessageIsUser, $messages, + $messagesEmpty, $selectedStoredSessionId, $sessions, sessionPinId @@ -55,7 +57,7 @@ import { type DroppedFile, partitionDroppedFiles } from './hooks/use-composer-ac import { useFileDropZone } from './hooks/use-file-drop-zone' import { ScrollToBottomButton } from './scroll-to-bottom-button' import { SessionActionsMenu } from './sidebar/session-actions-menu' -import { lastVisibleMessageIsUser, threadLoadingState } from './thread-loading' +import { threadLoadingState } from './thread-loading' interface ChatViewProps extends Omit, 'onSubmit'> { gateway: HermesGateway | null @@ -156,105 +158,35 @@ function ChatHeader({ ) } -export function ChatView({ - className, - gateway, - onToggleSelectedPin, - onDeleteSelectedSession, +interface ChatRuntimeBoundaryProps { + busy: boolean + children: React.ReactNode + onCancel: () => Promise | void + onEdit: (message: AppendMessage) => Promise + onReload: (parentId: string | null) => Promise + onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void +} + +/** + * Owns the $messages subscription and the assistant-ui external-store runtime. + * + * Isolated from ChatView so the per-token delta flush (which replaces the + * $messages atom ~30×/s during streaming) only re-renders this component and + * the runtime provider. The children (Thread, ChatBar) are created by + * ChatView, whose render output is stable across flushes — so React bails out + * of re-rendering them by element identity and the stream's render cost stays + * confined to the streaming message's own subtree. + */ +function ChatRuntimeBoundary({ + busy, + children, onCancel, - onAddContextRef, - onAddUrl, - onAttachImageBlob, - onAttachDroppedItems, - onBranchInNewChat, - maxVoiceRecordingSeconds, - onPasteClipboardImage, - onPickFiles, - onPickFolders, - onPickImages, - onRemoveAttachment, - onSteer, - onSubmit, - onThreadMessagesChange, onEdit, onReload, - onRestoreToMessage, - onTranscribeAudio -}: ChatViewProps) { - const location = useLocation() - const activeSessionId = useStore($activeSessionId) - const awaitingResponse = useStore($awaitingResponse) - const busy = useStore($busy) - const contextSuggestions = useStore($contextSuggestions) - const currentCwd = useStore($currentCwd) - const currentModel = useStore($currentModel) - const currentProvider = useStore($currentProvider) - const freshDraftReady = useStore($freshDraftReady) - const gatewayState = useStore($gatewayState) - const gatewaySwapTarget = useStore($gatewaySwapTarget) - const gatewayOpen = gatewayState === 'open' - const introPersonality = useStore($introPersonality) - const introSeed = useStore($introSeed) + onThreadMessagesChange +}: ChatRuntimeBoundaryProps) { const messages = useStore($messages) - const selectedSessionId = useStore($selectedStoredSessionId) const runtimeMessageCacheRef = useRef(new WeakMap()) - const isRoutedSessionView = Boolean(routeSessionId(location.pathname)) - - const showIntro = - freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messages.length === 0 - - // Session is still loading if the route references a session we haven't - // resumed yet. Once `activeSessionId` is set (runtime has resumed), the - // session exists — even if it has zero messages (a brand-new routed - // session). The flicker where `busy` flips true briefly during hydrate - // is handled by `threadLoadingState`'s last-visible-user gate. - const loadingSession = isRoutedSessionView && messages.length === 0 && !activeSessionId - const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleMessageIsUser(messages)) - const showChatBar = !loadingSession - const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new') - - const modelOptionsQuery = useQuery({ - queryKey: ['model-options', activeSessionId || 'global'], - queryFn: () => { - if (!activeSessionId) { - return getGlobalModelOptions() - } - - if (!gateway) { - throw new Error('Hermes gateway unavailable') - } - - return gateway.request('model.options', { session_id: activeSessionId }) - }, - enabled: gatewayOpen - }) - - const quickModels = useMemo( - () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel), - [currentModel, currentProvider, modelOptionsQuery.data] - ) - - const chatBarState = useMemo( - () => ({ - model: { - model: currentModel, - provider: currentProvider, - canSwitch: gatewayOpen, - loading: !gatewayOpen || (!currentModel && !currentProvider), - quickModels - }, - tools: { - enabled: true, - label: 'Add context', - suggestions: contextSuggestions - }, - voice: { - enabled: true, - active: false - } - }), - [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels] - ) const runtimeMessageRepository = useMemo(() => { const items: { message: ThreadMessage; parentId: string | null }[] = [] @@ -304,6 +236,113 @@ export function ChatView({ onReload }) + return {children} +} + +export function ChatView({ + className, + gateway, + onToggleSelectedPin, + onDeleteSelectedSession, + onCancel, + onAddContextRef, + onAddUrl, + onAttachImageBlob, + onAttachDroppedItems, + onBranchInNewChat, + maxVoiceRecordingSeconds, + onPasteClipboardImage, + onPickFiles, + onPickFolders, + onPickImages, + onRemoveAttachment, + onSteer, + onSubmit, + onThreadMessagesChange, + onEdit, + onReload, + onRestoreToMessage, + onTranscribeAudio +}: ChatViewProps) { + const location = useLocation() + const activeSessionId = useStore($activeSessionId) + const awaitingResponse = useStore($awaitingResponse) + const busy = useStore($busy) + const contextSuggestions = useStore($contextSuggestions) + const currentCwd = useStore($currentCwd) + const currentModel = useStore($currentModel) + const currentProvider = useStore($currentProvider) + const freshDraftReady = useStore($freshDraftReady) + const gatewayState = useStore($gatewayState) + const gatewaySwapTarget = useStore($gatewaySwapTarget) + const gatewayOpen = gatewayState === 'open' + const introPersonality = useStore($introPersonality) + const introSeed = useStore($introSeed) + // PERF: ChatView must not subscribe to $messages — the atom is replaced on + // every streaming delta flush (~30×/s) and a subscription here re-renders + // the entire chat shell (header, chat bar, thread wrapper) per token. The + // runtime that DOES need the messages lives in ChatRuntimeBoundary below; + // this component only needs streaming-stable derivations. + const messagesEmpty = useStore($messagesEmpty) + const lastVisibleIsUser = useStore($lastVisibleMessageIsUser) + const selectedSessionId = useStore($selectedStoredSessionId) + const isRoutedSessionView = Boolean(routeSessionId(location.pathname)) + + const showIntro = freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty + + // Session is still loading if the route references a session we haven't + // resumed yet. Once `activeSessionId` is set (runtime has resumed), the + // session exists — even if it has zero messages (a brand-new routed + // session). The flicker where `busy` flips true briefly during hydrate + // is handled by `threadLoadingState`'s last-visible-user gate. + const loadingSession = isRoutedSessionView && messagesEmpty && !activeSessionId + const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser) + const showChatBar = !loadingSession + const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new') + + const modelOptionsQuery = useQuery({ + queryKey: ['model-options', activeSessionId || 'global'], + queryFn: () => { + if (!activeSessionId) { + return getGlobalModelOptions() + } + + if (!gateway) { + throw new Error('Hermes gateway unavailable') + } + + return gateway.request('model.options', { session_id: activeSessionId }) + }, + enabled: gatewayOpen + }) + + const quickModels = useMemo( + () => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel), + [currentModel, currentProvider, modelOptionsQuery.data] + ) + + const chatBarState = useMemo( + () => ({ + model: { + model: currentModel, + provider: currentProvider, + canSwitch: gatewayOpen, + loading: !gatewayOpen || (!currentModel && !currentProvider), + quickModels + }, + tools: { + enabled: true, + label: 'Add context', + suggestions: contextSuggestions + }, + voice: { + enabled: true, + active: false + } + }), + [contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels] + ) + // Drop files anywhere in the conversation area, not just on the composer // input. In-app drags (project tree / gutter) carry workspace-relative paths // the gateway resolves directly, so they stay inline `@file:` refs. OS/Finder @@ -356,7 +395,13 @@ export function ChatView({ className="relative min-h-0 max-w-full flex-1 overflow-hidden bg-(--ui-chat-surface-background) contain-[layout_paint]" {...dropHandlers} > - + )} - + {showChatBar && } diff --git a/apps/desktop/src/app/chat/thread-loading.ts b/apps/desktop/src/app/chat/thread-loading.ts index 97686c6550..05cfb08671 100644 --- a/apps/desktop/src/app/chat/thread-loading.ts +++ b/apps/desktop/src/app/chat/thread-loading.ts @@ -3,9 +3,14 @@ import type { ChatMessage } from '@/lib/chat-messages' export type ThreadLoadingState = 'response' | 'session' export function lastVisibleMessageIsUser(messages: ChatMessage[]): boolean { - const lastVisible = [...messages].reverse().find(message => !message.hidden) + // Allocation-free reverse scan — runs in a hot $messages computed. + for (let i = messages.length - 1; i >= 0; i -= 1) { + if (!messages[i].hidden) { + return messages[i].role === 'user' + } + } - return lastVisible?.role === 'user' + return false } export function threadLoadingState( diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts index a4a2feaaac..4e19c63795 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts @@ -618,10 +618,26 @@ export function useSessionActions({ const watchWindow = isWatchWindow() let localSnapshot = $messages.get() + // REST transcript prefetch and the gateway resume RPC are independent + // — run them concurrently so a big session's wall time is + // max(prefetch, resume) instead of their sum. The prefetch paints the + // transcript as soon as it lands; the RPC binds the runtime id. + // Watch windows skip the prefetch — lazy resume attaches the live mirror. + const prefetchPromise = watchWindow ? null : getSessionMessages(storedSessionId, sessionProfile) + + const resumePromise = requestGateway('session.resume', { + session_id: storedSessionId, + cols: 96, + ...(watchWindow ? { lazy: true } : {}), + ...(sessionProfile ? { profile: sessionProfile } : {}) + }) + // The rejection is consumed by the `await` below; this guard only + // keeps it from surfacing as unhandled while the prefetch settles. + resumePromise.catch(() => undefined) + try { - // Watch windows skip REST prefetch — lazy resume attaches the live mirror. - if (!watchWindow) { - const storedMessages = await getSessionMessages(storedSessionId, sessionProfile) + if (prefetchPromise) { + const storedMessages = await prefetchPromise if (isCurrentResume()) { localSnapshot = preserveLocalAssistantErrors(toChatMessages(storedMessages.messages), $messages.get()) @@ -635,12 +651,7 @@ export function useSessionActions({ // Non-fatal: gateway resume below can still hydrate the session. } - const resumed = await requestGateway('session.resume', { - session_id: storedSessionId, - cols: 96, - ...(watchWindow ? { lazy: true } : {}), - ...(sessionProfile ? { profile: sessionProfile } : {}) - }) + const resumed = await resumePromise if (!isCurrentResume()) { return @@ -648,17 +659,22 @@ export function useSessionActions({ const currentMessages = $messages.get() - const resumedMessages = preserveLocalAssistantErrors( - reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages), - currentMessages - ) - // Keep the local snapshot when resume would only reshuffle runtime projection. + // Keep the local snapshot when resume would only reshuffle runtime + // projection. When the REST prefetch already hydrated the transcript, + // skip converting/reconciling the resume payload entirely — on a + // 1000+-message session that second conversion plus the deep + // equivalence compare costs over a second of main-thread time. const preferredMessages = localSnapshot.length > 0 ? localSnapshot - : chatMessageArraysEquivalent(currentMessages, resumedMessages) - ? currentMessages - : resumedMessages + : (() => { + const resumedMessages = preserveLocalAssistantErrors( + reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages), + currentMessages + ) + + return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages + })() const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages) diff --git a/apps/desktop/src/components/assistant-ui/markdown-text.tsx b/apps/desktop/src/components/assistant-ui/markdown-text.tsx index 8ec734bf8b..1c50b65eab 100644 --- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx +++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx @@ -2,6 +2,7 @@ import { TextMessagePartProvider, useMessagePartText } from '@assistant-ui/react' import { + parseMarkdownIntoBlocks, type StreamdownTextComponents, StreamdownTextPrimitive, type SyntaxHighlighterProps @@ -26,6 +27,7 @@ import { mediaStreamUrl } from '@/lib/media' import { previewTargetFromMarkdownHref } from '@/lib/preview-targets' +import { tailBoundedRemend } from '@/lib/remend-tail' import { cn } from '@/lib/utils' // Math rendering plugin (KaTeX). Configured once at module scope — the @@ -42,6 +44,51 @@ import { cn } from '@/lib/utils' // LLM convention). The default false-setting only accepts `$$...$$`. const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true }) +// Replaces Streamdown's `parseIncompleteMarkdown` (full-text remend per +// flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay +// module-scope so the prop identity is stable across renders. +function preprocessWithTailRepair(text: string): string { + return tailBoundedRemend(preprocessMarkdown(text)) +} + +// Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full +// `marked` lex of the entire message, ~1.6ms per 28KB) inside a useMemo keyed +// on the text — but the same text is re-lexed every time a message REMOUNTS +// (virtualizer scroll, session switch) and whenever multiple surfaces render +// the same content (deferred + smooth reveal republish). A small module-level +// LRU keyed by the exact source string removes all of those repeat parses +// with zero correctness risk (same input → same output). Streaming tail +// growth misses the cache by design (every flush is a new string) — that +// single lex is the irreducible cost. +const BLOCK_CACHE_MAX = 64 +const BLOCK_CACHE_MIN_LENGTH = 1024 +const blockCache = new Map() + +function parseMarkdownIntoBlocksCached(markdown: string): string[] { + if (markdown.length < BLOCK_CACHE_MIN_LENGTH) { + return parseMarkdownIntoBlocks(markdown) + } + + const hit = blockCache.get(markdown) + + if (hit) { + // Refresh recency (Map iteration order is insertion order). + blockCache.delete(markdown) + blockCache.set(markdown, hit) + + return hit + } + + const blocks = parseMarkdownIntoBlocks(markdown) + blockCache.set(markdown, blocks) + + if (blockCache.size > BLOCK_CACHE_MAX) { + blockCache.delete(blockCache.keys().next().value as string) + } + + return blocks +} + async function mediaSrc(path: string): Promise { if (/^(?:https?|data):/i.test(path)) { return path @@ -241,6 +288,13 @@ function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>) // keeps draining its tail instead of snapping. const REVEAL_DRAIN_MS = 500 const REVEAL_MAX_CHARS_PER_FRAME = 30 +// Floor between reveal commits. Each commit republishes the text context and +// re-runs the whole Streamdown pipeline (preprocess → remend → lex → micromark +// on the open block) over the full accumulated text — at raw rAF cadence +// that's 60 full parses/second and was the dominant streaming cost for +// reasoning text. ~33ms keeps the reveal visually fluid (2 frames) while +// halving the parse work. +const REVEAL_MIN_COMMIT_MS = 33 function useSmoothReveal(text: string, isRunning: boolean): string { const [displayed, setDisplayed] = useState(isRunning ? '' : text) @@ -273,10 +327,27 @@ function useSmoothReveal(text: string, isRunning: boolean): string { const tick = () => { const now = performance.now() const dt = now - lastTickRef.current + + // Skip this frame if the floor hasn't elapsed — the backlog math below + // is dt-proportional, so delayed commits reveal proportionally more. + if (dt < REVEAL_MIN_COMMIT_MS) { + frameRef.current = requestAnimationFrame(tick) + + return + } + lastTickRef.current = now const remaining = targetRef.current.length - shownRef.current.length - const add = Math.min(remaining, REVEAL_MAX_CHARS_PER_FRAME, Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS))) + + const add = Math.min( + remaining, + // dt-scaled so the per-commit cap stays equivalent to the old + // per-frame cap at any commit cadence. + Math.ceil((REVEAL_MAX_CHARS_PER_FRAME * dt) / 16.7), + Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS)) + ) + shownRef.current = targetRef.current.slice(0, shownRef.current.length + add) setDisplayed(shownRef.current) @@ -460,17 +531,20 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex containerProps={containerProps} lineNumbers={false} mode="streaming" - // Always auto-close incomplete fences — even during streaming. - // Without this, an unclosed ```python ... ``` whose body contains - // `$` (very common: shell snippets, JS template strings, dollar - // amounts) leaks those dollars out to the math parser and they - // get rendered as broken inline math until the closing fence - // arrives. Shiki is independently deferred via `defer={isStreaming}` - // on the SyntaxHighlighter component, so we don't pay code-block - // tokenization on every token even with this set. - parseIncompleteMarkdown + // Incomplete-markdown repair is handled by `preprocessWithTailRepair` + // below (tail-bounded remend) instead of Streamdown's built-in pass, + // which re-runs remend over the ENTIRE message on every flush — ~18% + // of streaming script time on 50KB+ messages. The repair itself stays + // always-on (even between flushes / for completed messages): an + // unclosed ```python ... ``` whose body contains `$` (shell snippets, + // JS template strings, dollar amounts) would otherwise leak those + // dollars to the math parser and render broken inline math. Shiki is + // independently deferred via `defer={isStreaming}` on the + // SyntaxHighlighter component. + parseIncompleteMarkdown={false} + parseMarkdownIntoBlocksFn={parseMarkdownIntoBlocksCached} plugins={plugins} - preprocess={preprocessMarkdown} + preprocess={preprocessWithTailRepair} /> ) } diff --git a/apps/desktop/src/components/assistant-ui/thread.tsx b/apps/desktop/src/components/assistant-ui/thread.tsx index effeb38e79..f2a574d475 100644 --- a/apps/desktop/src/components/assistant-ui/thread.tsx +++ b/apps/desktop/src/components/assistant-ui/thread.tsx @@ -7,7 +7,8 @@ import { MessagePrimitive, type ToolCallMessagePartProps, useAui, - useAuiState + useAuiState, + useMessageRuntime } from '@assistant-ui/react' import { useStore } from '@nanostores/react' import { IconPlayerStopFilled } from '@tabler/icons-react' @@ -105,7 +106,11 @@ type ThreadLoadingState = 'response' | 'session' interface MessageActionProps { messageId: string - messageText: string + /** Lazy accessor — reads the live message text at action time. Passing the + * text itself as a prop forces the whole footer to re-render on every + * streaming delta flush (the text changes ~30×/s), which profiling showed + * was a large slice of per-token script time on long transcripts. */ + getMessageText: () => string onBranchInNewChat?: (messageId: string) => void } @@ -133,6 +138,28 @@ function messageContentText(content: unknown): string { return Array.isArray(content) ? content.map(partText).join('').trim() : '' } +// Cheap streaming-stable "does this message have visible text" check: returns +// on the first non-whitespace text part without concatenating the whole +// message. Used as a useAuiState selector so its boolean output stays stable +// across token flushes (flips false→true once per turn). +function contentHasVisibleText(content: unknown): boolean { + if (typeof content === 'string') { + return content.trim().length > 0 + } + + if (!Array.isArray(content)) { + return false + } + + for (const part of content) { + if (partText(part).trim().length > 0) { + return true + } + } + + return false +} + export const Thread: FC<{ clampToComposer?: boolean cwd?: string | null @@ -221,20 +248,39 @@ const CenteredThreadSpinner: FC = () => { const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> = ({ onBranchInNewChat }) => { const messageId = useAuiState(s => s.message.id) - const content = useAuiState(s => s.message.content) - const messageText = messageContentText(content) + const messageRuntime = useMessageRuntime() + + // PERF: this component must NOT subscribe to the streaming text. Every + // selector here returns a value that stays referentially stable across + // token flushes (booleans, status strings, '' while running), so the + // 30 Hz delta stream only re-renders the markdown part and the tiny + // StreamStallIndicator leaf — not the footer/preview/root subtree. + const messageStatus = useAuiState(s => s.message.status?.type) + const isRunning = messageStatus === 'running' + const isPlaceholder = useAuiState(s => s.message.status?.type === 'running' && s.message.content.length === 0) + const hasVisibleText = useAuiState(s => contentHasVisibleText(s.message.content)) + + // Preview targets only materialize once the turn completes — while running + // the selector returns '' (stable), so per-token flushes skip the regex + // scan and the re-render it would cause. + const completedText = useAuiState(s => + s.message.status?.type === 'running' ? '' : messageContentText(s.message.content) + ) const previewTargets = useMemo(() => { - if (!messageText || !/(https?:\/\/|file:\/\/)/i.test(messageText)) { + if (!completedText || !/(https?:\/\/|file:\/\/)/i.test(completedText)) { return [] } - return pickPrimaryPreviewTarget(extractPreviewTargets(messageText)) - }, [messageText]) + return pickPrimaryPreviewTarget(extractPreviewTargets(completedText)) + }, [completedText]) - const messageStatus = useAuiState(s => s.message.status?.type) - const isPlaceholder = messageStatus === 'running' && content.length === 0 - const enterRef = useEnterAnimation(messageStatus === 'running', `assistant-message:${messageId}`) + const getMessageText = useCallback( + () => messageContentText(messageRuntime.getState().content), + [messageRuntime] + ) + + const enterRef = useEnterAnimation(isRunning, `assistant-message:${messageId}`) if (isPlaceholder) { return null @@ -245,7 +291,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> className="group flex w-full min-w-0 max-w-full flex-col gap-0 self-start overflow-hidden" data-role="assistant" data-slot="aui_assistant-message-root" - data-streaming={messageStatus === 'running' ? 'true' : undefined} + data-streaming={isRunning ? 'true' : undefined} ref={enterRef} >
void }> > {/* Todos render in the composer status stack now, not inline. */} - {messageStatus === 'running' && } + {isRunning && } {previewTargets.length > 0 && (
{previewTargets.map(target => ( @@ -271,8 +317,8 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
- {messageText.trim().length > 0 && ( - + {hasVisibleText && ( + )} ) @@ -313,10 +359,28 @@ const STREAM_STALL_S = 2 // Tail "still thinking" indicator: the pre-first-token spinner goes away once // text flows, but if the stream then goes quiet mid-turn (tool think-time, -// provider stall) nothing signals that work continues. Watch a per-render +// provider stall) nothing signals that work continues. Watch a per-flush // activity signal; when it hasn't changed for STREAM_STALL_S, re-show the // dither + a timer counting from the last activity. -const StreamStallIndicator: FC<{ activity: string }> = ({ activity }) => { +// +// Subscribes to the activity signal ITSELF (rather than taking it as a prop) +// so that per-token updates re-render only this leaf, not the whole +// AssistantMessage subtree. +const StreamStallIndicator: FC = () => { + const activity = useAuiState(s => { + let textLength = 0 + + for (const part of s.message.content) { + const text = (part as { text?: unknown }).text + + if (typeof text === 'string') { + textLength += text.length + } + } + + return `${s.message.content.length}:${textLength}` + }) + const [stalled, setStalled] = useState(false) useEffect(() => { @@ -584,7 +648,7 @@ function formatMessageTimestamp( return SHORT_FMT.format(date) } -const AssistantActionBar: FC = ({ messageId, messageText, onBranchInNewChat }) => { +const AssistantActionBar: FC = ({ messageId, getMessageText, onBranchInNewChat }) => { const { t } = useI18n() const copy = t.assistant.thread const [menuOpen, setMenuOpen] = useState(false) @@ -605,7 +669,7 @@ const AssistantActionBar: FC = ({ messageId, messageText, on )} data-slot="aui_msg-actions" > - + triggerHaptic('submit')} tooltip={copy.refresh}> @@ -623,7 +687,7 @@ const AssistantActionBar: FC = ({ messageId, messageText, on {copy.branchNewChat} - + @@ -631,7 +695,7 @@ const AssistantActionBar: FC = ({ messageId, messageText, on ) } -const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, text }) => { +const ReadAloudItem: FC<{ getText: () => string; messageId: string }> = ({ getText, messageId }) => { const { t } = useI18n() const copy = t.assistant.thread const voicePlayback = useStore($voicePlayback) @@ -645,6 +709,8 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex const Icon = isPreparing ? Loader2Icon : isSpeaking ? VolumeXIcon : Volume2Icon const read = useCallback(async () => { + const text = getText() + if (!text || $voicePlayback.get().status !== 'idle') { return } @@ -654,11 +720,11 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex } catch (error) { notifyError(error, copy.readAloudFailed) } - }, [copy.readAloudFailed, messageId, text]) + }, [copy.readAloudFailed, getText, messageId]) return ( { e.preventDefault() void (isSpeaking ? stopVoicePlayback() : read()) @@ -820,8 +886,10 @@ const UserMessage: FC<{ // changes, not on every frame while the outer max-height animates open. const clampInnerRef = useRef(null) const [bodyClamped, setBodyClamped] = useState(false) + const lastClampHeightRef = useRef(-1) + const lineHeightRef = useRef(0) - const measureClamp = useCallback(() => { + const measureClamp = useCallback((entries: readonly ResizeObserverEntry[]) => { const inner = clampInnerRef.current const outer = inner?.parentElement @@ -829,12 +897,28 @@ const UserMessage: FC<{ return } - const styles = getComputedStyle(inner) - const lineHeight = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20 - const fullHeight = inner.scrollHeight + // Prefer the size the ResizeObserver already computed — reading + // `scrollHeight` outside RO timing forces a synchronous layout, and with + // many user bubbles observed at once those reads interleave with the + // style write below into a read-write-read reflow cascade. + const entryHeight = entries.find(entry => entry.target === inner)?.borderBoxSize?.[0]?.blockSize + const fullHeight = Math.ceil(entryHeight ?? inner.scrollHeight) + + if (fullHeight === lastClampHeightRef.current) { + return + } + + lastClampHeightRef.current = fullHeight + + // Line-height is stable for the life of the bubble (font settings don't + // change under it) — resolve the computed style once. + if (!lineHeightRef.current) { + const styles = getComputedStyle(inner) + lineHeightRef.current = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20 + } outer.style.setProperty('--human-msg-full', `${fullHeight}px`) - setBodyClamped(fullHeight > lineHeight * 2 + 1) + setBodyClamped(fullHeight > lineHeightRef.current * 2 + 1) }, []) useResizeObserver(measureClamp, clampInnerRef) diff --git a/apps/desktop/src/components/haptics-provider.tsx b/apps/desktop/src/components/haptics-provider.tsx index e86e4428f6..233dc2f75c 100644 --- a/apps/desktop/src/components/haptics-provider.tsx +++ b/apps/desktop/src/components/haptics-provider.tsx @@ -15,5 +15,29 @@ export function HapticsProvider({ children }: { children: ReactNode }) { return () => registerHapticTrigger(null) }, [muted, trigger]) + // web-haptics builds its AudioContext lazily inside the first trigger(), and + // the process's first AudioContext pays the CoreAudio spin-up (~850ms stall + // in profiles) — which landed on the first streamStart haptic as the first + // token painted. Open/close a throwaway context at idle so the real one + // connects to an already-warm audio service in single-digit ms. + useEffect(() => { + if (typeof requestIdleCallback !== 'function' || typeof AudioContext === 'undefined') { + return undefined + } + + const id = requestIdleCallback( + () => { + try { + void new AudioContext().close().catch(() => undefined) + } catch { + // No audio device (headless CI) — nothing to warm. + } + }, + { timeout: 2000 } + ) + + return () => cancelIdleCallback(id) + }, []) + return <>{children} } diff --git a/apps/desktop/src/components/ui/fade-text.tsx b/apps/desktop/src/components/ui/fade-text.tsx index f80c32c213..b487d87f6f 100644 --- a/apps/desktop/src/components/ui/fade-text.tsx +++ b/apps/desktop/src/components/ui/fade-text.tsx @@ -34,14 +34,21 @@ function FadeTextImpl({ children, className, fadeWidth = '3rem', style, ...rest const ref = useRef(null) const [overflowing, setOverflowing] = useState(false) - const measureOverflow = useCallback(() => { + const measureOverflow = useCallback((entries: readonly ResizeObserverEntry[]) => { const el = ref.current if (!el) { return } - setOverflowing(el.scrollWidth - el.clientWidth > 1) + // `clientWidth` from the RO entry when available (already computed); + // `scrollWidth` is unavoidable — content width isn't part of the entry — + // but inside RO timing layout is already clean so the read is cheap. + const clientWidth = entries.find(entry => entry.target === el)?.contentRect?.width ?? el.clientWidth + + // setState is identity-stable: React bails out when the boolean doesn't + // change, so repeated RO fires with the same answer don't re-render. + setOverflowing(el.scrollWidth - clientWidth > 1) }, []) useResizeObserver(measureOverflow, ref) diff --git a/apps/desktop/src/hooks/use-resize-observer.ts b/apps/desktop/src/hooks/use-resize-observer.ts index b350a367d7..e9a0b0b50a 100644 --- a/apps/desktop/src/hooks/use-resize-observer.ts +++ b/apps/desktop/src/hooks/use-resize-observer.ts @@ -1,17 +1,26 @@ import { type RefObject, useLayoutEffect, useRef } from 'react' -export function useResizeObserver(onResize: () => void, ...refs: readonly RefObject[]) { +/** + * Observe element resizes. The callback receives the ResizeObserver entries + * (empty on the initial synchronous call and in non-RO environments) so + * callers can read the observed size off the entry instead of forcing a + * fresh layout read. + */ +export function useResizeObserver( + onResize: (entries: readonly ResizeObserverEntry[]) => void, + ...refs: readonly RefObject[] +) { const refsRef = useRef(refs) refsRef.current = refs useLayoutEffect(() => { if (typeof ResizeObserver === 'undefined') { - onResize() + onResize([]) return } - const observer = new ResizeObserver(() => onResize()) + const observer = new ResizeObserver(entries => onResize(entries)) let observed = false for (const ref of refsRef.current) { @@ -31,7 +40,7 @@ export function useResizeObserver(onResize: () => void, ...refs: readonly RefObj return } - onResize() + onResize([]) return () => observer.disconnect() }, [onResize]) diff --git a/apps/desktop/src/lib/remend-tail.test.ts b/apps/desktop/src/lib/remend-tail.test.ts new file mode 100644 index 0000000000..c730937356 --- /dev/null +++ b/apps/desktop/src/lib/remend-tail.test.ts @@ -0,0 +1,105 @@ +import { parseMarkdownIntoBlocks } from '@assistant-ui/react-streamdown' +import remend from 'remend' +import { describe, expect, it } from 'vitest' + +import { findRemendWindowStart, tailBoundedRemend } from './remend-tail' + +const CORPUS = `# Heading one + +Intro paragraph with **bold**, *italic*, \`inline code\`, and a [link](https://example.com). + +## Code + +\`\`\`python +def main(): + cost = "$5" + print(f"total: $\{cost}") +\`\`\` + +Some text after the fence with $x^2 + y^2$ inline math. + +$$ +\\int_0^1 f(x) dx +$$ + +- list item one with **bold** +- list item two + +| col a | col b | +| ----- | ----- | +| 1 | 2 | + +~~~js +const s = \`template \${value}\` +~~~ + +Final paragraph with ~~strike~~ and unfinished [link text](https://exa +` + +/** + * Render-equivalence oracle: full-text remend and tail-bounded remend may + * differ in raw string output ONLY in ways that cannot affect rendering — + * i.e. after block splitting, every block must be identical. (Streamdown + * renders blocks independently, so block-level equality IS render equality.) + */ +function blocksOf(text: string): string[] { + return parseMarkdownIntoBlocks(text) +} + +describe('tailBoundedRemend', () => { + it('matches full remend block output at every streaming prefix', () => { + for (let end = 1; end <= CORPUS.length; end++) { + const prefix = CORPUS.slice(0, end) + const full = blocksOf(remend(prefix)) + const tail = blocksOf(tailBoundedRemend(prefix)) + + expect(tail, `prefix length ${end}: ${JSON.stringify(prefix.slice(-60))}`).toEqual(full) + } + }) + + it('repairs an unclosed fence opened early in a long message', () => { + const text = `intro\n\n\`\`\`python\n${'x = 1\n'.repeat(500)}print("$dollar")` + const repaired = tailBoundedRemend(text) + + expect(blocksOf(repaired)).toEqual(blocksOf(remend(text))) + // the window must reach back to the fence opener + expect(findRemendWindowStart(text)).toBe(text.indexOf('```python')) + }) + + it('bounds the window to the tail paragraph when no fence is open', () => { + const text = `para one\n\npara two\n\npara three with **bold` + const start = findRemendWindowStart(text) + + expect(start).toBe(text.indexOf('para three')) + expect(tailBoundedRemend(text)).toBe(remend(text)) + }) + + it('widens the window across an open $$ math block', () => { + const text = `before\n\n$$\n\\frac{a}{b}` + const start = findRemendWindowStart(text) + + expect(start).toBeLessThanOrEqual(text.indexOf('$$')) + expect(blocksOf(tailBoundedRemend(text))).toEqual(blocksOf(remend(text))) + }) + + it('handles closed constructs without modification', () => { + const text = `done **bold** and \`code\`\n\n\`\`\`js\nconst a = 1\n\`\`\`\n\nlast line.` + + expect(tailBoundedRemend(text)).toBe(text) + }) + + it('intentionally diverges from full remend on cross-block dangling openers', () => { + // Full remend scans the whole document and appends `**` for an opener + // left dangling in an EARLIER block, dumping stray asterisks into the + // unrelated tail block ("|**"). Because Streamdown splits into blocks + // after the repair, that opener never renders as bold either way — the + // tail-bounded result is the cleaner of the two. This test documents + // the divergence so a future remend upgrade that changes the behavior + // gets noticed. + const text = `- item with **dangling\n- item two\n\n|` + + expect(remend(text).endsWith('|**')).toBe(true) + expect(tailBoundedRemend(text).endsWith('|')).toBe(true) + expect(tailBoundedRemend(text).endsWith('|**')).toBe(false) + }) +}) diff --git a/apps/desktop/src/lib/remend-tail.ts b/apps/desktop/src/lib/remend-tail.ts new file mode 100644 index 0000000000..683f7dc193 --- /dev/null +++ b/apps/desktop/src/lib/remend-tail.ts @@ -0,0 +1,108 @@ +import remend from 'remend' + +// Tail-bounded incomplete-markdown repair. +// +// Streamdown's built-in `parseIncompleteMarkdown` runs `remend` over the whole +// accumulated message on every streaming flush (~18% of script time on 50KB+ +// messages). But repairs only ever matter in the trailing block: inline +// constructs can't cross a blank line, and Streamdown splits into blocks AFTER +// the repair, so a dangling opener in an earlier block can't reach the tail. +// We run `remend` on just that block instead. + +const BACKTICK = 96 // ` +const TILDE = 126 // ~ +const SPACE = 32 +const TAB = 9 +const BACKSLASH = 92 + +const isSpace = (c: number) => c === SPACE || c === TAB + +/** + * Index of the last top-level block start — the char after the most recent + * blank line that sits outside any open code fence or `$$` math block. An + * unclosed fence/math always begins after that blank, so it stays wholly + * inside the window without separate tracking. One cheap char pass, no regex. + */ +export function findRemendWindowStart(text: string): number { + const n = text.length + let inFence = false + let fenceChar = 0 + let fenceRun = 0 + let inMath = false + let boundary = 0 + let pending = -1 // a blank line, committed to `boundary` once content follows + + for (let lineStart = 0; lineStart <= n; ) { + let lineEnd = text.indexOf('\n', lineStart) + + if (lineEnd === -1) { + lineEnd = n + } + + let i = lineStart + + while (i < lineEnd && isSpace(text.charCodeAt(i))) { + i += 1 + } + + const first = i < lineEnd ? text.charCodeAt(i) : -1 + let marker = false + + // Fence open/close (``` or ~~~, ≤3 spaces indent). + if ((first === BACKTICK || first === TILDE) && i - lineStart <= 3) { + let run = i + + while (run < lineEnd && text.charCodeAt(run) === first) { + run += 1 + } + + if (run - i >= 3) { + marker = true + + if (!inFence) { + inFence = true + fenceChar = first + fenceRun = run - i + } else if (first === fenceChar && run - i >= fenceRun && onlyWhitespace(text, run, lineEnd)) { + inFence = false + } + } + } + + // Toggle `$$` math state on plain lines ($$ inside a fence is literal). + if (!inFence && !marker) { + for (let s = text.indexOf('$$', lineStart); s !== -1 && s < lineEnd - 1; s = text.indexOf('$$', s + 2)) { + if (s === 0 || text.charCodeAt(s - 1) !== BACKSLASH) { + inMath = !inMath + } + } + } + + if (first === -1 && !inFence && !inMath) { + pending = lineEnd + 1 + } else if (pending !== -1) { + boundary = pending + pending = -1 + } + + lineStart = lineEnd + 1 + } + + return boundary +} + +function onlyWhitespace(text: string, from: number, to: number): boolean { + for (let i = from; i < to; i += 1) { + if (!isSpace(text.charCodeAt(i))) { + return false + } + } + + return true +} + +export function tailBoundedRemend(text: string): string { + const start = findRemendWindowStart(text) + + return start <= 0 ? remend(text) : text.slice(0, start) + remend(text.slice(start)) +} diff --git a/apps/desktop/src/store/session.ts b/apps/desktop/src/store/session.ts index dcf778c469..f1e1e2ee61 100644 --- a/apps/desktop/src/store/session.ts +++ b/apps/desktop/src/store/session.ts @@ -1,5 +1,6 @@ -import { atom } from 'nanostores' +import { atom, computed } from 'nanostores' +import { lastVisibleMessageIsUser } from '@/app/chat/thread-loading' import type { ContextSuggestion } from '@/app/types' import type { HermesConnection } from '@/global' import type { ChatMessage } from '@/lib/chat-messages' @@ -195,6 +196,15 @@ export const $workingSessionIds = atom([]) export const $activeSessionId = atom(null) export const $selectedStoredSessionId = atom(null) export const $messages = atom([]) + +// Streaming-stable derivations of $messages. During a token stream the array +// is replaced ~30×/s; components that only care about coarse facts (is the +// thread empty? is the tail a user message?) subscribe to these instead of +// $messages so per-token flushes don't re-render them — nanostores' `computed` +// only notifies when the derived VALUE changes. +export const $messagesEmpty = computed($messages, messages => messages.length === 0) +export const $lastVisibleMessageIsUser = computed($messages, lastVisibleMessageIsUser) + export const $freshDraftReady = atom(false) export const $busy = atom(false) export const $awaitingResponse = atom(false) diff --git a/nix/lib.nix b/nix/lib.nix index 1e6ad96a43..da5762ad44 100644 --- a/nix/lib.nix +++ b/nix/lib.nix @@ -21,7 +21,7 @@ let # Single npm deps fetch from the workspace root lockfile. # All workspace packages share this derivation. - npmDepsHash = "sha256-BfTSh6J2VZ/07tq2DYnKgUViZCgRhW1sC2uj18H65SE="; + npmDepsHash = "sha256-dFUlWvIIsCqvtGkoobs0qUzFlSdejuffI/uLoQxhW8Q="; npmDeps = pkgs.fetchNpmDeps { inherit src; diff --git a/package-lock.json b/package-lock.json index 018074f302..717f7a12c2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -119,6 +119,7 @@ "react-router-dom": "^7.17.0", "react-shiki": "^0.9.3", "remark-math": "^6.0.0", + "remend": "^1.3.0", "shiki": "^4.0.2", "streamdown": "^2.5.0", "tailwind-merge": "^3.5.0",