mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-14 09:11:54 +00:00
perf(desktop): cut GUI streaming & interaction lag (#45343)
* perf(desktop): isolate streaming re-renders & cut layout thrash During a token stream $messages is replaced ~30x/s. Subscribing the whole chat view to it re-rendered the composer, runtime boundary, and every message on every delta. - Derive coarse facts (empty thread? tail is user?) via nanostores `computed` atoms so per-token flushes don't re-render their consumers. - Move the $messages subscription + runtime wiring into a dedicated ChatRuntimeBoundary; the composer reads $messages imperatively. - Drive message rows off stable useAuiState selectors and a lazy getMessageText getter instead of eagerly materialized text. - Feed ResizeObserver entry sizes into measureClamp / FadeText and dedupe the style writes, killing the read-write-read reflow cascade. * perf(desktop): incremental markdown rendering during streams Re-parsing the full message markdown every reveal frame is O(N^2) over a long answer and dominated stream CPU. - Throttle useSmoothReveal commits to ~1 frame (REVEAL_MIN_COMMIT_MS). - Memoize block parsing with an LRU keyed on source text so only changed blocks re-parse. - Replace Streamdown's full-text parseIncompleteMarkdown with a tail-bounded remend: scan to the last top-level boundary outside fences/math and repair only the trailing open block. New remend-tail.ts is proven render-equivalent to full remend at every streaming prefix (remend-tail.test.ts), minus an intentional, documented divergence on cross-block dangling openers. * perf(desktop): faster session resume & warm AudioContext at idle - Resume: fire the REST transcript prefetch and the session.resume RPC in parallel, and skip the redundant message conversion + reconciliation when the prefetch already hydrated the transcript. - Haptics: web-haptics builds its AudioContext lazily on first trigger, paying the ~850ms CoreAudio spin-up on the first streamStart haptic as the first token paints. Open/close a throwaway context at idle so the real one connects to an already-warm audio service.
This commit is contained in:
commit
492c402774
15 changed files with 655 additions and 165 deletions
|
|
@ -90,6 +90,7 @@
|
|||
"react-router-dom": "^7.17.0",
|
||||
"react-shiki": "^0.9.3",
|
||||
"remark-math": "^6.0.0",
|
||||
"remend": "^1.3.0",
|
||||
"shiki": "^4.0.2",
|
||||
"streamdown": "^2.5.0",
|
||||
"tailwind-merge": "^3.5.0",
|
||||
|
|
|
|||
|
|
@ -174,7 +174,6 @@ export function ChatBar({
|
|||
const queuedPromptsBySession = useStore($queuedPromptsBySession)
|
||||
const statusItemsBySession = useStore($statusItemsBySession)
|
||||
const scrolledUp = useStore($threadScrolledUp)
|
||||
const sessionMessages = useStore($messages)
|
||||
const activeQueueSessionKey = queueSessionKey || sessionId || null
|
||||
|
||||
const queuedPrompts = useMemo(
|
||||
|
|
@ -866,7 +865,9 @@ export function ChatBar({
|
|||
event.preventDefault()
|
||||
triggerKeyConsumedRef.current = true
|
||||
|
||||
const history = deriveUserHistory(sessionMessages, chatMessageText)
|
||||
// $messages is read imperatively (not subscribed) so the composer
|
||||
// doesn't re-render on every streaming delta flush.
|
||||
const history = deriveUserHistory($messages.get(), chatMessageText)
|
||||
const entry = browseBackward(sessionId, currentDraft, history)
|
||||
|
||||
if (entry !== null) {
|
||||
|
|
@ -891,7 +892,7 @@ export function ChatBar({
|
|||
event.preventDefault()
|
||||
triggerKeyConsumedRef.current = true
|
||||
|
||||
const history = deriveUserHistory(sessionMessages, chatMessageText)
|
||||
const history = deriveUserHistory($messages.get(), chatMessageText)
|
||||
const result = browseForward(sessionId, history)
|
||||
|
||||
if (result !== null) {
|
||||
|
|
|
|||
|
|
@ -35,7 +35,9 @@ import {
|
|||
$gatewayState,
|
||||
$introPersonality,
|
||||
$introSeed,
|
||||
$lastVisibleMessageIsUser,
|
||||
$messages,
|
||||
$messagesEmpty,
|
||||
$selectedStoredSessionId,
|
||||
$sessions,
|
||||
sessionPinId
|
||||
|
|
@ -55,7 +57,7 @@ import { type DroppedFile, partitionDroppedFiles } from './hooks/use-composer-ac
|
|||
import { useFileDropZone } from './hooks/use-file-drop-zone'
|
||||
import { ScrollToBottomButton } from './scroll-to-bottom-button'
|
||||
import { SessionActionsMenu } from './sidebar/session-actions-menu'
|
||||
import { lastVisibleMessageIsUser, threadLoadingState } from './thread-loading'
|
||||
import { threadLoadingState } from './thread-loading'
|
||||
|
||||
interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
|
||||
gateway: HermesGateway | null
|
||||
|
|
@ -156,105 +158,35 @@ function ChatHeader({
|
|||
)
|
||||
}
|
||||
|
||||
export function ChatView({
|
||||
className,
|
||||
gateway,
|
||||
onToggleSelectedPin,
|
||||
onDeleteSelectedSession,
|
||||
interface ChatRuntimeBoundaryProps {
|
||||
busy: boolean
|
||||
children: React.ReactNode
|
||||
onCancel: () => Promise<void> | void
|
||||
onEdit: (message: AppendMessage) => Promise<void>
|
||||
onReload: (parentId: string | null) => Promise<void>
|
||||
onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Owns the $messages subscription and the assistant-ui external-store runtime.
|
||||
*
|
||||
* Isolated from ChatView so the per-token delta flush (which replaces the
|
||||
* $messages atom ~30×/s during streaming) only re-renders this component and
|
||||
* the runtime provider. The children (Thread, ChatBar) are created by
|
||||
* ChatView, whose render output is stable across flushes — so React bails out
|
||||
* of re-rendering them by element identity and the stream's render cost stays
|
||||
* confined to the streaming message's own subtree.
|
||||
*/
|
||||
function ChatRuntimeBoundary({
|
||||
busy,
|
||||
children,
|
||||
onCancel,
|
||||
onAddContextRef,
|
||||
onAddUrl,
|
||||
onAttachImageBlob,
|
||||
onAttachDroppedItems,
|
||||
onBranchInNewChat,
|
||||
maxVoiceRecordingSeconds,
|
||||
onPasteClipboardImage,
|
||||
onPickFiles,
|
||||
onPickFolders,
|
||||
onPickImages,
|
||||
onRemoveAttachment,
|
||||
onSteer,
|
||||
onSubmit,
|
||||
onThreadMessagesChange,
|
||||
onEdit,
|
||||
onReload,
|
||||
onRestoreToMessage,
|
||||
onTranscribeAudio
|
||||
}: ChatViewProps) {
|
||||
const location = useLocation()
|
||||
const activeSessionId = useStore($activeSessionId)
|
||||
const awaitingResponse = useStore($awaitingResponse)
|
||||
const busy = useStore($busy)
|
||||
const contextSuggestions = useStore($contextSuggestions)
|
||||
const currentCwd = useStore($currentCwd)
|
||||
const currentModel = useStore($currentModel)
|
||||
const currentProvider = useStore($currentProvider)
|
||||
const freshDraftReady = useStore($freshDraftReady)
|
||||
const gatewayState = useStore($gatewayState)
|
||||
const gatewaySwapTarget = useStore($gatewaySwapTarget)
|
||||
const gatewayOpen = gatewayState === 'open'
|
||||
const introPersonality = useStore($introPersonality)
|
||||
const introSeed = useStore($introSeed)
|
||||
onThreadMessagesChange
|
||||
}: ChatRuntimeBoundaryProps) {
|
||||
const messages = useStore($messages)
|
||||
const selectedSessionId = useStore($selectedStoredSessionId)
|
||||
const runtimeMessageCacheRef = useRef(new WeakMap<ChatMessage, ThreadMessage>())
|
||||
const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
|
||||
|
||||
const showIntro =
|
||||
freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messages.length === 0
|
||||
|
||||
// Session is still loading if the route references a session we haven't
|
||||
// resumed yet. Once `activeSessionId` is set (runtime has resumed), the
|
||||
// session exists — even if it has zero messages (a brand-new routed
|
||||
// session). The flicker where `busy` flips true briefly during hydrate
|
||||
// is handled by `threadLoadingState`'s last-visible-user gate.
|
||||
const loadingSession = isRoutedSessionView && messages.length === 0 && !activeSessionId
|
||||
const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleMessageIsUser(messages))
|
||||
const showChatBar = !loadingSession
|
||||
const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
|
||||
|
||||
const modelOptionsQuery = useQuery<ModelOptionsResponse>({
|
||||
queryKey: ['model-options', activeSessionId || 'global'],
|
||||
queryFn: () => {
|
||||
if (!activeSessionId) {
|
||||
return getGlobalModelOptions()
|
||||
}
|
||||
|
||||
if (!gateway) {
|
||||
throw new Error('Hermes gateway unavailable')
|
||||
}
|
||||
|
||||
return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
|
||||
},
|
||||
enabled: gatewayOpen
|
||||
})
|
||||
|
||||
const quickModels = useMemo(
|
||||
() => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
|
||||
[currentModel, currentProvider, modelOptionsQuery.data]
|
||||
)
|
||||
|
||||
const chatBarState = useMemo<ChatBarState>(
|
||||
() => ({
|
||||
model: {
|
||||
model: currentModel,
|
||||
provider: currentProvider,
|
||||
canSwitch: gatewayOpen,
|
||||
loading: !gatewayOpen || (!currentModel && !currentProvider),
|
||||
quickModels
|
||||
},
|
||||
tools: {
|
||||
enabled: true,
|
||||
label: 'Add context',
|
||||
suggestions: contextSuggestions
|
||||
},
|
||||
voice: {
|
||||
enabled: true,
|
||||
active: false
|
||||
}
|
||||
}),
|
||||
[contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
|
||||
)
|
||||
|
||||
const runtimeMessageRepository = useMemo(() => {
|
||||
const items: { message: ThreadMessage; parentId: string | null }[] = []
|
||||
|
|
@ -304,6 +236,113 @@ export function ChatView({
|
|||
onReload
|
||||
})
|
||||
|
||||
return <AssistantRuntimeProvider runtime={runtime}>{children}</AssistantRuntimeProvider>
|
||||
}
|
||||
|
||||
export function ChatView({
|
||||
className,
|
||||
gateway,
|
||||
onToggleSelectedPin,
|
||||
onDeleteSelectedSession,
|
||||
onCancel,
|
||||
onAddContextRef,
|
||||
onAddUrl,
|
||||
onAttachImageBlob,
|
||||
onAttachDroppedItems,
|
||||
onBranchInNewChat,
|
||||
maxVoiceRecordingSeconds,
|
||||
onPasteClipboardImage,
|
||||
onPickFiles,
|
||||
onPickFolders,
|
||||
onPickImages,
|
||||
onRemoveAttachment,
|
||||
onSteer,
|
||||
onSubmit,
|
||||
onThreadMessagesChange,
|
||||
onEdit,
|
||||
onReload,
|
||||
onRestoreToMessage,
|
||||
onTranscribeAudio
|
||||
}: ChatViewProps) {
|
||||
const location = useLocation()
|
||||
const activeSessionId = useStore($activeSessionId)
|
||||
const awaitingResponse = useStore($awaitingResponse)
|
||||
const busy = useStore($busy)
|
||||
const contextSuggestions = useStore($contextSuggestions)
|
||||
const currentCwd = useStore($currentCwd)
|
||||
const currentModel = useStore($currentModel)
|
||||
const currentProvider = useStore($currentProvider)
|
||||
const freshDraftReady = useStore($freshDraftReady)
|
||||
const gatewayState = useStore($gatewayState)
|
||||
const gatewaySwapTarget = useStore($gatewaySwapTarget)
|
||||
const gatewayOpen = gatewayState === 'open'
|
||||
const introPersonality = useStore($introPersonality)
|
||||
const introSeed = useStore($introSeed)
|
||||
// PERF: ChatView must not subscribe to $messages — the atom is replaced on
|
||||
// every streaming delta flush (~30×/s) and a subscription here re-renders
|
||||
// the entire chat shell (header, chat bar, thread wrapper) per token. The
|
||||
// runtime that DOES need the messages lives in ChatRuntimeBoundary below;
|
||||
// this component only needs streaming-stable derivations.
|
||||
const messagesEmpty = useStore($messagesEmpty)
|
||||
const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
|
||||
const selectedSessionId = useStore($selectedStoredSessionId)
|
||||
const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
|
||||
|
||||
const showIntro = freshDraftReady && !isRoutedSessionView && !selectedSessionId && !activeSessionId && messagesEmpty
|
||||
|
||||
// Session is still loading if the route references a session we haven't
|
||||
// resumed yet. Once `activeSessionId` is set (runtime has resumed), the
|
||||
// session exists — even if it has zero messages (a brand-new routed
|
||||
// session). The flicker where `busy` flips true briefly during hydrate
|
||||
// is handled by `threadLoadingState`'s last-visible-user gate.
|
||||
const loadingSession = isRoutedSessionView && messagesEmpty && !activeSessionId
|
||||
const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
|
||||
const showChatBar = !loadingSession
|
||||
const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
|
||||
|
||||
const modelOptionsQuery = useQuery<ModelOptionsResponse>({
|
||||
queryKey: ['model-options', activeSessionId || 'global'],
|
||||
queryFn: () => {
|
||||
if (!activeSessionId) {
|
||||
return getGlobalModelOptions()
|
||||
}
|
||||
|
||||
if (!gateway) {
|
||||
throw new Error('Hermes gateway unavailable')
|
||||
}
|
||||
|
||||
return gateway.request<ModelOptionsResponse>('model.options', { session_id: activeSessionId })
|
||||
},
|
||||
enabled: gatewayOpen
|
||||
})
|
||||
|
||||
const quickModels = useMemo(
|
||||
() => quickModelOptions(modelOptionsQuery.data, currentProvider, currentModel),
|
||||
[currentModel, currentProvider, modelOptionsQuery.data]
|
||||
)
|
||||
|
||||
const chatBarState = useMemo<ChatBarState>(
|
||||
() => ({
|
||||
model: {
|
||||
model: currentModel,
|
||||
provider: currentProvider,
|
||||
canSwitch: gatewayOpen,
|
||||
loading: !gatewayOpen || (!currentModel && !currentProvider),
|
||||
quickModels
|
||||
},
|
||||
tools: {
|
||||
enabled: true,
|
||||
label: 'Add context',
|
||||
suggestions: contextSuggestions
|
||||
},
|
||||
voice: {
|
||||
enabled: true,
|
||||
active: false
|
||||
}
|
||||
}),
|
||||
[contextSuggestions, currentModel, currentProvider, gatewayOpen, quickModels]
|
||||
)
|
||||
|
||||
// Drop files anywhere in the conversation area, not just on the composer
|
||||
// input. In-app drags (project tree / gutter) carry workspace-relative paths
|
||||
// the gateway resolves directly, so they stay inline `@file:` refs. OS/Finder
|
||||
|
|
@ -356,7 +395,13 @@ export function ChatView({
|
|||
className="relative min-h-0 max-w-full flex-1 overflow-hidden bg-(--ui-chat-surface-background) contain-[layout_paint]"
|
||||
{...dropHandlers}
|
||||
>
|
||||
<AssistantRuntimeProvider runtime={runtime}>
|
||||
<ChatRuntimeBoundary
|
||||
busy={busy}
|
||||
onCancel={onCancel}
|
||||
onEdit={onEdit}
|
||||
onReload={onReload}
|
||||
onThreadMessagesChange={onThreadMessagesChange}
|
||||
>
|
||||
<Thread
|
||||
clampToComposer={showChatBar}
|
||||
cwd={currentCwd}
|
||||
|
|
@ -397,7 +442,7 @@ export function ChatView({
|
|||
/>
|
||||
</Suspense>
|
||||
)}
|
||||
</AssistantRuntimeProvider>
|
||||
</ChatRuntimeBoundary>
|
||||
{showChatBar && <ScrollToBottomButton />}
|
||||
<ChatDropOverlay kind={dragKind} />
|
||||
<ChatSwapOverlay profile={gatewaySwapTarget} />
|
||||
|
|
|
|||
|
|
@ -3,9 +3,14 @@ import type { ChatMessage } from '@/lib/chat-messages'
|
|||
export type ThreadLoadingState = 'response' | 'session'
|
||||
|
||||
export function lastVisibleMessageIsUser(messages: ChatMessage[]): boolean {
|
||||
const lastVisible = [...messages].reverse().find(message => !message.hidden)
|
||||
// Allocation-free reverse scan — runs in a hot $messages computed.
|
||||
for (let i = messages.length - 1; i >= 0; i -= 1) {
|
||||
if (!messages[i].hidden) {
|
||||
return messages[i].role === 'user'
|
||||
}
|
||||
}
|
||||
|
||||
return lastVisible?.role === 'user'
|
||||
return false
|
||||
}
|
||||
|
||||
export function threadLoadingState(
|
||||
|
|
|
|||
|
|
@ -618,10 +618,26 @@ export function useSessionActions({
|
|||
const watchWindow = isWatchWindow()
|
||||
let localSnapshot = $messages.get()
|
||||
|
||||
// REST transcript prefetch and the gateway resume RPC are independent
|
||||
// — run them concurrently so a big session's wall time is
|
||||
// max(prefetch, resume) instead of their sum. The prefetch paints the
|
||||
// transcript as soon as it lands; the RPC binds the runtime id.
|
||||
// Watch windows skip the prefetch — lazy resume attaches the live mirror.
|
||||
const prefetchPromise = watchWindow ? null : getSessionMessages(storedSessionId, sessionProfile)
|
||||
|
||||
const resumePromise = requestGateway<SessionResumeResponse>('session.resume', {
|
||||
session_id: storedSessionId,
|
||||
cols: 96,
|
||||
...(watchWindow ? { lazy: true } : {}),
|
||||
...(sessionProfile ? { profile: sessionProfile } : {})
|
||||
})
|
||||
// The rejection is consumed by the `await` below; this guard only
|
||||
// keeps it from surfacing as unhandled while the prefetch settles.
|
||||
resumePromise.catch(() => undefined)
|
||||
|
||||
try {
|
||||
// Watch windows skip REST prefetch — lazy resume attaches the live mirror.
|
||||
if (!watchWindow) {
|
||||
const storedMessages = await getSessionMessages(storedSessionId, sessionProfile)
|
||||
if (prefetchPromise) {
|
||||
const storedMessages = await prefetchPromise
|
||||
|
||||
if (isCurrentResume()) {
|
||||
localSnapshot = preserveLocalAssistantErrors(toChatMessages(storedMessages.messages), $messages.get())
|
||||
|
|
@ -635,12 +651,7 @@ export function useSessionActions({
|
|||
// Non-fatal: gateway resume below can still hydrate the session.
|
||||
}
|
||||
|
||||
const resumed = await requestGateway<SessionResumeResponse>('session.resume', {
|
||||
session_id: storedSessionId,
|
||||
cols: 96,
|
||||
...(watchWindow ? { lazy: true } : {}),
|
||||
...(sessionProfile ? { profile: sessionProfile } : {})
|
||||
})
|
||||
const resumed = await resumePromise
|
||||
|
||||
if (!isCurrentResume()) {
|
||||
return
|
||||
|
|
@ -648,17 +659,22 @@ export function useSessionActions({
|
|||
|
||||
const currentMessages = $messages.get()
|
||||
|
||||
const resumedMessages = preserveLocalAssistantErrors(
|
||||
reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
|
||||
currentMessages
|
||||
)
|
||||
// Keep the local snapshot when resume would only reshuffle runtime projection.
|
||||
// Keep the local snapshot when resume would only reshuffle runtime
|
||||
// projection. When the REST prefetch already hydrated the transcript,
|
||||
// skip converting/reconciling the resume payload entirely — on a
|
||||
// 1000+-message session that second conversion plus the deep
|
||||
// equivalence compare costs over a second of main-thread time.
|
||||
const preferredMessages =
|
||||
localSnapshot.length > 0
|
||||
? localSnapshot
|
||||
: chatMessageArraysEquivalent(currentMessages, resumedMessages)
|
||||
? currentMessages
|
||||
: resumedMessages
|
||||
: (() => {
|
||||
const resumedMessages = preserveLocalAssistantErrors(
|
||||
reconcileResumeMessages(toChatMessages(resumed.messages), currentMessages),
|
||||
currentMessages
|
||||
)
|
||||
|
||||
return chatMessageArraysEquivalent(currentMessages, resumedMessages) ? currentMessages : resumedMessages
|
||||
})()
|
||||
|
||||
const messagesForView = preserveLocalAssistantErrors(preferredMessages, currentMessages)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import { TextMessagePartProvider, useMessagePartText } from '@assistant-ui/react'
|
||||
import {
|
||||
parseMarkdownIntoBlocks,
|
||||
type StreamdownTextComponents,
|
||||
StreamdownTextPrimitive,
|
||||
type SyntaxHighlighterProps
|
||||
|
|
@ -26,6 +27,7 @@ import {
|
|||
mediaStreamUrl
|
||||
} from '@/lib/media'
|
||||
import { previewTargetFromMarkdownHref } from '@/lib/preview-targets'
|
||||
import { tailBoundedRemend } from '@/lib/remend-tail'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
// Math rendering plugin (KaTeX). Configured once at module scope — the
|
||||
|
|
@ -42,6 +44,51 @@ import { cn } from '@/lib/utils'
|
|||
// LLM convention). The default false-setting only accepts `$$...$$`.
|
||||
const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })
|
||||
|
||||
// Replaces Streamdown's `parseIncompleteMarkdown` (full-text remend per
|
||||
// flush) with a tail-bounded repair — see lib/remend-tail.ts. Must stay
|
||||
// module-scope so the prop identity is stable across renders.
|
||||
function preprocessWithTailRepair(text: string): string {
|
||||
return tailBoundedRemend(preprocessMarkdown(text))
|
||||
}
|
||||
|
||||
// Memoized block splitter. Streamdown calls `parseMarkdownIntoBlocks` (a full
|
||||
// `marked` lex of the entire message, ~1.6ms per 28KB) inside a useMemo keyed
|
||||
// on the text — but the same text is re-lexed every time a message REMOUNTS
|
||||
// (virtualizer scroll, session switch) and whenever multiple surfaces render
|
||||
// the same content (deferred + smooth reveal republish). A small module-level
|
||||
// LRU keyed by the exact source string removes all of those repeat parses
|
||||
// with zero correctness risk (same input → same output). Streaming tail
|
||||
// growth misses the cache by design (every flush is a new string) — that
|
||||
// single lex is the irreducible cost.
|
||||
const BLOCK_CACHE_MAX = 64
|
||||
const BLOCK_CACHE_MIN_LENGTH = 1024
|
||||
const blockCache = new Map<string, string[]>()
|
||||
|
||||
function parseMarkdownIntoBlocksCached(markdown: string): string[] {
|
||||
if (markdown.length < BLOCK_CACHE_MIN_LENGTH) {
|
||||
return parseMarkdownIntoBlocks(markdown)
|
||||
}
|
||||
|
||||
const hit = blockCache.get(markdown)
|
||||
|
||||
if (hit) {
|
||||
// Refresh recency (Map iteration order is insertion order).
|
||||
blockCache.delete(markdown)
|
||||
blockCache.set(markdown, hit)
|
||||
|
||||
return hit
|
||||
}
|
||||
|
||||
const blocks = parseMarkdownIntoBlocks(markdown)
|
||||
blockCache.set(markdown, blocks)
|
||||
|
||||
if (blockCache.size > BLOCK_CACHE_MAX) {
|
||||
blockCache.delete(blockCache.keys().next().value as string)
|
||||
}
|
||||
|
||||
return blocks
|
||||
}
|
||||
|
||||
async function mediaSrc(path: string): Promise<string> {
|
||||
if (/^(?:https?|data):/i.test(path)) {
|
||||
return path
|
||||
|
|
@ -241,6 +288,13 @@ function MarkdownImage({ className, src, alt, ...props }: ComponentProps<'img'>)
|
|||
// keeps draining its tail instead of snapping.
|
||||
const REVEAL_DRAIN_MS = 500
|
||||
const REVEAL_MAX_CHARS_PER_FRAME = 30
|
||||
// Floor between reveal commits. Each commit republishes the text context and
|
||||
// re-runs the whole Streamdown pipeline (preprocess → remend → lex → micromark
|
||||
// on the open block) over the full accumulated text — at raw rAF cadence
|
||||
// that's 60 full parses/second and was the dominant streaming cost for
|
||||
// reasoning text. ~33ms keeps the reveal visually fluid (2 frames) while
|
||||
// halving the parse work.
|
||||
const REVEAL_MIN_COMMIT_MS = 33
|
||||
|
||||
function useSmoothReveal(text: string, isRunning: boolean): string {
|
||||
const [displayed, setDisplayed] = useState(isRunning ? '' : text)
|
||||
|
|
@ -273,10 +327,27 @@ function useSmoothReveal(text: string, isRunning: boolean): string {
|
|||
const tick = () => {
|
||||
const now = performance.now()
|
||||
const dt = now - lastTickRef.current
|
||||
|
||||
// Skip this frame if the floor hasn't elapsed — the backlog math below
|
||||
// is dt-proportional, so delayed commits reveal proportionally more.
|
||||
if (dt < REVEAL_MIN_COMMIT_MS) {
|
||||
frameRef.current = requestAnimationFrame(tick)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
lastTickRef.current = now
|
||||
|
||||
const remaining = targetRef.current.length - shownRef.current.length
|
||||
const add = Math.min(remaining, REVEAL_MAX_CHARS_PER_FRAME, Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS)))
|
||||
|
||||
const add = Math.min(
|
||||
remaining,
|
||||
// dt-scaled so the per-commit cap stays equivalent to the old
|
||||
// per-frame cap at any commit cadence.
|
||||
Math.ceil((REVEAL_MAX_CHARS_PER_FRAME * dt) / 16.7),
|
||||
Math.max(1, Math.ceil((remaining * dt) / REVEAL_DRAIN_MS))
|
||||
)
|
||||
|
||||
shownRef.current = targetRef.current.slice(0, shownRef.current.length + add)
|
||||
setDisplayed(shownRef.current)
|
||||
|
||||
|
|
@ -460,17 +531,20 @@ function MarkdownTextSurface({ containerClassName, containerProps }: MarkdownTex
|
|||
containerProps={containerProps}
|
||||
lineNumbers={false}
|
||||
mode="streaming"
|
||||
// Always auto-close incomplete fences — even during streaming.
|
||||
// Without this, an unclosed ```python ... ``` whose body contains
|
||||
// `$` (very common: shell snippets, JS template strings, dollar
|
||||
// amounts) leaks those dollars out to the math parser and they
|
||||
// get rendered as broken inline math until the closing fence
|
||||
// arrives. Shiki is independently deferred via `defer={isStreaming}`
|
||||
// on the SyntaxHighlighter component, so we don't pay code-block
|
||||
// tokenization on every token even with this set.
|
||||
parseIncompleteMarkdown
|
||||
// Incomplete-markdown repair is handled by `preprocessWithTailRepair`
|
||||
// below (tail-bounded remend) instead of Streamdown's built-in pass,
|
||||
// which re-runs remend over the ENTIRE message on every flush — ~18%
|
||||
// of streaming script time on 50KB+ messages. The repair itself stays
|
||||
// always-on (even between flushes / for completed messages): an
|
||||
// unclosed ```python ... ``` whose body contains `$` (shell snippets,
|
||||
// JS template strings, dollar amounts) would otherwise leak those
|
||||
// dollars to the math parser and render broken inline math. Shiki is
|
||||
// independently deferred via `defer={isStreaming}` on the
|
||||
// SyntaxHighlighter component.
|
||||
parseIncompleteMarkdown={false}
|
||||
parseMarkdownIntoBlocksFn={parseMarkdownIntoBlocksCached}
|
||||
plugins={plugins}
|
||||
preprocess={preprocessMarkdown}
|
||||
preprocess={preprocessWithTailRepair}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ import {
|
|||
MessagePrimitive,
|
||||
type ToolCallMessagePartProps,
|
||||
useAui,
|
||||
useAuiState
|
||||
useAuiState,
|
||||
useMessageRuntime
|
||||
} from '@assistant-ui/react'
|
||||
import { useStore } from '@nanostores/react'
|
||||
import { IconPlayerStopFilled } from '@tabler/icons-react'
|
||||
|
|
@ -105,7 +106,11 @@ type ThreadLoadingState = 'response' | 'session'
|
|||
|
||||
interface MessageActionProps {
|
||||
messageId: string
|
||||
messageText: string
|
||||
/** Lazy accessor — reads the live message text at action time. Passing the
|
||||
* text itself as a prop forces the whole footer to re-render on every
|
||||
* streaming delta flush (the text changes ~30×/s), which profiling showed
|
||||
* was a large slice of per-token script time on long transcripts. */
|
||||
getMessageText: () => string
|
||||
onBranchInNewChat?: (messageId: string) => void
|
||||
}
|
||||
|
||||
|
|
@ -133,6 +138,28 @@ function messageContentText(content: unknown): string {
|
|||
return Array.isArray(content) ? content.map(partText).join('').trim() : ''
|
||||
}
|
||||
|
||||
// Cheap streaming-stable "does this message have visible text" check: returns
|
||||
// on the first non-whitespace text part without concatenating the whole
|
||||
// message. Used as a useAuiState selector so its boolean output stays stable
|
||||
// across token flushes (flips false→true once per turn).
|
||||
function contentHasVisibleText(content: unknown): boolean {
|
||||
if (typeof content === 'string') {
|
||||
return content.trim().length > 0
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) {
|
||||
return false
|
||||
}
|
||||
|
||||
for (const part of content) {
|
||||
if (partText(part).trim().length > 0) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
export const Thread: FC<{
|
||||
clampToComposer?: boolean
|
||||
cwd?: string | null
|
||||
|
|
@ -221,20 +248,39 @@ const CenteredThreadSpinner: FC = () => {
|
|||
|
||||
const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> = ({ onBranchInNewChat }) => {
|
||||
const messageId = useAuiState(s => s.message.id)
|
||||
const content = useAuiState(s => s.message.content)
|
||||
const messageText = messageContentText(content)
|
||||
const messageRuntime = useMessageRuntime()
|
||||
|
||||
// PERF: this component must NOT subscribe to the streaming text. Every
|
||||
// selector here returns a value that stays referentially stable across
|
||||
// token flushes (booleans, status strings, '' while running), so the
|
||||
// 30 Hz delta stream only re-renders the markdown part and the tiny
|
||||
// StreamStallIndicator leaf — not the footer/preview/root subtree.
|
||||
const messageStatus = useAuiState(s => s.message.status?.type)
|
||||
const isRunning = messageStatus === 'running'
|
||||
const isPlaceholder = useAuiState(s => s.message.status?.type === 'running' && s.message.content.length === 0)
|
||||
const hasVisibleText = useAuiState(s => contentHasVisibleText(s.message.content))
|
||||
|
||||
// Preview targets only materialize once the turn completes — while running
|
||||
// the selector returns '' (stable), so per-token flushes skip the regex
|
||||
// scan and the re-render it would cause.
|
||||
const completedText = useAuiState(s =>
|
||||
s.message.status?.type === 'running' ? '' : messageContentText(s.message.content)
|
||||
)
|
||||
|
||||
const previewTargets = useMemo(() => {
|
||||
if (!messageText || !/(https?:\/\/|file:\/\/)/i.test(messageText)) {
|
||||
if (!completedText || !/(https?:\/\/|file:\/\/)/i.test(completedText)) {
|
||||
return []
|
||||
}
|
||||
|
||||
return pickPrimaryPreviewTarget(extractPreviewTargets(messageText))
|
||||
}, [messageText])
|
||||
return pickPrimaryPreviewTarget(extractPreviewTargets(completedText))
|
||||
}, [completedText])
|
||||
|
||||
const messageStatus = useAuiState(s => s.message.status?.type)
|
||||
const isPlaceholder = messageStatus === 'running' && content.length === 0
|
||||
const enterRef = useEnterAnimation(messageStatus === 'running', `assistant-message:${messageId}`)
|
||||
const getMessageText = useCallback(
|
||||
() => messageContentText(messageRuntime.getState().content),
|
||||
[messageRuntime]
|
||||
)
|
||||
|
||||
const enterRef = useEnterAnimation(isRunning, `assistant-message:${messageId}`)
|
||||
|
||||
if (isPlaceholder) {
|
||||
return null
|
||||
|
|
@ -245,7 +291,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
|
|||
className="group flex w-full min-w-0 max-w-full flex-col gap-0 self-start overflow-hidden"
|
||||
data-role="assistant"
|
||||
data-slot="aui_assistant-message-root"
|
||||
data-streaming={messageStatus === 'running' ? 'true' : undefined}
|
||||
data-streaming={isRunning ? 'true' : undefined}
|
||||
ref={enterRef}
|
||||
>
|
||||
<div
|
||||
|
|
@ -254,7 +300,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
|
|||
>
|
||||
{/* Todos render in the composer status stack now, not inline. */}
|
||||
<MessagePrimitive.Parts components={MESSAGE_PARTS_COMPONENTS} />
|
||||
{messageStatus === 'running' && <StreamStallIndicator activity={`${content.length}:${messageText.length}`} />}
|
||||
{isRunning && <StreamStallIndicator />}
|
||||
{previewTargets.length > 0 && (
|
||||
<div className="mt-3 flex flex-wrap gap-2">
|
||||
{previewTargets.map(target => (
|
||||
|
|
@ -271,8 +317,8 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
|
|||
</ErrorPrimitive.Root>
|
||||
</MessagePrimitive.Error>
|
||||
</div>
|
||||
{messageText.trim().length > 0 && (
|
||||
<AssistantFooter messageId={messageId} messageText={messageText} onBranchInNewChat={onBranchInNewChat} />
|
||||
{hasVisibleText && (
|
||||
<AssistantFooter getMessageText={getMessageText} messageId={messageId} onBranchInNewChat={onBranchInNewChat} />
|
||||
)}
|
||||
</MessagePrimitive.Root>
|
||||
)
|
||||
|
|
@ -313,10 +359,28 @@ const STREAM_STALL_S = 2
|
|||
|
||||
// Tail "still thinking" indicator: the pre-first-token spinner goes away once
|
||||
// text flows, but if the stream then goes quiet mid-turn (tool think-time,
|
||||
// provider stall) nothing signals that work continues. Watch a per-render
|
||||
// provider stall) nothing signals that work continues. Watch a per-flush
|
||||
// activity signal; when it hasn't changed for STREAM_STALL_S, re-show the
|
||||
// dither + a timer counting from the last activity.
|
||||
const StreamStallIndicator: FC<{ activity: string }> = ({ activity }) => {
|
||||
//
|
||||
// Subscribes to the activity signal ITSELF (rather than taking it as a prop)
|
||||
// so that per-token updates re-render only this leaf, not the whole
|
||||
// AssistantMessage subtree.
|
||||
const StreamStallIndicator: FC = () => {
|
||||
const activity = useAuiState(s => {
|
||||
let textLength = 0
|
||||
|
||||
for (const part of s.message.content) {
|
||||
const text = (part as { text?: unknown }).text
|
||||
|
||||
if (typeof text === 'string') {
|
||||
textLength += text.length
|
||||
}
|
||||
}
|
||||
|
||||
return `${s.message.content.length}:${textLength}`
|
||||
})
|
||||
|
||||
const [stalled, setStalled] = useState(false)
|
||||
|
||||
useEffect(() => {
|
||||
|
|
@ -584,7 +648,7 @@ function formatMessageTimestamp(
|
|||
return SHORT_FMT.format(date)
|
||||
}
|
||||
|
||||
const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, onBranchInNewChat }) => {
|
||||
const AssistantActionBar: FC<MessageActionProps> = ({ messageId, getMessageText, onBranchInNewChat }) => {
|
||||
const { t } = useI18n()
|
||||
const copy = t.assistant.thread
|
||||
const [menuOpen, setMenuOpen] = useState(false)
|
||||
|
|
@ -605,7 +669,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
|
|||
)}
|
||||
data-slot="aui_msg-actions"
|
||||
>
|
||||
<CopyButton appearance="icon" buttonSize="icon" disabled={!messageText} label={copy.copy} text={messageText} />
|
||||
<CopyButton appearance="icon" buttonSize="icon" label={copy.copy} text={getMessageText} />
|
||||
<ActionBarPrimitive.Reload asChild>
|
||||
<TooltipIconButton onClick={() => triggerHaptic('submit')} tooltip={copy.refresh}>
|
||||
<Codicon name="refresh" />
|
||||
|
|
@ -623,7 +687,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
|
|||
<GitBranchIcon />
|
||||
{copy.branchNewChat}
|
||||
</DropdownMenuItem>
|
||||
<ReadAloudItem messageId={messageId} text={messageText} />
|
||||
<ReadAloudItem getText={getMessageText} messageId={messageId} />
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
</ActionBarPrimitive.Root>
|
||||
|
|
@ -631,7 +695,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
|
|||
)
|
||||
}
|
||||
|
||||
const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, text }) => {
|
||||
const ReadAloudItem: FC<{ getText: () => string; messageId: string }> = ({ getText, messageId }) => {
|
||||
const { t } = useI18n()
|
||||
const copy = t.assistant.thread
|
||||
const voicePlayback = useStore($voicePlayback)
|
||||
|
|
@ -645,6 +709,8 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex
|
|||
const Icon = isPreparing ? Loader2Icon : isSpeaking ? VolumeXIcon : Volume2Icon
|
||||
|
||||
const read = useCallback(async () => {
|
||||
const text = getText()
|
||||
|
||||
if (!text || $voicePlayback.get().status !== 'idle') {
|
||||
return
|
||||
}
|
||||
|
|
@ -654,11 +720,11 @@ const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, tex
|
|||
} catch (error) {
|
||||
notifyError(error, copy.readAloudFailed)
|
||||
}
|
||||
}, [copy.readAloudFailed, messageId, text])
|
||||
}, [copy.readAloudFailed, getText, messageId])
|
||||
|
||||
return (
|
||||
<DropdownMenuItem
|
||||
disabled={isPreparing || (!isSpeaking && (anyPlaybackActive || !text))}
|
||||
disabled={isPreparing || (!isSpeaking && anyPlaybackActive)}
|
||||
onSelect={e => {
|
||||
e.preventDefault()
|
||||
void (isSpeaking ? stopVoicePlayback() : read())
|
||||
|
|
@ -820,8 +886,10 @@ const UserMessage: FC<{
|
|||
// changes, not on every frame while the outer max-height animates open.
|
||||
const clampInnerRef = useRef<HTMLDivElement | null>(null)
|
||||
const [bodyClamped, setBodyClamped] = useState(false)
|
||||
const lastClampHeightRef = useRef(-1)
|
||||
const lineHeightRef = useRef(0)
|
||||
|
||||
const measureClamp = useCallback(() => {
|
||||
const measureClamp = useCallback((entries: readonly ResizeObserverEntry[]) => {
|
||||
const inner = clampInnerRef.current
|
||||
const outer = inner?.parentElement
|
||||
|
||||
|
|
@ -829,12 +897,28 @@ const UserMessage: FC<{
|
|||
return
|
||||
}
|
||||
|
||||
const styles = getComputedStyle(inner)
|
||||
const lineHeight = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
|
||||
const fullHeight = inner.scrollHeight
|
||||
// Prefer the size the ResizeObserver already computed — reading
|
||||
// `scrollHeight` outside RO timing forces a synchronous layout, and with
|
||||
// many user bubbles observed at once those reads interleave with the
|
||||
// style write below into a read-write-read reflow cascade.
|
||||
const entryHeight = entries.find(entry => entry.target === inner)?.borderBoxSize?.[0]?.blockSize
|
||||
const fullHeight = Math.ceil(entryHeight ?? inner.scrollHeight)
|
||||
|
||||
if (fullHeight === lastClampHeightRef.current) {
|
||||
return
|
||||
}
|
||||
|
||||
lastClampHeightRef.current = fullHeight
|
||||
|
||||
// Line-height is stable for the life of the bubble (font settings don't
|
||||
// change under it) — resolve the computed style once.
|
||||
if (!lineHeightRef.current) {
|
||||
const styles = getComputedStyle(inner)
|
||||
lineHeightRef.current = parseFloat(styles.lineHeight) || 1.5 * parseFloat(styles.fontSize) || 20
|
||||
}
|
||||
|
||||
outer.style.setProperty('--human-msg-full', `${fullHeight}px`)
|
||||
setBodyClamped(fullHeight > lineHeight * 2 + 1)
|
||||
setBodyClamped(fullHeight > lineHeightRef.current * 2 + 1)
|
||||
}, [])
|
||||
|
||||
useResizeObserver(measureClamp, clampInnerRef)
|
||||
|
|
|
|||
|
|
@ -15,5 +15,29 @@ export function HapticsProvider({ children }: { children: ReactNode }) {
|
|||
return () => registerHapticTrigger(null)
|
||||
}, [muted, trigger])
|
||||
|
||||
// web-haptics builds its AudioContext lazily inside the first trigger(), and
|
||||
// the process's first AudioContext pays the CoreAudio spin-up (~850ms stall
|
||||
// in profiles) — which landed on the first streamStart haptic as the first
|
||||
// token painted. Open/close a throwaway context at idle so the real one
|
||||
// connects to an already-warm audio service in single-digit ms.
|
||||
useEffect(() => {
|
||||
if (typeof requestIdleCallback !== 'function' || typeof AudioContext === 'undefined') {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const id = requestIdleCallback(
|
||||
() => {
|
||||
try {
|
||||
void new AudioContext().close().catch(() => undefined)
|
||||
} catch {
|
||||
// No audio device (headless CI) — nothing to warm.
|
||||
}
|
||||
},
|
||||
{ timeout: 2000 }
|
||||
)
|
||||
|
||||
return () => cancelIdleCallback(id)
|
||||
}, [])
|
||||
|
||||
return <>{children}</>
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,14 +34,21 @@ function FadeTextImpl({ children, className, fadeWidth = '3rem', style, ...rest
|
|||
const ref = useRef<HTMLSpanElement>(null)
|
||||
const [overflowing, setOverflowing] = useState(false)
|
||||
|
||||
const measureOverflow = useCallback(() => {
|
||||
const measureOverflow = useCallback((entries: readonly ResizeObserverEntry[]) => {
|
||||
const el = ref.current
|
||||
|
||||
if (!el) {
|
||||
return
|
||||
}
|
||||
|
||||
setOverflowing(el.scrollWidth - el.clientWidth > 1)
|
||||
// `clientWidth` from the RO entry when available (already computed);
|
||||
// `scrollWidth` is unavoidable — content width isn't part of the entry —
|
||||
// but inside RO timing layout is already clean so the read is cheap.
|
||||
const clientWidth = entries.find(entry => entry.target === el)?.contentRect?.width ?? el.clientWidth
|
||||
|
||||
// setState is identity-stable: React bails out when the boolean doesn't
|
||||
// change, so repeated RO fires with the same answer don't re-render.
|
||||
setOverflowing(el.scrollWidth - clientWidth > 1)
|
||||
}, [])
|
||||
|
||||
useResizeObserver(measureOverflow, ref)
|
||||
|
|
|
|||
|
|
@ -1,17 +1,26 @@
|
|||
import { type RefObject, useLayoutEffect, useRef } from 'react'
|
||||
|
||||
export function useResizeObserver(onResize: () => void, ...refs: readonly RefObject<Element | null>[]) {
|
||||
/**
|
||||
* Observe element resizes. The callback receives the ResizeObserver entries
|
||||
* (empty on the initial synchronous call and in non-RO environments) so
|
||||
* callers can read the observed size off the entry instead of forcing a
|
||||
* fresh layout read.
|
||||
*/
|
||||
export function useResizeObserver(
|
||||
onResize: (entries: readonly ResizeObserverEntry[]) => void,
|
||||
...refs: readonly RefObject<Element | null>[]
|
||||
) {
|
||||
const refsRef = useRef(refs)
|
||||
refsRef.current = refs
|
||||
|
||||
useLayoutEffect(() => {
|
||||
if (typeof ResizeObserver === 'undefined') {
|
||||
onResize()
|
||||
onResize([])
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const observer = new ResizeObserver(() => onResize())
|
||||
const observer = new ResizeObserver(entries => onResize(entries))
|
||||
let observed = false
|
||||
|
||||
for (const ref of refsRef.current) {
|
||||
|
|
@ -31,7 +40,7 @@ export function useResizeObserver(onResize: () => void, ...refs: readonly RefObj
|
|||
return
|
||||
}
|
||||
|
||||
onResize()
|
||||
onResize([])
|
||||
|
||||
return () => observer.disconnect()
|
||||
}, [onResize])
|
||||
|
|
|
|||
105
apps/desktop/src/lib/remend-tail.test.ts
Normal file
105
apps/desktop/src/lib/remend-tail.test.ts
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
import { parseMarkdownIntoBlocks } from '@assistant-ui/react-streamdown'
|
||||
import remend from 'remend'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { findRemendWindowStart, tailBoundedRemend } from './remend-tail'
|
||||
|
||||
const CORPUS = `# Heading one
|
||||
|
||||
Intro paragraph with **bold**, *italic*, \`inline code\`, and a [link](https://example.com).
|
||||
|
||||
## Code
|
||||
|
||||
\`\`\`python
|
||||
def main():
|
||||
cost = "$5"
|
||||
print(f"total: $\{cost}")
|
||||
\`\`\`
|
||||
|
||||
Some text after the fence with $x^2 + y^2$ inline math.
|
||||
|
||||
$$
|
||||
\\int_0^1 f(x) dx
|
||||
$$
|
||||
|
||||
- list item one with **bold**
|
||||
- list item two
|
||||
|
||||
| col a | col b |
|
||||
| ----- | ----- |
|
||||
| 1 | 2 |
|
||||
|
||||
~~~js
|
||||
const s = \`template \${value}\`
|
||||
~~~
|
||||
|
||||
Final paragraph with ~~strike~~ and unfinished [link text](https://exa
|
||||
`
|
||||
|
||||
/**
|
||||
* Render-equivalence oracle: full-text remend and tail-bounded remend may
|
||||
* differ in raw string output ONLY in ways that cannot affect rendering —
|
||||
* i.e. after block splitting, every block must be identical. (Streamdown
|
||||
* renders blocks independently, so block-level equality IS render equality.)
|
||||
*/
|
||||
function blocksOf(text: string): string[] {
|
||||
return parseMarkdownIntoBlocks(text)
|
||||
}
|
||||
|
||||
describe('tailBoundedRemend', () => {
|
||||
it('matches full remend block output at every streaming prefix', () => {
|
||||
for (let end = 1; end <= CORPUS.length; end++) {
|
||||
const prefix = CORPUS.slice(0, end)
|
||||
const full = blocksOf(remend(prefix))
|
||||
const tail = blocksOf(tailBoundedRemend(prefix))
|
||||
|
||||
expect(tail, `prefix length ${end}: ${JSON.stringify(prefix.slice(-60))}`).toEqual(full)
|
||||
}
|
||||
})
|
||||
|
||||
it('repairs an unclosed fence opened early in a long message', () => {
|
||||
const text = `intro\n\n\`\`\`python\n${'x = 1\n'.repeat(500)}print("$dollar")`
|
||||
const repaired = tailBoundedRemend(text)
|
||||
|
||||
expect(blocksOf(repaired)).toEqual(blocksOf(remend(text)))
|
||||
// the window must reach back to the fence opener
|
||||
expect(findRemendWindowStart(text)).toBe(text.indexOf('```python'))
|
||||
})
|
||||
|
||||
it('bounds the window to the tail paragraph when no fence is open', () => {
|
||||
const text = `para one\n\npara two\n\npara three with **bold`
|
||||
const start = findRemendWindowStart(text)
|
||||
|
||||
expect(start).toBe(text.indexOf('para three'))
|
||||
expect(tailBoundedRemend(text)).toBe(remend(text))
|
||||
})
|
||||
|
||||
it('widens the window across an open $$ math block', () => {
|
||||
const text = `before\n\n$$\n\\frac{a}{b}`
|
||||
const start = findRemendWindowStart(text)
|
||||
|
||||
expect(start).toBeLessThanOrEqual(text.indexOf('$$'))
|
||||
expect(blocksOf(tailBoundedRemend(text))).toEqual(blocksOf(remend(text)))
|
||||
})
|
||||
|
||||
it('handles closed constructs without modification', () => {
|
||||
const text = `done **bold** and \`code\`\n\n\`\`\`js\nconst a = 1\n\`\`\`\n\nlast line.`
|
||||
|
||||
expect(tailBoundedRemend(text)).toBe(text)
|
||||
})
|
||||
|
||||
it('intentionally diverges from full remend on cross-block dangling openers', () => {
|
||||
// Full remend scans the whole document and appends `**` for an opener
|
||||
// left dangling in an EARLIER block, dumping stray asterisks into the
|
||||
// unrelated tail block ("|**"). Because Streamdown splits into blocks
|
||||
// after the repair, that opener never renders as bold either way — the
|
||||
// tail-bounded result is the cleaner of the two. This test documents
|
||||
// the divergence so a future remend upgrade that changes the behavior
|
||||
// gets noticed.
|
||||
const text = `- item with **dangling\n- item two\n\n|`
|
||||
|
||||
expect(remend(text).endsWith('|**')).toBe(true)
|
||||
expect(tailBoundedRemend(text).endsWith('|')).toBe(true)
|
||||
expect(tailBoundedRemend(text).endsWith('|**')).toBe(false)
|
||||
})
|
||||
})
|
||||
108
apps/desktop/src/lib/remend-tail.ts
Normal file
108
apps/desktop/src/lib/remend-tail.ts
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
import remend from 'remend'
|
||||
|
||||
// Tail-bounded incomplete-markdown repair.
|
||||
//
|
||||
// Streamdown's built-in `parseIncompleteMarkdown` runs `remend` over the whole
|
||||
// accumulated message on every streaming flush (~18% of script time on 50KB+
|
||||
// messages). But repairs only ever matter in the trailing block: inline
|
||||
// constructs can't cross a blank line, and Streamdown splits into blocks AFTER
|
||||
// the repair, so a dangling opener in an earlier block can't reach the tail.
|
||||
// We run `remend` on just that block instead.
|
||||
|
||||
const BACKTICK = 96 // `
|
||||
const TILDE = 126 // ~
|
||||
const SPACE = 32
|
||||
const TAB = 9
|
||||
const BACKSLASH = 92
|
||||
|
||||
const isSpace = (c: number) => c === SPACE || c === TAB
|
||||
|
||||
/**
|
||||
* Index of the last top-level block start — the char after the most recent
|
||||
* blank line that sits outside any open code fence or `$$` math block. An
|
||||
* unclosed fence/math always begins after that blank, so it stays wholly
|
||||
* inside the window without separate tracking. One cheap char pass, no regex.
|
||||
*/
|
||||
export function findRemendWindowStart(text: string): number {
|
||||
const n = text.length
|
||||
let inFence = false
|
||||
let fenceChar = 0
|
||||
let fenceRun = 0
|
||||
let inMath = false
|
||||
let boundary = 0
|
||||
let pending = -1 // a blank line, committed to `boundary` once content follows
|
||||
|
||||
for (let lineStart = 0; lineStart <= n; ) {
|
||||
let lineEnd = text.indexOf('\n', lineStart)
|
||||
|
||||
if (lineEnd === -1) {
|
||||
lineEnd = n
|
||||
}
|
||||
|
||||
let i = lineStart
|
||||
|
||||
while (i < lineEnd && isSpace(text.charCodeAt(i))) {
|
||||
i += 1
|
||||
}
|
||||
|
||||
const first = i < lineEnd ? text.charCodeAt(i) : -1
|
||||
let marker = false
|
||||
|
||||
// Fence open/close (``` or ~~~, ≤3 spaces indent).
|
||||
if ((first === BACKTICK || first === TILDE) && i - lineStart <= 3) {
|
||||
let run = i
|
||||
|
||||
while (run < lineEnd && text.charCodeAt(run) === first) {
|
||||
run += 1
|
||||
}
|
||||
|
||||
if (run - i >= 3) {
|
||||
marker = true
|
||||
|
||||
if (!inFence) {
|
||||
inFence = true
|
||||
fenceChar = first
|
||||
fenceRun = run - i
|
||||
} else if (first === fenceChar && run - i >= fenceRun && onlyWhitespace(text, run, lineEnd)) {
|
||||
inFence = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Toggle `$$` math state on plain lines ($$ inside a fence is literal).
|
||||
if (!inFence && !marker) {
|
||||
for (let s = text.indexOf('$$', lineStart); s !== -1 && s < lineEnd - 1; s = text.indexOf('$$', s + 2)) {
|
||||
if (s === 0 || text.charCodeAt(s - 1) !== BACKSLASH) {
|
||||
inMath = !inMath
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (first === -1 && !inFence && !inMath) {
|
||||
pending = lineEnd + 1
|
||||
} else if (pending !== -1) {
|
||||
boundary = pending
|
||||
pending = -1
|
||||
}
|
||||
|
||||
lineStart = lineEnd + 1
|
||||
}
|
||||
|
||||
return boundary
|
||||
}
|
||||
|
||||
function onlyWhitespace(text: string, from: number, to: number): boolean {
|
||||
for (let i = from; i < to; i += 1) {
|
||||
if (!isSpace(text.charCodeAt(i))) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
export function tailBoundedRemend(text: string): string {
|
||||
const start = findRemendWindowStart(text)
|
||||
|
||||
return start <= 0 ? remend(text) : text.slice(0, start) + remend(text.slice(start))
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import { atom } from 'nanostores'
|
||||
import { atom, computed } from 'nanostores'
|
||||
|
||||
import { lastVisibleMessageIsUser } from '@/app/chat/thread-loading'
|
||||
import type { ContextSuggestion } from '@/app/types'
|
||||
import type { HermesConnection } from '@/global'
|
||||
import type { ChatMessage } from '@/lib/chat-messages'
|
||||
|
|
@ -195,6 +196,15 @@ export const $workingSessionIds = atom<string[]>([])
|
|||
export const $activeSessionId = atom<string | null>(null)
|
||||
export const $selectedStoredSessionId = atom<string | null>(null)
|
||||
export const $messages = atom<ChatMessage[]>([])
|
||||
|
||||
// Streaming-stable derivations of $messages. During a token stream the array
|
||||
// is replaced ~30×/s; components that only care about coarse facts (is the
|
||||
// thread empty? is the tail a user message?) subscribe to these instead of
|
||||
// $messages so per-token flushes don't re-render them — nanostores' `computed`
|
||||
// only notifies when the derived VALUE changes.
|
||||
export const $messagesEmpty = computed($messages, messages => messages.length === 0)
|
||||
export const $lastVisibleMessageIsUser = computed($messages, lastVisibleMessageIsUser)
|
||||
|
||||
export const $freshDraftReady = atom(false)
|
||||
export const $busy = atom(false)
|
||||
export const $awaitingResponse = atom(false)
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ let
|
|||
|
||||
# Single npm deps fetch from the workspace root lockfile.
|
||||
# All workspace packages share this derivation.
|
||||
npmDepsHash = "sha256-BfTSh6J2VZ/07tq2DYnKgUViZCgRhW1sC2uj18H65SE=";
|
||||
npmDepsHash = "sha256-dFUlWvIIsCqvtGkoobs0qUzFlSdejuffI/uLoQxhW8Q=";
|
||||
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
|
|
|
|||
1
package-lock.json
generated
1
package-lock.json
generated
|
|
@ -119,6 +119,7 @@
|
|||
"react-router-dom": "^7.17.0",
|
||||
"react-shiki": "^0.9.3",
|
||||
"remark-math": "^6.0.0",
|
||||
"remend": "^1.3.0",
|
||||
"shiki": "^4.0.2",
|
||||
"streamdown": "^2.5.0",
|
||||
"tailwind-merge": "^3.5.0",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue