fix(desktop): avoid stack overflow on embedded image replay

Replace the giant embedded-image regex with a bounded scanner so opening sessions with multi-megabyte data URLs does not crash the renderer.
2026-06-23 10:42:00 +00:00 · 2026-06-22 18:19:36 -05:00 · 2026-06-22 18:19:36 -05:00 · a6b670d4a2
commit a6b670d4a2
parent 87c4a5ebb8
2 changed files with 121 additions and 13 deletions
--- a/apps/desktop/src/lib/embedded-images.test.ts
+++ b/apps/desktop/src/lib/embedded-images.test.ts
@ -32,4 +32,13 @@ describe('extractEmbeddedImages', () => {
    expect(result.cleanedText).toBe('first  mid  tail')
    expect(result.images).toEqual([SAMPLE_PNG_DATA_URL, second])
  })
+
+  it('handles multi-megabyte data URLs without overflowing the JS stack', () => {
+    const hugeDataUrl = 'data:image/png;base64,' + 'A'.repeat(8_000_000)
+    const result = extractEmbeddedImages(`describe this ${hugeDataUrl} thanks`)
+
+    expect(result.cleanedText).toBe('describe this  thanks')
+    expect(result.images).toHaveLength(1)
+    expect(result.images[0]).toHaveLength(hugeDataUrl.length)
+  })
 })
--- a/apps/desktop/src/lib/embedded-images.ts
+++ b/apps/desktop/src/lib/embedded-images.ts
@ -1,7 +1,11 @@
-const EMBEDDED_IMAGE_RE =
-  /(\{\s*"type"\s*:\s*"image_url"\s*,\s*"image_url"\s*:\s*\{\s*"url"\s*:\s*")?(data:image\/[\w.+-]+;base64,[A-Za-z0-9+/=]{64,})("\s*\}\s*\})?/g
-
 const DATA_URL_RE = /^data:([\w./+-]+);base64,(.*)$/i
+const DATA_IMAGE_PREFIX = 'data:image/'
+const BASE64_MARKER = ';base64,'
+const MIN_EMBEDDED_IMAGE_BASE64_LENGTH = 64
+const JSON_IMAGE_OPEN_RE = /\{\s*"type"\s*:\s*"image_url"\s*,\s*"image_url"\s*:\s*\{\s*"url"\s*:\s*"$/
+const JSON_IMAGE_CLOSE_RE = /^"\s*\}\s*\}/
+const JSON_IMAGE_OPEN_MAX = 96
+const JSON_IMAGE_CLOSE_MAX = 16

 export const DATA_IMAGE_URL_RE = /^data:image\/[\w.+-]+;base64,/i

@ -31,24 +35,119 @@ export function dataUrlToBlob(dataUrl: string): Blob | null {
  }
 }

+function isImageMimeCode(code: number): boolean {
+  return (
+    (code >= 48 && code <= 57) ||
+    (code >= 65 && code <= 90) ||
+    (code >= 97 && code <= 122) ||
+    code === 43 ||
+    code === 45 ||
+    code === 46 ||
+    code === 95
+  )
+}
+
+function isBase64Code(code: number): boolean {
+  return (
+    (code >= 48 && code <= 57) ||
+    (code >= 65 && code <= 90) ||
+    (code >= 97 && code <= 122) ||
+    code === 43 ||
+    code === 47 ||
+    code === 61
+  )
+}
+
+function readDataImageUrl(text: string, start: number): { end: number; url: string } | null {
+  if (!text.startsWith(DATA_IMAGE_PREFIX, start)) {
+    return null
+  }
+
+  let cursor = start + DATA_IMAGE_PREFIX.length
+
+  while (cursor < text.length && isImageMimeCode(text.charCodeAt(cursor))) {
+    cursor += 1
+  }
+
+  if (cursor === start + DATA_IMAGE_PREFIX.length || !text.startsWith(BASE64_MARKER, cursor)) {
+    return null
+  }
+
+  cursor += BASE64_MARKER.length
+  const base64Start = cursor
+
+  while (cursor < text.length && isBase64Code(text.charCodeAt(cursor))) {
+    cursor += 1
+  }
+
+  if (cursor - base64Start < MIN_EMBEDDED_IMAGE_BASE64_LENGTH) {
+    return null
+  }
+
+  return { end: cursor, url: text.slice(start, cursor) }
+}
+
+function embeddedImageRemovalRange(text: string, dataStart: number, dataEnd: number): { end: number; start: number } {
+  let start = dataStart
+  let end = dataEnd
+  const openSearchStart = Math.max(0, dataStart - JSON_IMAGE_OPEN_MAX)
+  const openMatch = text.slice(openSearchStart, dataStart).match(JSON_IMAGE_OPEN_RE)
+
+  if (openMatch?.index !== undefined) {
+    const close = text.slice(dataEnd, dataEnd + JSON_IMAGE_CLOSE_MAX).match(JSON_IMAGE_CLOSE_RE)
+
+    if (close) {
+      start = openSearchStart + openMatch.index
+      end = dataEnd + close[0].length
+    }
+  }
+
+  return { end, start }
+}
+
+function normalizeCleanedText(text: string): string {
+  return text.replace(/[ \t]+\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim()
+}
+
 export function extractEmbeddedImages(text: string): EmbeddedImageExtraction {
-  if (!text || !text.includes('data:image/')) {
+  if (!text || !text.includes(DATA_IMAGE_PREFIX)) {
    return { cleanedText: text, images: [] }
  }

  const images: string[] = []
+  const pieces: string[] = []
+  let appendCursor = 0
+  let searchCursor = 0

-  const cleanedText = text
-    .replace(EMBEDDED_IMAGE_RE, (_match, _open, dataUrl: string) => {
-      images.push(dataUrl)
+  while (searchCursor < text.length) {
+    const dataStart = text.indexOf(DATA_IMAGE_PREFIX, searchCursor)

-      return ''
-    })
-    .replace(/[ \t]+\n/g, '\n')
-    .replace(/\n{3,}/g, '\n\n')
-    .trim()
+    if (dataStart === -1) {
+      break
+    }

-  return { cleanedText, images }
+    const dataUrl = readDataImageUrl(text, dataStart)
+
+    if (!dataUrl) {
+      searchCursor = dataStart + DATA_IMAGE_PREFIX.length
+
+      continue
+    }
+
+    const range = embeddedImageRemovalRange(text, dataStart, dataUrl.end)
+    pieces.push(text.slice(appendCursor, range.start))
+    images.push(dataUrl.url)
+    appendCursor = range.end
+    searchCursor = range.end
+  }
+
+  if (!images.length) {
+    return { cleanedText: text, images: [] }
+  }
+
+  pieces.push(text.slice(appendCursor))
+
+  return { cleanedText: normalizeCleanedText(pieces.join('')), images }
 }

 export function embeddedImageUrls(text: string): string[] {