fix(desktop): avoid stack overflow on embedded image replay

Replace the giant embedded-image regex with a bounded scanner so opening sessions with multi-megabyte data URLs does not crash the renderer.
This commit is contained in:
Brooklyn Nicholson 2026-06-22 18:19:36 -05:00
parent 87c4a5ebb8
commit a6b670d4a2
2 changed files with 121 additions and 13 deletions

View file

@ -32,4 +32,13 @@ describe('extractEmbeddedImages', () => {
expect(result.cleanedText).toBe('first mid tail')
expect(result.images).toEqual([SAMPLE_PNG_DATA_URL, second])
})
it('handles multi-megabyte data URLs without overflowing the JS stack', () => {
const hugeDataUrl = 'data:image/png;base64,' + 'A'.repeat(8_000_000)
const result = extractEmbeddedImages(`describe this ${hugeDataUrl} thanks`)
expect(result.cleanedText).toBe('describe this thanks')
expect(result.images).toHaveLength(1)
expect(result.images[0]).toHaveLength(hugeDataUrl.length)
})
})

View file

@ -1,7 +1,11 @@
const EMBEDDED_IMAGE_RE =
/(\{\s*"type"\s*:\s*"image_url"\s*,\s*"image_url"\s*:\s*\{\s*"url"\s*:\s*")?(data:image\/[\w.+-]+;base64,[A-Za-z0-9+/=]{64,})("\s*\}\s*\})?/g
const DATA_URL_RE = /^data:([\w./+-]+);base64,(.*)$/i
const DATA_IMAGE_PREFIX = 'data:image/'
const BASE64_MARKER = ';base64,'
const MIN_EMBEDDED_IMAGE_BASE64_LENGTH = 64
const JSON_IMAGE_OPEN_RE = /\{\s*"type"\s*:\s*"image_url"\s*,\s*"image_url"\s*:\s*\{\s*"url"\s*:\s*"$/
const JSON_IMAGE_CLOSE_RE = /^"\s*\}\s*\}/
const JSON_IMAGE_OPEN_MAX = 96
const JSON_IMAGE_CLOSE_MAX = 16
export const DATA_IMAGE_URL_RE = /^data:image\/[\w.+-]+;base64,/i
@ -31,24 +35,119 @@ export function dataUrlToBlob(dataUrl: string): Blob | null {
}
}
function isImageMimeCode(code: number): boolean {
return (
(code >= 48 && code <= 57) ||
(code >= 65 && code <= 90) ||
(code >= 97 && code <= 122) ||
code === 43 ||
code === 45 ||
code === 46 ||
code === 95
)
}
function isBase64Code(code: number): boolean {
return (
(code >= 48 && code <= 57) ||
(code >= 65 && code <= 90) ||
(code >= 97 && code <= 122) ||
code === 43 ||
code === 47 ||
code === 61
)
}
function readDataImageUrl(text: string, start: number): { end: number; url: string } | null {
if (!text.startsWith(DATA_IMAGE_PREFIX, start)) {
return null
}
let cursor = start + DATA_IMAGE_PREFIX.length
while (cursor < text.length && isImageMimeCode(text.charCodeAt(cursor))) {
cursor += 1
}
if (cursor === start + DATA_IMAGE_PREFIX.length || !text.startsWith(BASE64_MARKER, cursor)) {
return null
}
cursor += BASE64_MARKER.length
const base64Start = cursor
while (cursor < text.length && isBase64Code(text.charCodeAt(cursor))) {
cursor += 1
}
if (cursor - base64Start < MIN_EMBEDDED_IMAGE_BASE64_LENGTH) {
return null
}
return { end: cursor, url: text.slice(start, cursor) }
}
function embeddedImageRemovalRange(text: string, dataStart: number, dataEnd: number): { end: number; start: number } {
let start = dataStart
let end = dataEnd
const openSearchStart = Math.max(0, dataStart - JSON_IMAGE_OPEN_MAX)
const openMatch = text.slice(openSearchStart, dataStart).match(JSON_IMAGE_OPEN_RE)
if (openMatch?.index !== undefined) {
const close = text.slice(dataEnd, dataEnd + JSON_IMAGE_CLOSE_MAX).match(JSON_IMAGE_CLOSE_RE)
if (close) {
start = openSearchStart + openMatch.index
end = dataEnd + close[0].length
}
}
return { end, start }
}
function normalizeCleanedText(text: string): string {
return text.replace(/[ \t]+\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim()
}
export function extractEmbeddedImages(text: string): EmbeddedImageExtraction {
if (!text || !text.includes('data:image/')) {
if (!text || !text.includes(DATA_IMAGE_PREFIX)) {
return { cleanedText: text, images: [] }
}
const images: string[] = []
const pieces: string[] = []
let appendCursor = 0
let searchCursor = 0
const cleanedText = text
.replace(EMBEDDED_IMAGE_RE, (_match, _open, dataUrl: string) => {
images.push(dataUrl)
while (searchCursor < text.length) {
const dataStart = text.indexOf(DATA_IMAGE_PREFIX, searchCursor)
return ''
})
.replace(/[ \t]+\n/g, '\n')
.replace(/\n{3,}/g, '\n\n')
.trim()
if (dataStart === -1) {
break
}
return { cleanedText, images }
const dataUrl = readDataImageUrl(text, dataStart)
if (!dataUrl) {
searchCursor = dataStart + DATA_IMAGE_PREFIX.length
continue
}
const range = embeddedImageRemovalRange(text, dataStart, dataUrl.end)
pieces.push(text.slice(appendCursor, range.start))
images.push(dataUrl.url)
appendCursor = range.end
searchCursor = range.end
}
if (!images.length) {
return { cleanedText: text, images: [] }
}
pieces.push(text.slice(appendCursor))
return { cleanedText: normalizeCleanedText(pieces.join('')), images }
}
export function embeddedImageUrls(text: string): string[] {