refactor(hermes-ink): delete now-dead SGR mouse fragment recovery

With the tokenizer reassembling split CSI sequences across a flush (prior
commit), no SGR mouse fragment can reach a text token anymore — terminals
write a mouse report as one atomic sequence, and any read/flush split now
re-joins in the tokenizer buffer instead of leaking. That makes the whole
downstream recovery layer dead code:

- SGR_MOUSE_FRAGMENT_RE, MOUSE_BURST_NOISE_RE, MOUSE_BURST_RESIDUE_RE
- parseTextWithSgrMouseFragments / parseSgrMouseFragment /
  normalizeSgrMouseFragment
- the whole-text mouse-burst noise fast path in parseMultipleKeypresses

Remove all of it (~185 lines) and the tests that only exercised it. The
narrow legacy X10 wheel-tail resynth stays (distinct mechanism, kept with
its own test). This retires the #17701#18113#26781#28463#35512
regex hardening chain in favor of the one correct parser fix.
This commit is contained in:
Brooklyn Nicholson 2026-06-03 19:29:42 -05:00
parent de124800a2
commit 6efc7eda57
2 changed files with 20 additions and 205 deletions

View file

@ -97,76 +97,6 @@ describe('mouse wheel modifier decoding', () => {
})
})
describe('fragmented SGR mouse recovery', () => {
it('re-synthesizes bracket-only SGR mouse tails as mouse events', () => {
const [[mouse]] = parseMultipleKeypresses(INITIAL_STATE, '[<35;159;11M')
expect(mouse).toMatchObject({ kind: 'mouse', button: 35, col: 159, row: 11, action: 'press' })
})
it('re-synthesizes angle-only SGR mouse tails as mouse events', () => {
const [[mouse]] = parseMultipleKeypresses(INITIAL_STATE, '<35;159;11M')
expect(mouse).toMatchObject({ kind: 'mouse', button: 35, col: 159, row: 11, action: 'press' })
})
it('re-synthesizes degraded SGR mouse bursts without leaking prompt text', () => {
const [events] = parseMultipleKeypresses(INITIAL_STATE, '5;142;11M<35;159;11M35;124;26M35;119;26Mtyped')
expect(events.slice(0, 4)).toEqual([
expect.objectContaining({ kind: 'mouse', button: 5, col: 142, row: 11 }),
expect.objectContaining({ kind: 'mouse', button: 35, col: 159, row: 11 }),
expect.objectContaining({ kind: 'mouse', button: 35, col: 124, row: 26 }),
expect.objectContaining({ kind: 'mouse', button: 35, col: 119, row: 26 })
])
expect(events[4]).toMatchObject({ kind: 'key', sequence: 'typed' })
})
it('keeps isolated semicolon text that only resembles a prefixless mouse report', () => {
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, 'see 1;2;3M for details')
expect(key).toMatchObject({ kind: 'key', sequence: 'see 1;2;3M for details' })
})
it('does not match prefixless fragments inside longer digit runs', () => {
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, '1234;56;78M9;10;11M')
expect(key).toMatchObject({ kind: 'key', sequence: '1234;56;78M9;10;11M' })
})
it('swallows a fully degraded mouse-burst noise blob without leaking prompt text', () => {
// Captured from Windows Terminal during a heavy tool-call render: the event
// loop blocked past App's 50ms flush timer, so a long burst of SGR mouse
// reports (mode 1003 any-motion) arrived as text with prefixes AND
// too degraded for SGR_MOUSE_FRAGMENT_RE (1- and 2-param remnants, a
// stray focus-in `[I`), so without the whole-text noise fast path the entire
// blob types into the composer and locks the user out.
const blob =
'M6M35;220;56M6M35;218;56M169;48M;157;47M;44M20;43M79;40M78;40M0M7M35;49;41M48;41M;47;40M9;15;32M[I;31M5;211;26M35;211;25M7M;220;1MM0M09;25M24M23M3;22MM18M99;26M32MM38M63;44M47MM1;51M M4M54M'
const [events] = parseMultipleKeypresses(INITIAL_STATE, blob)
expect(events).toEqual([])
})
it('keeps plain prose that only contains scattered M and m letters', () => {
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, 'Mmm MMM mmm yummy')
expect(key).toMatchObject({ kind: 'key', sequence: 'Mmm MMM mmm yummy' })
})
it('swallows noise wholesale even when it contains intact recoverable fragments', () => {
// A noise blob can carry a few intact `<b;c;r M` fragments amid the chewed
// shards. The whole-text noise check must run BEFORE fragment recovery —
// otherwise parseTextWithSgrMouseFragments returns non-null and emits a
// pile of recovered mouse events instead of dropping the blob wholesale.
const blob = '<35;159;11M;44M20;43M0M7M<35;124;26M;47;40M9;15;32M5M2M'
const [events] = parseMultipleKeypresses(INITIAL_STATE, blob)
expect(events).toEqual([])
})
})
describe('flush-boundary SGR mouse reassembly', () => {
it('reassembles a report split by a mid-sequence watchdog flush into one mouse event', () => {
// chunk 1: heavy render stalls the loop, only the prefix is read
@ -191,4 +121,13 @@ describe('flush-boundary SGR mouse reassembly', () => {
expect(keys).toEqual([])
expect(state.incomplete).toBe('')
})
it('re-synthesizes an orphaned X10 wheel tail (legacy mouse) into a scroll key', () => {
// X10 wheel-up = ESC[M + (0x40+32) + col + row. If the ESC was flushed as a
// lone Escape and the `[M…` payload arrives as text, resynthesize it.
const tail = '[M' + String.fromCharCode(0x60) + '!!'
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, tail)
expect(key).toMatchObject({ name: 'wheelup' })
})
})

View file

@ -63,35 +63,6 @@ const XTVERSION_RE = /^\x1bP>\|(.*?)(?:\x07|\x1b\\)$/s
// Button 32=left-drag (0x20 | motion-bit). Plain 0/1/2 = left/mid/right click.
// eslint-disable-next-line no-control-regex
const SGR_MOUSE_RE = /^\x1b\[<(\d+);(\d+);(\d+)([Mm])$/
const SGR_MOUSE_FRAGMENT_RE = /(?<!\d)(?:\[<|<)?(?:[0-9]|[1-9][0-9]|1\d{2}|2[0-4]\d|25[0-5]);\d+;\d+[Mm]/g
// Whole-text mouse-burst noise fast path. When a heavy render blocks the event
// loop past App's 50ms flush watchdog, a long burst of SGR mouse reports (mode
// 1003 any-motion / 1006 SGR) can arrive as a single text token with prefixes
// AND coordinate digits chewed off across many partial reads. The surviving
// shards (1- and 2-param remnants, stray focus-in `[I`, lone `M`/`m`
// terminators) are too degraded for SGR_MOUSE_FRAGMENT_RE, so the leftover
// tail leaks into the composer and locks the user out (they can't type or exit).
//
// If the ENTIRE text token is drawn only from the mouse-leak alphabet
// (`[ ] < ; I M m`, digits, and the stray spaces a burst can carry) AND it
// carries the structural signature of mouse coordinates — ≥3 `M`/`m`
// terminators, at least one digit, and at least one `;` separator — swallow it
// wholesale. All three constraints together preserve real prose: `Mmm MMM mmm`
// has no digit and no `;`, `see 1;2;3M for details` contains disqualifying
// letters, and `1234;56;78M9;10;11M` has only two terminators.
// eslint-disable-next-line no-control-regex
const MOUSE_BURST_NOISE_RE = /^(?=[\s\S]*\d)(?=[\s\S]*;)(?=(?:[^Mm]*[Mm]){3})[\d;<\[\]IMm \x1b]+$/
// Residual-shard variant for the gaps BETWEEN / AFTER recovered fragments
// inside parseTextWithSgrMouseFragments. A real recovery run leaves degraded
// remnants (e.g. `M6M`, `7M;220;1MM0M`, lone `;157;47M`) that are pure
// mouse-leak alphabet but too short to satisfy the ≥3-terminator whole-text
// rule. Swallow such a residue only when it is pure alphabet AND carries a
// digit AND at least one `M`/`m` — a prose gap like ` for details ` contains
// disqualifying letters and never matches.
// eslint-disable-next-line no-control-regex
const MOUSE_BURST_RESIDUE_RE = /^(?=[^\d]*\d)(?=[^Mm]*[Mm])[\d;<\[\]IMm \x1b]+$/
function createPasteKey(content: string): ParsedKey {
return {
@ -296,32 +267,18 @@ export function parseMultipleKeypresses(
} else if (token.type === 'text') {
if (inPaste) {
pasteBuffer += token.value
} else if (MOUSE_BURST_NOISE_RE.test(token.value)) {
// Fully degraded mouse-burst noise — a heavy render (e.g. a sudo /
// secret prompt repaint) blocked the event loop past App's 50ms flush
// watchdog, so a long burst of SGR mouse reports arrived as text with
// prefixes AND coordinate digits chewed off. Checked BEFORE fragment
// recovery: a noise blob can still contain a few intact `<b;c;r M`
// fragments, and parseTextWithSgrMouseFragments would then return
// non-null and emit a pile of recovered mouse events instead of
// dropping the blob wholesale. Swallow it here so it never leaks into
// the composer (and we skip the extra fragment-recovery work mid-stall).
} else if (/^\[M[\x60-\x7f][\x20-\uffff]{2}$/.test(token.value)) {
// Orphaned X10 wheel tail (legacy 1000/1002 terminals, fullscreen
// only). If the buffered ESC was flushed as a lone Escape and the X10
// payload (`[M` + 3 bytes) arrived as the next text token, re-synthesize
// with ESC so the scroll event still fires instead of leaking. SGR mouse
// reports no longer reach this branch — the tokenizer keeps an
// incomplete CSI buffered across a flush and reassembles it (see
// termio/tokenize.ts), so the old fragment/burst recovery is gone.
const resynthesized = '\x1b' + token.value
keys.push(parseKeypress(resynthesized))
} else {
const mouseFragments = parseTextWithSgrMouseFragments(token.value)
if (mouseFragments) {
keys.push(...mouseFragments)
} else if (/^\[M[\x60-\x7f][\x20-\uffff]{2}$/.test(token.value)) {
// Orphaned X10 wheel tail (fullscreen only — mouse tracking is off
// otherwise). A heavy render blocked the event loop past App's 50ms
// flush timer, so the buffered ESC was flushed as a lone Escape and
// the continuation arrived as text. Re-synthesize with ESC so the
// scroll event still fires instead of leaking into the prompt.
const resynthesized = '\x1b' + token.value
keys.push(parseKeypress(resynthesized))
} else {
keys.push(parseKeypress(token.value))
}
keys.push(parseKeypress(token.value))
}
}
}
@ -663,87 +620,6 @@ function parseMouseEvent(s: string): ParsedMouse | null {
}
}
function normalizeSgrMouseFragment(fragment: string): string {
if (fragment.startsWith('[<')) {
return `\x1b${fragment}`
}
if (fragment.startsWith('<')) {
return `\x1b[${fragment}`
}
return `\x1b[<${fragment}`
}
function parseSgrMouseFragment(fragment: string): ParsedInput {
const sequence = normalizeSgrMouseFragment(fragment)
return parseMouseEvent(sequence) ?? parseKeypress(sequence)
}
function parseTextWithSgrMouseFragments(text: string): ParsedInput[] | null {
SGR_MOUSE_FRAGMENT_RE.lastIndex = 0
const matches = [...text.matchAll(SGR_MOUSE_FRAGMENT_RE)]
if (matches.length === 0) {
return null
}
const parsed: ParsedInput[] = []
let cursor = 0
let consumedAny = false
for (let i = 0; i < matches.length;) {
const first = matches[i]!
const run: RegExpMatchArray[] = [first]
let runEnd = first.index! + first[0].length
i++
while (i < matches.length && matches[i]!.index === runEnd) {
run.push(matches[i]!)
runEnd = matches[i]!.index! + matches[i]![0].length
i++
}
const hasExplicitMousePrefix = run.some(match => match[0].startsWith('[<') || match[0].startsWith('<'))
const isFragmentBurst = run.length > 1
if (!hasExplicitMousePrefix && !isFragmentBurst) {
continue
}
if (first.index! > cursor) {
const gap = text.slice(cursor, first.index!)
// Skip pure mouse-leak residue between recovered fragments; only emit
// real text gaps as keypresses.
if (!MOUSE_BURST_RESIDUE_RE.test(gap)) {
parsed.push(parseKeypress(gap))
}
}
for (const match of run) {
parsed.push(parseSgrMouseFragment(match[0]))
}
cursor = runEnd
consumedAny = true
}
if (!consumedAny) {
return null
}
if (cursor < text.length) {
const tail = text.slice(cursor)
// Swallow a pure mouse-leak residue tail (the head fragments recovered, but
// the burst trailed off into chewed-up shards). Emit only real trailing text.
if (!MOUSE_BURST_RESIDUE_RE.test(tail)) {
parsed.push(parseKeypress(tail))
}
}
return parsed
}
function parseKeypress(s: string = ''): ParsedKey {
let parts