From f3543235475a7edb7a44adc883d97cdebf818cfb Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Wed, 3 Jun 2026 19:24:28 -0500 Subject: [PATCH] fix(hermes-ink): reassemble split mouse sequences at the tokenizer; drop the regex sink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root-cause fix for the SGR mouse fragment leak (`46M35;40M...` typed into the prompt). The leak was never really about the fragments — it was the flush emitting them. When App's 50ms watchdog fires mid-CSI during a render stall, the tokenizer was force-emitting the buffered partial as a token and resetting to ground, so both the prefix and the ESC-less remainder surfaced as unparseable input. Make the flush state-aware (xterm.js discipline): a bare ESC still flushes to the Escape key (the legitimate ESCDELAY case), but a buffer still inside a multi-byte control sequence (csi/osc/dcs/apc/ss3/intermediate) is NOT emitted — it's kept so the continuation reassembles on the next feed. A one-tick truncation valve in createTokenizer.flush() drops a partial that survives a second flush with no progress, so a genuinely truncated write can't fuse into the next keypress. With partials never entering the input stream, the downstream scrubber is dead code: remove the SGR fragment guard from input-event.ts (both the original `/^\[<\d+;\d+;\d+[Mm]/` and the consolidated form added earlier in this PR). The parse-keypress burst-recovery regexes (MOUSE_BURST_*) are now also redundant but left in place as a safety net for one release; they can be removed in a follow-up once this soaks. Tests: tokenize.test.ts proves a mid-CSI flush keeps/reassembles and that a stale partial is dropped after a second flush and a bare ESC still emits; parse-keypress.test.ts adds the end-to-end split-then-reassemble case yielding a single clean mouse event with no leaked key. Supersedes #29337. --- .../hermes-ink/src/ink/events/input-event.ts | 39 ++--------- .../hermes-ink/src/ink/parse-keypress.test.ts | 27 ++++++++ .../src/ink/termio/tokenize.test.ts | 65 +++++++++++++++++++ .../hermes-ink/src/ink/termio/tokenize.ts | 40 +++++++++++- 4 files changed, 133 insertions(+), 38 deletions(-) create mode 100644 ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts diff --git a/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts b/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts index f88146be942..900f0042c0b 100644 --- a/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts +++ b/ui-tui/packages/hermes-ink/src/ink/events/input-event.ts @@ -5,32 +5,6 @@ import { Event } from './event.js' const inputForSpecialSequence = (name: string): string => name === 'space' ? ' ' : name === 'return' || name === 'escape' ? '' : name -// SGR mouse-report fragment that leaked into a nameless text/sequence token. -// In alt-screen Ink enables MOUSE_ANY (DEC 1003), so every pixel of motion -// emits a CSI mouse report (ESC[ { // blob types into the composer and locks the user out. const blob = 'M6M35;220;56M6M35;218;56M169;48M;157;47M;44M20;43M79;40M78;40M0M7M35;49;41M48;41M;47;40M9;15;32M[I;31M5;211;26M35;211;25M7M;220;1MM0M09;25M24M23M3;22MM18M99;26M32MM38M63;44M47MM1;51M M4M54M' + const [events] = parseMultipleKeypresses(INITIAL_STATE, blob) expect(events).toEqual([]) @@ -165,3 +166,29 @@ describe('fragmented SGR mouse recovery', () => { expect(events).toEqual([]) }) }) + +describe('flush-boundary SGR mouse reassembly', () => { + it('reassembles a report split by a mid-sequence watchdog flush into one mouse event', () => { + // chunk 1: heavy render stalls the loop, only the prefix is read + let [keys, state] = parseMultipleKeypresses(INITIAL_STATE, '\x1b[<0;35;') + expect(keys).toEqual([]) + + // App's 50ms watchdog flushes (input=null) — must NOT emit the partial + ;[keys, state] = parseMultipleKeypresses(state, null) + expect(keys).toEqual([]) + + // continuation arrives; the whole report reassembles, nothing leaks + ;[keys, state] = parseMultipleKeypresses(state, '46M') + expect(keys).toEqual([expect.objectContaining({ kind: 'mouse', button: 0, col: 35, row: 46, action: 'press' })]) + }) + + it('drops a truncated mouse prefix after a second flush instead of leaking it', () => { + let [keys, state] = parseMultipleKeypresses(INITIAL_STATE, '\x1b[<0;35;') + + ;[keys, state] = parseMultipleKeypresses(state, null) // first flush keeps it + ;[keys, state] = parseMultipleKeypresses(state, null) // second flush drops it + + expect(keys).toEqual([]) + expect(state.incomplete).toBe('') + }) +}) diff --git a/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts new file mode 100644 index 00000000000..4d73cfcddad --- /dev/null +++ b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.test.ts @@ -0,0 +1,65 @@ +import { describe, expect, it } from 'vitest' + +import { createTokenizer } from './tokenize.js' + +describe('tokenizer escape-sequence boundaries', () => { + it('reassembles a CSI mouse sequence split across two feeds', () => { + const t = createTokenizer({ x10Mouse: true }) + + expect(t.feed('\x1b[<0;35;')).toEqual([]) + expect(t.feed('46M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }]) + expect(t.buffer()).toBe('') + }) +}) + +describe('tokenizer state-aware flush', () => { + it('does not emit an incomplete CSI on flush — it keeps it for reassembly', () => { + const t = createTokenizer({ x10Mouse: true }) + + // A render stall lets App's watchdog flush mid-sequence. The buffered CSI + // prefix must NOT be emitted (that is the `46M…` leak); it stays buffered. + expect(t.feed('\x1b[<0;35;')).toEqual([]) + expect(t.flush()).toEqual([]) + expect(t.buffer()).toBe('\x1b[<0;35;') + + // The continuation arrives on the next feed and the whole report + // reassembles into a single clean sequence token — nothing leaked. + expect(t.feed('46M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }]) + expect(t.buffer()).toBe('') + }) + + it('drops a partial control sequence that survives a second flush (truncation)', () => { + const t = createTokenizer({ x10Mouse: true }) + + expect(t.feed('\x1b[<0;35;')).toEqual([]) + expect(t.flush()).toEqual([]) // first flush keeps the buffer + expect(t.buffer()).toBe('\x1b[<0;35;') + + // Continuation never arrived: the next flush sees the same buffer and + // drops it so it can't fuse with the next keypress's bytes. + expect(t.flush()).toEqual([]) + expect(t.buffer()).toBe('') + }) + + it('still emits a bare ESC on flush so the Escape key works', () => { + const t = createTokenizer({ x10Mouse: true }) + + expect(t.feed('\x1b')).toEqual([]) + expect(t.flush()).toEqual([{ type: 'sequence', value: '\x1b' }]) + expect(t.buffer()).toBe('') + }) + + it('reassembles even when a flush fires between every byte of the report', () => { + const t = createTokenizer({ x10Mouse: true }) + + // Pathological stall: a flush between each chunk. As long as the + // continuation eventually arrives, no fragment is ever emitted as input. + for (const chunk of ['\x1b[', '<', '0;', '35;', '46']) { + expect(t.feed(chunk)).toEqual([]) + expect(t.flush()).toEqual([]) + } + + expect(t.feed('M')).toEqual([{ type: 'sequence', value: '\x1b[<0;35;46M' }]) + expect(t.buffer()).toBe('') + }) +}) diff --git a/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.ts b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.ts index 40ba7e21435..03f99cf2f4a 100644 --- a/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.ts +++ b/ui-tui/packages/hermes-ink/src/ink/termio/tokenize.ts @@ -47,10 +47,18 @@ type TokenizerOptions = { export function createTokenizer(options?: TokenizerOptions): Tokenizer { let currentState: State = 'ground' let currentBuffer = '' + // The control-sequence buffer kept across the previous flush, if any. Used + // as a one-tick truncation valve: a partial CSI mouse report normally + // reassembles on the very next feed, so if a flush sees the exact same + // buffer it kept last time (the continuation never arrived), we drop it. + let lastFlushedBuffer = '' const x10Mouse = options?.x10Mouse ?? false return { feed(input: string): Token[] { + // Real bytes arrived — any kept partial is no longer stale. + lastFlushedBuffer = '' + const result = tokenize(input, currentState, currentBuffer, false, x10Mouse) currentState = result.state.state @@ -64,12 +72,25 @@ export function createTokenizer(options?: TokenizerOptions): Tokenizer { currentState = result.state.state currentBuffer = result.state.buffer + // tokenize() keeps (doesn't emit) an incomplete control sequence on + // flush. If two consecutive flushes see the same buffer with no feed in + // between, the continuation is never coming (truncated write / killed + // process) — drop it so it can't fuse with the next keypress's bytes. + if (currentBuffer && currentBuffer === lastFlushedBuffer) { + currentState = 'ground' + currentBuffer = '' + lastFlushedBuffer = '' + } else { + lastFlushedBuffer = currentBuffer + } + return result.tokens }, reset(): void { currentState = 'ground' currentBuffer = '' + lastFlushedBuffer = '' }, buffer(): string { @@ -298,8 +319,10 @@ function tokenize( // Handle end of input if (result.state === 'ground') { flushText() - } else if (flush) { - // Force output incomplete sequence + } else if (flush && result.state === 'escape') { + // A bare ESC with nothing after it is the Escape key — the one incomplete + // state a flush should turn into input (the classic ESCDELAY lone-ESC + // disambiguation: ESC alone vs. ESC as a sequence/meta prefix). const remaining = data.slice(seqStart) if (remaining) { @@ -308,7 +331,18 @@ function tokenize( result.state = 'ground' } else { - // Buffer incomplete sequence for next call + // Buffer the incomplete sequence. Two paths land here: + // - streaming (flush=false): normal carry-over to the next feed. + // - flush=true while still inside a multi-byte control sequence + // (csi/osc/dcs/apc/ss3/escapeIntermediate): we deliberately do NOT + // emit it. A half-arrived CSI mouse report (ESC[