fix(tui): restrict fast-echo bypass to ASCII so Vietnamese/CJK/IME input renders correctly (#26011)

* fix(tui): restrict fast-echo bypass to ASCII so Vietnamese/CJK/IME input renders correctly

The composer's fast-echo path (canFastAppend / canFastBackspace) writes
characters straight to stdout to skip an Ink re-render on the hot
typing path. The previous guard only checked
'stringWidth(text) === text.length', which lets a lot of non-ASCII
through:

  - Vietnamese precomposed letters (ề, ắ, ờ, ự, ...) report width 1 and
    length 1, but a Vietnamese Telex / IME stack produces them across
    multiple keystrokes; the intermediate composition state must be
    drawn by Ink so the rendered cell, the stored value, and the
    cursor column stay in lockstep when the final commit replaces the
    preview.
  - NFD combining marks (U+0300..U+036F) are zero-width but length 1,
    so even a passing equality lets them slip and silently desync the
    cell column.
  - CJK/East-Asian wide and emoji rejected only because their length
    differs, but the boundary was shape-shaped, not intent-shaped.

User-visible bug from the original report:
  Example: eê noiói nge neène
  -> the bypass committed the IME preview char before the diacritic
     replaced it, leaving doubled letters on screen.

Fix: gate fast-echo on pure printable ASCII (0x20-0x7e). The
performance-critical English typing path is unchanged; everything else
goes through the normal Ink render path so layout stays accurate.

Also extracts the shape preconditions as pure exported helpers
(canFastAppendShape / canFastBackspaceShape) so the regression matrix
is testable without spinning up a TextInput.

Tests: ui-tui/src/__tests__/textInputFastEcho.test.ts adds 20 cases
covering ASCII still works, Vietnamese precomposed + NFD, CJK, emoji,
NBSP / Latin-1, ANSI / control bytes, multi-line, and end-of-line
preconditions. Verified RED on the previous guard (11 of 20 fail) and
GREEN on the new guard.

Refs: #5221, #7443, #17602, #17603 (similar wide-char rendering bugs).

* docs(tui): clarify Vietnamese char terminology in regression comment

Address Copilot review: 'single byte width' implied UTF-8 byte semantics,
but the relevant property is JS code units (`text.length === 1`) and
display width (`stringWidth === 1`). Reworded to match.
This commit is contained in:
brooklyn! 2026-05-15 07:41:50 -07:00 committed by GitHub
parent d5416284f1
commit 9fb40e6a3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 218 additions and 19 deletions

View file

@ -0,0 +1,136 @@
import { describe, expect, it } from 'vitest'
import { canFastAppendShape, canFastBackspaceShape } from '../components/textInput.js'
// The fast-echo path bypasses Ink and writes characters directly to stdout
// for the common case of typing plain English at the end of the line. These
// tests pin the shape preconditions that make that bypass safe.
//
// Regression intent: any non-ASCII text — Vietnamese precomposed letters
// (one grapheme, `text.length === 1`, `stringWidth === 1`, but produced
// via IME composition across multiple keystrokes), combining marks
// (zero width), CJK (double width), emoji (variable width), or anything
// that could be produced by an in-flight IME composition — must NOT
// take the bypass. Closes:
// - "TUI is experiencing font errors when using Unicode to type Vietnamese"
// - #5221 TUI input box renders incorrectly for CJK / East-Asian wide
// - #7443 CLI TUI renders and deletes Chinese characters incorrectly
// - #17602 / #17603 Chinese text scattering / ghosting
describe('canFastAppendShape', () => {
const COLS = 40
it('accepts plain ASCII appended at end of single-line input', () => {
expect(canFastAppendShape('hello', 5, 'x', COLS, 5)).toBe(true)
expect(canFastAppendShape('hello', 5, ' world', COLS, 5)).toBe(true)
})
it('rejects when cursor is not at end of line', () => {
expect(canFastAppendShape('hello', 3, 'x', COLS, 5)).toBe(false)
})
it('rejects when current is empty (placeholder render path needed)', () => {
expect(canFastAppendShape('', 0, 'x', COLS, 0)).toBe(false)
})
it('rejects when current contains a newline (multi-line layout)', () => {
expect(canFastAppendShape('hi\nthere', 8, 'x', COLS, 5)).toBe(false)
})
it('rejects when appending would hit the wrap column', () => {
// Reaching cols on append must trigger a wrap, which the bypass
// cannot draw. Stay strictly below cols.
expect(canFastAppendShape('hello', 5, 'x', 6, 5)).toBe(false)
})
// -- Regression coverage: Vietnamese / combining marks / IME --
it('rejects Vietnamese precomposed letter ề (U+1EC1) — IME composition path', () => {
// 'ề' is one grapheme, length 1, width 1, but Vietnamese Telex/IME
// produces it via a multi-key composition. Fast-echo would commit the
// intermediate state to stdout and desync once the final commit
// arrives.
expect(canFastAppendShape('hello', 5, 'ề', COLS, 5)).toBe(false)
})
it('rejects Vietnamese tone marks ă, ơ, ư (Latin-Extended-A/B)', () => {
for (const ch of ['ă', 'ắ', 'ơ', 'ờ', 'ư', 'ự']) {
expect(canFastAppendShape('hello', 5, ch, COLS, 5)).toBe(false)
}
})
it('rejects NFD combining marks (U+0300 grave, U+0301 acute, U+0302 circumflex)', () => {
// Decomposed Vietnamese: 'e' + combining circumflex + combining grave
// = 'ề'. Each combining mark is zero-width but length 1; without the
// ASCII guard the second/third keypress would be fast-echoed and
// desync the cell column.
expect(canFastAppendShape('hello', 5, '\u0300', COLS, 5)).toBe(false)
expect(canFastAppendShape('hello', 5, '\u0301', COLS, 5)).toBe(false)
expect(canFastAppendShape('hello', 5, '\u0302', COLS, 5)).toBe(false)
})
it('rejects CJK (East-Asian wide) characters', () => {
expect(canFastAppendShape('hello', 5, '你', COLS, 5)).toBe(false)
expect(canFastAppendShape('hello', 5, '日本', COLS, 5)).toBe(false)
})
it('rejects emoji', () => {
expect(canFastAppendShape('hello', 5, '🙂', COLS, 5)).toBe(false)
})
it('rejects ANSI-bearing or control text', () => {
expect(canFastAppendShape('hello', 5, '\x1b[31m', COLS, 5)).toBe(false)
expect(canFastAppendShape('hello', 5, '\t', COLS, 5)).toBe(false)
expect(canFastAppendShape('hello', 5, '\x7f', COLS, 5)).toBe(false)
})
it('rejects NBSP and Latin-1 letters that would change the line shape', () => {
expect(canFastAppendShape('hello', 5, '\u00a0', COLS, 5)).toBe(false)
expect(canFastAppendShape('hello', 5, 'é', COLS, 5)).toBe(false)
expect(canFastAppendShape('hello', 5, 'ñ', COLS, 5)).toBe(false)
})
})
describe('canFastBackspaceShape', () => {
it('accepts deleting the last ASCII char', () => {
expect(canFastBackspaceShape('hello', 5)).toBe(true)
})
it('rejects when cursor is not at end', () => {
expect(canFastBackspaceShape('hello', 3)).toBe(false)
})
it('rejects when there is nothing to delete', () => {
expect(canFastBackspaceShape('', 0)).toBe(false)
expect(canFastBackspaceShape('hello', 0)).toBe(false)
})
it('rejects when value contains a newline', () => {
expect(canFastBackspaceShape('hi\nthere', 8)).toBe(false)
})
it('rejects deleting Vietnamese precomposed letter ề', () => {
// The "\b \b" shortcut clears one terminal cell; that's fine for a
// 1-cell ASCII char but if the previous grapheme is a Vietnamese
// letter that the IME may still be holding open, we want Ink to
// re-render so composition state stays consistent.
expect(canFastBackspaceShape('helloề', 'helloề'.length)).toBe(false)
})
it('rejects deleting a CJK character (2 cells)', () => {
expect(canFastBackspaceShape('hi你', 'hi你'.length)).toBe(false)
})
it('rejects deleting a NFD-composed grapheme with combining marks', () => {
// 'e' + U+0302 (circumflex) + U+0300 (grave) — final grapheme is one
// cluster but the previous-grapheme slice is multi-codepoint. Width
// is 1 but the bypass would be unsafe because the rendered cell
// already contained the combined glyph.
const s = 'hello' + 'e\u0302\u0300'
expect(canFastBackspaceShape(s, s.length)).toBe(false)
})
it('rejects deleting an emoji', () => {
expect(canFastBackspaceShape('hi🙂', 'hi🙂'.length)).toBe(false)
})
})

View file

@ -179,6 +179,84 @@ export function lineNav(s: string, p: number, dir: -1 | 1): null | number {
export { offsetFromPosition }
const ASCII_PRINTABLE_RE = /^[\x20-\x7e]+$/
/**
* Pure shape-only precondition for the fast-echo append path.
*
* The fast-echo path bypasses Ink's renderer and writes text directly to
* stdout, so the stored value, the rendered terminal cells, and the cursor
* column must all stay in sync without any layout work. We only allow it
* when the inserted text is pure printable ASCII so that:
*
* - `text.length` matches the number of grapheme clusters (no combining
* marks, no surrogate pairs, no precomposed CJK / Latin-Extended
* letters that an IME might still be holding open as a composition),
* - terminal width is exactly 1 cell per character (no East-Asian wide,
* no zero-width, no ambiguous-width fonts),
* - input methods (Vietnamese Telex, IME, dead-keys) cannot leak
* intermediate composition bytes through the bypass before the final
* commit arrives those always go through the normal Ink render path
* and stay layout-accurate (closes #5221, #7443, #17602/#17603).
*
* We deliberately do NOT just check `stringWidth(text) === text.length`:
* Vietnamese precomposed letters like "ề" (U+1EC1) report width 1 and
* length 1 but are still produced by IME compositions and must not be
* fast-echoed.
*/
export function canFastAppendShape(
current: string,
cursor: number,
text: string,
columns: number,
currentLineWidth: number
): boolean {
if (cursor !== current.length) {
return false
}
if (current.length === 0) {
return false
}
if (current.includes('\n')) {
return false
}
if (!ASCII_PRINTABLE_RE.test(text)) {
return false
}
return currentLineWidth + text.length < Math.max(1, columns)
}
/**
* Pure shape-only precondition for the fast-echo backspace path.
*
* Same reasoning as canFastAppendShape only allow the direct
* "\b \b" stdout shortcut when the deleted grapheme is pure printable
* ASCII. Anything else (combining marks, IME compositions, wide chars,
* tabs, ANSI fragments) goes through the normal render path so Ink can
* recompute cell widths.
*/
export function canFastBackspaceShape(current: string, cursor: number): boolean {
if (cursor !== current.length) {
return false
}
if (cursor <= 0) {
return false
}
if (current.includes('\n')) {
return false
}
const removed = current.slice(prevPos(current, cursor), cursor)
return ASCII_PRINTABLE_RE.test(removed)
}
function renderWithCursor(value: string, cursor: number) {
const pos = Math.max(0, Math.min(cursor, value.length))
@ -444,26 +522,11 @@ export function TextInput({
const canFastEchoBase = () => focus && termFocus && !selected && !mask && !!stdout?.isTTY
const canFastAppend = (current: string, cursor: number, text: string) => {
const sw = stringWidth(text)
const canFastAppend = (current: string, cursor: number, text: string) =>
canFastEchoBase() && canFastAppendShape(current, cursor, text, columns, lineWidthRef.current)
return (
canFastEchoBase() &&
cursor === current.length &&
current.length > 0 &&
!current.includes('\n') &&
sw === text.length &&
lineWidthRef.current + sw < Math.max(1, columns)
)
}
const canFastBackspace = (current: string, cursor: number) => {
if (!canFastEchoBase() || cursor !== current.length || cursor <= 0 || current.includes('\n')) {
return false
}
return stringWidth(current.slice(prevPos(current, cursor), cursor)) === 1
}
const canFastBackspace = (current: string, cursor: number) =>
canFastEchoBase() && canFastBackspaceShape(current, cursor)
const commit = (
next: string,