mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(tui): restrict fast-echo bypass to ASCII so Vietnamese/CJK/IME input renders correctly (#26011)
* fix(tui): restrict fast-echo bypass to ASCII so Vietnamese/CJK/IME input renders correctly
The composer's fast-echo path (canFastAppend / canFastBackspace) writes
characters straight to stdout to skip an Ink re-render on the hot
typing path. The previous guard only checked
'stringWidth(text) === text.length', which lets a lot of non-ASCII
through:
- Vietnamese precomposed letters (ề, ắ, ờ, ự, ...) report width 1 and
length 1, but a Vietnamese Telex / IME stack produces them across
multiple keystrokes; the intermediate composition state must be
drawn by Ink so the rendered cell, the stored value, and the
cursor column stay in lockstep when the final commit replaces the
preview.
- NFD combining marks (U+0300..U+036F) are zero-width but length 1,
so even a passing equality lets them slip and silently desync the
cell column.
- CJK/East-Asian wide and emoji rejected only because their length
differs, but the boundary was shape-shaped, not intent-shaped.
User-visible bug from the original report:
Example: eê noiói nge neène
-> the bypass committed the IME preview char before the diacritic
replaced it, leaving doubled letters on screen.
Fix: gate fast-echo on pure printable ASCII (0x20-0x7e). The
performance-critical English typing path is unchanged; everything else
goes through the normal Ink render path so layout stays accurate.
Also extracts the shape preconditions as pure exported helpers
(canFastAppendShape / canFastBackspaceShape) so the regression matrix
is testable without spinning up a TextInput.
Tests: ui-tui/src/__tests__/textInputFastEcho.test.ts adds 20 cases
covering ASCII still works, Vietnamese precomposed + NFD, CJK, emoji,
NBSP / Latin-1, ANSI / control bytes, multi-line, and end-of-line
preconditions. Verified RED on the previous guard (11 of 20 fail) and
GREEN on the new guard.
Refs: #5221, #7443, #17602, #17603 (similar wide-char rendering bugs).
* docs(tui): clarify Vietnamese char terminology in regression comment
Address Copilot review: 'single byte width' implied UTF-8 byte semantics,
but the relevant property is JS code units (`text.length === 1`) and
display width (`stringWidth === 1`). Reworded to match.
This commit is contained in:
parent
d5416284f1
commit
9fb40e6a3d
2 changed files with 218 additions and 19 deletions
136
ui-tui/src/__tests__/textInputFastEcho.test.ts
Normal file
136
ui-tui/src/__tests__/textInputFastEcho.test.ts
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { canFastAppendShape, canFastBackspaceShape } from '../components/textInput.js'
|
||||
|
||||
// The fast-echo path bypasses Ink and writes characters directly to stdout
|
||||
// for the common case of typing plain English at the end of the line. These
|
||||
// tests pin the shape preconditions that make that bypass safe.
|
||||
//
|
||||
// Regression intent: any non-ASCII text — Vietnamese precomposed letters
|
||||
// (one grapheme, `text.length === 1`, `stringWidth === 1`, but produced
|
||||
// via IME composition across multiple keystrokes), combining marks
|
||||
// (zero width), CJK (double width), emoji (variable width), or anything
|
||||
// that could be produced by an in-flight IME composition — must NOT
|
||||
// take the bypass. Closes:
|
||||
// - "TUI is experiencing font errors when using Unicode to type Vietnamese"
|
||||
// - #5221 TUI input box renders incorrectly for CJK / East-Asian wide
|
||||
// - #7443 CLI TUI renders and deletes Chinese characters incorrectly
|
||||
// - #17602 / #17603 Chinese text scattering / ghosting
|
||||
|
||||
describe('canFastAppendShape', () => {
|
||||
const COLS = 40
|
||||
|
||||
it('accepts plain ASCII appended at end of single-line input', () => {
|
||||
expect(canFastAppendShape('hello', 5, 'x', COLS, 5)).toBe(true)
|
||||
expect(canFastAppendShape('hello', 5, ' world', COLS, 5)).toBe(true)
|
||||
})
|
||||
|
||||
it('rejects when cursor is not at end of line', () => {
|
||||
expect(canFastAppendShape('hello', 3, 'x', COLS, 5)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects when current is empty (placeholder render path needed)', () => {
|
||||
expect(canFastAppendShape('', 0, 'x', COLS, 0)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects when current contains a newline (multi-line layout)', () => {
|
||||
expect(canFastAppendShape('hi\nthere', 8, 'x', COLS, 5)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects when appending would hit the wrap column', () => {
|
||||
// Reaching cols on append must trigger a wrap, which the bypass
|
||||
// cannot draw. Stay strictly below cols.
|
||||
expect(canFastAppendShape('hello', 5, 'x', 6, 5)).toBe(false)
|
||||
})
|
||||
|
||||
// -- Regression coverage: Vietnamese / combining marks / IME --
|
||||
|
||||
it('rejects Vietnamese precomposed letter ề (U+1EC1) — IME composition path', () => {
|
||||
// 'ề' is one grapheme, length 1, width 1, but Vietnamese Telex/IME
|
||||
// produces it via a multi-key composition. Fast-echo would commit the
|
||||
// intermediate state to stdout and desync once the final commit
|
||||
// arrives.
|
||||
expect(canFastAppendShape('hello', 5, 'ề', COLS, 5)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects Vietnamese tone marks ă, ơ, ư (Latin-Extended-A/B)', () => {
|
||||
for (const ch of ['ă', 'ắ', 'ơ', 'ờ', 'ư', 'ự']) {
|
||||
expect(canFastAppendShape('hello', 5, ch, COLS, 5)).toBe(false)
|
||||
}
|
||||
})
|
||||
|
||||
it('rejects NFD combining marks (U+0300 grave, U+0301 acute, U+0302 circumflex)', () => {
|
||||
// Decomposed Vietnamese: 'e' + combining circumflex + combining grave
|
||||
// = 'ề'. Each combining mark is zero-width but length 1; without the
|
||||
// ASCII guard the second/third keypress would be fast-echoed and
|
||||
// desync the cell column.
|
||||
expect(canFastAppendShape('hello', 5, '\u0300', COLS, 5)).toBe(false)
|
||||
expect(canFastAppendShape('hello', 5, '\u0301', COLS, 5)).toBe(false)
|
||||
expect(canFastAppendShape('hello', 5, '\u0302', COLS, 5)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects CJK (East-Asian wide) characters', () => {
|
||||
expect(canFastAppendShape('hello', 5, '你', COLS, 5)).toBe(false)
|
||||
expect(canFastAppendShape('hello', 5, '日本', COLS, 5)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects emoji', () => {
|
||||
expect(canFastAppendShape('hello', 5, '🙂', COLS, 5)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects ANSI-bearing or control text', () => {
|
||||
expect(canFastAppendShape('hello', 5, '\x1b[31m', COLS, 5)).toBe(false)
|
||||
expect(canFastAppendShape('hello', 5, '\t', COLS, 5)).toBe(false)
|
||||
expect(canFastAppendShape('hello', 5, '\x7f', COLS, 5)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects NBSP and Latin-1 letters that would change the line shape', () => {
|
||||
expect(canFastAppendShape('hello', 5, '\u00a0', COLS, 5)).toBe(false)
|
||||
expect(canFastAppendShape('hello', 5, 'é', COLS, 5)).toBe(false)
|
||||
expect(canFastAppendShape('hello', 5, 'ñ', COLS, 5)).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('canFastBackspaceShape', () => {
|
||||
it('accepts deleting the last ASCII char', () => {
|
||||
expect(canFastBackspaceShape('hello', 5)).toBe(true)
|
||||
})
|
||||
|
||||
it('rejects when cursor is not at end', () => {
|
||||
expect(canFastBackspaceShape('hello', 3)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects when there is nothing to delete', () => {
|
||||
expect(canFastBackspaceShape('', 0)).toBe(false)
|
||||
expect(canFastBackspaceShape('hello', 0)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects when value contains a newline', () => {
|
||||
expect(canFastBackspaceShape('hi\nthere', 8)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects deleting Vietnamese precomposed letter ề', () => {
|
||||
// The "\b \b" shortcut clears one terminal cell; that's fine for a
|
||||
// 1-cell ASCII char but if the previous grapheme is a Vietnamese
|
||||
// letter that the IME may still be holding open, we want Ink to
|
||||
// re-render so composition state stays consistent.
|
||||
expect(canFastBackspaceShape('helloề', 'helloề'.length)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects deleting a CJK character (2 cells)', () => {
|
||||
expect(canFastBackspaceShape('hi你', 'hi你'.length)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects deleting a NFD-composed grapheme with combining marks', () => {
|
||||
// 'e' + U+0302 (circumflex) + U+0300 (grave) — final grapheme is one
|
||||
// cluster but the previous-grapheme slice is multi-codepoint. Width
|
||||
// is 1 but the bypass would be unsafe because the rendered cell
|
||||
// already contained the combined glyph.
|
||||
const s = 'hello' + 'e\u0302\u0300'
|
||||
expect(canFastBackspaceShape(s, s.length)).toBe(false)
|
||||
})
|
||||
|
||||
it('rejects deleting an emoji', () => {
|
||||
expect(canFastBackspaceShape('hi🙂', 'hi🙂'.length)).toBe(false)
|
||||
})
|
||||
})
|
||||
|
|
@ -179,6 +179,84 @@ export function lineNav(s: string, p: number, dir: -1 | 1): null | number {
|
|||
|
||||
export { offsetFromPosition }
|
||||
|
||||
const ASCII_PRINTABLE_RE = /^[\x20-\x7e]+$/
|
||||
|
||||
/**
|
||||
* Pure shape-only precondition for the fast-echo append path.
|
||||
*
|
||||
* The fast-echo path bypasses Ink's renderer and writes text directly to
|
||||
* stdout, so the stored value, the rendered terminal cells, and the cursor
|
||||
* column must all stay in sync without any layout work. We only allow it
|
||||
* when the inserted text is pure printable ASCII so that:
|
||||
*
|
||||
* - `text.length` matches the number of grapheme clusters (no combining
|
||||
* marks, no surrogate pairs, no precomposed CJK / Latin-Extended
|
||||
* letters that an IME might still be holding open as a composition),
|
||||
* - terminal width is exactly 1 cell per character (no East-Asian wide,
|
||||
* no zero-width, no ambiguous-width fonts),
|
||||
* - input methods (Vietnamese Telex, IME, dead-keys) cannot leak
|
||||
* intermediate composition bytes through the bypass before the final
|
||||
* commit arrives — those always go through the normal Ink render path
|
||||
* and stay layout-accurate (closes #5221, #7443, #17602/#17603).
|
||||
*
|
||||
* We deliberately do NOT just check `stringWidth(text) === text.length`:
|
||||
* Vietnamese precomposed letters like "ề" (U+1EC1) report width 1 and
|
||||
* length 1 but are still produced by IME compositions and must not be
|
||||
* fast-echoed.
|
||||
*/
|
||||
export function canFastAppendShape(
|
||||
current: string,
|
||||
cursor: number,
|
||||
text: string,
|
||||
columns: number,
|
||||
currentLineWidth: number
|
||||
): boolean {
|
||||
if (cursor !== current.length) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (current.length === 0) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (current.includes('\n')) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (!ASCII_PRINTABLE_RE.test(text)) {
|
||||
return false
|
||||
}
|
||||
|
||||
return currentLineWidth + text.length < Math.max(1, columns)
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure shape-only precondition for the fast-echo backspace path.
|
||||
*
|
||||
* Same reasoning as canFastAppendShape — only allow the direct
|
||||
* "\b \b" stdout shortcut when the deleted grapheme is pure printable
|
||||
* ASCII. Anything else (combining marks, IME compositions, wide chars,
|
||||
* tabs, ANSI fragments) goes through the normal render path so Ink can
|
||||
* recompute cell widths.
|
||||
*/
|
||||
export function canFastBackspaceShape(current: string, cursor: number): boolean {
|
||||
if (cursor !== current.length) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (cursor <= 0) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (current.includes('\n')) {
|
||||
return false
|
||||
}
|
||||
|
||||
const removed = current.slice(prevPos(current, cursor), cursor)
|
||||
|
||||
return ASCII_PRINTABLE_RE.test(removed)
|
||||
}
|
||||
|
||||
function renderWithCursor(value: string, cursor: number) {
|
||||
const pos = Math.max(0, Math.min(cursor, value.length))
|
||||
|
||||
|
|
@ -444,26 +522,11 @@ export function TextInput({
|
|||
|
||||
const canFastEchoBase = () => focus && termFocus && !selected && !mask && !!stdout?.isTTY
|
||||
|
||||
const canFastAppend = (current: string, cursor: number, text: string) => {
|
||||
const sw = stringWidth(text)
|
||||
const canFastAppend = (current: string, cursor: number, text: string) =>
|
||||
canFastEchoBase() && canFastAppendShape(current, cursor, text, columns, lineWidthRef.current)
|
||||
|
||||
return (
|
||||
canFastEchoBase() &&
|
||||
cursor === current.length &&
|
||||
current.length > 0 &&
|
||||
!current.includes('\n') &&
|
||||
sw === text.length &&
|
||||
lineWidthRef.current + sw < Math.max(1, columns)
|
||||
)
|
||||
}
|
||||
|
||||
const canFastBackspace = (current: string, cursor: number) => {
|
||||
if (!canFastEchoBase() || cursor !== current.length || cursor <= 0 || current.includes('\n')) {
|
||||
return false
|
||||
}
|
||||
|
||||
return stringWidth(current.slice(prevPos(current, cursor), cursor)) === 1
|
||||
}
|
||||
const canFastBackspace = (current: string, cursor: number) =>
|
||||
canFastEchoBase() && canFastBackspaceShape(current, cursor)
|
||||
|
||||
const commit = (
|
||||
next: string,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue