mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(cli,tui): align CJK / wide-char markdown tables (#23863)
CJK and emoji glyphs render as two terminal cells but JS String#length and the model's own padding count them as one, so any markdown table with Chinese / Japanese / Korean cells drifts right per row when a real terminal renders it. Both surfaces fix this with a display-cell width measurement (wcswidth on the Python side, stringWidth on the TUI side). Changes: - agent/markdown_tables.py: new helper. realign_markdown_tables(text) detects markdown table blocks (header + |---| divider) and rewrites the row padding using wcwidth.wcswidth so every pipe and dash lines up across rows. No-op on text without tables. - cli.py: hook the helper into _render_final_assistant_content for strip / render modes (raw passes through untouched), and into the streaming line emitter so live token-by-token rendering also produces aligned tables. A small two-buffer state machine in _emit_stream_text holds table rows until the block ends, then flushes them through the realigner so all rows pad to a single per-column width. - ui-tui/src/components/markdown.tsx: renderTable now uses stringWidth (Bun.stringWidth fast path + East-Asian-width-aware fallback, already memoised in @hermes/ink) instead of UTF-16 String#length for both column-width measurement and per-cell padding. Drops the comment that documented the bug as a deliberate limitation. Validation: - New tests/agent/test_markdown_tables.py (11): every rebuilt block shares pipe column offsets across rows for pure CJK, mixed CJK+emoji, ragged-row, and multi-table inputs. - Updated tests/cli/test_cli_markdown_rendering.py: the existing strip-mode test asserted exact whitespace; rewritten to assert the alignment contract (cell content survives + every rendered row shares pipe offsets). - New ui-tui markdown.test.ts case (1): rendered column-2 start offset is identical for the header + every body row, including the CJK row that drifted before the fix. - Live: hermes chat -q with the user-reported screenshot prompt now produces a perfectly aligned table on the wire (header, divider, 4 body rows including '通义千问', all pipes at identical columns).
This commit is contained in:
parent
657874460f
commit
1d00716754
6 changed files with 559 additions and 15 deletions
|
|
@ -217,3 +217,50 @@ describe('Md wrapping', () => {
|
|||
expect(lines.some(line => line.startsWith(' hi ok'))).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('renderTable CJK width alignment', () => {
|
||||
it('column starts share the same display offset across CJK rows', async () => {
|
||||
const { stringWidth } = await import('@hermes/ink')
|
||||
|
||||
const md = [
|
||||
'| 配置 | Config | 状态 |',
|
||||
'|------|--------|------|',
|
||||
'| Vicuna (report) | dense | × |',
|
||||
'| ChatGLM | chat | ✓ |',
|
||||
'| 通义千问 | qwen | × |'
|
||||
].join('\n')
|
||||
|
||||
// Pre-fix bug: ` `.repeat(w - stripInlineMarkup(...).length) used
|
||||
// UTF-16 code units, so a CJK header cell padded to 2 cells while
|
||||
// the body cell padded to 4, drifting subsequent columns by 2
|
||||
// cells per CJK char.
|
||||
//
|
||||
// Post-fix contract: the prefix preceding the start of column N
|
||||
// has the same display width across the header and every body row
|
||||
// (deduped to skip the divider, which renders independently).
|
||||
const lines = renderPlain(
|
||||
React.createElement(Box, null, React.createElement(Md, { compact: true, t: DEFAULT_THEME, text: md }))
|
||||
).filter(line => line.trim().length > 0)
|
||||
|
||||
// Heuristic: a "data row" line either contains 'Config' (header)
|
||||
// or one of the body labels; a divider is all box-drawing. Use
|
||||
// the substring 'Config' / 'dense' / 'chat' / 'qwen' as the
|
||||
// unique anchor for column 2's start position on each row.
|
||||
const colStarts = (line: string, anchor: string): number => {
|
||||
const idx = line.indexOf(anchor)
|
||||
return idx < 0 ? -1 : stringWidth(line.slice(0, idx))
|
||||
}
|
||||
|
||||
const headerCol2 = lines.map(l => colStarts(l, 'Config')).find(v => v >= 0)
|
||||
const denseCol2 = lines.map(l => colStarts(l, 'dense')).find(v => v >= 0)
|
||||
const chatCol2 = lines.map(l => colStarts(l, 'chat')).find(v => v >= 0)
|
||||
const qwenCol2 = lines.map(l => colStarts(l, 'qwen')).find(v => v >= 0)
|
||||
|
||||
expect(headerCol2).toBeDefined()
|
||||
expect(denseCol2).toBe(headerCol2)
|
||||
expect(chatCol2).toBe(headerCol2)
|
||||
// The CJK row is the one that drifted before the fix. It must
|
||||
// align with the rest now.
|
||||
expect(qwenCol2).toBe(headerCol2)
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { Box, Link, Text } from '@hermes/ink'
|
||||
import { Box, Link, stringWidth, Text } from '@hermes/ink'
|
||||
import { Fragment, memo, type ReactNode, useMemo } from 'react'
|
||||
|
||||
import { ensureEmojiPresentation } from '../lib/emoji.js'
|
||||
|
|
@ -170,16 +170,22 @@ export const stripInlineMarkup = (v: string) =>
|
|||
.replace(/\\\(([^\n]+?)\\\)/g, '$1')
|
||||
|
||||
const renderTable = (k: number, rows: string[][], t: Theme) => {
|
||||
const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length)))
|
||||
// Column widths in *display cells*, not UTF-16 code units. CJK
|
||||
// glyphs and most emoji render as two cells but `String#length`
|
||||
// counts them as one, which collapses Chinese / Japanese / Korean
|
||||
// tables into drift across rows. `stringWidth` (Bun.stringWidth
|
||||
// fast path + an East-Asian-width-aware fallback, memoised in
|
||||
// @hermes/ink) returns the actual cell count.
|
||||
const cellWidth = (raw: string) => stringWidth(stripInlineMarkup(raw))
|
||||
|
||||
const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => cellWidth(r[ci] ?? ''))))
|
||||
|
||||
// Thin divider under the header. Without it tables look like prose
|
||||
// with extra spacing because the header is just accent-coloured text
|
||||
// (#15534). We avoid full borders on purpose — column widths come
|
||||
// from `stripInlineMarkup(...).length` (UTF-16 code units, not
|
||||
// display width), so a real outline often misaligns on emoji and
|
||||
// East-Asian wide characters; one dim solid rule (`─`) under row 0
|
||||
// plus tab-style column gaps reads cleanly on every terminal we
|
||||
// tested.
|
||||
// from `stringWidth(...)`, so the dividers and the row content stay
|
||||
// in sync on CJK / emoji tables; tab-style column gaps still read
|
||||
// cleanly without the boxed look.
|
||||
const sep = widths.map(w => '─'.repeat(Math.max(1, w))).join(' ')
|
||||
|
||||
return (
|
||||
|
|
@ -190,7 +196,7 @@ const renderTable = (k: number, rows: string[][], t: Theme) => {
|
|||
{widths.map((w, ci) => (
|
||||
<Text bold={ri === 0} color={ri === 0 ? t.color.accent : undefined} key={ci}>
|
||||
<MdInline t={t} text={row[ci] ?? ''} />
|
||||
{' '.repeat(Math.max(0, w - stripInlineMarkup(row[ci] ?? '').length))}
|
||||
{' '.repeat(Math.max(0, w - cellWidth(row[ci] ?? '')))}
|
||||
{ci < widths.length - 1 ? ' ' : ''}
|
||||
</Text>
|
||||
))}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue