fix(cli,tui): align CJK / wide-char markdown tables (#23863)

CJK and emoji glyphs render as two terminal cells but JS String#length
and the model's own padding count them as one, so any markdown table
with Chinese / Japanese / Korean cells drifts right per row when a
real terminal renders it. Both surfaces fix this with a display-cell
width measurement (wcswidth on the Python side, stringWidth on the
TUI side).

Changes:
- agent/markdown_tables.py: new helper. realign_markdown_tables(text)
  detects markdown table blocks (header + |---| divider) and
  rewrites the row padding using wcwidth.wcswidth so every pipe and
  dash lines up across rows. No-op on text without tables.
- cli.py: hook the helper into _render_final_assistant_content for
  strip / render modes (raw passes through untouched), and into the
  streaming line emitter so live token-by-token rendering also
  produces aligned tables. A small two-buffer state machine in
  _emit_stream_text holds table rows until the block ends, then
  flushes them through the realigner so all rows pad to a single
  per-column width.
- ui-tui/src/components/markdown.tsx: renderTable now uses
  stringWidth (Bun.stringWidth fast path + East-Asian-width-aware
  fallback, already memoised in @hermes/ink) instead of UTF-16
  String#length for both column-width measurement and per-cell
  padding. Drops the comment that documented the bug as a deliberate
  limitation.

Validation:
- New tests/agent/test_markdown_tables.py (11): every rebuilt block
  shares pipe column offsets across rows for pure CJK, mixed
  CJK+emoji, ragged-row, and multi-table inputs.
- Updated tests/cli/test_cli_markdown_rendering.py: the existing
  strip-mode test asserted exact whitespace; rewritten to assert the
  alignment contract (cell content survives + every rendered row
  shares pipe offsets).
- New ui-tui markdown.test.ts case (1): rendered column-2 start
  offset is identical for the header + every body row, including
  the CJK row that drifted before the fix.
- Live: hermes chat -q with the user-reported screenshot prompt now
  produces a perfectly aligned table on the wire (header, divider,
  4 body rows including '通义千问', all pipes at identical columns).
This commit is contained in:
Teknium 2026-05-11 11:13:06 -07:00 committed by GitHub
parent 657874460f
commit 1d00716754
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 559 additions and 15 deletions

94
cli.py
View file

@ -87,6 +87,11 @@ from agent.usage_pricing import (
format_duration_compact,
format_token_count_compact,
)
from agent.markdown_tables import (
is_table_divider,
looks_like_table_row,
realign_markdown_tables,
)
# NOTE: `from agent.account_usage import ...` is deliberately NOT at module
# top — it transitively pulls the OpenAI SDK chain (~230 ms cold) and is only
# needed when the user runs `/limits`. Lazy-imported inside the handler below.
@ -1355,12 +1360,21 @@ def _render_final_assistant_content(text: str, mode: str = "render"):
normalized_mode = str(mode or "render").strip().lower()
if normalized_mode == "strip":
return _RichText(_strip_markdown_syntax(text))
# Strip first — inline markdown inside cells (`code`, **bold**, ~~strike~~)
# changes cell display width — then re-align so the column padding
# reflects the final visible text, not the marker-decorated source.
return _RichText(realign_markdown_tables(_strip_markdown_syntax(text)))
if normalized_mode == "raw":
return _rich_text_from_ansi(text or "")
# `render` mode: Rich's Markdown renderer handles CJK width via wcwidth
# internally, so a pre-pass through realign_markdown_tables would just
# rewrite already-correct padding. But on the way in we still want to
# normalise model-emitted under-padded tables so that mid-render fallbacks
# (narrow panels, etc.) at least see consistent input.
plain = _rich_text_from_ansi(text or "").plain
plain = _preserve_windows_dot_segments_for_markdown(plain)
plain = realign_markdown_tables(plain)
return Markdown(plain)
@ -2331,6 +2345,12 @@ class HermesCLI:
self._stream_started = False # True once first delta arrives
self._stream_box_opened = False # True once the response box header is printed
self._reasoning_preview_buf = "" # Coalesce tiny reasoning chunks for [thinking] output
# Table-row buffer. When a streamed line looks like it could be
# part of a markdown table, hold it here until the block ends so
# we can re-pad with wcwidth-aware widths. Empty by default;
# populated only while `_in_stream_table` is True.
self._stream_table_buf: list[str] = []
self._in_stream_table = False
self._pending_edit_snapshots = {}
self._last_input_mode_recovery = 0.0
self._input_mode_recovery_notice_shown = False
@ -3624,11 +3644,51 @@ class HermesCLI:
# Emit complete lines, keep partial remainder in buffer
_tc = getattr(self, "_stream_text_ansi", "")
def _emit_one(printed_line: str) -> None:
_cprint(f"{_STREAM_PAD}{_tc}{printed_line}{_RST}" if _tc else f"{_STREAM_PAD}{printed_line}")
def _flush_table_buf() -> None:
buf = self._stream_table_buf
self._stream_table_buf = []
self._in_stream_table = False
if not buf:
return
# Strip cell-level markdown (`code`, **bold**, ~~strike~~) FIRST
# so the realigner pads to the final visible cell width, not
# the marker-decorated source width. Otherwise a body row
# like `` | Bold | `**bold**` | `` lands narrower than its
# header column once the markers are removed.
joined = "\n".join(buf)
if self.final_response_markdown == "strip":
joined = _strip_markdown_syntax(joined)
block = realign_markdown_tables(joined)
for ln in block.split("\n"):
_emit_one(ln)
while "\n" in self._stream_buf:
line, self._stream_buf = self._stream_buf.split("\n", 1)
# Hold table-shaped lines in a side-buffer so we can re-pad
# the whole block once it ends. Streaming line-by-line, we
# cannot re-align mid-table without reflowing already-printed
# rows; the cost is that the user sees the table appear in a
# single batch when the block closes instead of row-by-row.
if self._in_stream_table:
if looks_like_table_row(line) or is_table_divider(line):
self._stream_table_buf.append(line)
continue
# Block ended — flush the realigned table, then fall
# through to print the current (non-table) line.
_flush_table_buf()
elif looks_like_table_row(line):
self._stream_table_buf.append(line)
self._in_stream_table = True
continue
if self.final_response_markdown == "strip":
line = _strip_markdown_syntax(line)
_cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}")
_emit_one(line)
def _flush_stream(self) -> None:
"""Emit any remaining partial line from the stream buffer and close the box."""
@ -3643,8 +3703,34 @@ class HermesCLI:
# Close reasoning box if still open (in case no content tokens arrived)
self._close_reasoning_box()
_tc = getattr(self, "_stream_text_ansi", "")
# If the stream buffer has a trailing partial line that looks like
# a table row, fold it into the table buffer so the whole block
# gets re-aligned together. Otherwise the final row prints raw
# (with the model's original under-padded spacing) while the rows
# above it are aligned.
if (
self._stream_buf
and getattr(self, "_in_stream_table", False)
and (looks_like_table_row(self._stream_buf) or is_table_divider(self._stream_buf))
):
self._stream_table_buf.append(self._stream_buf)
self._stream_buf = ""
# Flush any buffered table rows first so their padding is
# finalised before the stream remainder lands.
if getattr(self, "_stream_table_buf", None):
joined = "\n".join(self._stream_table_buf)
self._stream_table_buf = []
self._in_stream_table = False
if self.final_response_markdown == "strip":
joined = _strip_markdown_syntax(joined)
block = realign_markdown_tables(joined)
for ln in block.split("\n"):
_cprint(f"{_STREAM_PAD}{_tc}{ln}{_RST}" if _tc else f"{_STREAM_PAD}{ln}")
if self._stream_buf:
_tc = getattr(self, "_stream_text_ansi", "")
line = _strip_markdown_syntax(self._stream_buf) if self.final_response_markdown == "strip" else self._stream_buf
_cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}")
self._stream_buf = ""
@ -3667,6 +3753,8 @@ class HermesCLI:
self._reasoning_buf = ""
self._reasoning_preview_buf = ""
self._deferred_content = ""
self._stream_table_buf = []
self._in_stream_table = False
def _slow_command_status(self, command: str) -> str:
"""Return a user-facing status message for slower slash commands."""