mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(cli,tui): align CJK / wide-char markdown tables (#23863)
CJK and emoji glyphs render as two terminal cells but JS String#length and the model's own padding count them as one, so any markdown table with Chinese / Japanese / Korean cells drifts right per row when a real terminal renders it. Both surfaces fix this with a display-cell width measurement (wcswidth on the Python side, stringWidth on the TUI side). Changes: - agent/markdown_tables.py: new helper. realign_markdown_tables(text) detects markdown table blocks (header + |---| divider) and rewrites the row padding using wcwidth.wcswidth so every pipe and dash lines up across rows. No-op on text without tables. - cli.py: hook the helper into _render_final_assistant_content for strip / render modes (raw passes through untouched), and into the streaming line emitter so live token-by-token rendering also produces aligned tables. A small two-buffer state machine in _emit_stream_text holds table rows until the block ends, then flushes them through the realigner so all rows pad to a single per-column width. - ui-tui/src/components/markdown.tsx: renderTable now uses stringWidth (Bun.stringWidth fast path + East-Asian-width-aware fallback, already memoised in @hermes/ink) instead of UTF-16 String#length for both column-width measurement and per-cell padding. Drops the comment that documented the bug as a deliberate limitation. Validation: - New tests/agent/test_markdown_tables.py (11): every rebuilt block shares pipe column offsets across rows for pure CJK, mixed CJK+emoji, ragged-row, and multi-table inputs. - Updated tests/cli/test_cli_markdown_rendering.py: the existing strip-mode test asserted exact whitespace; rewritten to assert the alignment contract (cell content survives + every rendered row shares pipe offsets). - New ui-tui markdown.test.ts case (1): rendered column-2 start offset is identical for the header + every body row, including the CJK row that drifted before the fix. - Live: hermes chat -q with the user-reported screenshot prompt now produces a perfectly aligned table on the wire (header, divider, 4 body rows including '通义千问', all pipes at identical columns).
This commit is contained in:
parent
657874460f
commit
1d00716754
6 changed files with 559 additions and 15 deletions
170
agent/markdown_tables.py
Normal file
170
agent/markdown_tables.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
"""CJK/wide-character-aware re-alignment of model-emitted markdown tables.
|
||||
|
||||
Models pad markdown tables assuming each character occupies one terminal
|
||||
cell. CJK glyphs and most emoji render as two cells, so the model's
|
||||
spacing collapses into drift the moment a table reaches a real terminal —
|
||||
header pipes line up, every body row drifts right by N cells per CJK
|
||||
char.
|
||||
|
||||
This module rebuilds row padding using ``wcwidth.wcswidth`` (display
|
||||
columns), preserving the table's pipes and dashes so it still reads as a
|
||||
plain-text table in ``strip`` / unrendered display modes. Standard Rich
|
||||
markdown rendering already aligns CJK correctly inside a wide enough
|
||||
panel; this helper is for the paths that print the model's text more or
|
||||
less verbatim.
|
||||
|
||||
The helper is deliberately conservative:
|
||||
|
||||
* Only contiguous ``| ... |`` blocks with a divider line are rewritten.
|
||||
* Anything that does not look like a table is passed through unchanged.
|
||||
* Single-line / mid-stream fragments are left alone — callers buffer
|
||||
table rows and flush them once the block is complete.
|
||||
|
||||
There is a small, intentional caveat: ``wcwidth`` returns ``-1`` for some
|
||||
emoji-with-variation-selector sequences (e.g. ``⚠️``); we clamp those to
|
||||
0 so they do not corrupt the column width math. The 1-cell drift on
|
||||
those specific glyphs is preferable to silently widening every table
|
||||
that contains one.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
from wcwidth import wcswidth
|
||||
|
||||
__all__ = [
|
||||
"is_table_divider",
|
||||
"looks_like_table_row",
|
||||
"realign_markdown_tables",
|
||||
"split_table_row",
|
||||
]
|
||||
|
||||
|
||||
_DIVIDER_CELL_RE = re.compile(r"^\s*:?-{3,}:?\s*$")
|
||||
_MIN_COL_WIDTH = 3 # matches the divider's minimum dash run.
|
||||
|
||||
|
||||
def _disp_width(s: str) -> int:
|
||||
"""``wcswidth`` clamped to a non-negative integer.
|
||||
|
||||
``wcswidth`` returns ``-1`` when it encounters a control char or an
|
||||
unknown sequence; treat those as zero-width rather than letting a
|
||||
negative number flow into ``max`` and break the column-width math.
|
||||
"""
|
||||
|
||||
w = wcswidth(s)
|
||||
return w if w > 0 else 0
|
||||
|
||||
|
||||
def _pad_to_width(s: str, target: int) -> str:
|
||||
return s + " " * max(0, target - _disp_width(s))
|
||||
|
||||
|
||||
def split_table_row(row: str) -> List[str]:
|
||||
"""Split ``| a | b | c |`` into ``["a", "b", "c"]`` with trims."""
|
||||
|
||||
s = row.strip()
|
||||
if s.startswith("|"):
|
||||
s = s[1:]
|
||||
if s.endswith("|"):
|
||||
s = s[:-1]
|
||||
return [c.strip() for c in s.split("|")]
|
||||
|
||||
|
||||
def is_table_divider(row: str) -> bool:
|
||||
"""True when ``row`` is a markdown table separator line."""
|
||||
|
||||
cells = split_table_row(row)
|
||||
return len(cells) > 1 and all(_DIVIDER_CELL_RE.match(c) for c in cells)
|
||||
|
||||
|
||||
def looks_like_table_row(row: str) -> bool:
|
||||
"""True when ``row`` could plausibly be a markdown table row.
|
||||
|
||||
Used by streaming callers to decide whether to buffer an in-flight
|
||||
line. We are intentionally permissive here — the realigner itself
|
||||
only rewrites blocks that are accompanied by a divider, so a false
|
||||
positive here at most delays the print of one line.
|
||||
"""
|
||||
|
||||
if "|" not in row:
|
||||
return False
|
||||
stripped = row.strip()
|
||||
if not stripped:
|
||||
return False
|
||||
# A leading pipe is the strongest signal; without it we still allow
|
||||
# rows with at least two pipes so models that omit the leading pipe
|
||||
# don't slip past us.
|
||||
if stripped.startswith("|"):
|
||||
return True
|
||||
return stripped.count("|") >= 2
|
||||
|
||||
|
||||
def _render_block(rows: List[List[str]]) -> List[str]:
|
||||
"""Render ``rows`` (header + body, divider implied) at uniform widths."""
|
||||
|
||||
ncols = max(len(r) for r in rows)
|
||||
rows = [r + [""] * (ncols - len(r)) for r in rows]
|
||||
|
||||
widths = [
|
||||
max(_MIN_COL_WIDTH, *(_disp_width(r[c]) for r in rows))
|
||||
for c in range(ncols)
|
||||
]
|
||||
|
||||
def _row(cells: List[str]) -> str:
|
||||
return (
|
||||
"| "
|
||||
+ " | ".join(_pad_to_width(c, widths[k]) for k, c in enumerate(cells))
|
||||
+ " |"
|
||||
)
|
||||
|
||||
out = [_row(rows[0])]
|
||||
out.append("|" + "|".join("-" * (w + 2) for w in widths) + "|")
|
||||
for r in rows[1:]:
|
||||
out.append(_row(r))
|
||||
return out
|
||||
|
||||
|
||||
def realign_markdown_tables(text: str) -> str:
|
||||
"""Rewrite every ``| ... |`` + divider block with wcwidth-aware padding.
|
||||
|
||||
Lines that are not part of a recognised table are returned verbatim,
|
||||
so this is safe to apply to arbitrary assistant prose.
|
||||
"""
|
||||
|
||||
if "|" not in text:
|
||||
return text
|
||||
|
||||
lines = text.split("\n")
|
||||
out: List[str] = []
|
||||
i = 0
|
||||
n = len(lines)
|
||||
|
||||
while i < n:
|
||||
line = lines[i]
|
||||
# A table starts with a header row whose next line is a divider.
|
||||
if (
|
||||
"|" in line
|
||||
and i + 1 < n
|
||||
and is_table_divider(lines[i + 1])
|
||||
):
|
||||
header = split_table_row(line)
|
||||
body: List[List[str]] = []
|
||||
j = i + 2
|
||||
while j < n and "|" in lines[j] and lines[j].strip():
|
||||
if is_table_divider(lines[j]):
|
||||
j += 1
|
||||
continue
|
||||
body.append(split_table_row(lines[j]))
|
||||
j += 1
|
||||
|
||||
if any(c for c in header) or body:
|
||||
out.extend(_render_block([header] + body))
|
||||
i = j
|
||||
continue
|
||||
out.append(line)
|
||||
i += 1
|
||||
|
||||
return "\n".join(out)
|
||||
94
cli.py
94
cli.py
|
|
@ -87,6 +87,11 @@ from agent.usage_pricing import (
|
|||
format_duration_compact,
|
||||
format_token_count_compact,
|
||||
)
|
||||
from agent.markdown_tables import (
|
||||
is_table_divider,
|
||||
looks_like_table_row,
|
||||
realign_markdown_tables,
|
||||
)
|
||||
# NOTE: `from agent.account_usage import ...` is deliberately NOT at module
|
||||
# top — it transitively pulls the OpenAI SDK chain (~230 ms cold) and is only
|
||||
# needed when the user runs `/limits`. Lazy-imported inside the handler below.
|
||||
|
|
@ -1355,12 +1360,21 @@ def _render_final_assistant_content(text: str, mode: str = "render"):
|
|||
|
||||
normalized_mode = str(mode or "render").strip().lower()
|
||||
if normalized_mode == "strip":
|
||||
return _RichText(_strip_markdown_syntax(text))
|
||||
# Strip first — inline markdown inside cells (`code`, **bold**, ~~strike~~)
|
||||
# changes cell display width — then re-align so the column padding
|
||||
# reflects the final visible text, not the marker-decorated source.
|
||||
return _RichText(realign_markdown_tables(_strip_markdown_syntax(text)))
|
||||
if normalized_mode == "raw":
|
||||
return _rich_text_from_ansi(text or "")
|
||||
|
||||
# `render` mode: Rich's Markdown renderer handles CJK width via wcwidth
|
||||
# internally, so a pre-pass through realign_markdown_tables would just
|
||||
# rewrite already-correct padding. But on the way in we still want to
|
||||
# normalise model-emitted under-padded tables so that mid-render fallbacks
|
||||
# (narrow panels, etc.) at least see consistent input.
|
||||
plain = _rich_text_from_ansi(text or "").plain
|
||||
plain = _preserve_windows_dot_segments_for_markdown(plain)
|
||||
plain = realign_markdown_tables(plain)
|
||||
return Markdown(plain)
|
||||
|
||||
|
||||
|
|
@ -2331,6 +2345,12 @@ class HermesCLI:
|
|||
self._stream_started = False # True once first delta arrives
|
||||
self._stream_box_opened = False # True once the response box header is printed
|
||||
self._reasoning_preview_buf = "" # Coalesce tiny reasoning chunks for [thinking] output
|
||||
# Table-row buffer. When a streamed line looks like it could be
|
||||
# part of a markdown table, hold it here until the block ends so
|
||||
# we can re-pad with wcwidth-aware widths. Empty by default;
|
||||
# populated only while `_in_stream_table` is True.
|
||||
self._stream_table_buf: list[str] = []
|
||||
self._in_stream_table = False
|
||||
self._pending_edit_snapshots = {}
|
||||
self._last_input_mode_recovery = 0.0
|
||||
self._input_mode_recovery_notice_shown = False
|
||||
|
|
@ -3624,11 +3644,51 @@ class HermesCLI:
|
|||
|
||||
# Emit complete lines, keep partial remainder in buffer
|
||||
_tc = getattr(self, "_stream_text_ansi", "")
|
||||
|
||||
def _emit_one(printed_line: str) -> None:
|
||||
_cprint(f"{_STREAM_PAD}{_tc}{printed_line}{_RST}" if _tc else f"{_STREAM_PAD}{printed_line}")
|
||||
|
||||
def _flush_table_buf() -> None:
|
||||
buf = self._stream_table_buf
|
||||
self._stream_table_buf = []
|
||||
self._in_stream_table = False
|
||||
if not buf:
|
||||
return
|
||||
# Strip cell-level markdown (`code`, **bold**, ~~strike~~) FIRST
|
||||
# so the realigner pads to the final visible cell width, not
|
||||
# the marker-decorated source width. Otherwise a body row
|
||||
# like `` | Bold | `**bold**` | `` lands narrower than its
|
||||
# header column once the markers are removed.
|
||||
joined = "\n".join(buf)
|
||||
if self.final_response_markdown == "strip":
|
||||
joined = _strip_markdown_syntax(joined)
|
||||
block = realign_markdown_tables(joined)
|
||||
for ln in block.split("\n"):
|
||||
_emit_one(ln)
|
||||
|
||||
while "\n" in self._stream_buf:
|
||||
line, self._stream_buf = self._stream_buf.split("\n", 1)
|
||||
|
||||
# Hold table-shaped lines in a side-buffer so we can re-pad
|
||||
# the whole block once it ends. Streaming line-by-line, we
|
||||
# cannot re-align mid-table without reflowing already-printed
|
||||
# rows; the cost is that the user sees the table appear in a
|
||||
# single batch when the block closes instead of row-by-row.
|
||||
if self._in_stream_table:
|
||||
if looks_like_table_row(line) or is_table_divider(line):
|
||||
self._stream_table_buf.append(line)
|
||||
continue
|
||||
# Block ended — flush the realigned table, then fall
|
||||
# through to print the current (non-table) line.
|
||||
_flush_table_buf()
|
||||
elif looks_like_table_row(line):
|
||||
self._stream_table_buf.append(line)
|
||||
self._in_stream_table = True
|
||||
continue
|
||||
|
||||
if self.final_response_markdown == "strip":
|
||||
line = _strip_markdown_syntax(line)
|
||||
_cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}")
|
||||
_emit_one(line)
|
||||
|
||||
def _flush_stream(self) -> None:
|
||||
"""Emit any remaining partial line from the stream buffer and close the box."""
|
||||
|
|
@ -3643,8 +3703,34 @@ class HermesCLI:
|
|||
# Close reasoning box if still open (in case no content tokens arrived)
|
||||
self._close_reasoning_box()
|
||||
|
||||
_tc = getattr(self, "_stream_text_ansi", "")
|
||||
|
||||
# If the stream buffer has a trailing partial line that looks like
|
||||
# a table row, fold it into the table buffer so the whole block
|
||||
# gets re-aligned together. Otherwise the final row prints raw
|
||||
# (with the model's original under-padded spacing) while the rows
|
||||
# above it are aligned.
|
||||
if (
|
||||
self._stream_buf
|
||||
and getattr(self, "_in_stream_table", False)
|
||||
and (looks_like_table_row(self._stream_buf) or is_table_divider(self._stream_buf))
|
||||
):
|
||||
self._stream_table_buf.append(self._stream_buf)
|
||||
self._stream_buf = ""
|
||||
|
||||
# Flush any buffered table rows first so their padding is
|
||||
# finalised before the stream remainder lands.
|
||||
if getattr(self, "_stream_table_buf", None):
|
||||
joined = "\n".join(self._stream_table_buf)
|
||||
self._stream_table_buf = []
|
||||
self._in_stream_table = False
|
||||
if self.final_response_markdown == "strip":
|
||||
joined = _strip_markdown_syntax(joined)
|
||||
block = realign_markdown_tables(joined)
|
||||
for ln in block.split("\n"):
|
||||
_cprint(f"{_STREAM_PAD}{_tc}{ln}{_RST}" if _tc else f"{_STREAM_PAD}{ln}")
|
||||
|
||||
if self._stream_buf:
|
||||
_tc = getattr(self, "_stream_text_ansi", "")
|
||||
line = _strip_markdown_syntax(self._stream_buf) if self.final_response_markdown == "strip" else self._stream_buf
|
||||
_cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}")
|
||||
self._stream_buf = ""
|
||||
|
|
@ -3667,6 +3753,8 @@ class HermesCLI:
|
|||
self._reasoning_buf = ""
|
||||
self._reasoning_preview_buf = ""
|
||||
self._deferred_content = ""
|
||||
self._stream_table_buf = []
|
||||
self._in_stream_table = False
|
||||
|
||||
def _slow_command_status(self, command: str) -> str:
|
||||
"""Return a user-facing status message for slower slash commands."""
|
||||
|
|
|
|||
210
tests/agent/test_markdown_tables.py
Normal file
210
tests/agent/test_markdown_tables.py
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
"""Tests for `agent.markdown_tables.realign_markdown_tables`.
|
||||
|
||||
These cover the alignment guarantee on CJK / wide-character tables and
|
||||
the conservative no-op behaviour on non-table input.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from textwrap import dedent
|
||||
|
||||
from wcwidth import wcswidth
|
||||
|
||||
from agent.markdown_tables import (
|
||||
is_table_divider,
|
||||
looks_like_table_row,
|
||||
realign_markdown_tables,
|
||||
split_table_row,
|
||||
)
|
||||
|
||||
|
||||
def _column_offsets(line: str) -> list[int]:
|
||||
"""Return the display-cell index of every ``|`` in ``line``."""
|
||||
|
||||
cells: list[int] = []
|
||||
width = 0
|
||||
for ch in line:
|
||||
if ch == "|":
|
||||
cells.append(width)
|
||||
# wcswidth on a single char; clamp negatives.
|
||||
w = wcswidth(ch)
|
||||
width += w if w > 0 else 1
|
||||
return cells
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# split_table_row / is_table_divider / looks_like_table_row
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_split_strips_outer_pipes_and_trims():
|
||||
assert split_table_row("| a | b | c |") == ["a", "b", "c"]
|
||||
assert split_table_row("|配置|状态|") == ["配置", "状态"]
|
||||
assert split_table_row("a | b | c") == ["a", "b", "c"]
|
||||
|
||||
|
||||
def test_is_table_divider_handles_alignment_colons():
|
||||
assert is_table_divider("|---|---|")
|
||||
assert is_table_divider("| :--- | ---: | :---: |")
|
||||
assert not is_table_divider("| - | - |") # 1 dash is not a divider
|
||||
assert not is_table_divider("| a | b |")
|
||||
assert not is_table_divider("---") # single column, no pipes
|
||||
|
||||
|
||||
def test_looks_like_table_row():
|
||||
assert looks_like_table_row("| a | b |")
|
||||
assert looks_like_table_row("a | b | c") # no leading pipe, ≥2 pipes
|
||||
assert not looks_like_table_row("not a table")
|
||||
assert not looks_like_table_row("a | b") # one pipe, no leading pipe
|
||||
assert not looks_like_table_row("")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# realign_markdown_tables
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_op_on_text_without_tables():
|
||||
text = "Hello world\nThis has no | pipes table.\n"
|
||||
assert realign_markdown_tables(text) == text
|
||||
|
||||
|
||||
def test_no_op_when_pipes_but_no_divider():
|
||||
text = "echo a | grep b\necho c | wc -l\n"
|
||||
assert realign_markdown_tables(text) == text
|
||||
|
||||
|
||||
def test_cjk_table_pipes_align_across_rows():
|
||||
# Model-emitted (under-padded for CJK) input.
|
||||
src = dedent(
|
||||
"""\
|
||||
| 配置 | Config | 论文 (%) | 复现 (%) | 差值 | 状态 |
|
||||
|------|--------|---------|---------|------|------|
|
||||
| Vicuna (report) | dense | 79.30 | 未完成 | - | × |
|
||||
| ChatGLM | chat | 37.60 | 37.82 | +0.22 | ✓ |
|
||||
| 通义千问 | qwen | (无) | 报错 | - | × |
|
||||
"""
|
||||
)
|
||||
|
||||
out = realign_markdown_tables(src).rstrip("\n").split("\n")
|
||||
|
||||
# All rows in the rebuilt block must have pipes at identical display
|
||||
# columns — that's the alignment guarantee.
|
||||
offsets = [_column_offsets(row) for row in out]
|
||||
assert all(o == offsets[0] for o in offsets), (
|
||||
"rebuilt table rows do not share pipe column offsets:\n"
|
||||
+ "\n".join(out)
|
||||
)
|
||||
# And we expect 7 pipes per row (6 columns + outer borders).
|
||||
assert len(offsets[0]) == 7
|
||||
|
||||
|
||||
def test_emoji_with_cjk_table_aligns():
|
||||
src = dedent(
|
||||
"""\
|
||||
| 模型 | 状态 | 备注 |
|
||||
|------|------|------|
|
||||
| 千问 | ✅ | 通过 |
|
||||
| Claude | ✅ | 推理强 |
|
||||
| 文心一言 | ❌ | 报错 |
|
||||
"""
|
||||
)
|
||||
|
||||
out = realign_markdown_tables(src).rstrip("\n").split("\n")
|
||||
offsets = [_column_offsets(row) for row in out]
|
||||
# The emoji-with-variation-selector case (⚠️) intentionally tolerates
|
||||
# 1-cell drift; bare emoji like ✅ / ❌ have stable wcwidth and must
|
||||
# align. Use bare emoji here so the assertion is hard.
|
||||
assert all(o == offsets[0] for o in offsets), (
|
||||
"emoji+CJK rows do not share pipe column offsets:\n" + "\n".join(out)
|
||||
)
|
||||
|
||||
|
||||
def test_already_aligned_ascii_table_remains_aligned():
|
||||
src = dedent(
|
||||
"""\
|
||||
| a | b |
|
||||
|-----|-----|
|
||||
| 1 | 2 |
|
||||
| foo | bar |
|
||||
"""
|
||||
)
|
||||
out = realign_markdown_tables(src).rstrip("\n").split("\n")
|
||||
offsets = [_column_offsets(row) for row in out]
|
||||
assert all(o == offsets[0] for o in offsets)
|
||||
|
||||
|
||||
def test_passes_non_table_lines_through_around_a_table():
|
||||
src = dedent(
|
||||
"""\
|
||||
Here is a comparison:
|
||||
|
||||
| 模型 | 状态 |
|
||||
|------|------|
|
||||
| 千问 | 通过 |
|
||||
|
||||
And some prose after.
|
||||
"""
|
||||
)
|
||||
|
||||
out = realign_markdown_tables(src)
|
||||
assert out.startswith("Here is a comparison:\n")
|
||||
assert out.endswith("And some prose after.\n")
|
||||
# And the table lines are aligned.
|
||||
block = [ln for ln in out.split("\n") if "|" in ln]
|
||||
offsets = [_column_offsets(row) for row in block]
|
||||
assert all(o == offsets[0] for o in offsets)
|
||||
|
||||
|
||||
def test_handles_ragged_rows_by_padding_short_rows():
|
||||
src = dedent(
|
||||
"""\
|
||||
| a | b | c |
|
||||
|---|---|---|
|
||||
| 1 | 2 |
|
||||
| x | y | z |
|
||||
"""
|
||||
)
|
||||
out = realign_markdown_tables(src).rstrip("\n").split("\n")
|
||||
offsets = [_column_offsets(row) for row in out]
|
||||
# Short rows must be padded out so they have the same pipe count
|
||||
# and column positions as the header.
|
||||
assert all(len(o) == len(offsets[0]) for o in offsets)
|
||||
assert all(o == offsets[0] for o in offsets)
|
||||
|
||||
|
||||
def test_multiple_tables_in_one_text():
|
||||
src = dedent(
|
||||
"""\
|
||||
First:
|
||||
|
||||
| 配置 | 值 |
|
||||
|------|----|
|
||||
| 通义 | 1 |
|
||||
|
||||
Second:
|
||||
|
||||
| model | n |
|
||||
|-------|---|
|
||||
| gpt | 2 |
|
||||
"""
|
||||
)
|
||||
out = realign_markdown_tables(src)
|
||||
# Each table block individually aligns.
|
||||
blocks: list[list[str]] = []
|
||||
current: list[str] = []
|
||||
for line in out.split("\n"):
|
||||
if "|" in line:
|
||||
current.append(line)
|
||||
elif current:
|
||||
blocks.append(current)
|
||||
current = []
|
||||
if current:
|
||||
blocks.append(current)
|
||||
|
||||
assert len(blocks) == 2
|
||||
for block in blocks:
|
||||
offsets = [_column_offsets(row) for row in block]
|
||||
assert all(o == offsets[0] for o in offsets), (
|
||||
f"block did not align:\n" + "\n".join(block)
|
||||
)
|
||||
|
|
@ -118,14 +118,37 @@ def test_strip_mode_preserves_table_structure_while_cleaning_cell_markdown():
|
|||
)
|
||||
|
||||
output = _render_to_text(renderable)
|
||||
assert "| Syntax | Example |" in output
|
||||
assert "|---|---|" in output
|
||||
assert "| Bold | bold |" in output
|
||||
assert "| Strike | strike |" in output
|
||||
|
||||
# Inline cell markdown is stripped (the contract this test enforces).
|
||||
assert "**" not in output
|
||||
assert "~~" not in output
|
||||
assert "`" not in output
|
||||
|
||||
# Cell *content* survives, even if the surrounding whitespace was
|
||||
# rewritten by the wcwidth-aware re-aligner. Asserting on bare
|
||||
# cell text keeps this test focused on the strip behaviour rather
|
||||
# than snapshotting incidental column padding (which is what the
|
||||
# CJK-alignment fix changes).
|
||||
assert "Syntax" in output
|
||||
assert "Example" in output
|
||||
assert "Bold" in output and "bold" in output
|
||||
assert "Strike" in output and "strike" in output
|
||||
|
||||
# Structural sanity: the table still renders as pipe-bordered rows
|
||||
# (header + divider + 2 body rows).
|
||||
body_rows = [ln for ln in output.splitlines() if ln.strip().startswith("|")]
|
||||
assert len(body_rows) == 4
|
||||
|
||||
# Every rendered table row shares the same pipe column offsets — the
|
||||
# alignment guarantee from realign_markdown_tables.
|
||||
pipe_cols = [
|
||||
[i for i, ch in enumerate(row) if ch == "|"] for row in body_rows
|
||||
]
|
||||
assert all(p == pipe_cols[0] for p in pipe_cols), (
|
||||
"table rows misaligned after strip-mode rendering:\n"
|
||||
+ "\n".join(body_rows)
|
||||
)
|
||||
|
||||
|
||||
def test_final_assistant_content_can_leave_markdown_raw():
|
||||
renderable = _render_final_assistant_content("***Bold italic***", mode="raw")
|
||||
|
|
|
|||
|
|
@ -217,3 +217,50 @@ describe('Md wrapping', () => {
|
|||
expect(lines.some(line => line.startsWith(' hi ok'))).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('renderTable CJK width alignment', () => {
|
||||
it('column starts share the same display offset across CJK rows', async () => {
|
||||
const { stringWidth } = await import('@hermes/ink')
|
||||
|
||||
const md = [
|
||||
'| 配置 | Config | 状态 |',
|
||||
'|------|--------|------|',
|
||||
'| Vicuna (report) | dense | × |',
|
||||
'| ChatGLM | chat | ✓ |',
|
||||
'| 通义千问 | qwen | × |'
|
||||
].join('\n')
|
||||
|
||||
// Pre-fix bug: ` `.repeat(w - stripInlineMarkup(...).length) used
|
||||
// UTF-16 code units, so a CJK header cell padded to 2 cells while
|
||||
// the body cell padded to 4, drifting subsequent columns by 2
|
||||
// cells per CJK char.
|
||||
//
|
||||
// Post-fix contract: the prefix preceding the start of column N
|
||||
// has the same display width across the header and every body row
|
||||
// (deduped to skip the divider, which renders independently).
|
||||
const lines = renderPlain(
|
||||
React.createElement(Box, null, React.createElement(Md, { compact: true, t: DEFAULT_THEME, text: md }))
|
||||
).filter(line => line.trim().length > 0)
|
||||
|
||||
// Heuristic: a "data row" line either contains 'Config' (header)
|
||||
// or one of the body labels; a divider is all box-drawing. Use
|
||||
// the substring 'Config' / 'dense' / 'chat' / 'qwen' as the
|
||||
// unique anchor for column 2's start position on each row.
|
||||
const colStarts = (line: string, anchor: string): number => {
|
||||
const idx = line.indexOf(anchor)
|
||||
return idx < 0 ? -1 : stringWidth(line.slice(0, idx))
|
||||
}
|
||||
|
||||
const headerCol2 = lines.map(l => colStarts(l, 'Config')).find(v => v >= 0)
|
||||
const denseCol2 = lines.map(l => colStarts(l, 'dense')).find(v => v >= 0)
|
||||
const chatCol2 = lines.map(l => colStarts(l, 'chat')).find(v => v >= 0)
|
||||
const qwenCol2 = lines.map(l => colStarts(l, 'qwen')).find(v => v >= 0)
|
||||
|
||||
expect(headerCol2).toBeDefined()
|
||||
expect(denseCol2).toBe(headerCol2)
|
||||
expect(chatCol2).toBe(headerCol2)
|
||||
// The CJK row is the one that drifted before the fix. It must
|
||||
// align with the rest now.
|
||||
expect(qwenCol2).toBe(headerCol2)
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { Box, Link, Text } from '@hermes/ink'
|
||||
import { Box, Link, stringWidth, Text } from '@hermes/ink'
|
||||
import { Fragment, memo, type ReactNode, useMemo } from 'react'
|
||||
|
||||
import { ensureEmojiPresentation } from '../lib/emoji.js'
|
||||
|
|
@ -170,16 +170,22 @@ export const stripInlineMarkup = (v: string) =>
|
|||
.replace(/\\\(([^\n]+?)\\\)/g, '$1')
|
||||
|
||||
const renderTable = (k: number, rows: string[][], t: Theme) => {
|
||||
const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length)))
|
||||
// Column widths in *display cells*, not UTF-16 code units. CJK
|
||||
// glyphs and most emoji render as two cells but `String#length`
|
||||
// counts them as one, which collapses Chinese / Japanese / Korean
|
||||
// tables into drift across rows. `stringWidth` (Bun.stringWidth
|
||||
// fast path + an East-Asian-width-aware fallback, memoised in
|
||||
// @hermes/ink) returns the actual cell count.
|
||||
const cellWidth = (raw: string) => stringWidth(stripInlineMarkup(raw))
|
||||
|
||||
const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => cellWidth(r[ci] ?? ''))))
|
||||
|
||||
// Thin divider under the header. Without it tables look like prose
|
||||
// with extra spacing because the header is just accent-coloured text
|
||||
// (#15534). We avoid full borders on purpose — column widths come
|
||||
// from `stripInlineMarkup(...).length` (UTF-16 code units, not
|
||||
// display width), so a real outline often misaligns on emoji and
|
||||
// East-Asian wide characters; one dim solid rule (`─`) under row 0
|
||||
// plus tab-style column gaps reads cleanly on every terminal we
|
||||
// tested.
|
||||
// from `stringWidth(...)`, so the dividers and the row content stay
|
||||
// in sync on CJK / emoji tables; tab-style column gaps still read
|
||||
// cleanly without the boxed look.
|
||||
const sep = widths.map(w => '─'.repeat(Math.max(1, w))).join(' ')
|
||||
|
||||
return (
|
||||
|
|
@ -190,7 +196,7 @@ const renderTable = (k: number, rows: string[][], t: Theme) => {
|
|||
{widths.map((w, ci) => (
|
||||
<Text bold={ri === 0} color={ri === 0 ? t.color.accent : undefined} key={ci}>
|
||||
<MdInline t={t} text={row[ci] ?? ''} />
|
||||
{' '.repeat(Math.max(0, w - stripInlineMarkup(row[ci] ?? '').length))}
|
||||
{' '.repeat(Math.max(0, w - cellWidth(row[ci] ?? '')))}
|
||||
{ci < widths.length - 1 ? ' ' : ''}
|
||||
</Text>
|
||||
))}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue