hermes-agent/tests/agent/test_markdown_tables.py
Teknium ea1d0462cf
fix(cli): vertical fallback for markdown tables wider than terminal (#23948)
Follow-up to #23863 (CJK table alignment). The realigner was
correctly padding pipes to identical column offsets, but when a
table's natural width exceeds terminal cells it produced lines that
the terminal soft-wrapped mid-cell, destroying column alignment
visually even though the bytes were perfectly padded. Reported as
'columns are not aligned' on tables containing one long row alongside
several short rows.

Approach mirrors Claude Code's MarkdownTable.tsx narrow-terminal
fallback: when realign_markdown_tables is given an available_width
budget and the rebuilt horizontal table exceeds it, render each body
row as 'Header: value' lines separated by a thin ─ rule. Word-wraps
oversize values at the budget with a 2-space continuation indent.

- agent/markdown_tables.py: realign_markdown_tables(text, available_width=None);
  threshold check at the top of _render_block flips into a new
  _render_vertical fallback. Includes _wrap_to_width with hard-break
  for tokens longer than the budget.
- cli.py: helper _terminal_width_for_streaming() returns
  shutil.get_terminal_size().columns minus _STREAM_PAD and a 2-cell
  safety margin; passed to all three realign call sites
  (_render_final_assistant_content for strip+render Panel paths, and
  the streaming flushers in _emit_stream_text / _flush_stream).
- tests/agent/test_markdown_tables.py: 4 new tests covering the
  overflow-vertical fallback for ASCII + CJK content, the
  'fits → keep horizontal' case, and the long-cell wrap with indent.

Live-verified: with COLUMNS=100, the user's reported 'long row in
ASCII table' case now renders as vertical key-value rows that all fit
the panel; the 6-column CJK comparison table still renders as an
aligned horizontal table because it fits inside 100 cols.
2026-05-11 16:49:13 -07:00

312 lines
9.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for `agent.markdown_tables.realign_markdown_tables`.
These cover the alignment guarantee on CJK / wide-character tables and
the conservative no-op behaviour on non-table input.
"""
from __future__ import annotations
from textwrap import dedent
from wcwidth import wcswidth
from agent.markdown_tables import (
is_table_divider,
looks_like_table_row,
realign_markdown_tables,
split_table_row,
)
def _column_offsets(line: str) -> list[int]:
"""Return the display-cell index of every ``|`` in ``line``."""
cells: list[int] = []
width = 0
for ch in line:
if ch == "|":
cells.append(width)
# wcswidth on a single char; clamp negatives.
w = wcswidth(ch)
width += w if w > 0 else 1
return cells
# ---------------------------------------------------------------------------
# split_table_row / is_table_divider / looks_like_table_row
# ---------------------------------------------------------------------------
def test_split_strips_outer_pipes_and_trims():
assert split_table_row("| a | b | c |") == ["a", "b", "c"]
assert split_table_row("|配置|状态|") == ["配置", "状态"]
assert split_table_row("a | b | c") == ["a", "b", "c"]
def test_is_table_divider_handles_alignment_colons():
assert is_table_divider("|---|---|")
assert is_table_divider("| :--- | ---: | :---: |")
assert not is_table_divider("| - | - |") # 1 dash is not a divider
assert not is_table_divider("| a | b |")
assert not is_table_divider("---") # single column, no pipes
def test_looks_like_table_row():
assert looks_like_table_row("| a | b |")
assert looks_like_table_row("a | b | c") # no leading pipe, ≥2 pipes
assert not looks_like_table_row("not a table")
assert not looks_like_table_row("a | b") # one pipe, no leading pipe
assert not looks_like_table_row("")
# ---------------------------------------------------------------------------
# realign_markdown_tables
# ---------------------------------------------------------------------------
def test_no_op_on_text_without_tables():
text = "Hello world\nThis has no | pipes table.\n"
assert realign_markdown_tables(text) == text
def test_no_op_when_pipes_but_no_divider():
text = "echo a | grep b\necho c | wc -l\n"
assert realign_markdown_tables(text) == text
def test_cjk_table_pipes_align_across_rows():
# Model-emitted (under-padded for CJK) input.
src = dedent(
"""\
| 配置 | Config | 论文 (%) | 复现 (%) | 差值 | 状态 |
|------|--------|---------|---------|------|------|
| Vicuna (report) | dense | 79.30 | 未完成 | - | × |
| ChatGLM | chat | 37.60 | 37.82 | +0.22 | ✓ |
| 通义千问 | qwen | (无) | 报错 | - | × |
"""
)
out = realign_markdown_tables(src).rstrip("\n").split("\n")
# All rows in the rebuilt block must have pipes at identical display
# columns — that's the alignment guarantee.
offsets = [_column_offsets(row) for row in out]
assert all(o == offsets[0] for o in offsets), (
"rebuilt table rows do not share pipe column offsets:\n"
+ "\n".join(out)
)
# And we expect 7 pipes per row (6 columns + outer borders).
assert len(offsets[0]) == 7
def test_emoji_with_cjk_table_aligns():
src = dedent(
"""\
| 模型 | 状态 | 备注 |
|------|------|------|
| 千问 | ✅ | 通过 |
| Claude | ✅ | 推理强 |
| 文心一言 | ❌ | 报错 |
"""
)
out = realign_markdown_tables(src).rstrip("\n").split("\n")
offsets = [_column_offsets(row) for row in out]
# The emoji-with-variation-selector case (⚠️) intentionally tolerates
# 1-cell drift; bare emoji like ✅ / ❌ have stable wcwidth and must
# align. Use bare emoji here so the assertion is hard.
assert all(o == offsets[0] for o in offsets), (
"emoji+CJK rows do not share pipe column offsets:\n" + "\n".join(out)
)
def test_already_aligned_ascii_table_remains_aligned():
src = dedent(
"""\
| a | b |
|-----|-----|
| 1 | 2 |
| foo | bar |
"""
)
out = realign_markdown_tables(src).rstrip("\n").split("\n")
offsets = [_column_offsets(row) for row in out]
assert all(o == offsets[0] for o in offsets)
def test_passes_non_table_lines_through_around_a_table():
src = dedent(
"""\
Here is a comparison:
| 模型 | 状态 |
|------|------|
| 千问 | 通过 |
And some prose after.
"""
)
out = realign_markdown_tables(src)
assert out.startswith("Here is a comparison:\n")
assert out.endswith("And some prose after.\n")
# And the table lines are aligned.
block = [ln for ln in out.split("\n") if "|" in ln]
offsets = [_column_offsets(row) for row in block]
assert all(o == offsets[0] for o in offsets)
# ---------------------------------------------------------------------------
# Vertical fallback for tables wider than the terminal
# ---------------------------------------------------------------------------
def test_overflow_falls_back_to_vertical_when_table_wider_than_terminal():
"""A horizontal table that would exceed the available width must
drop to vertical key-value rendering so the terminal does not
soft-wrap mid-cell (which destroys column alignment visually)."""
src = dedent(
"""\
| Item | Description | Notes |
|------|-------------|-------|
| a | short | ok |
| b | this is a much longer description that stretches the column wider than the others by a lot | fine |
| c | tiny | - |
"""
)
out = realign_markdown_tables(src, available_width=100)
# No horizontal pipe-bordered rows: vertical mode emits "Header: value"
# lines and a ─ separator instead.
assert "|" not in out
assert "Item: a" in out
assert "Description: short" in out
assert "Notes: ok" in out
# Body rows separated by ─ rule
assert "──" in out
# Every emitted line fits the available width.
for line in out.split("\n"):
assert wcswidth(line) <= 100, f"line wider than budget: {line!r}"
def test_horizontal_kept_when_table_fits():
"""A table that fits the terminal must keep the horizontal
pipe-bordered rendering — vertical fallback only kicks in when
soft-wrap is unavoidable."""
src = dedent(
"""\
| Name | Age |
|------|-----|
| Alice | 30 |
| Bob | 25 |
"""
)
out = realign_markdown_tables(src, available_width=100)
# Pipe-bordered rendering survives.
body_rows = [ln for ln in out.split("\n") if ln.strip().startswith("|")]
assert len(body_rows) == 4
offsets = [_column_offsets(r) for r in body_rows]
assert all(o == offsets[0] for o in offsets)
def test_vertical_fallback_wraps_long_cell_text_with_indent():
src = dedent(
"""\
| Key | Value |
|-----|-------|
| x | this value is long enough that wrapping the value to fit a narrow terminal width is required even in vertical mode |
"""
)
out = realign_markdown_tables(src, available_width=60)
lines = out.split("\n")
assert lines[0].startswith("Key: x")
# First "Value:" line + at least one continuation indented by 2 spaces.
value_idx = next(i for i, l in enumerate(lines) if l.startswith("Value:"))
assert lines[value_idx + 1].startswith(" ")
# Every line still fits the budget.
for line in lines:
assert wcswidth(line) <= 60
def test_overflow_falls_back_to_vertical_for_cjk_too():
"""CJK content can also push a table over the terminal budget;
the vertical fallback should kick in regardless of script."""
src = dedent(
"""\
| 模型 | 描述 | 备注 |
|------|------|------|
| 千问 | 一个相当长的描述用于把列宽撑得超过可用终端宽度从而触发竖排回退 | 通过 |
| 文心 | 短 | × |
"""
)
out = realign_markdown_tables(src, available_width=50)
assert "|" not in out
assert "模型: 千问" in out
assert "模型: 文心" in out
for line in out.split("\n"):
assert wcswidth(line) <= 50, f"line wider than budget: {line!r}"
def test_handles_ragged_rows_by_padding_short_rows():
src = dedent(
"""\
| a | b | c |
|---|---|---|
| 1 | 2 |
| x | y | z |
"""
)
out = realign_markdown_tables(src).rstrip("\n").split("\n")
offsets = [_column_offsets(row) for row in out]
# Short rows must be padded out so they have the same pipe count
# and column positions as the header.
assert all(len(o) == len(offsets[0]) for o in offsets)
assert all(o == offsets[0] for o in offsets)
def test_multiple_tables_in_one_text():
src = dedent(
"""\
First:
| 配置 | 值 |
|------|----|
| 通义 | 1 |
Second:
| model | n |
|-------|---|
| gpt | 2 |
"""
)
out = realign_markdown_tables(src)
# Each table block individually aligns.
blocks: list[list[str]] = []
current: list[str] = []
for line in out.split("\n"):
if "|" in line:
current.append(line)
elif current:
blocks.append(current)
current = []
if current:
blocks.append(current)
assert len(blocks) == 2
for block in blocks:
offsets = [_column_offsets(row) for row in block]
assert all(o == offsets[0] for o in offsets), (
f"block did not align:\n" + "\n".join(block)
)