feat(telegram): auto-wrap markdown tables in code blocks (#11794)

Telegram's MarkdownV2 has no table syntax — pipes get backslash-escaped
and tables render as noisy unaligned text.  format_message now detects
GFM-style pipe tables (header row + delimiter row + optional body) and
wraps them in ``` fences before the existing MarkdownV2 conversion runs.
Telegram renders fenced code blocks as monospace preformatted text with
columns intact.

Tables already inside an existing code block are left alone.  Plain
prose with pipes, lone '---' horizontal rules, and non-table content
are unaffected.

Closes the recurring community request to stop having to ask the agent
to re-render tables as code blocks manually.
This commit is contained in:
Teknium 2026-04-17 14:27:26 -07:00 committed by GitHub
parent 3207b9bda0
commit 036dacf659
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 236 additions and 1 deletions

View file

@ -118,6 +118,84 @@ def _strip_mdv2(text: str) -> str:
return cleaned
# ---------------------------------------------------------------------------
# Markdown table → code block conversion
# ---------------------------------------------------------------------------
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
# so pipe tables render as noisy backslash-pipe text with no alignment.
# Wrapping the table in a fenced code block makes Telegram render it as
# monospace preformatted text with columns intact.
# Matches a GFM table delimiter row: optional outer pipes, cells containing
# only dashes (with optional leading/trailing colons for alignment) separated
# by '|'. Requires at least one internal '|' so lone '---' horizontal rules
# are NOT matched.
_TABLE_SEPARATOR_RE = re.compile(
r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$'
)
def _is_table_row(line: str) -> bool:
"""Return True if *line* could plausibly be a table data row."""
stripped = line.strip()
return bool(stripped) and '|' in stripped
def _wrap_markdown_tables(text: str) -> str:
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
Detected by a row containing '|' immediately followed by a delimiter
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
non-blank lines are consumed as the table body and included in the
wrapped block. Tables inside existing fenced code blocks are left
alone.
"""
if '|' not in text or '-' not in text:
return text
lines = text.split('\n')
out: list[str] = []
in_fence = False
i = 0
while i < len(lines):
line = lines[i]
stripped = line.lstrip()
# Track existing fenced code blocks — never touch content inside.
if stripped.startswith('```'):
in_fence = not in_fence
out.append(line)
i += 1
continue
if in_fence:
out.append(line)
i += 1
continue
# Look for a header row (contains '|') immediately followed by a
# delimiter row.
if (
'|' in line
and i + 1 < len(lines)
and _TABLE_SEPARATOR_RE.match(lines[i + 1])
):
table_block = [line, lines[i + 1]]
j = i + 2
while j < len(lines) and _is_table_row(lines[j]):
table_block.append(lines[j])
j += 1
out.append('```')
out.extend(table_block)
out.append('```')
i = j
continue
out.append(line)
i += 1
return '\n'.join(out)
class TelegramAdapter(BasePlatformAdapter):
"""
Telegram bot adapter.
@ -1916,6 +1994,12 @@ class TelegramAdapter(BasePlatformAdapter):
text = content
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
# render tables natively, but fenced code blocks render as
# monospace preformatted text with columns intact. The wrapped
# tables then flow through step (1) below as protected regions.
text = _wrap_markdown_tables(text)
# 1) Protect fenced code blocks (``` ... ```)
# Per MarkdownV2 spec, \ and ` inside pre/code must be escaped.
def _protect_fenced(m):

View file

@ -34,7 +34,12 @@ def _ensure_telegram_mock():
_ensure_telegram_mock()
from gateway.platforms.telegram import TelegramAdapter, _escape_mdv2, _strip_mdv2 # noqa: E402
from gateway.platforms.telegram import ( # noqa: E402
TelegramAdapter,
_escape_mdv2,
_strip_mdv2,
_wrap_markdown_tables,
)
# ---------------------------------------------------------------------------
@ -535,6 +540,152 @@ class TestStripMdv2:
assert _strip_mdv2("||hidden text||") == "hidden text"
# =========================================================================
# Markdown table auto-wrap
# =========================================================================
class TestWrapMarkdownTables:
"""_wrap_markdown_tables wraps GFM pipe tables in ``` fences so
Telegram renders them as monospace preformatted text instead of the
noisy backslash-pipe mess MarkdownV2 produces."""
def test_basic_table_wrapped(self):
text = (
"Scores:\n\n"
"| Player | Score |\n"
"|--------|-------|\n"
"| Alice | 150 |\n"
"| Bob | 120 |\n"
"\nEnd."
)
out = _wrap_markdown_tables(text)
# Table is now wrapped in a fence
assert "```\n| Player | Score |" in out
assert "| Bob | 120 |\n```" in out
# Surrounding prose is preserved
assert out.startswith("Scores:")
assert out.endswith("End.")
def test_bare_pipe_table_wrapped(self):
"""Tables without outer pipes (GFM allows this) are still detected."""
text = "head1 | head2\n--- | ---\na | b\nc | d"
out = _wrap_markdown_tables(text)
assert out.startswith("```\n")
assert out.rstrip().endswith("```")
assert "head1 | head2" in out
def test_alignment_separators(self):
"""Separator rows with :--- / ---: / :---: alignment markers match."""
text = (
"| Name | Age | City |\n"
"|:-----|----:|:----:|\n"
"| Ada | 30 | NYC |"
)
out = _wrap_markdown_tables(text)
assert out.count("```") == 2
def test_two_consecutive_tables_wrapped_separately(self):
text = (
"| A | B |\n"
"|---|---|\n"
"| 1 | 2 |\n"
"\n"
"| X | Y |\n"
"|---|---|\n"
"| 9 | 8 |"
)
out = _wrap_markdown_tables(text)
# Four fences total — one opening + closing per table
assert out.count("```") == 4
def test_plain_text_with_pipes_not_wrapped(self):
"""A bare pipe in prose must NOT trigger wrapping."""
text = "Use the | pipe operator to chain commands."
assert _wrap_markdown_tables(text) == text
def test_horizontal_rule_not_wrapped(self):
"""A lone '---' horizontal rule must not be mistaken for a separator."""
text = "Section A\n\n---\n\nSection B"
assert _wrap_markdown_tables(text) == text
def test_existing_code_block_with_pipes_left_alone(self):
"""A table already inside a fenced code block must not be re-wrapped."""
text = (
"```\n"
"| a | b |\n"
"|---|---|\n"
"| 1 | 2 |\n"
"```"
)
assert _wrap_markdown_tables(text) == text
def test_no_pipe_character_short_circuits(self):
text = "Plain **bold** text with no table."
assert _wrap_markdown_tables(text) == text
def test_no_dash_short_circuits(self):
text = "a | b\nc | d" # has pipes but no '-' separator row
assert _wrap_markdown_tables(text) == text
def test_single_column_separator_not_matched(self):
"""Single-column tables (rare) are not detected — we require at
least one internal pipe in the separator row to avoid false
positives on formatting rules."""
text = "| a |\n| - |\n| b |"
assert _wrap_markdown_tables(text) == text
class TestFormatMessageTables:
"""End-to-end: a pipe table passes through format_message with its
pipes and dashes left alone inside the fence, not mangled by MarkdownV2
escaping."""
def test_table_rendered_as_code_block(self, adapter):
text = (
"Data:\n\n"
"| Col1 | Col2 |\n"
"|------|------|\n"
"| A | B |\n"
)
out = adapter.format_message(text)
# Pipes inside the fenced block are NOT escaped
assert "```\n| Col1 | Col2 |" in out
assert "\\|" not in out.split("```")[1]
# Dashes in separator not escaped inside fence
assert "\\-" not in out.split("```")[1]
def test_text_after_table_still_formatted(self, adapter):
text = (
"| A | B |\n"
"|---|---|\n"
"| 1 | 2 |\n"
"\n"
"Nice **work** team!"
)
out = adapter.format_message(text)
# MarkdownV2 bold conversion still happens outside the table
assert "*work*" in out
# Exclamation outside fence is escaped
assert "\\!" in out
def test_multiple_tables_in_single_message(self, adapter):
text = (
"First:\n"
"| A | B |\n"
"|---|---|\n"
"| 1 | 2 |\n"
"\n"
"Second:\n"
"| X | Y |\n"
"|---|---|\n"
"| 9 | 8 |\n"
)
out = adapter.format_message(text)
# Two separate fenced blocks in the output
assert out.count("```") == 4
@pytest.mark.asyncio
async def test_send_escapes_chunk_indicator_for_markdownv2(adapter):
adapter.MAX_MESSAGE_LENGTH = 80