From 036dacf6592dac36a57a3d26187039fb3b1a37a0 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 17 Apr 2026 14:27:26 -0700 Subject: [PATCH] feat(telegram): auto-wrap markdown tables in code blocks (#11794) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Telegram's MarkdownV2 has no table syntax — pipes get backslash-escaped and tables render as noisy unaligned text. format_message now detects GFM-style pipe tables (header row + delimiter row + optional body) and wraps them in ``` fences before the existing MarkdownV2 conversion runs. Telegram renders fenced code blocks as monospace preformatted text with columns intact. Tables already inside an existing code block are left alone. Plain prose with pipes, lone '---' horizontal rules, and non-table content are unaffected. Closes the recurring community request to stop having to ask the agent to re-render tables as code blocks manually. --- gateway/platforms/telegram.py | 84 ++++++++++++++ tests/gateway/test_telegram_format.py | 153 +++++++++++++++++++++++++- 2 files changed, 236 insertions(+), 1 deletion(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 2f4ec9329..5b1fef133 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -118,6 +118,84 @@ def _strip_mdv2(text: str) -> str: return cleaned +# --------------------------------------------------------------------------- +# Markdown table → code block conversion +# --------------------------------------------------------------------------- +# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal, +# so pipe tables render as noisy backslash-pipe text with no alignment. +# Wrapping the table in a fenced code block makes Telegram render it as +# monospace preformatted text with columns intact. + +# Matches a GFM table delimiter row: optional outer pipes, cells containing +# only dashes (with optional leading/trailing colons for alignment) separated +# by '|'. Requires at least one internal '|' so lone '---' horizontal rules +# are NOT matched. +_TABLE_SEPARATOR_RE = re.compile( + r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$' +) + + +def _is_table_row(line: str) -> bool: + """Return True if *line* could plausibly be a table data row.""" + stripped = line.strip() + return bool(stripped) and '|' in stripped + + +def _wrap_markdown_tables(text: str) -> str: + """Wrap GFM-style pipe tables in ``` fences so Telegram renders them. + + Detected by a row containing '|' immediately followed by a delimiter + row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing + non-blank lines are consumed as the table body and included in the + wrapped block. Tables inside existing fenced code blocks are left + alone. + """ + if '|' not in text or '-' not in text: + return text + + lines = text.split('\n') + out: list[str] = [] + in_fence = False + i = 0 + while i < len(lines): + line = lines[i] + stripped = line.lstrip() + + # Track existing fenced code blocks — never touch content inside. + if stripped.startswith('```'): + in_fence = not in_fence + out.append(line) + i += 1 + continue + if in_fence: + out.append(line) + i += 1 + continue + + # Look for a header row (contains '|') immediately followed by a + # delimiter row. + if ( + '|' in line + and i + 1 < len(lines) + and _TABLE_SEPARATOR_RE.match(lines[i + 1]) + ): + table_block = [line, lines[i + 1]] + j = i + 2 + while j < len(lines) and _is_table_row(lines[j]): + table_block.append(lines[j]) + j += 1 + out.append('```') + out.extend(table_block) + out.append('```') + i = j + continue + + out.append(line) + i += 1 + + return '\n'.join(out) + + class TelegramAdapter(BasePlatformAdapter): """ Telegram bot adapter. @@ -1916,6 +1994,12 @@ class TelegramAdapter(BasePlatformAdapter): text = content + # 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't + # render tables natively, but fenced code blocks render as + # monospace preformatted text with columns intact. The wrapped + # tables then flow through step (1) below as protected regions. + text = _wrap_markdown_tables(text) + # 1) Protect fenced code blocks (``` ... ```) # Per MarkdownV2 spec, \ and ` inside pre/code must be escaped. def _protect_fenced(m): diff --git a/tests/gateway/test_telegram_format.py b/tests/gateway/test_telegram_format.py index 1bd889b7c..ce7e02a47 100644 --- a/tests/gateway/test_telegram_format.py +++ b/tests/gateway/test_telegram_format.py @@ -34,7 +34,12 @@ def _ensure_telegram_mock(): _ensure_telegram_mock() -from gateway.platforms.telegram import TelegramAdapter, _escape_mdv2, _strip_mdv2 # noqa: E402 +from gateway.platforms.telegram import ( # noqa: E402 + TelegramAdapter, + _escape_mdv2, + _strip_mdv2, + _wrap_markdown_tables, +) # --------------------------------------------------------------------------- @@ -535,6 +540,152 @@ class TestStripMdv2: assert _strip_mdv2("||hidden text||") == "hidden text" +# ========================================================================= +# Markdown table auto-wrap +# ========================================================================= + + +class TestWrapMarkdownTables: + """_wrap_markdown_tables wraps GFM pipe tables in ``` fences so + Telegram renders them as monospace preformatted text instead of the + noisy backslash-pipe mess MarkdownV2 produces.""" + + def test_basic_table_wrapped(self): + text = ( + "Scores:\n\n" + "| Player | Score |\n" + "|--------|-------|\n" + "| Alice | 150 |\n" + "| Bob | 120 |\n" + "\nEnd." + ) + out = _wrap_markdown_tables(text) + # Table is now wrapped in a fence + assert "```\n| Player | Score |" in out + assert "| Bob | 120 |\n```" in out + # Surrounding prose is preserved + assert out.startswith("Scores:") + assert out.endswith("End.") + + def test_bare_pipe_table_wrapped(self): + """Tables without outer pipes (GFM allows this) are still detected.""" + text = "head1 | head2\n--- | ---\na | b\nc | d" + out = _wrap_markdown_tables(text) + assert out.startswith("```\n") + assert out.rstrip().endswith("```") + assert "head1 | head2" in out + + def test_alignment_separators(self): + """Separator rows with :--- / ---: / :---: alignment markers match.""" + text = ( + "| Name | Age | City |\n" + "|:-----|----:|:----:|\n" + "| Ada | 30 | NYC |" + ) + out = _wrap_markdown_tables(text) + assert out.count("```") == 2 + + def test_two_consecutive_tables_wrapped_separately(self): + text = ( + "| A | B |\n" + "|---|---|\n" + "| 1 | 2 |\n" + "\n" + "| X | Y |\n" + "|---|---|\n" + "| 9 | 8 |" + ) + out = _wrap_markdown_tables(text) + # Four fences total — one opening + closing per table + assert out.count("```") == 4 + + def test_plain_text_with_pipes_not_wrapped(self): + """A bare pipe in prose must NOT trigger wrapping.""" + text = "Use the | pipe operator to chain commands." + assert _wrap_markdown_tables(text) == text + + def test_horizontal_rule_not_wrapped(self): + """A lone '---' horizontal rule must not be mistaken for a separator.""" + text = "Section A\n\n---\n\nSection B" + assert _wrap_markdown_tables(text) == text + + def test_existing_code_block_with_pipes_left_alone(self): + """A table already inside a fenced code block must not be re-wrapped.""" + text = ( + "```\n" + "| a | b |\n" + "|---|---|\n" + "| 1 | 2 |\n" + "```" + ) + assert _wrap_markdown_tables(text) == text + + def test_no_pipe_character_short_circuits(self): + text = "Plain **bold** text with no table." + assert _wrap_markdown_tables(text) == text + + def test_no_dash_short_circuits(self): + text = "a | b\nc | d" # has pipes but no '-' separator row + assert _wrap_markdown_tables(text) == text + + def test_single_column_separator_not_matched(self): + """Single-column tables (rare) are not detected — we require at + least one internal pipe in the separator row to avoid false + positives on formatting rules.""" + text = "| a |\n| - |\n| b |" + assert _wrap_markdown_tables(text) == text + + +class TestFormatMessageTables: + """End-to-end: a pipe table passes through format_message with its + pipes and dashes left alone inside the fence, not mangled by MarkdownV2 + escaping.""" + + def test_table_rendered_as_code_block(self, adapter): + text = ( + "Data:\n\n" + "| Col1 | Col2 |\n" + "|------|------|\n" + "| A | B |\n" + ) + out = adapter.format_message(text) + # Pipes inside the fenced block are NOT escaped + assert "```\n| Col1 | Col2 |" in out + assert "\\|" not in out.split("```")[1] + # Dashes in separator not escaped inside fence + assert "\\-" not in out.split("```")[1] + + def test_text_after_table_still_formatted(self, adapter): + text = ( + "| A | B |\n" + "|---|---|\n" + "| 1 | 2 |\n" + "\n" + "Nice **work** team!" + ) + out = adapter.format_message(text) + # MarkdownV2 bold conversion still happens outside the table + assert "*work*" in out + # Exclamation outside fence is escaped + assert "\\!" in out + + def test_multiple_tables_in_single_message(self, adapter): + text = ( + "First:\n" + "| A | B |\n" + "|---|---|\n" + "| 1 | 2 |\n" + "\n" + "Second:\n" + "| X | Y |\n" + "|---|---|\n" + "| 9 | 8 |\n" + ) + out = adapter.format_message(text) + # Two separate fenced blocks in the output + assert out.count("```") == 4 + + @pytest.mark.asyncio async def test_send_escapes_chunk_indicator_for_markdownv2(adapter): adapter.MAX_MESSAGE_LENGTH = 80