diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 2f4ec93294..5b1fef1337 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -118,6 +118,84 @@ def _strip_mdv2(text: str) -> str: return cleaned +# --------------------------------------------------------------------------- +# Markdown table → code block conversion +# --------------------------------------------------------------------------- +# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal, +# so pipe tables render as noisy backslash-pipe text with no alignment. +# Wrapping the table in a fenced code block makes Telegram render it as +# monospace preformatted text with columns intact. + +# Matches a GFM table delimiter row: optional outer pipes, cells containing +# only dashes (with optional leading/trailing colons for alignment) separated +# by '|'. Requires at least one internal '|' so lone '---' horizontal rules +# are NOT matched. +_TABLE_SEPARATOR_RE = re.compile( + r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$' +) + + +def _is_table_row(line: str) -> bool: + """Return True if *line* could plausibly be a table data row.""" + stripped = line.strip() + return bool(stripped) and '|' in stripped + + +def _wrap_markdown_tables(text: str) -> str: + """Wrap GFM-style pipe tables in ``` fences so Telegram renders them. + + Detected by a row containing '|' immediately followed by a delimiter + row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing + non-blank lines are consumed as the table body and included in the + wrapped block. Tables inside existing fenced code blocks are left + alone. + """ + if '|' not in text or '-' not in text: + return text + + lines = text.split('\n') + out: list[str] = [] + in_fence = False + i = 0 + while i < len(lines): + line = lines[i] + stripped = line.lstrip() + + # Track existing fenced code blocks — never touch content inside. + if stripped.startswith('```'): + in_fence = not in_fence + out.append(line) + i += 1 + continue + if in_fence: + out.append(line) + i += 1 + continue + + # Look for a header row (contains '|') immediately followed by a + # delimiter row. + if ( + '|' in line + and i + 1 < len(lines) + and _TABLE_SEPARATOR_RE.match(lines[i + 1]) + ): + table_block = [line, lines[i + 1]] + j = i + 2 + while j < len(lines) and _is_table_row(lines[j]): + table_block.append(lines[j]) + j += 1 + out.append('```') + out.extend(table_block) + out.append('```') + i = j + continue + + out.append(line) + i += 1 + + return '\n'.join(out) + + class TelegramAdapter(BasePlatformAdapter): """ Telegram bot adapter. @@ -1916,6 +1994,12 @@ class TelegramAdapter(BasePlatformAdapter): text = content + # 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't + # render tables natively, but fenced code blocks render as + # monospace preformatted text with columns intact. The wrapped + # tables then flow through step (1) below as protected regions. + text = _wrap_markdown_tables(text) + # 1) Protect fenced code blocks (``` ... ```) # Per MarkdownV2 spec, \ and ` inside pre/code must be escaped. def _protect_fenced(m): diff --git a/tests/gateway/test_telegram_format.py b/tests/gateway/test_telegram_format.py index 1bd889b7c8..ce7e02a474 100644 --- a/tests/gateway/test_telegram_format.py +++ b/tests/gateway/test_telegram_format.py @@ -34,7 +34,12 @@ def _ensure_telegram_mock(): _ensure_telegram_mock() -from gateway.platforms.telegram import TelegramAdapter, _escape_mdv2, _strip_mdv2 # noqa: E402 +from gateway.platforms.telegram import ( # noqa: E402 + TelegramAdapter, + _escape_mdv2, + _strip_mdv2, + _wrap_markdown_tables, +) # --------------------------------------------------------------------------- @@ -535,6 +540,152 @@ class TestStripMdv2: assert _strip_mdv2("||hidden text||") == "hidden text" +# ========================================================================= +# Markdown table auto-wrap +# ========================================================================= + + +class TestWrapMarkdownTables: + """_wrap_markdown_tables wraps GFM pipe tables in ``` fences so + Telegram renders them as monospace preformatted text instead of the + noisy backslash-pipe mess MarkdownV2 produces.""" + + def test_basic_table_wrapped(self): + text = ( + "Scores:\n\n" + "| Player | Score |\n" + "|--------|-------|\n" + "| Alice | 150 |\n" + "| Bob | 120 |\n" + "\nEnd." + ) + out = _wrap_markdown_tables(text) + # Table is now wrapped in a fence + assert "```\n| Player | Score |" in out + assert "| Bob | 120 |\n```" in out + # Surrounding prose is preserved + assert out.startswith("Scores:") + assert out.endswith("End.") + + def test_bare_pipe_table_wrapped(self): + """Tables without outer pipes (GFM allows this) are still detected.""" + text = "head1 | head2\n--- | ---\na | b\nc | d" + out = _wrap_markdown_tables(text) + assert out.startswith("```\n") + assert out.rstrip().endswith("```") + assert "head1 | head2" in out + + def test_alignment_separators(self): + """Separator rows with :--- / ---: / :---: alignment markers match.""" + text = ( + "| Name | Age | City |\n" + "|:-----|----:|:----:|\n" + "| Ada | 30 | NYC |" + ) + out = _wrap_markdown_tables(text) + assert out.count("```") == 2 + + def test_two_consecutive_tables_wrapped_separately(self): + text = ( + "| A | B |\n" + "|---|---|\n" + "| 1 | 2 |\n" + "\n" + "| X | Y |\n" + "|---|---|\n" + "| 9 | 8 |" + ) + out = _wrap_markdown_tables(text) + # Four fences total — one opening + closing per table + assert out.count("```") == 4 + + def test_plain_text_with_pipes_not_wrapped(self): + """A bare pipe in prose must NOT trigger wrapping.""" + text = "Use the | pipe operator to chain commands." + assert _wrap_markdown_tables(text) == text + + def test_horizontal_rule_not_wrapped(self): + """A lone '---' horizontal rule must not be mistaken for a separator.""" + text = "Section A\n\n---\n\nSection B" + assert _wrap_markdown_tables(text) == text + + def test_existing_code_block_with_pipes_left_alone(self): + """A table already inside a fenced code block must not be re-wrapped.""" + text = ( + "```\n" + "| a | b |\n" + "|---|---|\n" + "| 1 | 2 |\n" + "```" + ) + assert _wrap_markdown_tables(text) == text + + def test_no_pipe_character_short_circuits(self): + text = "Plain **bold** text with no table." + assert _wrap_markdown_tables(text) == text + + def test_no_dash_short_circuits(self): + text = "a | b\nc | d" # has pipes but no '-' separator row + assert _wrap_markdown_tables(text) == text + + def test_single_column_separator_not_matched(self): + """Single-column tables (rare) are not detected — we require at + least one internal pipe in the separator row to avoid false + positives on formatting rules.""" + text = "| a |\n| - |\n| b |" + assert _wrap_markdown_tables(text) == text + + +class TestFormatMessageTables: + """End-to-end: a pipe table passes through format_message with its + pipes and dashes left alone inside the fence, not mangled by MarkdownV2 + escaping.""" + + def test_table_rendered_as_code_block(self, adapter): + text = ( + "Data:\n\n" + "| Col1 | Col2 |\n" + "|------|------|\n" + "| A | B |\n" + ) + out = adapter.format_message(text) + # Pipes inside the fenced block are NOT escaped + assert "```\n| Col1 | Col2 |" in out + assert "\\|" not in out.split("```")[1] + # Dashes in separator not escaped inside fence + assert "\\-" not in out.split("```")[1] + + def test_text_after_table_still_formatted(self, adapter): + text = ( + "| A | B |\n" + "|---|---|\n" + "| 1 | 2 |\n" + "\n" + "Nice **work** team!" + ) + out = adapter.format_message(text) + # MarkdownV2 bold conversion still happens outside the table + assert "*work*" in out + # Exclamation outside fence is escaped + assert "\\!" in out + + def test_multiple_tables_in_single_message(self, adapter): + text = ( + "First:\n" + "| A | B |\n" + "|---|---|\n" + "| 1 | 2 |\n" + "\n" + "Second:\n" + "| X | Y |\n" + "|---|---|\n" + "| 9 | 8 |\n" + ) + out = adapter.format_message(text) + # Two separate fenced blocks in the output + assert out.count("```") == 4 + + @pytest.mark.asyncio async def test_send_escapes_chunk_indicator_for_markdownv2(adapter): adapter.MAX_MESSAGE_LENGTH = 80