From 70c834a740ed8a71895f57d7c164890b4ecb03cd Mon Sep 17 00:00:00 2001 From: Yashiel Sookdeo Date: Sat, 13 Jun 2026 21:15:54 +0200 Subject: [PATCH] =?UTF-8?q?refactor:=20extract=20shared=20GFM=20table?= =?UTF-8?q?=E2=86=92bullet=20helpers=20into=20helpers.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move table-detection regex, row-splitting, and table-to-bullet conversion into gateway/platforms/helpers.py so both Discord and Telegram adapters can share them. Co-authored-by: Yashiel Sookdeo --- gateway/platforms/helpers.py | 125 +++++++++++++++++++++++++ tests/gateway/test_table_helpers.py | 137 ++++++++++++++++++++++++++++ 2 files changed, 262 insertions(+) create mode 100644 tests/gateway/test_table_helpers.py diff --git a/gateway/platforms/helpers.py b/gateway/platforms/helpers.py index a3704bf50cf..7af36280cf4 100644 --- a/gateway/platforms/helpers.py +++ b/gateway/platforms/helpers.py @@ -276,3 +276,128 @@ def redact_phone(phone: str) -> str: if len(phone) <= 8: return phone[:2] + "****" + phone[-2:] if len(phone) > 4 else "****" return phone[:4] + "****" + phone[-4:] + + +# ─── GFM Markdown Table → Bullet Conversion ───────────────────────────────── +# Shared by Discord and Telegram adapters. Discord calls +# convert_table_to_bullets() directly; Telegram imports the primitives +# but keeps its own MarkdownV2-aware renderer. + + +# Matches a GFM table delimiter row: optional outer pipes, cells of dashes +# (with optional alignment colons) separated by '|'. +# Requires at least one internal '|' so lone '---' rules are NOT matched. +TABLE_SEPARATOR_RE = re.compile( + r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$' +) + + +def is_table_row(line: str) -> bool: + """Return True if *line* could plausibly be a table data row.""" + stripped = line.strip() + return bool(stripped) and '|' in stripped + + +def split_markdown_table_row(line: str) -> list[str]: + """Split a GFM table row into stripped cell values.""" + stripped = line.strip() + if stripped.startswith("|"): + stripped = stripped[1:] + if stripped.endswith("|"): + stripped = stripped[:-1] + return [cell.strip() for cell in stripped.split("|")] + + +def _render_table_block(table_block: list[str]) -> str: + """Render a detected GFM table as bold-heading + bullet groups. + + Uses the same alignment logic as Telegram's renderer: for non-row-label + tables, ``data_cells = cells`` (the full row) and the bullet whose value + duplicates the heading is skipped. This keeps header→value alignment + correct. + """ + if len(table_block) < 3: + return "\n".join(table_block) + + headers = split_markdown_table_row(table_block[0]) + if len(headers) < 2: + return "\n".join(table_block) + + first_data_row = ( + split_markdown_table_row(table_block[2]) + if len(table_block) > 2 + else [] + ) + has_row_label_col = len(first_data_row) == len(headers) + 1 + + rendered_groups: list[str] = [] + for index, row in enumerate(table_block[2:], start=1): + cells = split_markdown_table_row(row) + if has_row_label_col: + heading = cells[0] if cells and cells[0] else f"Row {index}" + data_cells = cells[1:] + else: + heading = next((cell for cell in cells if cell), f"Row {index}") + data_cells = cells + + if len(data_cells) < len(headers): + data_cells.extend([""] * (len(headers) - len(data_cells))) + elif len(data_cells) > len(headers): + data_cells = data_cells[: len(headers)] + + bullets: list[str] = [] + for header, value in zip(headers, data_cells): + if not has_row_label_col and value == heading: + continue + bullets.append(f"• {header}: {value}") + + group_lines = [f"**{heading}**", *bullets] + rendered_groups.append("\n".join(group_lines)) + + return "\n\n".join(rendered_groups) + + +def convert_table_to_bullets(text: str) -> str: + """Rewrite GFM pipe tables into bold-heading + bullet groups. + + Tables inside fenced code blocks are left alone. + """ + if '|' not in text or '-' not in text: + return text + + lines = text.split('\n') + out: list[str] = [] + in_fence = False + i = 0 + while i < len(lines): + line = lines[i] + stripped = line.lstrip() + + if stripped.startswith('```'): + in_fence = not in_fence + out.append(line) + i += 1 + continue + if in_fence: + out.append(line) + i += 1 + continue + + if ( + '|' in line + and i + 1 < len(lines) + and TABLE_SEPARATOR_RE.match(lines[i + 1]) + ): + table_block = [line, lines[i + 1]] + j = i + 2 + while j < len(lines) and is_table_row(lines[j]): + table_block.append(lines[j]) + j += 1 + out.append(_render_table_block(table_block)) + i = j + continue + + out.append(line) + i += 1 + + return '\n'.join(out) diff --git a/tests/gateway/test_table_helpers.py b/tests/gateway/test_table_helpers.py new file mode 100644 index 00000000000..b3048a921dc --- /dev/null +++ b/tests/gateway/test_table_helpers.py @@ -0,0 +1,137 @@ +"""Shared GFM table → bullet conversion helpers.""" + +from gateway.platforms.helpers import ( + TABLE_SEPARATOR_RE, + is_table_row, + split_markdown_table_row, + convert_table_to_bullets, +) + + +class TestTablePrimitives: + + def test_separator_re_matches_basic(self): + assert TABLE_SEPARATOR_RE.match("|---|---|") + + def test_separator_re_matches_alignment(self): + assert TABLE_SEPARATOR_RE.match("|:-----|----:|:----:|") + + def test_separator_re_rejects_lone_rule(self): + assert not TABLE_SEPARATOR_RE.match("---") + + def test_is_table_row_with_pipe(self): + assert is_table_row("| Alice | 150 |") + + def test_is_table_row_blank(self): + assert not is_table_row("") + + def test_split_row_strips_outer_pipes(self): + assert split_markdown_table_row("| a | b | c |") == ["a", "b", "c"] + + def test_split_row_no_outer_pipes(self): + assert split_markdown_table_row("a | b | c") == ["a", "b", "c"] + + +class TestConvertTableToBullets: + + def test_basic_table(self): + text = ( + "| Player | Score |\n" + "|--------|-------|\n" + "| Alice | 150 |\n" + "| Bob | 120 |" + ) + out = convert_table_to_bullets(text) + assert "**Alice**" in out + assert "• Score: 150" in out + assert "**Bob**" in out + assert "• Score: 120" in out + assert "• Player: Alice" not in out + + def test_three_column_table(self): + text = ( + "| Name | Age | City |\n" + "|:-----|----:|:----:|\n" + "| Ada | 30 | NYC |" + ) + out = convert_table_to_bullets(text) + assert "**Ada**" in out + assert "• Name: Ada" not in out + assert "• Age: 30" in out + assert "• City: NYC" in out + assert "**Ada**\n• Age: 30\n• City: NYC" in out + + def test_row_label_column(self): + text = ( + "| | Score | Rank |\n" + "|--------|-------|------|\n" + "| Alice | 150 | 1 |\n" + "| Bob | 120 | 2 |" + ) + out = convert_table_to_bullets(text) + assert "**Alice**" in out + assert "• Score: 150" in out + assert "• Rank: 1" in out + assert "**Alice**\n• Score: 150\n• Rank: 1" in out + + def test_bare_pipe_table(self): + text = "head1 | head2\n--- | ---\na | b\nc | d" + out = convert_table_to_bullets(text) + assert "**a**" in out + assert "• head1: a" not in out + assert "• head2: b" in out + + def test_two_consecutive_tables(self): + text = ( + "| A | B |\n" + "|---|---|\n" + "| 1 | 2 |\n" + "\n" + "| X | Y |\n" + "|---|---|\n" + "| 9 | 8 |" + ) + out = convert_table_to_bullets(text) + assert out.count("**1**") == 1 + assert out.count("**9**") == 1 + assert "• B: 2" in out + assert "• Y: 8" in out + + def test_surrounding_prose_preserved(self): + text = ( + "Scores:\n\n" + "| Player | Score |\n" + "|--------|-------|\n" + "| Alice | 150 |\n" + "\nEnd." + ) + out = convert_table_to_bullets(text) + assert out.startswith("Scores:") + assert out.endswith("End.") + + def test_table_inside_code_fence_untouched(self): + text = "```\n| a | b |\n|---|---|\n| 1 | 2 |\n```" + assert convert_table_to_bullets(text) == text + + def test_plain_text_with_pipes_untouched(self): + text = "Use the | pipe operator to chain." + assert convert_table_to_bullets(text) == text + + def test_horizontal_rule_not_matched(self): + text = "Section A\n\n---\n\nSection B" + assert convert_table_to_bullets(text) == text + + def test_no_pipe_short_circuits(self): + text = "Plain **bold** text." + assert convert_table_to_bullets(text) == text + + def test_row_groups_separated_by_blank_line(self): + text = ( + "| A | B |\n" + "|---|---|\n" + "| x | 1 |\n" + "| y | 2 |" + ) + out = convert_table_to_bullets(text) + assert "• B: 1\n\n**y**" in out + assert "\n\n• " not in out