refactor: extract shared GFM table→bullet helpers into helpers.py

Move table-detection regex, row-splitting, and table-to-bullet
conversion into gateway/platforms/helpers.py so both Discord and
Telegram adapters can share them.

Co-authored-by: Yashiel Sookdeo <yashiel@skyner.co.za>
This commit is contained in:
Yashiel Sookdeo 2026-06-13 21:15:54 +02:00 committed by kshitij
parent 9c9b28a2b3
commit 70c834a740
2 changed files with 262 additions and 0 deletions

View file

@ -276,3 +276,128 @@ def redact_phone(phone: str) -> str:
if len(phone) <= 8:
return phone[:2] + "****" + phone[-2:] if len(phone) > 4 else "****"
return phone[:4] + "****" + phone[-4:]
# ─── GFM Markdown Table → Bullet Conversion ─────────────────────────────────
# Shared by Discord and Telegram adapters. Discord calls
# convert_table_to_bullets() directly; Telegram imports the primitives
# but keeps its own MarkdownV2-aware renderer.
# Matches a GFM table delimiter row: optional outer pipes, cells of dashes
# (with optional alignment colons) separated by '|'.
# Requires at least one internal '|' so lone '---' rules are NOT matched.
TABLE_SEPARATOR_RE = re.compile(
r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$'
)
def is_table_row(line: str) -> bool:
"""Return True if *line* could plausibly be a table data row."""
stripped = line.strip()
return bool(stripped) and '|' in stripped
def split_markdown_table_row(line: str) -> list[str]:
"""Split a GFM table row into stripped cell values."""
stripped = line.strip()
if stripped.startswith("|"):
stripped = stripped[1:]
if stripped.endswith("|"):
stripped = stripped[:-1]
return [cell.strip() for cell in stripped.split("|")]
def _render_table_block(table_block: list[str]) -> str:
"""Render a detected GFM table as bold-heading + bullet groups.
Uses the same alignment logic as Telegram's renderer: for non-row-label
tables, ``data_cells = cells`` (the full row) and the bullet whose value
duplicates the heading is skipped. This keeps headervalue alignment
correct.
"""
if len(table_block) < 3:
return "\n".join(table_block)
headers = split_markdown_table_row(table_block[0])
if len(headers) < 2:
return "\n".join(table_block)
first_data_row = (
split_markdown_table_row(table_block[2])
if len(table_block) > 2
else []
)
has_row_label_col = len(first_data_row) == len(headers) + 1
rendered_groups: list[str] = []
for index, row in enumerate(table_block[2:], start=1):
cells = split_markdown_table_row(row)
if has_row_label_col:
heading = cells[0] if cells and cells[0] else f"Row {index}"
data_cells = cells[1:]
else:
heading = next((cell for cell in cells if cell), f"Row {index}")
data_cells = cells
if len(data_cells) < len(headers):
data_cells.extend([""] * (len(headers) - len(data_cells)))
elif len(data_cells) > len(headers):
data_cells = data_cells[: len(headers)]
bullets: list[str] = []
for header, value in zip(headers, data_cells):
if not has_row_label_col and value == heading:
continue
bullets.append(f"{header}: {value}")
group_lines = [f"**{heading}**", *bullets]
rendered_groups.append("\n".join(group_lines))
return "\n\n".join(rendered_groups)
def convert_table_to_bullets(text: str) -> str:
"""Rewrite GFM pipe tables into bold-heading + bullet groups.
Tables inside fenced code blocks are left alone.
"""
if '|' not in text or '-' not in text:
return text
lines = text.split('\n')
out: list[str] = []
in_fence = False
i = 0
while i < len(lines):
line = lines[i]
stripped = line.lstrip()
if stripped.startswith('```'):
in_fence = not in_fence
out.append(line)
i += 1
continue
if in_fence:
out.append(line)
i += 1
continue
if (
'|' in line
and i + 1 < len(lines)
and TABLE_SEPARATOR_RE.match(lines[i + 1])
):
table_block = [line, lines[i + 1]]
j = i + 2
while j < len(lines) and is_table_row(lines[j]):
table_block.append(lines[j])
j += 1
out.append(_render_table_block(table_block))
i = j
continue
out.append(line)
i += 1
return '\n'.join(out)

View file

@ -0,0 +1,137 @@
"""Shared GFM table → bullet conversion helpers."""
from gateway.platforms.helpers import (
TABLE_SEPARATOR_RE,
is_table_row,
split_markdown_table_row,
convert_table_to_bullets,
)
class TestTablePrimitives:
def test_separator_re_matches_basic(self):
assert TABLE_SEPARATOR_RE.match("|---|---|")
def test_separator_re_matches_alignment(self):
assert TABLE_SEPARATOR_RE.match("|:-----|----:|:----:|")
def test_separator_re_rejects_lone_rule(self):
assert not TABLE_SEPARATOR_RE.match("---")
def test_is_table_row_with_pipe(self):
assert is_table_row("| Alice | 150 |")
def test_is_table_row_blank(self):
assert not is_table_row("")
def test_split_row_strips_outer_pipes(self):
assert split_markdown_table_row("| a | b | c |") == ["a", "b", "c"]
def test_split_row_no_outer_pipes(self):
assert split_markdown_table_row("a | b | c") == ["a", "b", "c"]
class TestConvertTableToBullets:
def test_basic_table(self):
text = (
"| Player | Score |\n"
"|--------|-------|\n"
"| Alice | 150 |\n"
"| Bob | 120 |"
)
out = convert_table_to_bullets(text)
assert "**Alice**" in out
assert "• Score: 150" in out
assert "**Bob**" in out
assert "• Score: 120" in out
assert "• Player: Alice" not in out
def test_three_column_table(self):
text = (
"| Name | Age | City |\n"
"|:-----|----:|:----:|\n"
"| Ada | 30 | NYC |"
)
out = convert_table_to_bullets(text)
assert "**Ada**" in out
assert "• Name: Ada" not in out
assert "• Age: 30" in out
assert "• City: NYC" in out
assert "**Ada**\n• Age: 30\n• City: NYC" in out
def test_row_label_column(self):
text = (
"| | Score | Rank |\n"
"|--------|-------|------|\n"
"| Alice | 150 | 1 |\n"
"| Bob | 120 | 2 |"
)
out = convert_table_to_bullets(text)
assert "**Alice**" in out
assert "• Score: 150" in out
assert "• Rank: 1" in out
assert "**Alice**\n• Score: 150\n• Rank: 1" in out
def test_bare_pipe_table(self):
text = "head1 | head2\n--- | ---\na | b\nc | d"
out = convert_table_to_bullets(text)
assert "**a**" in out
assert "• head1: a" not in out
assert "• head2: b" in out
def test_two_consecutive_tables(self):
text = (
"| A | B |\n"
"|---|---|\n"
"| 1 | 2 |\n"
"\n"
"| X | Y |\n"
"|---|---|\n"
"| 9 | 8 |"
)
out = convert_table_to_bullets(text)
assert out.count("**1**") == 1
assert out.count("**9**") == 1
assert "• B: 2" in out
assert "• Y: 8" in out
def test_surrounding_prose_preserved(self):
text = (
"Scores:\n\n"
"| Player | Score |\n"
"|--------|-------|\n"
"| Alice | 150 |\n"
"\nEnd."
)
out = convert_table_to_bullets(text)
assert out.startswith("Scores:")
assert out.endswith("End.")
def test_table_inside_code_fence_untouched(self):
text = "```\n| a | b |\n|---|---|\n| 1 | 2 |\n```"
assert convert_table_to_bullets(text) == text
def test_plain_text_with_pipes_untouched(self):
text = "Use the | pipe operator to chain."
assert convert_table_to_bullets(text) == text
def test_horizontal_rule_not_matched(self):
text = "Section A\n\n---\n\nSection B"
assert convert_table_to_bullets(text) == text
def test_no_pipe_short_circuits(self):
text = "Plain **bold** text."
assert convert_table_to_bullets(text) == text
def test_row_groups_separated_by_blank_line(self):
text = (
"| A | B |\n"
"|---|---|\n"
"| x | 1 |\n"
"| y | 2 |"
)
out = convert_table_to_bullets(text)
assert "• B: 1\n\n**y**" in out
assert "\n\n" not in out