mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
refactor: extract shared GFM table→bullet helpers into helpers.py
Move table-detection regex, row-splitting, and table-to-bullet conversion into gateway/platforms/helpers.py so both Discord and Telegram adapters can share them. Co-authored-by: Yashiel Sookdeo <yashiel@skyner.co.za>
This commit is contained in:
parent
9c9b28a2b3
commit
70c834a740
2 changed files with 262 additions and 0 deletions
|
|
@ -276,3 +276,128 @@ def redact_phone(phone: str) -> str:
|
|||
if len(phone) <= 8:
|
||||
return phone[:2] + "****" + phone[-2:] if len(phone) > 4 else "****"
|
||||
return phone[:4] + "****" + phone[-4:]
|
||||
|
||||
|
||||
# ─── GFM Markdown Table → Bullet Conversion ─────────────────────────────────
|
||||
# Shared by Discord and Telegram adapters. Discord calls
|
||||
# convert_table_to_bullets() directly; Telegram imports the primitives
|
||||
# but keeps its own MarkdownV2-aware renderer.
|
||||
|
||||
|
||||
# Matches a GFM table delimiter row: optional outer pipes, cells of dashes
|
||||
# (with optional alignment colons) separated by '|'.
|
||||
# Requires at least one internal '|' so lone '---' rules are NOT matched.
|
||||
TABLE_SEPARATOR_RE = re.compile(
|
||||
r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$'
|
||||
)
|
||||
|
||||
|
||||
def is_table_row(line: str) -> bool:
|
||||
"""Return True if *line* could plausibly be a table data row."""
|
||||
stripped = line.strip()
|
||||
return bool(stripped) and '|' in stripped
|
||||
|
||||
|
||||
def split_markdown_table_row(line: str) -> list[str]:
|
||||
"""Split a GFM table row into stripped cell values."""
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("|"):
|
||||
stripped = stripped[1:]
|
||||
if stripped.endswith("|"):
|
||||
stripped = stripped[:-1]
|
||||
return [cell.strip() for cell in stripped.split("|")]
|
||||
|
||||
|
||||
def _render_table_block(table_block: list[str]) -> str:
|
||||
"""Render a detected GFM table as bold-heading + bullet groups.
|
||||
|
||||
Uses the same alignment logic as Telegram's renderer: for non-row-label
|
||||
tables, ``data_cells = cells`` (the full row) and the bullet whose value
|
||||
duplicates the heading is skipped. This keeps header→value alignment
|
||||
correct.
|
||||
"""
|
||||
if len(table_block) < 3:
|
||||
return "\n".join(table_block)
|
||||
|
||||
headers = split_markdown_table_row(table_block[0])
|
||||
if len(headers) < 2:
|
||||
return "\n".join(table_block)
|
||||
|
||||
first_data_row = (
|
||||
split_markdown_table_row(table_block[2])
|
||||
if len(table_block) > 2
|
||||
else []
|
||||
)
|
||||
has_row_label_col = len(first_data_row) == len(headers) + 1
|
||||
|
||||
rendered_groups: list[str] = []
|
||||
for index, row in enumerate(table_block[2:], start=1):
|
||||
cells = split_markdown_table_row(row)
|
||||
if has_row_label_col:
|
||||
heading = cells[0] if cells and cells[0] else f"Row {index}"
|
||||
data_cells = cells[1:]
|
||||
else:
|
||||
heading = next((cell for cell in cells if cell), f"Row {index}")
|
||||
data_cells = cells
|
||||
|
||||
if len(data_cells) < len(headers):
|
||||
data_cells.extend([""] * (len(headers) - len(data_cells)))
|
||||
elif len(data_cells) > len(headers):
|
||||
data_cells = data_cells[: len(headers)]
|
||||
|
||||
bullets: list[str] = []
|
||||
for header, value in zip(headers, data_cells):
|
||||
if not has_row_label_col and value == heading:
|
||||
continue
|
||||
bullets.append(f"• {header}: {value}")
|
||||
|
||||
group_lines = [f"**{heading}**", *bullets]
|
||||
rendered_groups.append("\n".join(group_lines))
|
||||
|
||||
return "\n\n".join(rendered_groups)
|
||||
|
||||
|
||||
def convert_table_to_bullets(text: str) -> str:
|
||||
"""Rewrite GFM pipe tables into bold-heading + bullet groups.
|
||||
|
||||
Tables inside fenced code blocks are left alone.
|
||||
"""
|
||||
if '|' not in text or '-' not in text:
|
||||
return text
|
||||
|
||||
lines = text.split('\n')
|
||||
out: list[str] = []
|
||||
in_fence = False
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
stripped = line.lstrip()
|
||||
|
||||
if stripped.startswith('```'):
|
||||
in_fence = not in_fence
|
||||
out.append(line)
|
||||
i += 1
|
||||
continue
|
||||
if in_fence:
|
||||
out.append(line)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if (
|
||||
'|' in line
|
||||
and i + 1 < len(lines)
|
||||
and TABLE_SEPARATOR_RE.match(lines[i + 1])
|
||||
):
|
||||
table_block = [line, lines[i + 1]]
|
||||
j = i + 2
|
||||
while j < len(lines) and is_table_row(lines[j]):
|
||||
table_block.append(lines[j])
|
||||
j += 1
|
||||
out.append(_render_table_block(table_block))
|
||||
i = j
|
||||
continue
|
||||
|
||||
out.append(line)
|
||||
i += 1
|
||||
|
||||
return '\n'.join(out)
|
||||
|
|
|
|||
137
tests/gateway/test_table_helpers.py
Normal file
137
tests/gateway/test_table_helpers.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
"""Shared GFM table → bullet conversion helpers."""
|
||||
|
||||
from gateway.platforms.helpers import (
|
||||
TABLE_SEPARATOR_RE,
|
||||
is_table_row,
|
||||
split_markdown_table_row,
|
||||
convert_table_to_bullets,
|
||||
)
|
||||
|
||||
|
||||
class TestTablePrimitives:
|
||||
|
||||
def test_separator_re_matches_basic(self):
|
||||
assert TABLE_SEPARATOR_RE.match("|---|---|")
|
||||
|
||||
def test_separator_re_matches_alignment(self):
|
||||
assert TABLE_SEPARATOR_RE.match("|:-----|----:|:----:|")
|
||||
|
||||
def test_separator_re_rejects_lone_rule(self):
|
||||
assert not TABLE_SEPARATOR_RE.match("---")
|
||||
|
||||
def test_is_table_row_with_pipe(self):
|
||||
assert is_table_row("| Alice | 150 |")
|
||||
|
||||
def test_is_table_row_blank(self):
|
||||
assert not is_table_row("")
|
||||
|
||||
def test_split_row_strips_outer_pipes(self):
|
||||
assert split_markdown_table_row("| a | b | c |") == ["a", "b", "c"]
|
||||
|
||||
def test_split_row_no_outer_pipes(self):
|
||||
assert split_markdown_table_row("a | b | c") == ["a", "b", "c"]
|
||||
|
||||
|
||||
class TestConvertTableToBullets:
|
||||
|
||||
def test_basic_table(self):
|
||||
text = (
|
||||
"| Player | Score |\n"
|
||||
"|--------|-------|\n"
|
||||
"| Alice | 150 |\n"
|
||||
"| Bob | 120 |"
|
||||
)
|
||||
out = convert_table_to_bullets(text)
|
||||
assert "**Alice**" in out
|
||||
assert "• Score: 150" in out
|
||||
assert "**Bob**" in out
|
||||
assert "• Score: 120" in out
|
||||
assert "• Player: Alice" not in out
|
||||
|
||||
def test_three_column_table(self):
|
||||
text = (
|
||||
"| Name | Age | City |\n"
|
||||
"|:-----|----:|:----:|\n"
|
||||
"| Ada | 30 | NYC |"
|
||||
)
|
||||
out = convert_table_to_bullets(text)
|
||||
assert "**Ada**" in out
|
||||
assert "• Name: Ada" not in out
|
||||
assert "• Age: 30" in out
|
||||
assert "• City: NYC" in out
|
||||
assert "**Ada**\n• Age: 30\n• City: NYC" in out
|
||||
|
||||
def test_row_label_column(self):
|
||||
text = (
|
||||
"| | Score | Rank |\n"
|
||||
"|--------|-------|------|\n"
|
||||
"| Alice | 150 | 1 |\n"
|
||||
"| Bob | 120 | 2 |"
|
||||
)
|
||||
out = convert_table_to_bullets(text)
|
||||
assert "**Alice**" in out
|
||||
assert "• Score: 150" in out
|
||||
assert "• Rank: 1" in out
|
||||
assert "**Alice**\n• Score: 150\n• Rank: 1" in out
|
||||
|
||||
def test_bare_pipe_table(self):
|
||||
text = "head1 | head2\n--- | ---\na | b\nc | d"
|
||||
out = convert_table_to_bullets(text)
|
||||
assert "**a**" in out
|
||||
assert "• head1: a" not in out
|
||||
assert "• head2: b" in out
|
||||
|
||||
def test_two_consecutive_tables(self):
|
||||
text = (
|
||||
"| A | B |\n"
|
||||
"|---|---|\n"
|
||||
"| 1 | 2 |\n"
|
||||
"\n"
|
||||
"| X | Y |\n"
|
||||
"|---|---|\n"
|
||||
"| 9 | 8 |"
|
||||
)
|
||||
out = convert_table_to_bullets(text)
|
||||
assert out.count("**1**") == 1
|
||||
assert out.count("**9**") == 1
|
||||
assert "• B: 2" in out
|
||||
assert "• Y: 8" in out
|
||||
|
||||
def test_surrounding_prose_preserved(self):
|
||||
text = (
|
||||
"Scores:\n\n"
|
||||
"| Player | Score |\n"
|
||||
"|--------|-------|\n"
|
||||
"| Alice | 150 |\n"
|
||||
"\nEnd."
|
||||
)
|
||||
out = convert_table_to_bullets(text)
|
||||
assert out.startswith("Scores:")
|
||||
assert out.endswith("End.")
|
||||
|
||||
def test_table_inside_code_fence_untouched(self):
|
||||
text = "```\n| a | b |\n|---|---|\n| 1 | 2 |\n```"
|
||||
assert convert_table_to_bullets(text) == text
|
||||
|
||||
def test_plain_text_with_pipes_untouched(self):
|
||||
text = "Use the | pipe operator to chain."
|
||||
assert convert_table_to_bullets(text) == text
|
||||
|
||||
def test_horizontal_rule_not_matched(self):
|
||||
text = "Section A\n\n---\n\nSection B"
|
||||
assert convert_table_to_bullets(text) == text
|
||||
|
||||
def test_no_pipe_short_circuits(self):
|
||||
text = "Plain **bold** text."
|
||||
assert convert_table_to_bullets(text) == text
|
||||
|
||||
def test_row_groups_separated_by_blank_line(self):
|
||||
text = (
|
||||
"| A | B |\n"
|
||||
"|---|---|\n"
|
||||
"| x | 1 |\n"
|
||||
"| y | 2 |"
|
||||
)
|
||||
out = convert_table_to_bullets(text)
|
||||
assert "• B: 1\n\n**y**" in out
|
||||
assert "\n\n• " not in out
|
||||
Loading…
Add table
Add a link
Reference in a new issue