fix(telegram): keep chunk markers outside code fences

When truncate_message appends a (N/M) chunk indicator to a chunk that
had to close an in-progress fenced code block, the marker lands on the
closing fence line (``` \(1/2\) after MarkdownV2 escaping). Telegram
does not treat that as a clean closing fence and rejects the MarkdownV2,
falling back to plain text. Move the indicator onto its own line right
after the closing fence at all three legacy-send call sites.

Fixes #48517
This commit is contained in:
miha 2026-06-20 23:50:46 -07:00 committed by Teknium
parent 5aec00f7a9
commit 796f618f99
2 changed files with 62 additions and 3 deletions

View file

@ -196,6 +196,24 @@ def _strip_mdv2(text: str) -> str:
return cleaned
_CHUNK_INDICATOR_ON_FENCE_RE = re.compile(
r'(?m)^``` (?P<indicator>(?:\\)?\(\d+/\d+(?:\\)?\))$'
)
def _separate_chunk_indicator_from_fence(text: str) -> str:
"""Move ``(N/M)`` chunk markers off Telegram code-fence lines.
``truncate_message()`` appends chunk indicators to the end of a chunk. When
the chunk had to close an in-progress fenced code block, that creates a
line like ````` \\(1/2\\)`` after MarkdownV2 escaping. Telegram does not
treat that as a clean closing fence, so it can reject MarkdownV2 and fall
back to plain text. Put the indicator on its own line immediately after the
closing fence.
"""
return _CHUNK_INDICATOR_ON_FENCE_RE.sub(r'```\n\g<indicator>', text)
# ---------------------------------------------------------------------------
# Markdown table → Telegram-friendly row groups
# ---------------------------------------------------------------------------
@ -2436,7 +2454,9 @@ class TelegramAdapter(BasePlatformAdapter):
# MarkdownV2-special parentheses so Telegram doesn't reject the
# chunk and fall back to plain text.
chunks = [
re.sub(r" \((\d+)/(\d+)\)$", r" \\(\1/\2\\)", chunk)
_separate_chunk_indicator_from_fence(
re.sub(r" \((\d+)/(\d+)\)$", r" \\(\1/\2\\)", chunk)
)
for chunk in chunks
]
@ -2910,7 +2930,9 @@ class TelegramAdapter(BasePlatformAdapter):
if finalize:
# Use format_message + parse_mode for the final chunk;
# mirror edit_message's main happy-path.
formatted = self.format_message(first_chunk)
formatted = _separate_chunk_indicator_from_fence(
self.format_message(first_chunk)
)
try:
await self._bot.edit_message_text(
chat_id=int(chat_id),
@ -2971,7 +2993,9 @@ class TelegramAdapter(BasePlatformAdapter):
for use_markdown in (True, False) if finalize else (False,):
try:
if use_markdown:
text = self.format_message(chunk)
text = _separate_chunk_indicator_from_fence(
self.format_message(chunk)
)
else:
# Plain attempt: on finalize the MarkdownV2 attempt
# failed, so degrade to clean stripped text, never

View file

@ -178,6 +178,41 @@ class TestFormatMessageCodeBlocks:
assert r"`\\\\server\\share`" in result
@pytest.mark.asyncio
async def test_legacy_send_keeps_chunk_indicators_outside_fenced_code_lines(adapter):
"""Chunk markers must not corrupt Telegram MarkdownV2 code fences.
Telegram treats a closing fenced-code line with trailing text, e.g.
````` (1/2)``, as malformed MarkdownV2. The bot then falls back to plain
text, which is the user-visible duplicate/malformed preview symptom.
"""
adapter._bot = MagicMock()
adapter._bot.send_message = AsyncMock(
side_effect=[SimpleNamespace(message_id=i) for i in range(1, 20)]
)
adapter._bot.send_chat_action = AsyncMock()
object.__setattr__(adapter, "MAX_MESSAGE_LENGTH", 120)
adapter._rich_messages_enabled = False
content = (
"Intro before code block\n"
"```text\n"
+ ("~/.hermes/skills/github/hermes-contribution-workflow/SKILL.md\n" * 8)
+ "```\n"
"After."
)
result = await adapter.send("12345", content, metadata={"expect_edits": True})
assert result.success is True
sent_texts = [call.kwargs["text"] for call in adapter._bot.send_message.await_args_list]
assert len(sent_texts) > 1
for text in sent_texts:
for line in text.splitlines():
assert not re.match(r"^```\s+\\?\(\d+/\d+\\?\)$", line), text
assert not re.match(r"^```\s+\(\d+/\d+\)$", line), text
# =========================================================================
# format_message - bold and italic
# =========================================================================