fix(telegram): avoid rich messages for CJK text

Telegram Mac/Desktop Bot API 10.1 rich-message rendering leaves garbled
overlapping draft/overlay glyphs for CJK text (#47653), affecting every
message containing CJK characters. The legacy MarkdownV2 path renders the
same text cleanly, so skip the rich send / draft / final-edit paths up
front for content containing CJK (incl. astral-plane extensions) until
affected clients age out. Non-CJK rich rendering is preserved.

Fixes #47653
This commit is contained in:
tt-a1i 2026-06-21 08:30:58 -07:00 committed by Teknium
parent 65a477f12e
commit ea056b0559
2 changed files with 79 additions and 6 deletions

View file

@ -1048,6 +1048,16 @@ class TelegramAdapter(BasePlatformAdapter):
r"int|prod|sqrt|lim|infty|begin\{(?:equation|align|matrix|cases)\}))",
re.IGNORECASE | re.DOTALL,
)
_RICH_CJK_RE = re.compile(
"["
"\u3040-\u30ff" # Hiragana, Katakana
"\u3400-\u4dbf" # CJK Extension A
"\u4e00-\u9fff" # CJK Unified Ideographs
"\uac00-\ud7af" # Hangul syllables
"\uf900-\ufaff" # CJK Compatibility Ideographs
"\U00020000-\U000323af" # CJK extensions and compatibility supplement
"]"
)
def _has_telegram_desktop_details_math_crash_shape(self, content: str) -> bool:
"""Return True for rich-message details+math content that crashes TDesktop.
@ -1065,6 +1075,16 @@ class TelegramAdapter(BasePlatformAdapter):
return True
return False
def _has_telegram_desktop_cjk_rich_garble_shape(self, content: str) -> bool:
"""Return True for CJK content that current TDesktop rich drafts garble.
Telegram Mac/Desktop Bot API 10.1 rich-message rendering currently
leaves overlapping draft/overlay glyph artifacts for CJK text (#47653).
The legacy MarkdownV2 path renders the same text cleanly, so skip rich
delivery up front until affected clients age out.
"""
return bool(content and self._RICH_CJK_RE.search(content))
def _needs_rich_rendering(self, content: str) -> bool:
"""Return True for markdown constructs that the legacy path degrades.
@ -1103,6 +1123,7 @@ class TelegramAdapter(BasePlatformAdapter):
and content.strip()
and self._needs_rich_rendering(content)
and not self._has_telegram_desktop_details_math_crash_shape(content)
and not self._has_telegram_desktop_cjk_rich_garble_shape(content)
and self._content_fits_rich_limits(content)
and self._bot_supports_rich()
)
@ -1424,6 +1445,7 @@ class TelegramAdapter(BasePlatformAdapter):
and content
and content.strip()
and not self._has_telegram_desktop_details_math_crash_shape(content)
and not self._has_telegram_desktop_cjk_rich_garble_shape(content)
and self._content_fits_rich_limits(content)
and self._bot_supports_rich()
)

View file

@ -24,6 +24,8 @@ from telegram.error import BadRequest, NetworkError, TimedOut
# Content exercising rich-only constructs: a heading, a real Markdown table,
# and a task list. Pipes / brackets must survive untouched into the payload.
RICH_CONTENT = "## Results\n\n| Case | Status |\n|---|---|\n| rich | ✅ |\n\n- [x] table renders"
CJK_RICH_CONTENT = "## 持仓\n\n| 项目 | 状态 |\n|---|---|\n| 早盘 | 正常 |"
ASTRAL_CJK_RICH_CONTENT = "## Rare Han\n\n| glyph | status |\n|---|---|\n| \U00030000 | ok |"
DANGEROUS_DETAILS_MATH = (
"<details><summary>Complex proof</summary>\n\n"
"$$\\sum_{i=1}^{n} i = \\frac{n(n+1)}{2}$$\n\n"
@ -159,6 +161,28 @@ async def test_math_outside_details_still_uses_rich_send():
bot.send_message.assert_not_called()
@pytest.mark.asyncio
async def test_cjk_rich_content_skips_rich_send_to_avoid_tdesktop_garble():
adapter = _make_adapter()
result = await adapter.send("12345", CJK_RICH_CONTENT)
assert result.success is True
adapter._bot.do_api_request.assert_not_called()
adapter._bot.send_message.assert_awaited_once()
@pytest.mark.asyncio
async def test_astral_cjk_rich_content_skips_rich_send_to_avoid_tdesktop_garble():
adapter = _make_adapter()
result = await adapter.send("12345", ASTRAL_CJK_RICH_CONTENT)
assert result.success is True
adapter._bot.do_api_request.assert_not_called()
adapter._bot.send_message.assert_awaited_once()
@pytest.mark.asyncio
async def test_rich_messages_opt_out_uses_legacy_send_path():
adapter = _make_adapter(extra={"rich_messages": False})
@ -281,13 +305,15 @@ async def test_oversized_content_skips_rich_and_chunks():
async def test_rich_limit_is_characters_not_bytes():
"""Telegram's rich limit is UTF-8 characters, not encoded bytes."""
adapter = _make_adapter()
# Rich-eligible (table) so the content takes the rich path; the CJK body
# is 20k chars / 60k UTF-8 bytes — over the byte count, under the char cap.
cjk = "| a | b |\n|---|---|\n" + "" * 20000 # 20k chars, ~60k UTF-8 bytes
assert len(cjk.encode("utf-8")) > TelegramAdapter.RICH_MESSAGE_MAX_BYTES
assert len(cjk) <= TelegramAdapter.RICH_MESSAGE_MAX_CHARS
# Rich-eligible (table) so the content takes the rich path; the accented
# body is 20k chars / 40k UTF-8 bytes — over the byte count, under the
# character cap. CJK is intentionally avoided here because affected
# Telegram Desktop clients render CJK rich drafts incorrectly.
accented = "| a | b |\n|---|---|\n" + "é" * 20000
assert len(accented.encode("utf-8")) > TelegramAdapter.RICH_MESSAGE_MAX_BYTES
assert len(accented) <= TelegramAdapter.RICH_MESSAGE_MAX_CHARS
result = await adapter.send("12345", cjk)
result = await adapter.send("12345", accented)
assert result.success is True
bot = adapter._bot
@ -528,6 +554,18 @@ async def test_rich_draft_happy_path_sends_raw_markdown():
adapter._bot.send_message_draft.assert_not_called()
@pytest.mark.asyncio
async def test_cjk_rich_content_skips_rich_draft_to_avoid_tdesktop_garble():
adapter = _make_adapter()
adapter._bot.do_api_request = AsyncMock(return_value=True)
result = await adapter.send_draft("12345", draft_id=7, content=CJK_RICH_CONTENT)
assert result.success is True
adapter._bot.do_api_request.assert_not_called()
adapter._bot.send_message_draft.assert_awaited_once()
@pytest.mark.asyncio
async def test_rich_draft_capability_failure_falls_back_and_latches_off():
adapter = _make_adapter()
@ -673,6 +711,19 @@ async def test_finalize_edit_plain_content_stays_legacy():
adapter._bot.edit_message_text.assert_awaited()
@pytest.mark.asyncio
async def test_finalize_edit_cjk_rich_content_stays_legacy_to_avoid_tdesktop_garble():
adapter = _make_adapter()
result = await adapter.edit_message(
"12345", "555", CJK_RICH_CONTENT, finalize=True,
)
assert result.success is True
adapter._bot.do_api_request.assert_not_called()
adapter._bot.edit_message_text.assert_awaited_once()
@pytest.mark.asyncio
async def test_finalize_edit_rich_capability_error_falls_back_to_legacy():
"""A capability error on the rich edit latches rich off and falls back to