From ea056b05598cab8330555defe095988c3a7928f9 Mon Sep 17 00:00:00 2001 From: tt-a1i <53142663+tt-a1i@users.noreply.github.com> Date: Sun, 21 Jun 2026 08:30:58 -0700 Subject: [PATCH] fix(telegram): avoid rich messages for CJK text Telegram Mac/Desktop Bot API 10.1 rich-message rendering leaves garbled overlapping draft/overlay glyphs for CJK text (#47653), affecting every message containing CJK characters. The legacy MarkdownV2 path renders the same text cleanly, so skip the rich send / draft / final-edit paths up front for content containing CJK (incl. astral-plane extensions) until affected clients age out. Non-CJK rich rendering is preserved. Fixes #47653 --- plugins/platforms/telegram/adapter.py | 22 +++++++ tests/gateway/test_telegram_rich_messages.py | 63 ++++++++++++++++++-- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/plugins/platforms/telegram/adapter.py b/plugins/platforms/telegram/adapter.py index 92f9e174afa..2c11c82fa39 100644 --- a/plugins/platforms/telegram/adapter.py +++ b/plugins/platforms/telegram/adapter.py @@ -1048,6 +1048,16 @@ class TelegramAdapter(BasePlatformAdapter): r"int|prod|sqrt|lim|infty|begin\{(?:equation|align|matrix|cases)\}))", re.IGNORECASE | re.DOTALL, ) + _RICH_CJK_RE = re.compile( + "[" + "\u3040-\u30ff" # Hiragana, Katakana + "\u3400-\u4dbf" # CJK Extension A + "\u4e00-\u9fff" # CJK Unified Ideographs + "\uac00-\ud7af" # Hangul syllables + "\uf900-\ufaff" # CJK Compatibility Ideographs + "\U00020000-\U000323af" # CJK extensions and compatibility supplement + "]" + ) def _has_telegram_desktop_details_math_crash_shape(self, content: str) -> bool: """Return True for rich-message details+math content that crashes TDesktop. @@ -1065,6 +1075,16 @@ class TelegramAdapter(BasePlatformAdapter): return True return False + def _has_telegram_desktop_cjk_rich_garble_shape(self, content: str) -> bool: + """Return True for CJK content that current TDesktop rich drafts garble. + + Telegram Mac/Desktop Bot API 10.1 rich-message rendering currently + leaves overlapping draft/overlay glyph artifacts for CJK text (#47653). + The legacy MarkdownV2 path renders the same text cleanly, so skip rich + delivery up front until affected clients age out. + """ + return bool(content and self._RICH_CJK_RE.search(content)) + def _needs_rich_rendering(self, content: str) -> bool: """Return True for markdown constructs that the legacy path degrades. @@ -1103,6 +1123,7 @@ class TelegramAdapter(BasePlatformAdapter): and content.strip() and self._needs_rich_rendering(content) and not self._has_telegram_desktop_details_math_crash_shape(content) + and not self._has_telegram_desktop_cjk_rich_garble_shape(content) and self._content_fits_rich_limits(content) and self._bot_supports_rich() ) @@ -1424,6 +1445,7 @@ class TelegramAdapter(BasePlatformAdapter): and content and content.strip() and not self._has_telegram_desktop_details_math_crash_shape(content) + and not self._has_telegram_desktop_cjk_rich_garble_shape(content) and self._content_fits_rich_limits(content) and self._bot_supports_rich() ) diff --git a/tests/gateway/test_telegram_rich_messages.py b/tests/gateway/test_telegram_rich_messages.py index a7c4e9c1eaf..d667b8af912 100644 --- a/tests/gateway/test_telegram_rich_messages.py +++ b/tests/gateway/test_telegram_rich_messages.py @@ -24,6 +24,8 @@ from telegram.error import BadRequest, NetworkError, TimedOut # Content exercising rich-only constructs: a heading, a real Markdown table, # and a task list. Pipes / brackets must survive untouched into the payload. RICH_CONTENT = "## Results\n\n| Case | Status |\n|---|---|\n| rich | ✅ |\n\n- [x] table renders" +CJK_RICH_CONTENT = "## 持仓\n\n| 项目 | 状态 |\n|---|---|\n| 早盘 | 正常 |" +ASTRAL_CJK_RICH_CONTENT = "## Rare Han\n\n| glyph | status |\n|---|---|\n| \U00030000 | ok |" DANGEROUS_DETAILS_MATH = ( "
Complex proof\n\n" "$$\\sum_{i=1}^{n} i = \\frac{n(n+1)}{2}$$\n\n" @@ -159,6 +161,28 @@ async def test_math_outside_details_still_uses_rich_send(): bot.send_message.assert_not_called() +@pytest.mark.asyncio +async def test_cjk_rich_content_skips_rich_send_to_avoid_tdesktop_garble(): + adapter = _make_adapter() + + result = await adapter.send("12345", CJK_RICH_CONTENT) + + assert result.success is True + adapter._bot.do_api_request.assert_not_called() + adapter._bot.send_message.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_astral_cjk_rich_content_skips_rich_send_to_avoid_tdesktop_garble(): + adapter = _make_adapter() + + result = await adapter.send("12345", ASTRAL_CJK_RICH_CONTENT) + + assert result.success is True + adapter._bot.do_api_request.assert_not_called() + adapter._bot.send_message.assert_awaited_once() + + @pytest.mark.asyncio async def test_rich_messages_opt_out_uses_legacy_send_path(): adapter = _make_adapter(extra={"rich_messages": False}) @@ -281,13 +305,15 @@ async def test_oversized_content_skips_rich_and_chunks(): async def test_rich_limit_is_characters_not_bytes(): """Telegram's rich limit is UTF-8 characters, not encoded bytes.""" adapter = _make_adapter() - # Rich-eligible (table) so the content takes the rich path; the CJK body - # is 20k chars / 60k UTF-8 bytes — over the byte count, under the char cap. - cjk = "| a | b |\n|---|---|\n" + "测" * 20000 # 20k chars, ~60k UTF-8 bytes - assert len(cjk.encode("utf-8")) > TelegramAdapter.RICH_MESSAGE_MAX_BYTES - assert len(cjk) <= TelegramAdapter.RICH_MESSAGE_MAX_CHARS + # Rich-eligible (table) so the content takes the rich path; the accented + # body is 20k chars / 40k UTF-8 bytes — over the byte count, under the + # character cap. CJK is intentionally avoided here because affected + # Telegram Desktop clients render CJK rich drafts incorrectly. + accented = "| a | b |\n|---|---|\n" + "é" * 20000 + assert len(accented.encode("utf-8")) > TelegramAdapter.RICH_MESSAGE_MAX_BYTES + assert len(accented) <= TelegramAdapter.RICH_MESSAGE_MAX_CHARS - result = await adapter.send("12345", cjk) + result = await adapter.send("12345", accented) assert result.success is True bot = adapter._bot @@ -528,6 +554,18 @@ async def test_rich_draft_happy_path_sends_raw_markdown(): adapter._bot.send_message_draft.assert_not_called() +@pytest.mark.asyncio +async def test_cjk_rich_content_skips_rich_draft_to_avoid_tdesktop_garble(): + adapter = _make_adapter() + adapter._bot.do_api_request = AsyncMock(return_value=True) + + result = await adapter.send_draft("12345", draft_id=7, content=CJK_RICH_CONTENT) + + assert result.success is True + adapter._bot.do_api_request.assert_not_called() + adapter._bot.send_message_draft.assert_awaited_once() + + @pytest.mark.asyncio async def test_rich_draft_capability_failure_falls_back_and_latches_off(): adapter = _make_adapter() @@ -673,6 +711,19 @@ async def test_finalize_edit_plain_content_stays_legacy(): adapter._bot.edit_message_text.assert_awaited() +@pytest.mark.asyncio +async def test_finalize_edit_cjk_rich_content_stays_legacy_to_avoid_tdesktop_garble(): + adapter = _make_adapter() + + result = await adapter.edit_message( + "12345", "555", CJK_RICH_CONTENT, finalize=True, + ) + + assert result.success is True + adapter._bot.do_api_request.assert_not_called() + adapter._bot.edit_message_text.assert_awaited_once() + + @pytest.mark.asyncio async def test_finalize_edit_rich_capability_error_falls_back_to_legacy(): """A capability error on the rich edit latches rich off and falls back to