From e330112aa8dcbe15a0b84cb18c59a7f3d2a547a1 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sun, 19 Apr 2026 22:51:56 -0700 Subject: [PATCH] refactor(telegram): use entity-only mention detection Replaces the word-boundary regex scan with pure MessageEntity-based detection. Telegram's server emits MENTION entities for real @username mentions and TEXT_MENTION entities for @FirstName mentions; the text- scanning fallback was both redundant (entities are always present for real mentions) and broken (matched raw substrings like email addresses, URLs, code-block contents, and forwarded literal text). Entity-only detection: - Closes bug #12545 ("foo@hermes_bot.example" false positive). - Also fixes edge cases the regex fix would still miss: @handles inside URLs and code blocks, where Telegram does not emit mention entities. Tests rewritten to exercise realistic Telegram payloads (real mentions carry entities; substring false positives don't). --- gateway/platforms/telegram.py | 14 +- .../test_telegram_mention_boundaries.py | 209 ++++++++++++------ 2 files changed, 151 insertions(+), 72 deletions(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index fbfb942ec..16c207019 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -2258,23 +2258,27 @@ class TelegramAdapter(BasePlatformAdapter): bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower() bot_id = getattr(self._bot, "id", None) + expected = f"@{bot_username}" if bot_username else None def _iter_sources(): yield getattr(message, "text", None) or "", getattr(message, "entities", None) or [] yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or [] + # Telegram parses mentions server-side and emits MessageEntity objects + # (type=mention for @username, type=text_mention for @FirstName targeting + # a user without a public username). Only those entities are authoritative — + # raw substring matches like "foo@hermes_bot.example" are not mentions + # (bug #12545). Entities also correctly handle @handles inside URLs, code + # blocks, and quoted text, where a regex scan would over-match. for source_text, entities in _iter_sources(): - if bot_username: - if re.search(rf'(? user without a public handle.""" adapter = _make_adapter() - msg = _group_message(None, caption="foo@hermes_bot.example") + msg = _message(text="hey you", entities=[_text_mention_entity(4, 3, user_id=999)]) + assert adapter._message_mentions_bot(msg) is True + + +class TestSubstringFalsePositivesAreRejected: + """Bare `@bot_username` substrings without a MENTION entity must NOT match. + + These are all inputs where the OLD substring check returned True incorrectly. + A word-boundary regex would still over-match some of these (code blocks, + URLs). Entity-based detection handles them all correctly because Telegram's + parser does not emit mention entities for non-mention contexts. + """ + + def test_email_like_substring(self): + """bug #12545 exact repro: 'foo@hermes_bot.example'.""" + adapter = _make_adapter() + msg = _message(text="email me at foo@hermes_bot.example") assert adapter._message_mentions_bot(msg) is False + + def test_hostname_substring(self): + adapter = _make_adapter() + msg = _message(text="contact user@hermes_bot.domain.com") + assert adapter._message_mentions_bot(msg) is False + + def test_superstring_username(self): + """`@hermes_botx` is a different username; Telegram would emit a mention + entity for `@hermes_botx`, not `@hermes_bot`.""" + adapter = _make_adapter() + msg = _message(text="@hermes_botx hello") + assert adapter._message_mentions_bot(msg) is False + + def test_underscore_suffix_substring(self): + adapter = _make_adapter() + msg = _message(text="see @hermes_bot_admin for help") + assert adapter._message_mentions_bot(msg) is False + + def test_substring_inside_url_without_entity(self): + """@handle inside a URL produces a URL entity, not a MENTION entity.""" + adapter = _make_adapter() + msg = _message(text="see https://example.com/@hermes_bot for details") + assert adapter._message_mentions_bot(msg) is False + + def test_substring_inside_code_block_without_entity(self): + """Telegram doesn't emit mention entities inside code/pre entities.""" + adapter = _make_adapter() + msg = _message(text="use the string `@hermes_bot` in config") + assert adapter._message_mentions_bot(msg) is False + + def test_plain_text_with_no_at_sign(self): + adapter = _make_adapter() + msg = _message(text="just a normal group message") + assert adapter._message_mentions_bot(msg) is False + + def test_email_substring_in_caption(self): + adapter = _make_adapter() + msg = _message(caption="foo@hermes_bot.example") + assert adapter._message_mentions_bot(msg) is False + + +class TestEntityEdgeCases: + """Malformed or mismatched entities should not crash or over-match.""" + + def test_mention_entity_for_different_username(self): + adapter = _make_adapter() + text = "@someone_else hi" + msg = _message(text=text, entities=[_mention_entity(text, mention="@someone_else")]) + assert adapter._message_mentions_bot(msg) is False + + def test_text_mention_entity_for_different_user(self): + adapter = _make_adapter() + msg = _message(text="hi there", entities=[_text_mention_entity(0, 2, user_id=12345)]) + assert adapter._message_mentions_bot(msg) is False + + def test_malformed_entity_with_negative_offset(self): + adapter = _make_adapter() + msg = _message(text="@hermes_bot hi", + entities=[SimpleNamespace(type="mention", offset=-1, length=11)]) + assert adapter._message_mentions_bot(msg) is False + + def test_malformed_entity_with_zero_length(self): + adapter = _make_adapter() + msg = _message(text="@hermes_bot hi", + entities=[SimpleNamespace(type="mention", offset=0, length=0)]) + assert adapter._message_mentions_bot(msg) is False + + +class TestCaseInsensitivity: + """Telegram usernames are case-insensitive; the slice-compare normalizes both sides.""" + + def test_uppercase_mention(self): + adapter = _make_adapter() + text = "hi @HERMES_BOT" + msg = _message(text=text, entities=[_mention_entity(text, mention="@HERMES_BOT")]) + assert adapter._message_mentions_bot(msg) is True + + def test_mixed_case_mention(self): + adapter = _make_adapter() + text = "hi @Hermes_Bot" + msg = _message(text=text, entities=[_mention_entity(text, mention="@Hermes_Bot")]) + assert adapter._message_mentions_bot(msg) is True