refactor(telegram): use entity-only mention detection

Replaces the word-boundary regex scan with pure MessageEntity-based detection. Telegram's server emits MENTION entities for real @username mentions and TEXT_MENTION entities for @FirstName mentions; the text- scanning fallback was both redundant (entities are always present for real mentions) and broken (matched raw substrings like email addresses, URLs, code-block contents, and forwarded literal text). Entity-only detection: - Closes bug #12545 ("foo@hermes_bot.example" false positive). - Also fixes edge cases the regex fix would still miss: @handles inside URLs and code blocks, where Telegram does not emit mention entities. Tests rewritten to exercise realistic Telegram payloads (real mentions carry entities; substring false positives don't).
2026-04-25 00:51:20 +00:00 · 2026-04-19 22:51:56 -07:00 · 2026-04-19 22:51:56 -07:00 · e330112aa8
commit e330112aa8
parent 1e18e0503f
2 changed files with 151 additions and 72 deletions
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -2258,23 +2258,27 @@ class TelegramAdapter(BasePlatformAdapter):
        bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower()
        bot_id = getattr(self._bot, "id", None)
        expected = f"@{bot_username}" if bot_username else None
        def _iter_sources():
            yield getattr(message, "text", None) or "", getattr(message, "entities", None) or []
            yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or []
        # Telegram parses mentions server-side and emits MessageEntity objects
        # (type=mention for @username, type=text_mention for @FirstName targeting
        # a user without a public username). Only those entities are authoritative —
        # raw substring matches like "foo@hermes_bot.example" are not mentions
        # (bug #12545). Entities also correctly handle @handles inside URLs, code
        # blocks, and quoted text, where a regex scan would over-match.
        for source_text, entities in _iter_sources():
            if bot_username:
                if re.search(rf'(?<!\w)@{re.escape(bot_username)}(?!\w)', source_text, re.IGNORECASE):
                    return True
            for entity in entities:
                entity_type = str(getattr(entity, "type", "")).split(".")[-1].lower()
-                if entity_type == "mention" and bot_username:
+                if entity_type == "mention" and expected:
                    offset = int(getattr(entity, "offset", -1))
                    length = int(getattr(entity, "length", 0))
                    if offset < 0 or length <= 0:
                        continue
-                    if source_text[offset:offset + length].strip().lower() == f"@{bot_username}":
+                    if source_text[offset:offset + length].strip().lower() == expected:
                        return True
                elif entity_type == "text_mention":
                    user = getattr(entity, "user", None)
--- a/tests/gateway/test_telegram_mention_boundaries.py
+++ b/tests/gateway/test_telegram_mention_boundaries.py
@ -1,19 +1,23 @@
-"""Tests for Telegram bot mention word-boundary detection (bug #12545).
+"""Tests for Telegram bot mention detection (bug #12545).
-The old implementation used a naive substring check (`f"@{bot_username}" in text.lower()`),
+The old implementation used a naive substring check
-which incorrectly matched partial substrings like 'foo@hermes_bot.example'.
+(`f"@{bot_username}" in text.lower()`), which incorrectly matched partial
 substrings like 'foo@hermes_bot.example'.
-These tests verify that the regex-based word-boundary check correctly delimits mentions.
+Detection now relies entirely on the MessageEntity objects Telegram's server
 emits for real mentions. A bare `@username` substring in message text without
 a corresponding `MENTION` entity is NOT a mention — this correctly ignores
@handles that appear inside URLs, code blocks, email-like strings, or quoted
 text, because Telegram's parser does not emit mention entities for any of
 those contexts.
 """
 from types import SimpleNamespace
 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.telegram import TelegramAdapter
 def _make_adapter():
    """Build a minimal TelegramAdapter with a mocked bot."""
    from gateway.config import Platform, PlatformConfig
    adapter = object.__new__(TelegramAdapter)
    adapter.platform = Platform.TELEGRAM
    adapter.config = PlatformConfig(enabled=True, token="***", extra={})
@ -21,8 +25,23 @@ def _make_adapter():
    return adapter
-def _group_message(text, entities=None, caption=None, caption_entities=None):
+def _mention_entity(text, mention="@hermes_bot"):
-    """Produce a minimal group-message-like SimpleNamespace."""
+    """Build a MENTION entity pointing at a literal `@username` in `text`."""
    offset = text.index(mention)
    return SimpleNamespace(type="mention", offset=offset, length=len(mention))
 def _text_mention_entity(offset, length, user_id):
    """Build a TEXT_MENTION entity (used when the target user has no public @handle)."""
    return SimpleNamespace(
        type="text_mention",
        offset=offset,
        length=length,
        user=SimpleNamespace(id=user_id),
    )
 def _message(text=None, caption=None, entities=None, caption_entities=None):
    return SimpleNamespace(
        text=text,
        caption=caption,
@ -34,77 +53,133 @@ def _group_message(text, entities=None, caption=None, caption_entities=None):
    )
-class TestTelegramMentionBoundaries:
+class TestRealMentionsAreDetected:
-    """Test that _message_mentions_bot correctly respects word boundaries."""
+    """A real Telegram mention always comes with a MENTION entity — detect those."""
-    def test_exact_mention_is_recognized(self):
+    def test_mention_at_start_of_message(self):
        """'@hermes_bot' at any position should be detected."""
        adapter = _make_adapter()
-        msg = _group_message("hello @hermes_bot")
+        text = "@hermes_bot hello world"
        msg = _message(text=text, entities=[_mention_entity(text)])
        assert adapter._message_mentions_bot(msg) is True
-    def test_mention_at_start_of_string(self):
+    def test_mention_mid_sentence(self):
        """'@hermes_bot hello' should be detected."""
        adapter = _make_adapter()
-        msg = _group_message("@hermes_bot hello world")
+        text = "hey @hermes_bot, can you help?"
        msg = _message(text=text, entities=[_mention_entity(text)])
        assert adapter._message_mentions_bot(msg) is True
-    def test_mention_followed_by_punctuation(self):
+    def test_mention_at_end_of_message(self):
        """'@hermes_bot,' should be detected."""
        adapter = _make_adapter()
-        msg = _group_message("@hermes_bot, how are you?")
+        text = "thanks for looking @hermes_bot"
        msg = _message(text=text, entities=[_mention_entity(text)])
        assert adapter._message_mentions_bot(msg) is True
    def test_mention_in_subdomain_is_not_recognized(self):
        """'foo@hermes_bot.example' should NOT match (bug #12545)."""
        adapter = _make_adapter()
        msg = _group_message("foo@hermes_bot.example")
        assert adapter._message_mentions_bot(msg) is False
    def test_mention_in_longer_hostname_is_not_recognized(self):
        """'email me at user@hermes_bot.domain.com' should NOT match."""
        adapter = _make_adapter()
        msg = _group_message("email me at user@hermes_bot.domain.com")
        assert adapter._message_mentions_bot(msg) is False
    def test_superstring_username_is_not_recognized(self):
        """'@hermes_botx' should NOT match (different username)."""
        adapter = _make_adapter()
        msg = _group_message("@hermes_botx hello")
        assert adapter._message_mentions_bot(msg) is False
    def test_prefixed_superstring_is_not_recognized(self):
        """'foo@hermes_bot_bar' should NOT match."""
        adapter = _make_adapter()
        msg = _group_message("foo@hermes_bot_bar")
        assert adapter._message_mentions_bot(msg) is False
    def test_mention_case_insensitive(self):
        """'@HERMES_BOT' should be detected (case-insensitive)."""
        adapter = _make_adapter()
        msg = _group_message("@HERMES_BOT hello")
        assert adapter._message_mentions_bot(msg) is True
    def test_mention_mixed_case(self):
        """'@Hermes_Bot' should be detected."""
        adapter = _make_adapter()
        msg = _group_message("@Hermes_Bot hello")
        assert adapter._message_mentions_bot(msg) is True
    def test_no_mention_returns_false(self):
        """Plain text with no mention should return False."""
        adapter = _make_adapter()
        msg = _group_message("just a regular message in the group")
        assert adapter._message_mentions_bot(msg) is False
    def test_mention_in_caption(self):
        """Mention in caption should be detected."""
        adapter = _make_adapter()
-        msg = _group_message(None, caption="check this out @hermes_bot")
+        caption = "photo for @hermes_bot"
        msg = _message(caption=caption, caption_entities=[_mention_entity(caption)])
        assert adapter._message_mentions_bot(msg) is True
-    def test_subdomain_mention_in_caption_not_recognized(self):
+    def test_text_mention_entity_targets_bot(self):
-        """'foo@hermes_bot.example' in caption should NOT match."""
+        """TEXT_MENTION is Telegram's entity type for @FirstName -> user without a public handle."""
        adapter = _make_adapter()
-        msg = _group_message(None, caption="foo@hermes_bot.example")
+        msg = _message(text="hey you", entities=[_text_mention_entity(4, 3, user_id=999)])
        assert adapter._message_mentions_bot(msg) is True
 class TestSubstringFalsePositivesAreRejected:
    """Bare `@bot_username` substrings without a MENTION entity must NOT match.
    These are all inputs where the OLD substring check returned True incorrectly.
    A word-boundary regex would still over-match some of these (code blocks,
    URLs). Entity-based detection handles them all correctly because Telegram's
    parser does not emit mention entities for non-mention contexts.
    """
    def test_email_like_substring(self):
        """bug #12545 exact repro: 'foo@hermes_bot.example'."""
        adapter = _make_adapter()
        msg = _message(text="email me at foo@hermes_bot.example")
        assert adapter._message_mentions_bot(msg) is False
    def test_hostname_substring(self):
        adapter = _make_adapter()
        msg = _message(text="contact user@hermes_bot.domain.com")
        assert adapter._message_mentions_bot(msg) is False
    def test_superstring_username(self):
        """`@hermes_botx` is a different username; Telegram would emit a mention
        entity for `@hermes_botx`, not `@hermes_bot`."""
        adapter = _make_adapter()
        msg = _message(text="@hermes_botx hello")
        assert adapter._message_mentions_bot(msg) is False
    def test_underscore_suffix_substring(self):
        adapter = _make_adapter()
        msg = _message(text="see @hermes_bot_admin for help")
        assert adapter._message_mentions_bot(msg) is False
    def test_substring_inside_url_without_entity(self):
        """@handle inside a URL produces a URL entity, not a MENTION entity."""
        adapter = _make_adapter()
        msg = _message(text="see https://example.com/@hermes_bot for details")
        assert adapter._message_mentions_bot(msg) is False
    def test_substring_inside_code_block_without_entity(self):
        """Telegram doesn't emit mention entities inside code/pre entities."""
        adapter = _make_adapter()
        msg = _message(text="use the string `@hermes_bot` in config")
        assert adapter._message_mentions_bot(msg) is False
    def test_plain_text_with_no_at_sign(self):
        adapter = _make_adapter()
        msg = _message(text="just a normal group message")
        assert adapter._message_mentions_bot(msg) is False
    def test_email_substring_in_caption(self):
        adapter = _make_adapter()
        msg = _message(caption="foo@hermes_bot.example")
        assert adapter._message_mentions_bot(msg) is False
 class TestEntityEdgeCases:
    """Malformed or mismatched entities should not crash or over-match."""
    def test_mention_entity_for_different_username(self):
        adapter = _make_adapter()
        text = "@someone_else hi"
        msg = _message(text=text, entities=[_mention_entity(text, mention="@someone_else")])
        assert adapter._message_mentions_bot(msg) is False
    def test_text_mention_entity_for_different_user(self):
        adapter = _make_adapter()
        msg = _message(text="hi there", entities=[_text_mention_entity(0, 2, user_id=12345)])
        assert adapter._message_mentions_bot(msg) is False
    def test_malformed_entity_with_negative_offset(self):
        adapter = _make_adapter()
        msg = _message(text="@hermes_bot hi",
                       entities=[SimpleNamespace(type="mention", offset=-1, length=11)])
        assert adapter._message_mentions_bot(msg) is False
    def test_malformed_entity_with_zero_length(self):
        adapter = _make_adapter()
        msg = _message(text="@hermes_bot hi",
                       entities=[SimpleNamespace(type="mention", offset=0, length=0)])
        assert adapter._message_mentions_bot(msg) is False
 class TestCaseInsensitivity:
    """Telegram usernames are case-insensitive; the slice-compare normalizes both sides."""
    def test_uppercase_mention(self):
        adapter = _make_adapter()
        text = "hi @HERMES_BOT"
        msg = _message(text=text, entities=[_mention_entity(text, mention="@HERMES_BOT")])
        assert adapter._message_mentions_bot(msg) is True
    def test_mixed_case_mention(self):
        adapter = _make_adapter()
        text = "hi @Hermes_Bot"
        msg = _message(text=text, entities=[_mention_entity(text, mention="@Hermes_Bot")])
        assert adapter._message_mentions_bot(msg) is True