mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
refactor(telegram): use entity-only mention detection
Replaces the word-boundary regex scan with pure MessageEntity-based detection. Telegram's server emits MENTION entities for real @username mentions and TEXT_MENTION entities for @FirstName mentions; the text- scanning fallback was both redundant (entities are always present for real mentions) and broken (matched raw substrings like email addresses, URLs, code-block contents, and forwarded literal text). Entity-only detection: - Closes bug #12545 ("foo@hermes_bot.example" false positive). - Also fixes edge cases the regex fix would still miss: @handles inside URLs and code blocks, where Telegram does not emit mention entities. Tests rewritten to exercise realistic Telegram payloads (real mentions carry entities; substring false positives don't).
This commit is contained in:
parent
1e18e0503f
commit
e330112aa8
2 changed files with 151 additions and 72 deletions
|
|
@ -2258,23 +2258,27 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
|
||||
bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower()
|
||||
bot_id = getattr(self._bot, "id", None)
|
||||
expected = f"@{bot_username}" if bot_username else None
|
||||
|
||||
def _iter_sources():
|
||||
yield getattr(message, "text", None) or "", getattr(message, "entities", None) or []
|
||||
yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or []
|
||||
|
||||
# Telegram parses mentions server-side and emits MessageEntity objects
|
||||
# (type=mention for @username, type=text_mention for @FirstName targeting
|
||||
# a user without a public username). Only those entities are authoritative —
|
||||
# raw substring matches like "foo@hermes_bot.example" are not mentions
|
||||
# (bug #12545). Entities also correctly handle @handles inside URLs, code
|
||||
# blocks, and quoted text, where a regex scan would over-match.
|
||||
for source_text, entities in _iter_sources():
|
||||
if bot_username:
|
||||
if re.search(rf'(?<!\w)@{re.escape(bot_username)}(?!\w)', source_text, re.IGNORECASE):
|
||||
return True
|
||||
for entity in entities:
|
||||
entity_type = str(getattr(entity, "type", "")).split(".")[-1].lower()
|
||||
if entity_type == "mention" and bot_username:
|
||||
if entity_type == "mention" and expected:
|
||||
offset = int(getattr(entity, "offset", -1))
|
||||
length = int(getattr(entity, "length", 0))
|
||||
if offset < 0 or length <= 0:
|
||||
continue
|
||||
if source_text[offset:offset + length].strip().lower() == f"@{bot_username}":
|
||||
if source_text[offset:offset + length].strip().lower() == expected:
|
||||
return True
|
||||
elif entity_type == "text_mention":
|
||||
user = getattr(entity, "user", None)
|
||||
|
|
|
|||
|
|
@ -1,19 +1,23 @@
|
|||
"""Tests for Telegram bot mention word-boundary detection (bug #12545).
|
||||
"""Tests for Telegram bot mention detection (bug #12545).
|
||||
|
||||
The old implementation used a naive substring check (`f"@{bot_username}" in text.lower()`),
|
||||
which incorrectly matched partial substrings like 'foo@hermes_bot.example'.
|
||||
The old implementation used a naive substring check
|
||||
(`f"@{bot_username}" in text.lower()`), which incorrectly matched partial
|
||||
substrings like 'foo@hermes_bot.example'.
|
||||
|
||||
These tests verify that the regex-based word-boundary check correctly delimits mentions.
|
||||
Detection now relies entirely on the MessageEntity objects Telegram's server
|
||||
emits for real mentions. A bare `@username` substring in message text without
|
||||
a corresponding `MENTION` entity is NOT a mention — this correctly ignores
|
||||
@handles that appear inside URLs, code blocks, email-like strings, or quoted
|
||||
text, because Telegram's parser does not emit mention entities for any of
|
||||
those contexts.
|
||||
"""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
|
||||
|
||||
def _make_adapter():
|
||||
"""Build a minimal TelegramAdapter with a mocked bot."""
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
|
||||
adapter = object.__new__(TelegramAdapter)
|
||||
adapter.platform = Platform.TELEGRAM
|
||||
adapter.config = PlatformConfig(enabled=True, token="***", extra={})
|
||||
|
|
@ -21,8 +25,23 @@ def _make_adapter():
|
|||
return adapter
|
||||
|
||||
|
||||
def _group_message(text, entities=None, caption=None, caption_entities=None):
|
||||
"""Produce a minimal group-message-like SimpleNamespace."""
|
||||
def _mention_entity(text, mention="@hermes_bot"):
|
||||
"""Build a MENTION entity pointing at a literal `@username` in `text`."""
|
||||
offset = text.index(mention)
|
||||
return SimpleNamespace(type="mention", offset=offset, length=len(mention))
|
||||
|
||||
|
||||
def _text_mention_entity(offset, length, user_id):
|
||||
"""Build a TEXT_MENTION entity (used when the target user has no public @handle)."""
|
||||
return SimpleNamespace(
|
||||
type="text_mention",
|
||||
offset=offset,
|
||||
length=length,
|
||||
user=SimpleNamespace(id=user_id),
|
||||
)
|
||||
|
||||
|
||||
def _message(text=None, caption=None, entities=None, caption_entities=None):
|
||||
return SimpleNamespace(
|
||||
text=text,
|
||||
caption=caption,
|
||||
|
|
@ -34,77 +53,133 @@ def _group_message(text, entities=None, caption=None, caption_entities=None):
|
|||
)
|
||||
|
||||
|
||||
class TestTelegramMentionBoundaries:
|
||||
"""Test that _message_mentions_bot correctly respects word boundaries."""
|
||||
class TestRealMentionsAreDetected:
|
||||
"""A real Telegram mention always comes with a MENTION entity — detect those."""
|
||||
|
||||
def test_exact_mention_is_recognized(self):
|
||||
"""'@hermes_bot' at any position should be detected."""
|
||||
def test_mention_at_start_of_message(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("hello @hermes_bot")
|
||||
text = "@hermes_bot hello world"
|
||||
msg = _message(text=text, entities=[_mention_entity(text)])
|
||||
assert adapter._message_mentions_bot(msg) is True
|
||||
|
||||
def test_mention_at_start_of_string(self):
|
||||
"""'@hermes_bot hello' should be detected."""
|
||||
def test_mention_mid_sentence(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("@hermes_bot hello world")
|
||||
text = "hey @hermes_bot, can you help?"
|
||||
msg = _message(text=text, entities=[_mention_entity(text)])
|
||||
assert adapter._message_mentions_bot(msg) is True
|
||||
|
||||
def test_mention_followed_by_punctuation(self):
|
||||
"""'@hermes_bot,' should be detected."""
|
||||
def test_mention_at_end_of_message(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("@hermes_bot, how are you?")
|
||||
text = "thanks for looking @hermes_bot"
|
||||
msg = _message(text=text, entities=[_mention_entity(text)])
|
||||
assert adapter._message_mentions_bot(msg) is True
|
||||
|
||||
def test_mention_in_subdomain_is_not_recognized(self):
|
||||
"""'foo@hermes_bot.example' should NOT match (bug #12545)."""
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("foo@hermes_bot.example")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_mention_in_longer_hostname_is_not_recognized(self):
|
||||
"""'email me at user@hermes_bot.domain.com' should NOT match."""
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("email me at user@hermes_bot.domain.com")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_superstring_username_is_not_recognized(self):
|
||||
"""'@hermes_botx' should NOT match (different username)."""
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("@hermes_botx hello")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_prefixed_superstring_is_not_recognized(self):
|
||||
"""'foo@hermes_bot_bar' should NOT match."""
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("foo@hermes_bot_bar")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_mention_case_insensitive(self):
|
||||
"""'@HERMES_BOT' should be detected (case-insensitive)."""
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("@HERMES_BOT hello")
|
||||
assert adapter._message_mentions_bot(msg) is True
|
||||
|
||||
def test_mention_mixed_case(self):
|
||||
"""'@Hermes_Bot' should be detected."""
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("@Hermes_Bot hello")
|
||||
assert adapter._message_mentions_bot(msg) is True
|
||||
|
||||
def test_no_mention_returns_false(self):
|
||||
"""Plain text with no mention should return False."""
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message("just a regular message in the group")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_mention_in_caption(self):
|
||||
"""Mention in caption should be detected."""
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message(None, caption="check this out @hermes_bot")
|
||||
caption = "photo for @hermes_bot"
|
||||
msg = _message(caption=caption, caption_entities=[_mention_entity(caption)])
|
||||
assert adapter._message_mentions_bot(msg) is True
|
||||
|
||||
def test_subdomain_mention_in_caption_not_recognized(self):
|
||||
"""'foo@hermes_bot.example' in caption should NOT match."""
|
||||
def test_text_mention_entity_targets_bot(self):
|
||||
"""TEXT_MENTION is Telegram's entity type for @FirstName -> user without a public handle."""
|
||||
adapter = _make_adapter()
|
||||
msg = _group_message(None, caption="foo@hermes_bot.example")
|
||||
msg = _message(text="hey you", entities=[_text_mention_entity(4, 3, user_id=999)])
|
||||
assert adapter._message_mentions_bot(msg) is True
|
||||
|
||||
|
||||
class TestSubstringFalsePositivesAreRejected:
|
||||
"""Bare `@bot_username` substrings without a MENTION entity must NOT match.
|
||||
|
||||
These are all inputs where the OLD substring check returned True incorrectly.
|
||||
A word-boundary regex would still over-match some of these (code blocks,
|
||||
URLs). Entity-based detection handles them all correctly because Telegram's
|
||||
parser does not emit mention entities for non-mention contexts.
|
||||
"""
|
||||
|
||||
def test_email_like_substring(self):
|
||||
"""bug #12545 exact repro: 'foo@hermes_bot.example'."""
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="email me at foo@hermes_bot.example")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_hostname_substring(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="contact user@hermes_bot.domain.com")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_superstring_username(self):
|
||||
"""`@hermes_botx` is a different username; Telegram would emit a mention
|
||||
entity for `@hermes_botx`, not `@hermes_bot`."""
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="@hermes_botx hello")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_underscore_suffix_substring(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="see @hermes_bot_admin for help")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_substring_inside_url_without_entity(self):
|
||||
"""@handle inside a URL produces a URL entity, not a MENTION entity."""
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="see https://example.com/@hermes_bot for details")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_substring_inside_code_block_without_entity(self):
|
||||
"""Telegram doesn't emit mention entities inside code/pre entities."""
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="use the string `@hermes_bot` in config")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_plain_text_with_no_at_sign(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="just a normal group message")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_email_substring_in_caption(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _message(caption="foo@hermes_bot.example")
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
|
||||
class TestEntityEdgeCases:
|
||||
"""Malformed or mismatched entities should not crash or over-match."""
|
||||
|
||||
def test_mention_entity_for_different_username(self):
|
||||
adapter = _make_adapter()
|
||||
text = "@someone_else hi"
|
||||
msg = _message(text=text, entities=[_mention_entity(text, mention="@someone_else")])
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_text_mention_entity_for_different_user(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="hi there", entities=[_text_mention_entity(0, 2, user_id=12345)])
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_malformed_entity_with_negative_offset(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="@hermes_bot hi",
|
||||
entities=[SimpleNamespace(type="mention", offset=-1, length=11)])
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
def test_malformed_entity_with_zero_length(self):
|
||||
adapter = _make_adapter()
|
||||
msg = _message(text="@hermes_bot hi",
|
||||
entities=[SimpleNamespace(type="mention", offset=0, length=0)])
|
||||
assert adapter._message_mentions_bot(msg) is False
|
||||
|
||||
|
||||
class TestCaseInsensitivity:
|
||||
"""Telegram usernames are case-insensitive; the slice-compare normalizes both sides."""
|
||||
|
||||
def test_uppercase_mention(self):
|
||||
adapter = _make_adapter()
|
||||
text = "hi @HERMES_BOT"
|
||||
msg = _message(text=text, entities=[_mention_entity(text, mention="@HERMES_BOT")])
|
||||
assert adapter._message_mentions_bot(msg) is True
|
||||
|
||||
def test_mixed_case_mention(self):
|
||||
adapter = _make_adapter()
|
||||
text = "hi @Hermes_Bot"
|
||||
msg = _message(text=text, entities=[_mention_entity(text, mention="@Hermes_Bot")])
|
||||
assert adapter._message_mentions_bot(msg) is True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue