mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): canonicalize WhatsApp identity in session keys
Hermes' WhatsApp bridge routinely surfaces the same person under either a phone-format JID (60123456789@s.whatsapp.net) or a LID (…@lid), and may flip between the two for a single human within the same conversation. Before this change, build_session_key used the raw identifier verbatim, so the bridge reshuffling an alias form produced two distinct session keys for the same person — in two places: 1. DM chat_id — a user's DM sessions split in half, transcripts and per-sender state diverge. 2. Group participant_id (with group_sessions_per_user enabled) — a member's per-user session inside a group splits in half for the same reason. Add a canonicalizer that walks the bridge's lid-mapping-*.json files and picks the shortest/numeric-preferred alias as the stable identity. build_session_key now routes both the DM chat_id and the group participant_id through this helper when the platform is WhatsApp. All other platforms and chat types are untouched. Expose canonical_whatsapp_identifier and normalize_whatsapp_identifier as public helpers. Plugins that need per-sender behaviour (role-based routing, per-contact authorization, policy gating) need the same identity resolution Hermes uses internally; without a public helper, each plugin would have to re-implement the walker against the bridge's internal on-disk format. Keeping this alongside build_session_key makes it authoritative and one refactor away if the bridge ever changes shape. _expand_whatsapp_aliases stays private — it's an implementation detail of how the mapping files are walked, not a contract callers should depend on.
This commit is contained in:
parent
1143f234e3
commit
10deb1b87d
3 changed files with 234 additions and 9 deletions
|
|
@ -60,6 +60,7 @@ from .config import (
|
|||
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
|
||||
HomeChannel,
|
||||
)
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -518,15 +519,24 @@ def build_session_key(
|
|||
"""
|
||||
platform = source.platform.value
|
||||
if source.chat_type == "dm":
|
||||
if source.chat_id:
|
||||
dm_chat_id = source.chat_id
|
||||
if source.platform == Platform.WHATSAPP:
|
||||
dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
|
||||
|
||||
if dm_chat_id:
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}"
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm"
|
||||
|
||||
participant_id = source.user_id_alt or source.user_id
|
||||
if participant_id and source.platform == Platform.WHATSAPP:
|
||||
# Same JID/LID-flip bug as the DM case: without canonicalisation, a
|
||||
# single group member gets two isolated per-user sessions when the
|
||||
# bridge reshuffles alias forms.
|
||||
participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
|
||||
key_parts = ["agent:main", platform, source.chat_type]
|
||||
|
||||
if source.chat_id:
|
||||
|
|
@ -547,6 +557,95 @@ def build_session_key(
|
|||
return ":".join(key_parts)
|
||||
|
||||
|
||||
def normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
|
||||
|
||||
Accepts any of the identifier shapes the WhatsApp bridge may emit:
|
||||
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
|
||||
``"60123456789@lid"``, or a bare ``"+60123456789"`` / ``"60123456789"``.
|
||||
Returns just the numeric identifier (``"60123456789"``) suitable for
|
||||
equality comparisons.
|
||||
|
||||
Useful for plugins that want to match sender IDs against
|
||||
user-supplied config (phone numbers in ``config.yaml``) without
|
||||
worrying about which variant the bridge happens to deliver.
|
||||
"""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
|
||||
|
||||
def _expand_whatsapp_aliases(identifier: str) -> set[str]:
|
||||
"""Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = get_hermes_home() / "whatsapp" / "session"
|
||||
resolved: set[str] = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def canonical_whatsapp_identifier(identifier: str) -> str:
|
||||
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
|
||||
|
||||
WhatsApp may surface the same person under either a phone-format JID
|
||||
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
|
||||
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
|
||||
member inside a group chat — both represent a user identity, and the
|
||||
bridge may flip between the two for the same human.
|
||||
|
||||
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
|
||||
files, walks the mapping transitively, and picks the shortest
|
||||
(numeric-preferred) alias as the canonical identity. :func:`build_session_key`
|
||||
uses this for both WhatsApp DM chat_ids and WhatsApp group participant_ids,
|
||||
so callers get the same session-key identity Hermes itself uses.
|
||||
|
||||
Plugins that need per-sender behaviour (role-based routing,
|
||||
authorization, per-contact policy) should use this so their
|
||||
bookkeeping lines up with Hermes' session bookkeeping even when
|
||||
the bridge reshuffles aliases.
|
||||
|
||||
Returns an empty string if ``identifier`` normalizes to empty. If no
|
||||
mapping files exist yet (fresh bridge install), returns the
|
||||
normalized input unchanged.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
# _expand_whatsapp_aliases always includes `normalized` itself in the
|
||||
# returned set, so the min() below degrades gracefully to `normalized`
|
||||
# when no lid-mapping files are present.
|
||||
aliases = _expand_whatsapp_aliases(normalized)
|
||||
return min(aliases, key=lambda candidate: (len(candidate), candidate))
|
||||
|
||||
|
||||
class SessionStore:
|
||||
"""
|
||||
Manages session storage and retrieval.
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ from gateway.session import (
|
|||
build_session_context,
|
||||
build_session_context_prompt,
|
||||
build_session_key,
|
||||
canonical_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -626,9 +628,9 @@ class TestSessionStoreSwitchSession:
|
|||
db.close()
|
||||
|
||||
|
||||
class TestWhatsAppDMSessionKeyConsistency:
|
||||
"""Regression: all session-key construction must go through build_session_key
|
||||
so DMs are isolated by chat_id across platforms."""
|
||||
class TestWhatsAppSessionKeyConsistency:
|
||||
"""Regression: WhatsApp session keys must collapse JID/LID aliases to a
|
||||
single stable identity for both DM chat_ids and group participant_ids."""
|
||||
|
||||
@pytest.fixture()
|
||||
def store(self, tmp_path):
|
||||
|
|
@ -639,7 +641,7 @@ class TestWhatsAppDMSessionKeyConsistency:
|
|||
s._loaded = True
|
||||
return s
|
||||
|
||||
def test_whatsapp_dm_includes_chat_id(self):
|
||||
def test_whatsapp_dm_uses_canonical_identifier(self):
|
||||
source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="15551234567@s.whatsapp.net",
|
||||
|
|
@ -647,7 +649,80 @@ class TestWhatsAppDMSessionKeyConsistency:
|
|||
user_name="Phone User",
|
||||
)
|
||||
key = build_session_key(source)
|
||||
assert key == "agent:main:whatsapp:dm:15551234567@s.whatsapp.net"
|
||||
assert key == "agent:main:whatsapp:dm:15551234567"
|
||||
|
||||
def test_whatsapp_dm_aliases_share_one_session_key(self, tmp_path, monkeypatch):
|
||||
tmp_home = tmp_path / "hermes-home"
|
||||
mapping_dir = tmp_home / "whatsapp" / "session"
|
||||
mapping_dir.mkdir(parents=True, exist_ok=True)
|
||||
(mapping_dir / "lid-mapping-999999999999999.json").write_text(
|
||||
json.dumps("15551234567@s.whatsapp.net"),
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_home))
|
||||
|
||||
lid_source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="999999999999999@lid",
|
||||
chat_type="dm",
|
||||
user_name="Phone User",
|
||||
)
|
||||
phone_source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="15551234567@s.whatsapp.net",
|
||||
chat_type="dm",
|
||||
user_name="Phone User",
|
||||
)
|
||||
|
||||
assert build_session_key(lid_source) == "agent:main:whatsapp:dm:15551234567"
|
||||
assert build_session_key(phone_source) == "agent:main:whatsapp:dm:15551234567"
|
||||
|
||||
def test_whatsapp_group_participant_aliases_share_session_key(self, tmp_path, monkeypatch):
|
||||
"""With group_sessions_per_user, the same human flipping between
|
||||
phone-JID and LID inside a group must not produce two isolated
|
||||
per-user sessions."""
|
||||
tmp_home = tmp_path / "hermes-home"
|
||||
mapping_dir = tmp_home / "whatsapp" / "session"
|
||||
mapping_dir.mkdir(parents=True, exist_ok=True)
|
||||
(mapping_dir / "lid-mapping-999999999999999.json").write_text(
|
||||
json.dumps("15551234567@s.whatsapp.net"),
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_home))
|
||||
|
||||
lid_source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="120363000000000000@g.us",
|
||||
chat_type="group",
|
||||
user_id="999999999999999@lid",
|
||||
user_name="Group Member",
|
||||
)
|
||||
phone_source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="120363000000000000@g.us",
|
||||
chat_type="group",
|
||||
user_id="15551234567@s.whatsapp.net",
|
||||
user_name="Group Member",
|
||||
)
|
||||
|
||||
expected = "agent:main:whatsapp:group:120363000000000000@g.us:15551234567"
|
||||
assert build_session_key(lid_source, group_sessions_per_user=True) == expected
|
||||
assert build_session_key(phone_source, group_sessions_per_user=True) == expected
|
||||
|
||||
def test_whatsapp_group_shared_sessions_untouched_by_canonicalisation(self):
|
||||
"""When group_sessions_per_user is False, participant_id is not in the
|
||||
key at all, so canonicalisation is a no-op for this mode."""
|
||||
source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="120363000000000000@g.us",
|
||||
chat_type="group",
|
||||
user_id="999999999999999@lid",
|
||||
user_name="Group Member",
|
||||
)
|
||||
assert (
|
||||
build_session_key(source, group_sessions_per_user=False)
|
||||
== "agent:main:whatsapp:group:120363000000000000@g.us"
|
||||
)
|
||||
|
||||
def test_store_delegates_to_build_session_key(self, store):
|
||||
"""SessionStore._generate_session_key must produce the same result."""
|
||||
|
|
@ -866,6 +941,57 @@ class TestWhatsAppDMSessionKeyConsistency:
|
|||
assert key == "agent:main:telegram:dm:99:topic-1"
|
||||
|
||||
|
||||
class TestWhatsAppIdentifierPublicHelpers:
|
||||
"""Contract tests for the public WhatsApp identifier helpers.
|
||||
|
||||
These helpers are part of the public API for plugins that need
|
||||
WhatsApp identity awareness. Breaking these contracts is a
|
||||
breaking change for downstream plugins.
|
||||
"""
|
||||
|
||||
def test_normalize_strips_jid_suffix(self):
|
||||
assert normalize_whatsapp_identifier("60123456789@s.whatsapp.net") == "60123456789"
|
||||
|
||||
def test_normalize_strips_lid_suffix(self):
|
||||
assert normalize_whatsapp_identifier("999999999999999@lid") == "999999999999999"
|
||||
|
||||
def test_normalize_strips_device_suffix(self):
|
||||
assert normalize_whatsapp_identifier("60123456789:47@s.whatsapp.net") == "60123456789"
|
||||
|
||||
def test_normalize_strips_leading_plus(self):
|
||||
assert normalize_whatsapp_identifier("+60123456789") == "60123456789"
|
||||
|
||||
def test_normalize_handles_bare_numeric(self):
|
||||
assert normalize_whatsapp_identifier("60123456789") == "60123456789"
|
||||
|
||||
def test_normalize_handles_empty_and_none(self):
|
||||
assert normalize_whatsapp_identifier("") == ""
|
||||
assert normalize_whatsapp_identifier(None) == "" # type: ignore[arg-type]
|
||||
|
||||
def test_canonical_without_mapping_returns_normalized(self, tmp_path, monkeypatch):
|
||||
"""With no bridge mapping files, the normalized input is returned."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
assert canonical_whatsapp_identifier("60123456789@lid") == "60123456789"
|
||||
|
||||
def test_canonical_walks_lid_mapping(self, tmp_path, monkeypatch):
|
||||
"""LID is resolved to its paired phone identity via lid-mapping files."""
|
||||
mapping_dir = tmp_path / "whatsapp" / "session"
|
||||
mapping_dir.mkdir(parents=True, exist_ok=True)
|
||||
(mapping_dir / "lid-mapping-999999999999999.json").write_text(
|
||||
json.dumps("15551234567@s.whatsapp.net"),
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
canonical = canonical_whatsapp_identifier("999999999999999@lid")
|
||||
assert canonical == "15551234567"
|
||||
assert canonical_whatsapp_identifier("15551234567@s.whatsapp.net") == "15551234567"
|
||||
|
||||
def test_canonical_empty_input(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
assert canonical_whatsapp_identifier("") == ""
|
||||
|
||||
|
||||
class TestSessionStoreEntriesAttribute:
|
||||
"""Regression: /reset must access _entries, not _sessions."""
|
||||
|
||||
|
|
|
|||
|
|
@ -326,7 +326,7 @@ On messaging platforms, sessions are keyed by a deterministic session key built
|
|||
|-----------|--------------------|----------|
|
||||
| Telegram DM | `agent:main:telegram:dm:<chat_id>` | One session per DM chat |
|
||||
| Discord DM | `agent:main:discord:dm:<chat_id>` | One session per DM chat |
|
||||
| WhatsApp DM | `agent:main:whatsapp:dm:<chat_id>` | One session per DM chat |
|
||||
| WhatsApp DM | `agent:main:whatsapp:dm:<canonical_identifier>` | One session per DM user (LID/phone aliases collapse to one identity when mapping exists) |
|
||||
| Group chat | `agent:main:<platform>:group:<chat_id>:<user_id>` | Per-user inside the group when the platform exposes a user ID |
|
||||
| Group thread/topic | `agent:main:<platform>:group:<chat_id>:<thread_id>` | Shared session for all thread participants (default). Per-user with `thread_sessions_per_user: true`. |
|
||||
| Channel | `agent:main:<platform>:channel:<chat_id>:<user_id>` | Per-user inside the channel when the platform exposes a user ID |
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue