mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): canonicalize WhatsApp identity in session keys
Hermes' WhatsApp bridge routinely surfaces the same person under either a phone-format JID (60123456789@s.whatsapp.net) or a LID (…@lid), and may flip between the two for a single human within the same conversation. Before this change, build_session_key used the raw identifier verbatim, so the bridge reshuffling an alias form produced two distinct session keys for the same person — in two places: 1. DM chat_id — a user's DM sessions split in half, transcripts and per-sender state diverge. 2. Group participant_id (with group_sessions_per_user enabled) — a member's per-user session inside a group splits in half for the same reason. Add a canonicalizer that walks the bridge's lid-mapping-*.json files and picks the shortest/numeric-preferred alias as the stable identity. build_session_key now routes both the DM chat_id and the group participant_id through this helper when the platform is WhatsApp. All other platforms and chat types are untouched. Expose canonical_whatsapp_identifier and normalize_whatsapp_identifier as public helpers. Plugins that need per-sender behaviour (role-based routing, per-contact authorization, policy gating) need the same identity resolution Hermes uses internally; without a public helper, each plugin would have to re-implement the walker against the bridge's internal on-disk format. Keeping this alongside build_session_key makes it authoritative and one refactor away if the bridge ever changes shape. _expand_whatsapp_aliases stays private — it's an implementation detail of how the mapping files are walked, not a contract callers should depend on.
This commit is contained in:
parent
1143f234e3
commit
10deb1b87d
3 changed files with 234 additions and 9 deletions
|
|
@ -60,6 +60,7 @@ from .config import (
|
||||||
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
|
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
|
||||||
HomeChannel,
|
HomeChannel,
|
||||||
)
|
)
|
||||||
|
from hermes_constants import get_hermes_home
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -518,15 +519,24 @@ def build_session_key(
|
||||||
"""
|
"""
|
||||||
platform = source.platform.value
|
platform = source.platform.value
|
||||||
if source.chat_type == "dm":
|
if source.chat_type == "dm":
|
||||||
if source.chat_id:
|
dm_chat_id = source.chat_id
|
||||||
|
if source.platform == Platform.WHATSAPP:
|
||||||
|
dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
|
||||||
|
|
||||||
|
if dm_chat_id:
|
||||||
if source.thread_id:
|
if source.thread_id:
|
||||||
return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
|
return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
|
||||||
return f"agent:main:{platform}:dm:{source.chat_id}"
|
return f"agent:main:{platform}:dm:{dm_chat_id}"
|
||||||
if source.thread_id:
|
if source.thread_id:
|
||||||
return f"agent:main:{platform}:dm:{source.thread_id}"
|
return f"agent:main:{platform}:dm:{source.thread_id}"
|
||||||
return f"agent:main:{platform}:dm"
|
return f"agent:main:{platform}:dm"
|
||||||
|
|
||||||
participant_id = source.user_id_alt or source.user_id
|
participant_id = source.user_id_alt or source.user_id
|
||||||
|
if participant_id and source.platform == Platform.WHATSAPP:
|
||||||
|
# Same JID/LID-flip bug as the DM case: without canonicalisation, a
|
||||||
|
# single group member gets two isolated per-user sessions when the
|
||||||
|
# bridge reshuffles alias forms.
|
||||||
|
participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
|
||||||
key_parts = ["agent:main", platform, source.chat_type]
|
key_parts = ["agent:main", platform, source.chat_type]
|
||||||
|
|
||||||
if source.chat_id:
|
if source.chat_id:
|
||||||
|
|
@ -547,6 +557,95 @@ def build_session_key(
|
||||||
return ":".join(key_parts)
|
return ":".join(key_parts)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_whatsapp_identifier(value: str) -> str:
|
||||||
|
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
|
||||||
|
|
||||||
|
Accepts any of the identifier shapes the WhatsApp bridge may emit:
|
||||||
|
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
|
||||||
|
``"60123456789@lid"``, or a bare ``"+60123456789"`` / ``"60123456789"``.
|
||||||
|
Returns just the numeric identifier (``"60123456789"``) suitable for
|
||||||
|
equality comparisons.
|
||||||
|
|
||||||
|
Useful for plugins that want to match sender IDs against
|
||||||
|
user-supplied config (phone numbers in ``config.yaml``) without
|
||||||
|
worrying about which variant the bridge happens to deliver.
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
str(value or "")
|
||||||
|
.strip()
|
||||||
|
.replace("+", "", 1)
|
||||||
|
.split(":", 1)[0]
|
||||||
|
.split("@", 1)[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _expand_whatsapp_aliases(identifier: str) -> set[str]:
|
||||||
|
"""Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
|
||||||
|
normalized = normalize_whatsapp_identifier(identifier)
|
||||||
|
if not normalized:
|
||||||
|
return set()
|
||||||
|
|
||||||
|
session_dir = get_hermes_home() / "whatsapp" / "session"
|
||||||
|
resolved: set[str] = set()
|
||||||
|
queue = [normalized]
|
||||||
|
|
||||||
|
while queue:
|
||||||
|
current = queue.pop(0)
|
||||||
|
if not current or current in resolved:
|
||||||
|
continue
|
||||||
|
|
||||||
|
resolved.add(current)
|
||||||
|
for suffix in ("", "_reverse"):
|
||||||
|
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||||
|
if not mapping_path.exists():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
mapped = normalize_whatsapp_identifier(
|
||||||
|
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if mapped and mapped not in resolved:
|
||||||
|
queue.append(mapped)
|
||||||
|
|
||||||
|
return resolved
|
||||||
|
|
||||||
|
|
||||||
|
def canonical_whatsapp_identifier(identifier: str) -> str:
|
||||||
|
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
|
||||||
|
|
||||||
|
WhatsApp may surface the same person under either a phone-format JID
|
||||||
|
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
|
||||||
|
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
|
||||||
|
member inside a group chat — both represent a user identity, and the
|
||||||
|
bridge may flip between the two for the same human.
|
||||||
|
|
||||||
|
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
|
||||||
|
files, walks the mapping transitively, and picks the shortest
|
||||||
|
(numeric-preferred) alias as the canonical identity. :func:`build_session_key`
|
||||||
|
uses this for both WhatsApp DM chat_ids and WhatsApp group participant_ids,
|
||||||
|
so callers get the same session-key identity Hermes itself uses.
|
||||||
|
|
||||||
|
Plugins that need per-sender behaviour (role-based routing,
|
||||||
|
authorization, per-contact policy) should use this so their
|
||||||
|
bookkeeping lines up with Hermes' session bookkeeping even when
|
||||||
|
the bridge reshuffles aliases.
|
||||||
|
|
||||||
|
Returns an empty string if ``identifier`` normalizes to empty. If no
|
||||||
|
mapping files exist yet (fresh bridge install), returns the
|
||||||
|
normalized input unchanged.
|
||||||
|
"""
|
||||||
|
normalized = normalize_whatsapp_identifier(identifier)
|
||||||
|
if not normalized:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# _expand_whatsapp_aliases always includes `normalized` itself in the
|
||||||
|
# returned set, so the min() below degrades gracefully to `normalized`
|
||||||
|
# when no lid-mapping files are present.
|
||||||
|
aliases = _expand_whatsapp_aliases(normalized)
|
||||||
|
return min(aliases, key=lambda candidate: (len(candidate), candidate))
|
||||||
|
|
||||||
|
|
||||||
class SessionStore:
|
class SessionStore:
|
||||||
"""
|
"""
|
||||||
Manages session storage and retrieval.
|
Manages session storage and retrieval.
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ from gateway.session import (
|
||||||
build_session_context,
|
build_session_context,
|
||||||
build_session_context_prompt,
|
build_session_context_prompt,
|
||||||
build_session_key,
|
build_session_key,
|
||||||
|
canonical_whatsapp_identifier,
|
||||||
|
normalize_whatsapp_identifier,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -626,9 +628,9 @@ class TestSessionStoreSwitchSession:
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
class TestWhatsAppDMSessionKeyConsistency:
|
class TestWhatsAppSessionKeyConsistency:
|
||||||
"""Regression: all session-key construction must go through build_session_key
|
"""Regression: WhatsApp session keys must collapse JID/LID aliases to a
|
||||||
so DMs are isolated by chat_id across platforms."""
|
single stable identity for both DM chat_ids and group participant_ids."""
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def store(self, tmp_path):
|
def store(self, tmp_path):
|
||||||
|
|
@ -639,7 +641,7 @@ class TestWhatsAppDMSessionKeyConsistency:
|
||||||
s._loaded = True
|
s._loaded = True
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def test_whatsapp_dm_includes_chat_id(self):
|
def test_whatsapp_dm_uses_canonical_identifier(self):
|
||||||
source = SessionSource(
|
source = SessionSource(
|
||||||
platform=Platform.WHATSAPP,
|
platform=Platform.WHATSAPP,
|
||||||
chat_id="15551234567@s.whatsapp.net",
|
chat_id="15551234567@s.whatsapp.net",
|
||||||
|
|
@ -647,7 +649,80 @@ class TestWhatsAppDMSessionKeyConsistency:
|
||||||
user_name="Phone User",
|
user_name="Phone User",
|
||||||
)
|
)
|
||||||
key = build_session_key(source)
|
key = build_session_key(source)
|
||||||
assert key == "agent:main:whatsapp:dm:15551234567@s.whatsapp.net"
|
assert key == "agent:main:whatsapp:dm:15551234567"
|
||||||
|
|
||||||
|
def test_whatsapp_dm_aliases_share_one_session_key(self, tmp_path, monkeypatch):
|
||||||
|
tmp_home = tmp_path / "hermes-home"
|
||||||
|
mapping_dir = tmp_home / "whatsapp" / "session"
|
||||||
|
mapping_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(mapping_dir / "lid-mapping-999999999999999.json").write_text(
|
||||||
|
json.dumps("15551234567@s.whatsapp.net"),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_home))
|
||||||
|
|
||||||
|
lid_source = SessionSource(
|
||||||
|
platform=Platform.WHATSAPP,
|
||||||
|
chat_id="999999999999999@lid",
|
||||||
|
chat_type="dm",
|
||||||
|
user_name="Phone User",
|
||||||
|
)
|
||||||
|
phone_source = SessionSource(
|
||||||
|
platform=Platform.WHATSAPP,
|
||||||
|
chat_id="15551234567@s.whatsapp.net",
|
||||||
|
chat_type="dm",
|
||||||
|
user_name="Phone User",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert build_session_key(lid_source) == "agent:main:whatsapp:dm:15551234567"
|
||||||
|
assert build_session_key(phone_source) == "agent:main:whatsapp:dm:15551234567"
|
||||||
|
|
||||||
|
def test_whatsapp_group_participant_aliases_share_session_key(self, tmp_path, monkeypatch):
|
||||||
|
"""With group_sessions_per_user, the same human flipping between
|
||||||
|
phone-JID and LID inside a group must not produce two isolated
|
||||||
|
per-user sessions."""
|
||||||
|
tmp_home = tmp_path / "hermes-home"
|
||||||
|
mapping_dir = tmp_home / "whatsapp" / "session"
|
||||||
|
mapping_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(mapping_dir / "lid-mapping-999999999999999.json").write_text(
|
||||||
|
json.dumps("15551234567@s.whatsapp.net"),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_home))
|
||||||
|
|
||||||
|
lid_source = SessionSource(
|
||||||
|
platform=Platform.WHATSAPP,
|
||||||
|
chat_id="120363000000000000@g.us",
|
||||||
|
chat_type="group",
|
||||||
|
user_id="999999999999999@lid",
|
||||||
|
user_name="Group Member",
|
||||||
|
)
|
||||||
|
phone_source = SessionSource(
|
||||||
|
platform=Platform.WHATSAPP,
|
||||||
|
chat_id="120363000000000000@g.us",
|
||||||
|
chat_type="group",
|
||||||
|
user_id="15551234567@s.whatsapp.net",
|
||||||
|
user_name="Group Member",
|
||||||
|
)
|
||||||
|
|
||||||
|
expected = "agent:main:whatsapp:group:120363000000000000@g.us:15551234567"
|
||||||
|
assert build_session_key(lid_source, group_sessions_per_user=True) == expected
|
||||||
|
assert build_session_key(phone_source, group_sessions_per_user=True) == expected
|
||||||
|
|
||||||
|
def test_whatsapp_group_shared_sessions_untouched_by_canonicalisation(self):
|
||||||
|
"""When group_sessions_per_user is False, participant_id is not in the
|
||||||
|
key at all, so canonicalisation is a no-op for this mode."""
|
||||||
|
source = SessionSource(
|
||||||
|
platform=Platform.WHATSAPP,
|
||||||
|
chat_id="120363000000000000@g.us",
|
||||||
|
chat_type="group",
|
||||||
|
user_id="999999999999999@lid",
|
||||||
|
user_name="Group Member",
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
build_session_key(source, group_sessions_per_user=False)
|
||||||
|
== "agent:main:whatsapp:group:120363000000000000@g.us"
|
||||||
|
)
|
||||||
|
|
||||||
def test_store_delegates_to_build_session_key(self, store):
|
def test_store_delegates_to_build_session_key(self, store):
|
||||||
"""SessionStore._generate_session_key must produce the same result."""
|
"""SessionStore._generate_session_key must produce the same result."""
|
||||||
|
|
@ -866,6 +941,57 @@ class TestWhatsAppDMSessionKeyConsistency:
|
||||||
assert key == "agent:main:telegram:dm:99:topic-1"
|
assert key == "agent:main:telegram:dm:99:topic-1"
|
||||||
|
|
||||||
|
|
||||||
|
class TestWhatsAppIdentifierPublicHelpers:
|
||||||
|
"""Contract tests for the public WhatsApp identifier helpers.
|
||||||
|
|
||||||
|
These helpers are part of the public API for plugins that need
|
||||||
|
WhatsApp identity awareness. Breaking these contracts is a
|
||||||
|
breaking change for downstream plugins.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_normalize_strips_jid_suffix(self):
|
||||||
|
assert normalize_whatsapp_identifier("60123456789@s.whatsapp.net") == "60123456789"
|
||||||
|
|
||||||
|
def test_normalize_strips_lid_suffix(self):
|
||||||
|
assert normalize_whatsapp_identifier("999999999999999@lid") == "999999999999999"
|
||||||
|
|
||||||
|
def test_normalize_strips_device_suffix(self):
|
||||||
|
assert normalize_whatsapp_identifier("60123456789:47@s.whatsapp.net") == "60123456789"
|
||||||
|
|
||||||
|
def test_normalize_strips_leading_plus(self):
|
||||||
|
assert normalize_whatsapp_identifier("+60123456789") == "60123456789"
|
||||||
|
|
||||||
|
def test_normalize_handles_bare_numeric(self):
|
||||||
|
assert normalize_whatsapp_identifier("60123456789") == "60123456789"
|
||||||
|
|
||||||
|
def test_normalize_handles_empty_and_none(self):
|
||||||
|
assert normalize_whatsapp_identifier("") == ""
|
||||||
|
assert normalize_whatsapp_identifier(None) == "" # type: ignore[arg-type]
|
||||||
|
|
||||||
|
def test_canonical_without_mapping_returns_normalized(self, tmp_path, monkeypatch):
|
||||||
|
"""With no bridge mapping files, the normalized input is returned."""
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
assert canonical_whatsapp_identifier("60123456789@lid") == "60123456789"
|
||||||
|
|
||||||
|
def test_canonical_walks_lid_mapping(self, tmp_path, monkeypatch):
|
||||||
|
"""LID is resolved to its paired phone identity via lid-mapping files."""
|
||||||
|
mapping_dir = tmp_path / "whatsapp" / "session"
|
||||||
|
mapping_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(mapping_dir / "lid-mapping-999999999999999.json").write_text(
|
||||||
|
json.dumps("15551234567@s.whatsapp.net"),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
|
||||||
|
canonical = canonical_whatsapp_identifier("999999999999999@lid")
|
||||||
|
assert canonical == "15551234567"
|
||||||
|
assert canonical_whatsapp_identifier("15551234567@s.whatsapp.net") == "15551234567"
|
||||||
|
|
||||||
|
def test_canonical_empty_input(self, tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
assert canonical_whatsapp_identifier("") == ""
|
||||||
|
|
||||||
|
|
||||||
class TestSessionStoreEntriesAttribute:
|
class TestSessionStoreEntriesAttribute:
|
||||||
"""Regression: /reset must access _entries, not _sessions."""
|
"""Regression: /reset must access _entries, not _sessions."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -326,7 +326,7 @@ On messaging platforms, sessions are keyed by a deterministic session key built
|
||||||
|-----------|--------------------|----------|
|
|-----------|--------------------|----------|
|
||||||
| Telegram DM | `agent:main:telegram:dm:<chat_id>` | One session per DM chat |
|
| Telegram DM | `agent:main:telegram:dm:<chat_id>` | One session per DM chat |
|
||||||
| Discord DM | `agent:main:discord:dm:<chat_id>` | One session per DM chat |
|
| Discord DM | `agent:main:discord:dm:<chat_id>` | One session per DM chat |
|
||||||
| WhatsApp DM | `agent:main:whatsapp:dm:<chat_id>` | One session per DM chat |
|
| WhatsApp DM | `agent:main:whatsapp:dm:<canonical_identifier>` | One session per DM user (LID/phone aliases collapse to one identity when mapping exists) |
|
||||||
| Group chat | `agent:main:<platform>:group:<chat_id>:<user_id>` | Per-user inside the group when the platform exposes a user ID |
|
| Group chat | `agent:main:<platform>:group:<chat_id>:<user_id>` | Per-user inside the group when the platform exposes a user ID |
|
||||||
| Group thread/topic | `agent:main:<platform>:group:<chat_id>:<thread_id>` | Shared session for all thread participants (default). Per-user with `thread_sessions_per_user: true`. |
|
| Group thread/topic | `agent:main:<platform>:group:<chat_id>:<thread_id>` | Shared session for all thread participants (default). Per-user with `thread_sessions_per_user: true`. |
|
||||||
| Channel | `agent:main:<platform>:channel:<chat_id>:<user_id>` | Per-user inside the channel when the platform exposes a user ID |
|
| Channel | `agent:main:<platform>:channel:<chat_id>:<user_id>` | Per-user inside the channel when the platform exposes a user ID |
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue