mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): canonicalize WhatsApp identity in session keys
Hermes' WhatsApp bridge routinely surfaces the same person under either a phone-format JID (60123456789@s.whatsapp.net) or a LID (…@lid), and may flip between the two for a single human within the same conversation. Before this change, build_session_key used the raw identifier verbatim, so the bridge reshuffling an alias form produced two distinct session keys for the same person — in two places: 1. DM chat_id — a user's DM sessions split in half, transcripts and per-sender state diverge. 2. Group participant_id (with group_sessions_per_user enabled) — a member's per-user session inside a group splits in half for the same reason. Add a canonicalizer that walks the bridge's lid-mapping-*.json files and picks the shortest/numeric-preferred alias as the stable identity. build_session_key now routes both the DM chat_id and the group participant_id through this helper when the platform is WhatsApp. All other platforms and chat types are untouched. Expose canonical_whatsapp_identifier and normalize_whatsapp_identifier as public helpers. Plugins that need per-sender behaviour (role-based routing, per-contact authorization, policy gating) need the same identity resolution Hermes uses internally; without a public helper, each plugin would have to re-implement the walker against the bridge's internal on-disk format. Keeping this alongside build_session_key makes it authoritative and one refactor away if the bridge ever changes shape. _expand_whatsapp_aliases stays private — it's an implementation detail of how the mapping files are walked, not a contract callers should depend on.
This commit is contained in:
parent
1143f234e3
commit
10deb1b87d
3 changed files with 234 additions and 9 deletions
|
|
@ -60,6 +60,7 @@ from .config import (
|
|||
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
|
||||
HomeChannel,
|
||||
)
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -518,15 +519,24 @@ def build_session_key(
|
|||
"""
|
||||
platform = source.platform.value
|
||||
if source.chat_type == "dm":
|
||||
if source.chat_id:
|
||||
dm_chat_id = source.chat_id
|
||||
if source.platform == Platform.WHATSAPP:
|
||||
dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
|
||||
|
||||
if dm_chat_id:
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}"
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm"
|
||||
|
||||
participant_id = source.user_id_alt or source.user_id
|
||||
if participant_id and source.platform == Platform.WHATSAPP:
|
||||
# Same JID/LID-flip bug as the DM case: without canonicalisation, a
|
||||
# single group member gets two isolated per-user sessions when the
|
||||
# bridge reshuffles alias forms.
|
||||
participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
|
||||
key_parts = ["agent:main", platform, source.chat_type]
|
||||
|
||||
if source.chat_id:
|
||||
|
|
@ -547,6 +557,95 @@ def build_session_key(
|
|||
return ":".join(key_parts)
|
||||
|
||||
|
||||
def normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
|
||||
|
||||
Accepts any of the identifier shapes the WhatsApp bridge may emit:
|
||||
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
|
||||
``"60123456789@lid"``, or a bare ``"+60123456789"`` / ``"60123456789"``.
|
||||
Returns just the numeric identifier (``"60123456789"``) suitable for
|
||||
equality comparisons.
|
||||
|
||||
Useful for plugins that want to match sender IDs against
|
||||
user-supplied config (phone numbers in ``config.yaml``) without
|
||||
worrying about which variant the bridge happens to deliver.
|
||||
"""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
|
||||
|
||||
def _expand_whatsapp_aliases(identifier: str) -> set[str]:
|
||||
"""Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = get_hermes_home() / "whatsapp" / "session"
|
||||
resolved: set[str] = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def canonical_whatsapp_identifier(identifier: str) -> str:
|
||||
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
|
||||
|
||||
WhatsApp may surface the same person under either a phone-format JID
|
||||
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
|
||||
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
|
||||
member inside a group chat — both represent a user identity, and the
|
||||
bridge may flip between the two for the same human.
|
||||
|
||||
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
|
||||
files, walks the mapping transitively, and picks the shortest
|
||||
(numeric-preferred) alias as the canonical identity. :func:`build_session_key`
|
||||
uses this for both WhatsApp DM chat_ids and WhatsApp group participant_ids,
|
||||
so callers get the same session-key identity Hermes itself uses.
|
||||
|
||||
Plugins that need per-sender behaviour (role-based routing,
|
||||
authorization, per-contact policy) should use this so their
|
||||
bookkeeping lines up with Hermes' session bookkeeping even when
|
||||
the bridge reshuffles aliases.
|
||||
|
||||
Returns an empty string if ``identifier`` normalizes to empty. If no
|
||||
mapping files exist yet (fresh bridge install), returns the
|
||||
normalized input unchanged.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
# _expand_whatsapp_aliases always includes `normalized` itself in the
|
||||
# returned set, so the min() below degrades gracefully to `normalized`
|
||||
# when no lid-mapping files are present.
|
||||
aliases = _expand_whatsapp_aliases(normalized)
|
||||
return min(aliases, key=lambda candidate: (len(candidate), candidate))
|
||||
|
||||
|
||||
class SessionStore:
|
||||
"""
|
||||
Manages session storage and retrieval.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue