refactor(gateway): extract WhatsApp identity helpers into shared module

Follow-up to the canonical-identity session-key fix: pull the
JID/LID normalize/expand/canonical helpers into gateway/whatsapp_identity.py
instead of living in two places. gateway/session.py (session-key build) and
gateway/run.py (authorisation allowlist) now both import from the shared
module, so the two resolution paths can't drift apart.

Also switches the auth path from module-level _hermes_home (cached at
import time) to dynamic get_hermes_home() lookup, which matches the
session-key path and correctly reflects HERMES_HOME env overrides. The
lone test that monkeypatched gateway.run._hermes_home for the WhatsApp
auth path is updated to set HERMES_HOME env var instead; all other
tests that monkeypatch _hermes_home for unrelated paths (update,
restart drain, shutdown marker, etc.) still work — the module-level
_hermes_home is untouched.
This commit is contained in:
Teknium 2026-04-24 07:42:00 -07:00 committed by Teknium
parent 10deb1b87d
commit 62c14d5513
4 changed files with 146 additions and 132 deletions

View file

@ -298,50 +298,16 @@ from gateway.restart import (
) )
def _normalize_whatsapp_identifier(value: str) -> str: from gateway.whatsapp_identity import (
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.""" canonical_whatsapp_identifier as _canonical_whatsapp_identifier, # noqa: F401
return ( expand_whatsapp_aliases as _expand_whatsapp_auth_aliases,
str(value or "") normalize_whatsapp_identifier as _normalize_whatsapp_identifier,
.strip() )
.replace("+", "", 1)
.split(":", 1)[0]
.split("@", 1)[0]
)
def _expand_whatsapp_auth_aliases(identifier: str) -> set:
"""Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
normalized = _normalize_whatsapp_identifier(identifier)
if not normalized:
return set()
session_dir = _hermes_home / "whatsapp" / "session"
resolved = set()
queue = [normalized]
while queue:
current = queue.pop(0)
if not current or current in resolved:
continue
resolved.add(current)
for suffix in ("", "_reverse"):
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
if not mapping_path.exists():
continue
try:
mapped = _normalize_whatsapp_identifier(
json.loads(mapping_path.read_text(encoding="utf-8"))
)
except Exception:
continue
if mapped and mapped not in resolved:
queue.append(mapped)
return resolved
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Sentinel placed into _running_agents immediately when a session starts # Sentinel placed into _running_agents immediately when a session starts
# processing, *before* any await. Prevents a second message for the same # processing, *before* any await. Prevents a second message for the same
# session from bypassing the "already running" guard during the async gap # session from bypassing the "already running" guard during the async gap

View file

@ -60,7 +60,10 @@ from .config import (
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
HomeChannel, HomeChannel,
) )
from hermes_constants import get_hermes_home from .whatsapp_identity import (
canonical_whatsapp_identifier,
normalize_whatsapp_identifier,
)
@dataclass @dataclass
@ -557,95 +560,6 @@ def build_session_key(
return ":".join(key_parts) return ":".join(key_parts)
def normalize_whatsapp_identifier(value: str) -> str:
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
Accepts any of the identifier shapes the WhatsApp bridge may emit:
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
``"60123456789@lid"``, or a bare ``"+60123456789"`` / ``"60123456789"``.
Returns just the numeric identifier (``"60123456789"``) suitable for
equality comparisons.
Useful for plugins that want to match sender IDs against
user-supplied config (phone numbers in ``config.yaml``) without
worrying about which variant the bridge happens to deliver.
"""
return (
str(value or "")
.strip()
.replace("+", "", 1)
.split(":", 1)[0]
.split("@", 1)[0]
)
def _expand_whatsapp_aliases(identifier: str) -> set[str]:
"""Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
normalized = normalize_whatsapp_identifier(identifier)
if not normalized:
return set()
session_dir = get_hermes_home() / "whatsapp" / "session"
resolved: set[str] = set()
queue = [normalized]
while queue:
current = queue.pop(0)
if not current or current in resolved:
continue
resolved.add(current)
for suffix in ("", "_reverse"):
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
if not mapping_path.exists():
continue
try:
mapped = normalize_whatsapp_identifier(
json.loads(mapping_path.read_text(encoding="utf-8"))
)
except Exception:
continue
if mapped and mapped not in resolved:
queue.append(mapped)
return resolved
def canonical_whatsapp_identifier(identifier: str) -> str:
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
WhatsApp may surface the same person under either a phone-format JID
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
member inside a group chat both represent a user identity, and the
bridge may flip between the two for the same human.
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
files, walks the mapping transitively, and picks the shortest
(numeric-preferred) alias as the canonical identity. :func:`build_session_key`
uses this for both WhatsApp DM chat_ids and WhatsApp group participant_ids,
so callers get the same session-key identity Hermes itself uses.
Plugins that need per-sender behaviour (role-based routing,
authorization, per-contact policy) should use this so their
bookkeeping lines up with Hermes' session bookkeeping even when
the bridge reshuffles aliases.
Returns an empty string if ``identifier`` normalizes to empty. If no
mapping files exist yet (fresh bridge install), returns the
normalized input unchanged.
"""
normalized = normalize_whatsapp_identifier(identifier)
if not normalized:
return ""
# _expand_whatsapp_aliases always includes `normalized` itself in the
# returned set, so the min() below degrades gracefully to `normalized`
# when no lid-mapping files are present.
aliases = _expand_whatsapp_aliases(normalized)
return min(aliases, key=lambda candidate: (len(candidate), candidate))
class SessionStore: class SessionStore:
""" """
Manages session storage and retrieval. Manages session storage and retrieval.

View file

@ -0,0 +1,135 @@
"""Shared helpers for canonicalising WhatsApp sender identity.
WhatsApp's bridge can surface the same human under two different JID shapes
within a single conversation:
- LID form: ``999999999999999@lid``
- Phone form: ``15551234567@s.whatsapp.net``
Both the authorisation path (:mod:`gateway.run`) and the session-key path
(:mod:`gateway.session`) need to collapse these aliases to a single stable
identity. This module is the single source of truth for that resolution so
the two paths can never drift apart.
Public helpers:
- :func:`normalize_whatsapp_identifier` strip JID/LID/device/plus syntax
down to the bare numeric identifier.
- :func:`canonical_whatsapp_identifier` walk the bridge's
``lid-mapping-*.json`` files and return a stable canonical identity
across phone/LID variants.
- :func:`expand_whatsapp_aliases` return the full alias set for an
identifier. Used by authorisation code that needs to match any known
form of a sender against an allow-list.
Plugins that need per-sender behaviour on WhatsApp (role-based routing,
per-contact authorisation, policy gating in a gateway hook) should use
``canonical_whatsapp_identifier`` so their bookkeeping lines up with
Hermes' own session keys.
"""
from __future__ import annotations
import json
from typing import Set
from hermes_constants import get_hermes_home
def normalize_whatsapp_identifier(value: str) -> str:
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
Accepts any of the identifier shapes the WhatsApp bridge may emit:
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
Returns just the numeric identifier (``"60123456789"``) suitable for
equality comparisons.
Useful for plugins that want to match sender IDs against
user-supplied config (phone numbers in ``config.yaml``) without
worrying about which variant the bridge happens to deliver.
"""
return (
str(value or "")
.strip()
.replace("+", "", 1)
.split(":", 1)[0]
.split("@", 1)[0]
)
def expand_whatsapp_aliases(identifier: str) -> Set[str]:
"""Resolve WhatsApp phone/LID aliases via bridge session mapping files.
Returns the set of all identifiers transitively reachable through the
bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
starting from ``identifier``. The result always includes the
normalized input itself, so callers can safely ``in`` check against
the return value without a separate fallback branch.
Returns an empty set if ``identifier`` normalizes to empty.
"""
normalized = normalize_whatsapp_identifier(identifier)
if not normalized:
return set()
session_dir = get_hermes_home() / "whatsapp" / "session"
resolved: Set[str] = set()
queue = [normalized]
while queue:
current = queue.pop(0)
if not current or current in resolved:
continue
resolved.add(current)
for suffix in ("", "_reverse"):
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
if not mapping_path.exists():
continue
try:
mapped = normalize_whatsapp_identifier(
json.loads(mapping_path.read_text(encoding="utf-8"))
)
except Exception:
continue
if mapped and mapped not in resolved:
queue.append(mapped)
return resolved
def canonical_whatsapp_identifier(identifier: str) -> str:
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
WhatsApp may surface the same person under either a phone-format JID
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
member inside a group chat both represent a user identity, and the
bridge may flip between the two for the same human.
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
files, walks the mapping transitively, and picks the shortest
(numeric-preferred) alias as the canonical identity.
:func:`gateway.session.build_session_key` uses this for both WhatsApp
DM chat_ids and WhatsApp group participant_ids, so callers get the
same session-key identity Hermes itself uses.
Plugins that need per-sender behaviour (role-based routing,
authorisation, per-contact policy) should use this so their
bookkeeping lines up with Hermes' session bookkeeping even when
the bridge reshuffles aliases.
Returns an empty string if ``identifier`` normalizes to empty. If no
mapping files exist yet (fresh bridge install), returns the
normalized input unchanged.
"""
normalized = normalize_whatsapp_identifier(identifier)
if not normalized:
return ""
# expand_whatsapp_aliases always includes `normalized` itself in the
# returned set, so the min() below degrades gracefully to `normalized`
# when no lid-mapping files are present.
aliases = expand_whatsapp_aliases(normalized)
return min(aliases, key=lambda candidate: (len(candidate), candidate))

View file

@ -3,7 +3,6 @@ from unittest.mock import AsyncMock, MagicMock
import pytest import pytest
import gateway.run as gateway_run
from gateway.config import GatewayConfig, Platform, PlatformConfig from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent from gateway.platforms.base import MessageEvent
from gateway.session import SessionSource from gateway.session import SessionSource
@ -76,7 +75,7 @@ def _make_runner(platform: Platform, config: GatewayConfig):
def test_whatsapp_lid_user_matches_phone_allowlist_via_session_mapping(monkeypatch, tmp_path): def test_whatsapp_lid_user_matches_phone_allowlist_via_session_mapping(monkeypatch, tmp_path):
_clear_auth_env(monkeypatch) _clear_auth_env(monkeypatch)
monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "15550000001") monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "15550000001")
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) monkeypatch.setenv("HERMES_HOME", str(tmp_path))
session_dir = tmp_path / "whatsapp" / "session" session_dir = tmp_path / "whatsapp" / "session"
session_dir.mkdir(parents=True) session_dir.mkdir(parents=True)