diff --git a/gateway/run.py b/gateway/run.py index ecc6d17bc..e0d213b0c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -298,50 +298,16 @@ from gateway.restart import ( ) -def _normalize_whatsapp_identifier(value: str) -> str: - """Strip WhatsApp JID/LID syntax down to its stable numeric identifier.""" - return ( - str(value or "") - .strip() - .replace("+", "", 1) - .split(":", 1)[0] - .split("@", 1)[0] - ) +from gateway.whatsapp_identity import ( + canonical_whatsapp_identifier as _canonical_whatsapp_identifier, # noqa: F401 + expand_whatsapp_aliases as _expand_whatsapp_auth_aliases, + normalize_whatsapp_identifier as _normalize_whatsapp_identifier, +) -def _expand_whatsapp_auth_aliases(identifier: str) -> set: - """Resolve WhatsApp phone/LID aliases using bridge session mapping files.""" - normalized = _normalize_whatsapp_identifier(identifier) - if not normalized: - return set() - - session_dir = _hermes_home / "whatsapp" / "session" - resolved = set() - queue = [normalized] - - while queue: - current = queue.pop(0) - if not current or current in resolved: - continue - - resolved.add(current) - for suffix in ("", "_reverse"): - mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json" - if not mapping_path.exists(): - continue - try: - mapped = _normalize_whatsapp_identifier( - json.loads(mapping_path.read_text(encoding="utf-8")) - ) - except Exception: - continue - if mapped and mapped not in resolved: - queue.append(mapped) - - return resolved - logger = logging.getLogger(__name__) + # Sentinel placed into _running_agents immediately when a session starts # processing, *before* any await. Prevents a second message for the same # session from bypassing the "already running" guard during the async gap diff --git a/gateway/session.py b/gateway/session.py index 662cba64e..0584cd7ac 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -60,7 +60,10 @@ from .config import ( SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py HomeChannel, ) -from hermes_constants import get_hermes_home +from .whatsapp_identity import ( + canonical_whatsapp_identifier, + normalize_whatsapp_identifier, +) @dataclass @@ -557,95 +560,6 @@ def build_session_key( return ":".join(key_parts) -def normalize_whatsapp_identifier(value: str) -> str: - """Strip WhatsApp JID/LID syntax down to its stable numeric identifier. - - Accepts any of the identifier shapes the WhatsApp bridge may emit: - ``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``, - ``"60123456789@lid"``, or a bare ``"+60123456789"`` / ``"60123456789"``. - Returns just the numeric identifier (``"60123456789"``) suitable for - equality comparisons. - - Useful for plugins that want to match sender IDs against - user-supplied config (phone numbers in ``config.yaml``) without - worrying about which variant the bridge happens to deliver. - """ - return ( - str(value or "") - .strip() - .replace("+", "", 1) - .split(":", 1)[0] - .split("@", 1)[0] - ) - - -def _expand_whatsapp_aliases(identifier: str) -> set[str]: - """Resolve WhatsApp phone/LID aliases using bridge session mapping files.""" - normalized = normalize_whatsapp_identifier(identifier) - if not normalized: - return set() - - session_dir = get_hermes_home() / "whatsapp" / "session" - resolved: set[str] = set() - queue = [normalized] - - while queue: - current = queue.pop(0) - if not current or current in resolved: - continue - - resolved.add(current) - for suffix in ("", "_reverse"): - mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json" - if not mapping_path.exists(): - continue - try: - mapped = normalize_whatsapp_identifier( - json.loads(mapping_path.read_text(encoding="utf-8")) - ) - except Exception: - continue - if mapped and mapped not in resolved: - queue.append(mapped) - - return resolved - - -def canonical_whatsapp_identifier(identifier: str) -> str: - """Return a stable WhatsApp sender identity across phone-JID/LID variants. - - WhatsApp may surface the same person under either a phone-format JID - (``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This - applies to a DM ``chat_id`` *and* to the ``participant_id`` of a - member inside a group chat — both represent a user identity, and the - bridge may flip between the two for the same human. - - This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json`` - files, walks the mapping transitively, and picks the shortest - (numeric-preferred) alias as the canonical identity. :func:`build_session_key` - uses this for both WhatsApp DM chat_ids and WhatsApp group participant_ids, - so callers get the same session-key identity Hermes itself uses. - - Plugins that need per-sender behaviour (role-based routing, - authorization, per-contact policy) should use this so their - bookkeeping lines up with Hermes' session bookkeeping even when - the bridge reshuffles aliases. - - Returns an empty string if ``identifier`` normalizes to empty. If no - mapping files exist yet (fresh bridge install), returns the - normalized input unchanged. - """ - normalized = normalize_whatsapp_identifier(identifier) - if not normalized: - return "" - - # _expand_whatsapp_aliases always includes `normalized` itself in the - # returned set, so the min() below degrades gracefully to `normalized` - # when no lid-mapping files are present. - aliases = _expand_whatsapp_aliases(normalized) - return min(aliases, key=lambda candidate: (len(candidate), candidate)) - - class SessionStore: """ Manages session storage and retrieval. diff --git a/gateway/whatsapp_identity.py b/gateway/whatsapp_identity.py new file mode 100644 index 000000000..b0792daf7 --- /dev/null +++ b/gateway/whatsapp_identity.py @@ -0,0 +1,135 @@ +"""Shared helpers for canonicalising WhatsApp sender identity. + +WhatsApp's bridge can surface the same human under two different JID shapes +within a single conversation: + +- LID form: ``999999999999999@lid`` +- Phone form: ``15551234567@s.whatsapp.net`` + +Both the authorisation path (:mod:`gateway.run`) and the session-key path +(:mod:`gateway.session`) need to collapse these aliases to a single stable +identity. This module is the single source of truth for that resolution so +the two paths can never drift apart. + +Public helpers: + +- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax + down to the bare numeric identifier. +- :func:`canonical_whatsapp_identifier` — walk the bridge's + ``lid-mapping-*.json`` files and return a stable canonical identity + across phone/LID variants. +- :func:`expand_whatsapp_aliases` — return the full alias set for an + identifier. Used by authorisation code that needs to match any known + form of a sender against an allow-list. + +Plugins that need per-sender behaviour on WhatsApp (role-based routing, +per-contact authorisation, policy gating in a gateway hook) should use +``canonical_whatsapp_identifier`` so their bookkeeping lines up with +Hermes' own session keys. +""" + +from __future__ import annotations + +import json +from typing import Set + +from hermes_constants import get_hermes_home + + +def normalize_whatsapp_identifier(value: str) -> str: + """Strip WhatsApp JID/LID syntax down to its stable numeric identifier. + + Accepts any of the identifier shapes the WhatsApp bridge may emit: + ``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``, + ``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``. + Returns just the numeric identifier (``"60123456789"``) suitable for + equality comparisons. + + Useful for plugins that want to match sender IDs against + user-supplied config (phone numbers in ``config.yaml``) without + worrying about which variant the bridge happens to deliver. + """ + return ( + str(value or "") + .strip() + .replace("+", "", 1) + .split(":", 1)[0] + .split("@", 1)[0] + ) + + +def expand_whatsapp_aliases(identifier: str) -> Set[str]: + """Resolve WhatsApp phone/LID aliases via bridge session mapping files. + + Returns the set of all identifiers transitively reachable through the + bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files, + starting from ``identifier``. The result always includes the + normalized input itself, so callers can safely ``in`` check against + the return value without a separate fallback branch. + + Returns an empty set if ``identifier`` normalizes to empty. + """ + normalized = normalize_whatsapp_identifier(identifier) + if not normalized: + return set() + + session_dir = get_hermes_home() / "whatsapp" / "session" + resolved: Set[str] = set() + queue = [normalized] + + while queue: + current = queue.pop(0) + if not current or current in resolved: + continue + + resolved.add(current) + for suffix in ("", "_reverse"): + mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json" + if not mapping_path.exists(): + continue + try: + mapped = normalize_whatsapp_identifier( + json.loads(mapping_path.read_text(encoding="utf-8")) + ) + except Exception: + continue + if mapped and mapped not in resolved: + queue.append(mapped) + + return resolved + + +def canonical_whatsapp_identifier(identifier: str) -> str: + """Return a stable WhatsApp sender identity across phone-JID/LID variants. + + WhatsApp may surface the same person under either a phone-format JID + (``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This + applies to a DM ``chat_id`` *and* to the ``participant_id`` of a + member inside a group chat — both represent a user identity, and the + bridge may flip between the two for the same human. + + This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json`` + files, walks the mapping transitively, and picks the shortest + (numeric-preferred) alias as the canonical identity. + :func:`gateway.session.build_session_key` uses this for both WhatsApp + DM chat_ids and WhatsApp group participant_ids, so callers get the + same session-key identity Hermes itself uses. + + Plugins that need per-sender behaviour (role-based routing, + authorisation, per-contact policy) should use this so their + bookkeeping lines up with Hermes' session bookkeeping even when + the bridge reshuffles aliases. + + Returns an empty string if ``identifier`` normalizes to empty. If no + mapping files exist yet (fresh bridge install), returns the + normalized input unchanged. + """ + normalized = normalize_whatsapp_identifier(identifier) + if not normalized: + return "" + + # expand_whatsapp_aliases always includes `normalized` itself in the + # returned set, so the min() below degrades gracefully to `normalized` + # when no lid-mapping files are present. + aliases = expand_whatsapp_aliases(normalized) + return min(aliases, key=lambda candidate: (len(candidate), candidate)) diff --git a/tests/gateway/test_unauthorized_dm_behavior.py b/tests/gateway/test_unauthorized_dm_behavior.py index 23c06cc33..9571f3f4e 100644 --- a/tests/gateway/test_unauthorized_dm_behavior.py +++ b/tests/gateway/test_unauthorized_dm_behavior.py @@ -3,7 +3,6 @@ from unittest.mock import AsyncMock, MagicMock import pytest -import gateway.run as gateway_run from gateway.config import GatewayConfig, Platform, PlatformConfig from gateway.platforms.base import MessageEvent from gateway.session import SessionSource @@ -76,7 +75,7 @@ def _make_runner(platform: Platform, config: GatewayConfig): def test_whatsapp_lid_user_matches_phone_allowlist_via_session_mapping(monkeypatch, tmp_path): _clear_auth_env(monkeypatch) monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "15550000001") - monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) session_dir = tmp_path / "whatsapp" / "session" session_dir.mkdir(parents=True)