From 84f350efe06be9f0c2e9b05471acad5db91b0e7b Mon Sep 17 00:00:00 2001 From: keiravoss94 Date: Mon, 27 Apr 2026 16:00:09 +0800 Subject: [PATCH] feat(whatsapp): opt-in forwarding of owner-typed messages in bot mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In `WHATSAPP_MODE=bot` the bridge currently drops every fromMe inbound message — they are all assumed to be echoes of our own /send calls. That makes it impossible for plugins / agents to detect when a human owner has typed directly into a customer chat from the same WhatsApp Business account (e.g. via a linked phone or WhatsApp Web). This adds an opt-in `WHATSAPP_FORWARD_OWNER_MESSAGES` env var. When true, the bridge classifies fromMe inbound by looking up `key.id` in a bounded LRU of recently-sent message IDs (the existing 50-entry echo suppressor, bumped to 512 and extracted to a testable `outbound_ids.js` helper). Hits in the LRU are still dropped (echoes); misses are forwarded to the Python adapter with `fromOwner: true`. The Python adapter lifts that flag onto `MessageEvent.metadata["whatsapp_from_owner"]`. `metadata` is a new free-form dict on the event so future per-platform signals don't each need their own field. Default behaviour is unchanged: with the env flag unset, bot mode still drops every fromMe message exactly as before. Use cases for downstream consumers: - Implicit handover activation when the owner replies manually - Sliding TTL on owner activity (keep an active session alive while the owner is engaged) - Audit trails of owner interventions - Analytics on human-vs-bot reply ratios Heuristic limitation (documented in code): the LRU is in-memory. After a bridge restart, in-flight delivery receipts of pre-restart sends will briefly look like owner-typed for a few seconds until the set is repopulated. Persisting isn't worth the disk churn — downstream consumers should treat the flag as best-effort. Tests: - tests/gateway/test_whatsapp_from_owner.py (new): adapter sets the metadata flag iff the bridge payload has `fromOwner: true`; absent otherwise. - scripts/whatsapp-bridge/outbound_ids.test.mjs (new): LRU bounds, eviction order, falsy-id handling. Backwards compatibility: with the env flag unset, every code path is identical to before. No existing deployment is affected. --- gateway/platforms/base.py | 7 ++ plugins/platforms/whatsapp/adapter.py | 13 +++ scripts/whatsapp-bridge/bridge.js | 80 ++++++++++++----- scripts/whatsapp-bridge/outbound_ids.js | 39 ++++++++ scripts/whatsapp-bridge/outbound_ids.test.mjs | 62 +++++++++++++ tests/gateway/test_whatsapp_from_owner.py | 90 +++++++++++++++++++ 6 files changed, 268 insertions(+), 23 deletions(-) create mode 100644 scripts/whatsapp-bridge/outbound_ids.js create mode 100644 scripts/whatsapp-bridge/outbound_ids.test.mjs create mode 100644 tests/gateway/test_whatsapp_from_owner.py diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index d34ad36a403..eb393a7a611 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1691,6 +1691,13 @@ class MessageEvent: # completion notifications) that must bypass user authorization checks. internal: bool = False + # Free-form per-event metadata. Adapters may set platform-specific + # signals here (e.g. WhatsApp sets ``whatsapp_from_owner=True`` when + # the bridge is configured to forward owner-typed messages). Plugins + # consume via ``event.metadata.get(...)`` and must not rely on any + # particular key existing. + metadata: Dict[str, Any] = field(default_factory=dict) + # Timestamps timestamp: datetime = field(default_factory=datetime.now) diff --git a/plugins/platforms/whatsapp/adapter.py b/plugins/platforms/whatsapp/adapter.py index cebb129e399..997dc65ee6d 100644 --- a/plugins/platforms/whatsapp/adapter.py +++ b/plugins/platforms/whatsapp/adapter.py @@ -1309,6 +1309,18 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter): except Exception as e: print(f"[{self.name}] Failed to read document text: {e}", flush=True) + metadata: Dict[str, Any] = {} + # The bridge sets ``fromOwner: true`` on inbound fromMe messages + # that look owner-typed (linked-device send, not echoed from our + # own /send). Surfaced under a platform-prefixed key so plugins + # can detect "owner just replied in this customer chat" without + # having to peek at raw_message. Gated by + # ``WHATSAPP_FORWARD_OWNER_MESSAGES`` at the bridge layer; the + # propagation here is unconditional so a future producer can set + # the flag without us having to touch this code path again. + if data.get("fromOwner"): + metadata["whatsapp_from_owner"] = True + return MessageEvent( text=body, message_type=msg_type, @@ -1317,6 +1329,7 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter): message_id=data.get("messageId"), media_urls=cached_urls, media_types=media_types, + metadata=metadata, ) except Exception as e: print(f"[{self.name}] Error building event: {e}") diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 736715d693a..b3b256d76c8 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -30,6 +30,7 @@ import { execSync } from 'child_process'; import { tmpdir } from 'os'; import qrcode from 'qrcode-terminal'; import { matchesAllowedUser, parseAllowedUsers } from './allowlist.js'; +import { createOutboundIdTracker } from './outbound_ids.js'; // Parse CLI args const args = process.argv.slice(2); @@ -44,6 +45,23 @@ const WHATSAPP_DEBUG = typeof process.env.WHATSAPP_DEBUG === 'string' && ['1', 'true', 'yes', 'on'].includes(process.env.WHATSAPP_DEBUG.toLowerCase()); +// Opt-in: when true (and WHATSAPP_MODE === 'bot'), fromMe inbound messages +// that are NOT echoes of our own /send or /send-media calls are forwarded +// to the Python adapter with `fromOwner: true`. This lets plugins detect +// "owner just typed in this customer chat" — needed for handover / sliding +// TTL flows. Default OFF: existing deployments see no behavior change. +// +// Heuristic limitation: we distinguish bot-API-sent from owner-typed by +// looking up `key.id` in `recentlySentIds` (populated when /send returns). +// On bridge restart that set is empty, so a few in-flight bot replies may +// briefly look like owner-typed until they age out. Acceptable; we don't +// persist the set. +const FORWARD_OWNER_MESSAGES = + typeof process !== 'undefined' && + process.env && + typeof process.env.WHATSAPP_FORWARD_OWNER_MESSAGES === 'string' && + ['1', 'true', 'yes', 'on'].includes(process.env.WHATSAPP_FORWARD_OWNER_MESSAGES.toLowerCase()); + const PORT = parseInt(getArg('port', '3000'), 10); const SESSION_DIR = getArg('session', path.join(process.env.HOME || '~', '.hermes', 'whatsapp', 'session')); // Cache directories: the Python gateway passes the profile-aware paths via @@ -146,12 +164,7 @@ function splitLongMessage(message, maxLength = MAX_MESSAGE_LENGTH) { } function trackSentMessageId(sent) { - if (sent?.key?.id) { - recentlySentIds.add(sent.key.id); - if (recentlySentIds.size > MAX_RECENT_IDS) { - recentlySentIds.delete(recentlySentIds.values().next().value); - } - } + rememberSentId(sent?.key?.id); } function normalizeWhatsAppId(value) { @@ -205,9 +218,18 @@ const logger = pino({ level: 'warn' }); const messageQueue = []; const MAX_QUEUE_SIZE = 100; -// Track recently sent message IDs to prevent echo-back loops with media -const recentlySentIds = new Set(); -const MAX_RECENT_IDS = 50; +// Track recently sent message IDs. Two purposes: +// 1. Prevent echo-back loops with media in self-chat mode. +// 2. (When WHATSAPP_FORWARD_OWNER_MESSAGES=true) distinguish our own +// bot-API outbound messages from owner-typed messages on the linked +// device so we can forward only the latter. +// Capacity bounded (see outbound_ids.js) to keep memory flat under +// sustained sending. +const recentlySentIds = createOutboundIdTracker(512); + +function rememberSentId(id) { + recentlySentIds.remember(id); +} let sock = null; let connectionState = 'disconnected'; @@ -300,23 +322,33 @@ async function startSocket() { const senderNumber = senderId.replace(/@.*/, ''); // Handle fromMe messages based on mode + let fromOwner = false; if (msg.key.fromMe) { if (isGroup || chatId.includes('status')) continue; if (WHATSAPP_MODE === 'bot') { - // Bot mode: separate number. ALL fromMe are echo-backs of our own replies — skip. - continue; + // Bot mode: separate bot number. fromMe inbound is either + // (a) an echo of our own /send (recentlySentIds will catch it), or + // (b) a message the owner typed from their own phone using the + // linked-device session. + // + // We always drop (a). We drop (b) too unless the operator opts in + // via WHATSAPP_FORWARD_OWNER_MESSAGES so existing deployments see + // no behavior change. + if (recentlySentIds.has(msg.key.id)) continue; + if (!FORWARD_OWNER_MESSAGES) continue; + fromOwner = true; + } else { + // Self-chat mode: only allow messages in the user's own self-chat. + // WhatsApp now uses LID (Linked Identity Device) format: 67427329167522@lid + // AND classic format: 34652029134@s.whatsapp.net + // sock.user has both: { id: "number:10@s.whatsapp.net", lid: "lid_number:10@lid" } + const myNumber = (sock.user?.id || '').replace(/:.*@/, '@').replace(/@.*/, ''); + const myLid = (sock.user?.lid || '').replace(/:.*@/, '@').replace(/@.*/, ''); + const chatNumber = chatId.replace(/@.*/, ''); + const isSelfChat = (myNumber && chatNumber === myNumber) || (myLid && chatNumber === myLid); + if (!isSelfChat) continue; } - - // Self-chat mode: only allow messages in the user's own self-chat - // WhatsApp now uses LID (Linked Identity Device) format: 67427329167522@lid - // AND classic format: 34652029134@s.whatsapp.net - // sock.user has both: { id: "number:10@s.whatsapp.net", lid: "lid_number:10@lid" } - const myNumber = (sock.user?.id || '').replace(/:.*@/, '@').replace(/@.*/, ''); - const myLid = (sock.user?.lid || '').replace(/:.*@/, '@').replace(/@.*/, ''); - const chatNumber = chatId.replace(/@.*/, ''); - const isSelfChat = (myNumber && chatNumber === myNumber) || (myLid && chatNumber === myLid); - if (!isSelfChat) continue; } // Handle !fromMe messages (from other people) based on mode. @@ -473,6 +505,7 @@ async function startSocket() { hasQuotedMessage, botIds, timestamp: msg.messageTimestamp, + fromOwner, }; messageQueue.push(event); @@ -678,9 +711,7 @@ app.post('/send-media', async (req, res) => { } const sent = await sendWithTimeout(chatId, msgPayload); - trackSentMessageId(sent); - res.json({ success: true, messageId: sent?.key?.id }); } catch (err) { res.status(500).json({ error: err.message }); @@ -759,6 +790,9 @@ if (PAIR_ONLY) { console.log(` Set WHATSAPP_ALLOWED_USERS= to authorize specific users,`); console.log(` or WHATSAPP_ALLOWED_USERS=* for an explicit open bot.`); } + if (WHATSAPP_MODE === 'bot' && FORWARD_OWNER_MESSAGES) { + console.log(`👤 WHATSAPP_FORWARD_OWNER_MESSAGES=true — owner-typed messages will be forwarded with fromOwner:true`); + } console.log(); startSocket(); }); diff --git a/scripts/whatsapp-bridge/outbound_ids.js b/scripts/whatsapp-bridge/outbound_ids.js new file mode 100644 index 00000000000..cb2b95fdcbb --- /dev/null +++ b/scripts/whatsapp-bridge/outbound_ids.js @@ -0,0 +1,39 @@ +/** + * Bounded LRU set of outbound message IDs. + * + * Used by the WhatsApp bridge to distinguish "echo of our own /send" from + * "owner-typed message on the linked device" when forwarding `fromMe` + * inbound events back to the Python adapter. + * + * Heuristic limitation (intentional, documented for future debugging): + * the set is in-memory only. On bridge restart it is empty, so for the + * brief window between restart and the first new outbound, any in-flight + * delivery receipts of pre-restart sends would be classified as + * owner-typed. The TTL on owner-driven plugin actions (e.g. handover + * sliding TTL) bounds blast radius; persisting would not be worth the + * extra complexity / disk churn. + */ + +export function createOutboundIdTracker(maxSize = 512) { + const ids = new Set(); + + function remember(id) { + if (!id) return; + ids.add(id); + while (ids.size > maxSize) { + // Set iteration order is insertion order, so values().next() is the + // oldest entry — drop it to keep memory flat under sustained sending. + ids.delete(ids.values().next().value); + } + } + + function has(id) { + return Boolean(id) && ids.has(id); + } + + function size() { + return ids.size; + } + + return { remember, has, size }; +} diff --git a/scripts/whatsapp-bridge/outbound_ids.test.mjs b/scripts/whatsapp-bridge/outbound_ids.test.mjs new file mode 100644 index 00000000000..7c88d5e58ae --- /dev/null +++ b/scripts/whatsapp-bridge/outbound_ids.test.mjs @@ -0,0 +1,62 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; + +import { createOutboundIdTracker } from './outbound_ids.js'; + +test('remembers and recognises an outbound id', () => { + const tracker = createOutboundIdTracker(); + tracker.remember('msg-1'); + assert.equal(tracker.has('msg-1'), true); + assert.equal(tracker.has('msg-2'), false); +}); + +test('ignores empty / falsy ids', () => { + const tracker = createOutboundIdTracker(); + tracker.remember(undefined); + tracker.remember(''); + tracker.remember(null); + assert.equal(tracker.size(), 0); + assert.equal(tracker.has(''), false); + assert.equal(tracker.has(undefined), false); +}); + +test('evicts oldest entry once the cap is exceeded', () => { + const tracker = createOutboundIdTracker(3); + tracker.remember('a'); + tracker.remember('b'); + tracker.remember('c'); + tracker.remember('d'); // cap=3 → 'a' should be evicted + assert.equal(tracker.has('a'), false); + assert.equal(tracker.has('b'), true); + assert.equal(tracker.has('c'), true); + assert.equal(tracker.has('d'), true); + assert.equal(tracker.size(), 3); +}); + +test('cap holds across many inserts (bounded memory)', () => { + const tracker = createOutboundIdTracker(8); + for (let i = 0; i < 100; i += 1) { + tracker.remember(`id-${i}`); + } + assert.equal(tracker.size(), 8); + // Oldest (id-0..id-91) should be gone, latest 8 retained. + assert.equal(tracker.has('id-0'), false); + assert.equal(tracker.has('id-91'), false); + assert.equal(tracker.has('id-92'), true); + assert.equal(tracker.has('id-99'), true); +}); + +test('re-remembering an existing id refreshes its position', () => { + // Insertion-order semantics: re-adding doesn't move it forward in + // Set iteration order. This is intentional — we don't need recency, + // just bounded membership. Pin the actual behaviour so future + // refactors don't accidentally introduce LRU semantics. + const tracker = createOutboundIdTracker(2); + tracker.remember('a'); + tracker.remember('b'); + tracker.remember('a'); // no-op for ordering + tracker.remember('c'); // evicts 'a' (oldest by insertion) + assert.equal(tracker.has('a'), false); + assert.equal(tracker.has('b'), true); + assert.equal(tracker.has('c'), true); +}); diff --git a/tests/gateway/test_whatsapp_from_owner.py b/tests/gateway/test_whatsapp_from_owner.py new file mode 100644 index 00000000000..3b3b197fe0d --- /dev/null +++ b/tests/gateway/test_whatsapp_from_owner.py @@ -0,0 +1,90 @@ +"""Tests for WhatsApp owner-message metadata propagation. + +The Node bridge sets ``fromOwner: true`` on inbound `fromMe` messages that +look owner-typed (linked-device send, not echoed from /send) when the +operator opts into ``WHATSAPP_FORWARD_OWNER_MESSAGES``. These tests pin +the adapter's responsibility: lift that flag onto +``MessageEvent.metadata["whatsapp_from_owner"]`` and otherwise leave it +absent. The env-var gate itself lives in the bridge — the adapter just +trusts the payload. +""" + +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from gateway.config import Platform, PlatformConfig +from gateway.platforms.whatsapp import WhatsAppAdapter + + +def _make_adapter(): + adapter = WhatsAppAdapter.__new__(WhatsAppAdapter) + adapter.platform = Platform.WHATSAPP + adapter.config = PlatformConfig(enabled=True) + adapter._message_handler = AsyncMock() + adapter._dm_policy = "open" + adapter._allow_from = set() + adapter._group_policy = "open" + adapter._group_allow_from = set() + adapter._mention_patterns = [] + adapter._free_response_chats = set() + adapter._whatsapp_free_response_chats = lambda: set() + return adapter + + +def _dm_payload(**overrides): + payload = { + "messageId": "M1", + "chatId": "6281234567890@s.whatsapp.net", + "senderId": "6281234567890@s.whatsapp.net", + "senderName": "Customer", + "chatName": "Customer", + "isGroup": False, + "body": "hi from the linked phone", + "hasMedia": False, + "mediaType": "", + "mediaUrls": [], + "mentionedIds": [], + "quotedParticipant": "", + "botIds": [], + "timestamp": 0, + } + payload.update(overrides) + return payload + + +def test_metadata_flag_set_when_payload_has_from_owner(): + adapter = _make_adapter() + payload = _dm_payload(fromOwner=True) + + event = asyncio.run(adapter._build_message_event(payload)) + + assert event is not None + assert event.metadata.get("whatsapp_from_owner") is True + + +def test_metadata_flag_absent_by_default(): + """Default bridge payload (env flag off → field never present) must not + leak the metadata key. Plugins use ``.get(...)`` and rely on absence.""" + adapter = _make_adapter() + payload = _dm_payload() + + event = asyncio.run(adapter._build_message_event(payload)) + + assert event is not None + assert "whatsapp_from_owner" not in event.metadata + + +def test_metadata_flag_absent_when_explicitly_false(): + """Explicit fromOwner=false must not set the metadata key — plugins + test for truthiness, but absence is the canonical "not owner" state.""" + adapter = _make_adapter() + payload = _dm_payload(fromOwner=False) + + event = asyncio.run(adapter._build_message_event(payload)) + + assert event is not None + assert "whatsapp_from_owner" not in event.metadata