mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-04 12:33:08 +00:00
feat(whatsapp): opt-in forwarding of owner-typed messages in bot mode
In `WHATSAPP_MODE=bot` the bridge currently drops every fromMe inbound message — they are all assumed to be echoes of our own /send calls. That makes it impossible for plugins / agents to detect when a human owner has typed directly into a customer chat from the same WhatsApp Business account (e.g. via a linked phone or WhatsApp Web). This adds an opt-in `WHATSAPP_FORWARD_OWNER_MESSAGES` env var. When true, the bridge classifies fromMe inbound by looking up `key.id` in a bounded LRU of recently-sent message IDs (the existing 50-entry echo suppressor, bumped to 512 and extracted to a testable `outbound_ids.js` helper). Hits in the LRU are still dropped (echoes); misses are forwarded to the Python adapter with `fromOwner: true`. The Python adapter lifts that flag onto `MessageEvent.metadata["whatsapp_from_owner"]`. `metadata` is a new free-form dict on the event so future per-platform signals don't each need their own field. Default behaviour is unchanged: with the env flag unset, bot mode still drops every fromMe message exactly as before. Use cases for downstream consumers: - Implicit handover activation when the owner replies manually - Sliding TTL on owner activity (keep an active session alive while the owner is engaged) - Audit trails of owner interventions - Analytics on human-vs-bot reply ratios Heuristic limitation (documented in code): the LRU is in-memory. After a bridge restart, in-flight delivery receipts of pre-restart sends will briefly look like owner-typed for a few seconds until the set is repopulated. Persisting isn't worth the disk churn — downstream consumers should treat the flag as best-effort. Tests: - tests/gateway/test_whatsapp_from_owner.py (new): adapter sets the metadata flag iff the bridge payload has `fromOwner: true`; absent otherwise. - scripts/whatsapp-bridge/outbound_ids.test.mjs (new): LRU bounds, eviction order, falsy-id handling. Backwards compatibility: with the env flag unset, every code path is identical to before. No existing deployment is affected.
This commit is contained in:
parent
1366f376d6
commit
84f350efe0
6 changed files with 268 additions and 23 deletions
|
|
@ -1691,6 +1691,13 @@ class MessageEvent:
|
|||
# completion notifications) that must bypass user authorization checks.
|
||||
internal: bool = False
|
||||
|
||||
# Free-form per-event metadata. Adapters may set platform-specific
|
||||
# signals here (e.g. WhatsApp sets ``whatsapp_from_owner=True`` when
|
||||
# the bridge is configured to forward owner-typed messages). Plugins
|
||||
# consume via ``event.metadata.get(...)`` and must not rely on any
|
||||
# particular key existing.
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Timestamps
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
|
|
|
|||
|
|
@ -1309,6 +1309,18 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):
|
|||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to read document text: {e}", flush=True)
|
||||
|
||||
metadata: Dict[str, Any] = {}
|
||||
# The bridge sets ``fromOwner: true`` on inbound fromMe messages
|
||||
# that look owner-typed (linked-device send, not echoed from our
|
||||
# own /send). Surfaced under a platform-prefixed key so plugins
|
||||
# can detect "owner just replied in this customer chat" without
|
||||
# having to peek at raw_message. Gated by
|
||||
# ``WHATSAPP_FORWARD_OWNER_MESSAGES`` at the bridge layer; the
|
||||
# propagation here is unconditional so a future producer can set
|
||||
# the flag without us having to touch this code path again.
|
||||
if data.get("fromOwner"):
|
||||
metadata["whatsapp_from_owner"] = True
|
||||
|
||||
return MessageEvent(
|
||||
text=body,
|
||||
message_type=msg_type,
|
||||
|
|
@ -1317,6 +1329,7 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):
|
|||
message_id=data.get("messageId"),
|
||||
media_urls=cached_urls,
|
||||
media_types=media_types,
|
||||
metadata=metadata,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Error building event: {e}")
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import { execSync } from 'child_process';
|
|||
import { tmpdir } from 'os';
|
||||
import qrcode from 'qrcode-terminal';
|
||||
import { matchesAllowedUser, parseAllowedUsers } from './allowlist.js';
|
||||
import { createOutboundIdTracker } from './outbound_ids.js';
|
||||
|
||||
// Parse CLI args
|
||||
const args = process.argv.slice(2);
|
||||
|
|
@ -44,6 +45,23 @@ const WHATSAPP_DEBUG =
|
|||
typeof process.env.WHATSAPP_DEBUG === 'string' &&
|
||||
['1', 'true', 'yes', 'on'].includes(process.env.WHATSAPP_DEBUG.toLowerCase());
|
||||
|
||||
// Opt-in: when true (and WHATSAPP_MODE === 'bot'), fromMe inbound messages
|
||||
// that are NOT echoes of our own /send or /send-media calls are forwarded
|
||||
// to the Python adapter with `fromOwner: true`. This lets plugins detect
|
||||
// "owner just typed in this customer chat" — needed for handover / sliding
|
||||
// TTL flows. Default OFF: existing deployments see no behavior change.
|
||||
//
|
||||
// Heuristic limitation: we distinguish bot-API-sent from owner-typed by
|
||||
// looking up `key.id` in `recentlySentIds` (populated when /send returns).
|
||||
// On bridge restart that set is empty, so a few in-flight bot replies may
|
||||
// briefly look like owner-typed until they age out. Acceptable; we don't
|
||||
// persist the set.
|
||||
const FORWARD_OWNER_MESSAGES =
|
||||
typeof process !== 'undefined' &&
|
||||
process.env &&
|
||||
typeof process.env.WHATSAPP_FORWARD_OWNER_MESSAGES === 'string' &&
|
||||
['1', 'true', 'yes', 'on'].includes(process.env.WHATSAPP_FORWARD_OWNER_MESSAGES.toLowerCase());
|
||||
|
||||
const PORT = parseInt(getArg('port', '3000'), 10);
|
||||
const SESSION_DIR = getArg('session', path.join(process.env.HOME || '~', '.hermes', 'whatsapp', 'session'));
|
||||
// Cache directories: the Python gateway passes the profile-aware paths via
|
||||
|
|
@ -146,12 +164,7 @@ function splitLongMessage(message, maxLength = MAX_MESSAGE_LENGTH) {
|
|||
}
|
||||
|
||||
function trackSentMessageId(sent) {
|
||||
if (sent?.key?.id) {
|
||||
recentlySentIds.add(sent.key.id);
|
||||
if (recentlySentIds.size > MAX_RECENT_IDS) {
|
||||
recentlySentIds.delete(recentlySentIds.values().next().value);
|
||||
}
|
||||
}
|
||||
rememberSentId(sent?.key?.id);
|
||||
}
|
||||
|
||||
function normalizeWhatsAppId(value) {
|
||||
|
|
@ -205,9 +218,18 @@ const logger = pino({ level: 'warn' });
|
|||
const messageQueue = [];
|
||||
const MAX_QUEUE_SIZE = 100;
|
||||
|
||||
// Track recently sent message IDs to prevent echo-back loops with media
|
||||
const recentlySentIds = new Set();
|
||||
const MAX_RECENT_IDS = 50;
|
||||
// Track recently sent message IDs. Two purposes:
|
||||
// 1. Prevent echo-back loops with media in self-chat mode.
|
||||
// 2. (When WHATSAPP_FORWARD_OWNER_MESSAGES=true) distinguish our own
|
||||
// bot-API outbound messages from owner-typed messages on the linked
|
||||
// device so we can forward only the latter.
|
||||
// Capacity bounded (see outbound_ids.js) to keep memory flat under
|
||||
// sustained sending.
|
||||
const recentlySentIds = createOutboundIdTracker(512);
|
||||
|
||||
function rememberSentId(id) {
|
||||
recentlySentIds.remember(id);
|
||||
}
|
||||
|
||||
let sock = null;
|
||||
let connectionState = 'disconnected';
|
||||
|
|
@ -300,23 +322,33 @@ async function startSocket() {
|
|||
const senderNumber = senderId.replace(/@.*/, '');
|
||||
|
||||
// Handle fromMe messages based on mode
|
||||
let fromOwner = false;
|
||||
if (msg.key.fromMe) {
|
||||
if (isGroup || chatId.includes('status')) continue;
|
||||
|
||||
if (WHATSAPP_MODE === 'bot') {
|
||||
// Bot mode: separate number. ALL fromMe are echo-backs of our own replies — skip.
|
||||
continue;
|
||||
// Bot mode: separate bot number. fromMe inbound is either
|
||||
// (a) an echo of our own /send (recentlySentIds will catch it), or
|
||||
// (b) a message the owner typed from their own phone using the
|
||||
// linked-device session.
|
||||
//
|
||||
// We always drop (a). We drop (b) too unless the operator opts in
|
||||
// via WHATSAPP_FORWARD_OWNER_MESSAGES so existing deployments see
|
||||
// no behavior change.
|
||||
if (recentlySentIds.has(msg.key.id)) continue;
|
||||
if (!FORWARD_OWNER_MESSAGES) continue;
|
||||
fromOwner = true;
|
||||
} else {
|
||||
// Self-chat mode: only allow messages in the user's own self-chat.
|
||||
// WhatsApp now uses LID (Linked Identity Device) format: 67427329167522@lid
|
||||
// AND classic format: 34652029134@s.whatsapp.net
|
||||
// sock.user has both: { id: "number:10@s.whatsapp.net", lid: "lid_number:10@lid" }
|
||||
const myNumber = (sock.user?.id || '').replace(/:.*@/, '@').replace(/@.*/, '');
|
||||
const myLid = (sock.user?.lid || '').replace(/:.*@/, '@').replace(/@.*/, '');
|
||||
const chatNumber = chatId.replace(/@.*/, '');
|
||||
const isSelfChat = (myNumber && chatNumber === myNumber) || (myLid && chatNumber === myLid);
|
||||
if (!isSelfChat) continue;
|
||||
}
|
||||
|
||||
// Self-chat mode: only allow messages in the user's own self-chat
|
||||
// WhatsApp now uses LID (Linked Identity Device) format: 67427329167522@lid
|
||||
// AND classic format: 34652029134@s.whatsapp.net
|
||||
// sock.user has both: { id: "number:10@s.whatsapp.net", lid: "lid_number:10@lid" }
|
||||
const myNumber = (sock.user?.id || '').replace(/:.*@/, '@').replace(/@.*/, '');
|
||||
const myLid = (sock.user?.lid || '').replace(/:.*@/, '@').replace(/@.*/, '');
|
||||
const chatNumber = chatId.replace(/@.*/, '');
|
||||
const isSelfChat = (myNumber && chatNumber === myNumber) || (myLid && chatNumber === myLid);
|
||||
if (!isSelfChat) continue;
|
||||
}
|
||||
|
||||
// Handle !fromMe messages (from other people) based on mode.
|
||||
|
|
@ -473,6 +505,7 @@ async function startSocket() {
|
|||
hasQuotedMessage,
|
||||
botIds,
|
||||
timestamp: msg.messageTimestamp,
|
||||
fromOwner,
|
||||
};
|
||||
|
||||
messageQueue.push(event);
|
||||
|
|
@ -678,9 +711,7 @@ app.post('/send-media', async (req, res) => {
|
|||
}
|
||||
|
||||
const sent = await sendWithTimeout(chatId, msgPayload);
|
||||
|
||||
trackSentMessageId(sent);
|
||||
|
||||
res.json({ success: true, messageId: sent?.key?.id });
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: err.message });
|
||||
|
|
@ -759,6 +790,9 @@ if (PAIR_ONLY) {
|
|||
console.log(` Set WHATSAPP_ALLOWED_USERS=<phone> to authorize specific users,`);
|
||||
console.log(` or WHATSAPP_ALLOWED_USERS=* for an explicit open bot.`);
|
||||
}
|
||||
if (WHATSAPP_MODE === 'bot' && FORWARD_OWNER_MESSAGES) {
|
||||
console.log(`👤 WHATSAPP_FORWARD_OWNER_MESSAGES=true — owner-typed messages will be forwarded with fromOwner:true`);
|
||||
}
|
||||
console.log();
|
||||
startSocket();
|
||||
});
|
||||
|
|
|
|||
39
scripts/whatsapp-bridge/outbound_ids.js
Normal file
39
scripts/whatsapp-bridge/outbound_ids.js
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Bounded LRU set of outbound message IDs.
|
||||
*
|
||||
* Used by the WhatsApp bridge to distinguish "echo of our own /send" from
|
||||
* "owner-typed message on the linked device" when forwarding `fromMe`
|
||||
* inbound events back to the Python adapter.
|
||||
*
|
||||
* Heuristic limitation (intentional, documented for future debugging):
|
||||
* the set is in-memory only. On bridge restart it is empty, so for the
|
||||
* brief window between restart and the first new outbound, any in-flight
|
||||
* delivery receipts of pre-restart sends would be classified as
|
||||
* owner-typed. The TTL on owner-driven plugin actions (e.g. handover
|
||||
* sliding TTL) bounds blast radius; persisting would not be worth the
|
||||
* extra complexity / disk churn.
|
||||
*/
|
||||
|
||||
export function createOutboundIdTracker(maxSize = 512) {
|
||||
const ids = new Set();
|
||||
|
||||
function remember(id) {
|
||||
if (!id) return;
|
||||
ids.add(id);
|
||||
while (ids.size > maxSize) {
|
||||
// Set iteration order is insertion order, so values().next() is the
|
||||
// oldest entry — drop it to keep memory flat under sustained sending.
|
||||
ids.delete(ids.values().next().value);
|
||||
}
|
||||
}
|
||||
|
||||
function has(id) {
|
||||
return Boolean(id) && ids.has(id);
|
||||
}
|
||||
|
||||
function size() {
|
||||
return ids.size;
|
||||
}
|
||||
|
||||
return { remember, has, size };
|
||||
}
|
||||
62
scripts/whatsapp-bridge/outbound_ids.test.mjs
Normal file
62
scripts/whatsapp-bridge/outbound_ids.test.mjs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
|
||||
import { createOutboundIdTracker } from './outbound_ids.js';
|
||||
|
||||
test('remembers and recognises an outbound id', () => {
|
||||
const tracker = createOutboundIdTracker();
|
||||
tracker.remember('msg-1');
|
||||
assert.equal(tracker.has('msg-1'), true);
|
||||
assert.equal(tracker.has('msg-2'), false);
|
||||
});
|
||||
|
||||
test('ignores empty / falsy ids', () => {
|
||||
const tracker = createOutboundIdTracker();
|
||||
tracker.remember(undefined);
|
||||
tracker.remember('');
|
||||
tracker.remember(null);
|
||||
assert.equal(tracker.size(), 0);
|
||||
assert.equal(tracker.has(''), false);
|
||||
assert.equal(tracker.has(undefined), false);
|
||||
});
|
||||
|
||||
test('evicts oldest entry once the cap is exceeded', () => {
|
||||
const tracker = createOutboundIdTracker(3);
|
||||
tracker.remember('a');
|
||||
tracker.remember('b');
|
||||
tracker.remember('c');
|
||||
tracker.remember('d'); // cap=3 → 'a' should be evicted
|
||||
assert.equal(tracker.has('a'), false);
|
||||
assert.equal(tracker.has('b'), true);
|
||||
assert.equal(tracker.has('c'), true);
|
||||
assert.equal(tracker.has('d'), true);
|
||||
assert.equal(tracker.size(), 3);
|
||||
});
|
||||
|
||||
test('cap holds across many inserts (bounded memory)', () => {
|
||||
const tracker = createOutboundIdTracker(8);
|
||||
for (let i = 0; i < 100; i += 1) {
|
||||
tracker.remember(`id-${i}`);
|
||||
}
|
||||
assert.equal(tracker.size(), 8);
|
||||
// Oldest (id-0..id-91) should be gone, latest 8 retained.
|
||||
assert.equal(tracker.has('id-0'), false);
|
||||
assert.equal(tracker.has('id-91'), false);
|
||||
assert.equal(tracker.has('id-92'), true);
|
||||
assert.equal(tracker.has('id-99'), true);
|
||||
});
|
||||
|
||||
test('re-remembering an existing id refreshes its position', () => {
|
||||
// Insertion-order semantics: re-adding doesn't move it forward in
|
||||
// Set iteration order. This is intentional — we don't need recency,
|
||||
// just bounded membership. Pin the actual behaviour so future
|
||||
// refactors don't accidentally introduce LRU semantics.
|
||||
const tracker = createOutboundIdTracker(2);
|
||||
tracker.remember('a');
|
||||
tracker.remember('b');
|
||||
tracker.remember('a'); // no-op for ordering
|
||||
tracker.remember('c'); // evicts 'a' (oldest by insertion)
|
||||
assert.equal(tracker.has('a'), false);
|
||||
assert.equal(tracker.has('b'), true);
|
||||
assert.equal(tracker.has('c'), true);
|
||||
});
|
||||
90
tests/gateway/test_whatsapp_from_owner.py
Normal file
90
tests/gateway/test_whatsapp_from_owner.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
"""Tests for WhatsApp owner-message metadata propagation.
|
||||
|
||||
The Node bridge sets ``fromOwner: true`` on inbound `fromMe` messages that
|
||||
look owner-typed (linked-device send, not echoed from /send) when the
|
||||
operator opts into ``WHATSAPP_FORWARD_OWNER_MESSAGES``. These tests pin
|
||||
the adapter's responsibility: lift that flag onto
|
||||
``MessageEvent.metadata["whatsapp_from_owner"]`` and otherwise leave it
|
||||
absent. The env-var gate itself lives in the bridge — the adapter just
|
||||
trusts the payload.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.whatsapp import WhatsAppAdapter
|
||||
|
||||
|
||||
def _make_adapter():
|
||||
adapter = WhatsAppAdapter.__new__(WhatsAppAdapter)
|
||||
adapter.platform = Platform.WHATSAPP
|
||||
adapter.config = PlatformConfig(enabled=True)
|
||||
adapter._message_handler = AsyncMock()
|
||||
adapter._dm_policy = "open"
|
||||
adapter._allow_from = set()
|
||||
adapter._group_policy = "open"
|
||||
adapter._group_allow_from = set()
|
||||
adapter._mention_patterns = []
|
||||
adapter._free_response_chats = set()
|
||||
adapter._whatsapp_free_response_chats = lambda: set()
|
||||
return adapter
|
||||
|
||||
|
||||
def _dm_payload(**overrides):
|
||||
payload = {
|
||||
"messageId": "M1",
|
||||
"chatId": "6281234567890@s.whatsapp.net",
|
||||
"senderId": "6281234567890@s.whatsapp.net",
|
||||
"senderName": "Customer",
|
||||
"chatName": "Customer",
|
||||
"isGroup": False,
|
||||
"body": "hi from the linked phone",
|
||||
"hasMedia": False,
|
||||
"mediaType": "",
|
||||
"mediaUrls": [],
|
||||
"mentionedIds": [],
|
||||
"quotedParticipant": "",
|
||||
"botIds": [],
|
||||
"timestamp": 0,
|
||||
}
|
||||
payload.update(overrides)
|
||||
return payload
|
||||
|
||||
|
||||
def test_metadata_flag_set_when_payload_has_from_owner():
|
||||
adapter = _make_adapter()
|
||||
payload = _dm_payload(fromOwner=True)
|
||||
|
||||
event = asyncio.run(adapter._build_message_event(payload))
|
||||
|
||||
assert event is not None
|
||||
assert event.metadata.get("whatsapp_from_owner") is True
|
||||
|
||||
|
||||
def test_metadata_flag_absent_by_default():
|
||||
"""Default bridge payload (env flag off → field never present) must not
|
||||
leak the metadata key. Plugins use ``.get(...)`` and rely on absence."""
|
||||
adapter = _make_adapter()
|
||||
payload = _dm_payload()
|
||||
|
||||
event = asyncio.run(adapter._build_message_event(payload))
|
||||
|
||||
assert event is not None
|
||||
assert "whatsapp_from_owner" not in event.metadata
|
||||
|
||||
|
||||
def test_metadata_flag_absent_when_explicitly_false():
|
||||
"""Explicit fromOwner=false must not set the metadata key — plugins
|
||||
test for truthiness, but absence is the canonical "not owner" state."""
|
||||
adapter = _make_adapter()
|
||||
payload = _dm_payload(fromOwner=False)
|
||||
|
||||
event = asyncio.run(adapter._build_message_event(payload))
|
||||
|
||||
assert event is not None
|
||||
assert "whatsapp_from_owner" not in event.metadata
|
||||
Loading…
Add table
Add a link
Reference in a new issue