From 984e6cb5b8bbfc7f0b1c18a3ec3c599ad98614cb Mon Sep 17 00:00:00 2001 From: emozilla Date: Sat, 23 May 2026 01:07:01 -0400 Subject: [PATCH] feat(whatsapp): add WhatsApp Business Cloud API adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an official, production-grade WhatsApp integration via Meta's Business Cloud API as a complement to the existing Baileys bridge. No bridge subprocess, no QR codes, no account-ban risk — at the cost of a Meta Business account and a public HTTPS webhook URL. Setup is fully wizard-driven: 'hermes whatsapp-cloud' walks through every credential with paste-time validation (catches the #1 trap of pasting a phone number into the Phone Number ID field), generates a verify token, and ends with copy-paste instructions for the cloudflared / Meta-dashboard / Business Manager pieces that can't be automated. The wizard also points users at Meta's Business Manager for setting the bot's display name and profile picture. Feature set: - Inbound: text, images (with native-vision routing), voice notes (STT), documents (small text inlined, larger cached), reply context. - Outbound: text with WhatsApp-flavored markdown conversion, images, videos, documents, opus voice notes via ffmpeg with MP3 fallback. - Native interactive buttons for clarify, dangerous-command approval, and slash-command confirmation flows — matches the Telegram / Discord UX, graceful degrades to plain text. - Read receipts (blue double-checkmarks) and typing indicator, using Meta's combined endpoint so they fire in a single API call. - Webhook security: X-Hub-Signature-256 HMAC verification (raw body, constant-time), wamid deduplication, group-shaped-message refusal (groups deferred to v2 — Baileys still covers them). - Full integration with the gateway's session, cron, display-tier, prompt-hint, and auth-allowlist systems. Cloud and Baileys can run side-by-side against different phone numbers. Also wires STT (speech-to-text) through Nous's managed audio gateway for Nous subscribers — previously the default stt.provider=local required a separate faster-whisper install. New subscribers now get voice-note transcription out of the box. Docs: 418-line user guide at website/docs/user-guide/messaging/ whatsapp-cloud.md, sidebar entry, environment-variables reference, ADDING_A_PLATFORM.md updated with the optional interactive-UX contract for future adapter authors. Tests: 100 dedicated tests for the adapter, 32 for the setup wizard, 20 for the Nous subscription STT wiring, plus regression coverage across display_config, prompt_builder, and the cron scheduler. Known limitations (deferred until clear demand signal): - Group chats — use the Baileys bridge if you need them. - Message templates for 24-hour-window outside-conversation sends — reactive chat is unaffected; cron / delegate_task with gaps > 24h will fail with a clear error. The agent's system prompt warns the model about this so it knows to mention it when scheduling delayed messages. --- agent/prompt_builder.py | 21 +- cron/scheduler.py | 1 + gateway/config.py | 59 + gateway/display_config.py | 6 + gateway/platforms/ADDING_A_PLATFORM.md | 29 + gateway/platforms/whatsapp.py | 280 +- gateway/platforms/whatsapp_cloud.py | 1869 ++++++++++++++ gateway/platforms/whatsapp_common.py | 351 +++ gateway/run.py | 20 +- hermes_cli/main.py | 37 +- hermes_cli/nous_subscription.py | 134 +- hermes_cli/platforms.py | 1 + hermes_cli/setup_whatsapp_cloud.py | 530 ++++ hermes_cli/status.py | 2 +- tests/agent/test_prompt_builder.py | 21 +- tests/cron/test_scheduler.py | 23 + tests/gateway/test_display_config.py | 16 +- tests/gateway/test_whatsapp_cloud.py | 2250 +++++++++++++++++ tests/hermes_cli/test_nous_subscription.py | 156 +- .../hermes_cli/test_status_model_provider.py | 1 + tests/hermes_cli/test_whatsapp_cloud_setup.py | 406 +++ .../docs/reference/environment-variables.md | 13 + website/docs/user-guide/messaging/index.md | 2 + .../user-guide/messaging/whatsapp-cloud.md | 418 +++ website/docs/user-guide/messaging/whatsapp.md | 8 + website/sidebars.ts | 1 + 26 files changed, 6368 insertions(+), 287 deletions(-) create mode 100644 gateway/platforms/whatsapp_cloud.py create mode 100644 gateway/platforms/whatsapp_common.py create mode 100644 hermes_cli/setup_whatsapp_cloud.py create mode 100644 tests/gateway/test_whatsapp_cloud.py create mode 100644 tests/hermes_cli/test_whatsapp_cloud_setup.py create mode 100644 website/docs/user-guide/messaging/whatsapp-cloud.md diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 9c36d205ac5..ea1e598ff4a 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -428,6 +428,23 @@ PLATFORM_HINTS = { "files arrive as downloadable documents. You can also include image " "URLs in markdown format ![alt](url) and they will be sent as photos." ), + "whatsapp_cloud": ( + "You are on a text messaging communication platform, WhatsApp " + "(via Meta's official Business Cloud API). Standard markdown " + "(**bold**, ~~strike~~, # headers, [links](url)) is auto-converted " + "to WhatsApp's native syntax (*bold*, ~strike~, etc.) — feel free " + "to write in markdown. Tables are NOT supported — prefer bullet " + "lists or labeled key:value pairs. " + "You can send media files natively: include MEDIA:/absolute/path/to/file " + "in your response. Images (.jpg, .png) become photo attachments, " + "videos (.mp4) play inline, audio (.mp3, .ogg) sends as voice/audio " + "messages, other files arrive as documents. Image URLs in markdown " + "format ![alt](url) also work. " + "IMPORTANT: this platform has a 24-hour conversation window — if the " + "user hasn't messaged in 24h, free-form replies are refused by Meta " + "(error 131047). This rarely matters for live chat, but is worth " + "knowing if you're scheduling a delayed message." + ), "telegram": ( "You are on a text messaging communication platform, Telegram. " "Standard markdown is automatically converted to Telegram format. " @@ -1279,13 +1296,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) - lines = [ "# Nous Subscription", - "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.", + "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, OpenAI Whisper STT, and browser automation (Browser Use) by default. Modal execution is optional.", "Current capability status:", ] lines.extend(_status_line(feature) for feature in features.items()) lines.extend( [ - "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.", + "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, OpenAI Whisper, or Browser-Use API keys.", "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.", "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.", "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.", diff --git a/cron/scheduler.py b/cron/scheduler.py index e76f67064cf..a591e376588 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -114,6 +114,7 @@ _HOME_TARGET_ENV_VARS = { "bluebubbles": "BLUEBUBBLES_HOME_CHANNEL", "qqbot": "QQBOT_HOME_CHANNEL", "whatsapp": "WHATSAPP_HOME_CHANNEL", + "whatsapp_cloud": "WHATSAPP_CLOUD_HOME_CHANNEL", } # Legacy env var names kept for back-compat. Each entry is the current diff --git a/gateway/config.py b/gateway/config.py index 83326975249..cdd06d6e28a 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -109,6 +109,7 @@ class Platform(Enum): TELEGRAM = "telegram" DISCORD = "discord" WHATSAPP = "whatsapp" + WHATSAPP_CLOUD = "whatsapp_cloud" SLACK = "slack" SIGNAL = "signal" MATTERMOST = "mattermost" @@ -419,6 +420,9 @@ _PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] = cfg.extra.get("account_id") and (cfg.token or cfg.extra.get("token")) ), Platform.WHATSAPP: lambda cfg: True, # bridge handles auth + Platform.WHATSAPP_CLOUD: lambda cfg: bool( + cfg.extra.get("phone_number_id") and cfg.extra.get("access_token") + ), Platform.SIGNAL: lambda cfg: bool(cfg.extra.get("http_url")), Platform.EMAIL: lambda cfg: bool(cfg.extra.get("address")), Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")), @@ -1367,6 +1371,61 @@ def _apply_env_overrides(config: GatewayConfig) -> None: thread_id=os.getenv("WHATSAPP_HOME_CHANNEL_THREAD_ID") or None, ) + # WhatsApp Cloud API (official Business Platform via Meta). + # Distinct from the Baileys bridge: pure HTTP graph.facebook.com calls + # outbound, public webhook inbound. Both adapters can run in parallel + # against different phone numbers. + whatsapp_cloud_phone_id = os.getenv("WHATSAPP_CLOUD_PHONE_NUMBER_ID") + whatsapp_cloud_token = os.getenv("WHATSAPP_CLOUD_ACCESS_TOKEN") + if whatsapp_cloud_phone_id and whatsapp_cloud_token: + if Platform.WHATSAPP_CLOUD not in config.platforms: + config.platforms[Platform.WHATSAPP_CLOUD] = PlatformConfig() + config.platforms[Platform.WHATSAPP_CLOUD].enabled = True + config.platforms[Platform.WHATSAPP_CLOUD].extra.update({ + "phone_number_id": whatsapp_cloud_phone_id, + "access_token": whatsapp_cloud_token, + }) + # Optional: app_id / app_secret (signature verification) + wa_cloud_app_id = os.getenv("WHATSAPP_CLOUD_APP_ID") + if wa_cloud_app_id: + config.platforms[Platform.WHATSAPP_CLOUD].extra["app_id"] = wa_cloud_app_id + wa_cloud_app_secret = os.getenv("WHATSAPP_CLOUD_APP_SECRET") + if wa_cloud_app_secret: + config.platforms[Platform.WHATSAPP_CLOUD].extra["app_secret"] = wa_cloud_app_secret + # Optional: WABA id (analytics, future use) + wa_cloud_waba_id = os.getenv("WHATSAPP_CLOUD_WABA_ID") + if wa_cloud_waba_id: + config.platforms[Platform.WHATSAPP_CLOUD].extra["waba_id"] = wa_cloud_waba_id + # Webhook verify token — Meta hub.verify_token shared secret + wa_cloud_verify_token = os.getenv("WHATSAPP_CLOUD_VERIFY_TOKEN") + if wa_cloud_verify_token: + config.platforms[Platform.WHATSAPP_CLOUD].extra["verify_token"] = wa_cloud_verify_token + # Webhook server bind config (defaults baked into the adapter) + wa_cloud_host = os.getenv("WHATSAPP_CLOUD_WEBHOOK_HOST") + if wa_cloud_host: + config.platforms[Platform.WHATSAPP_CLOUD].extra["webhook_host"] = wa_cloud_host + wa_cloud_port = os.getenv("WHATSAPP_CLOUD_WEBHOOK_PORT") + if wa_cloud_port: + try: + config.platforms[Platform.WHATSAPP_CLOUD].extra["webhook_port"] = int(wa_cloud_port) + except ValueError: + pass + wa_cloud_path = os.getenv("WHATSAPP_CLOUD_WEBHOOK_PATH") + if wa_cloud_path: + config.platforms[Platform.WHATSAPP_CLOUD].extra["webhook_path"] = wa_cloud_path + # Graph API version override (rarely needed) + wa_cloud_api_version = os.getenv("WHATSAPP_CLOUD_API_VERSION") + if wa_cloud_api_version: + config.platforms[Platform.WHATSAPP_CLOUD].extra["api_version"] = wa_cloud_api_version + whatsapp_cloud_home = os.getenv("WHATSAPP_CLOUD_HOME_CHANNEL") + if whatsapp_cloud_home and Platform.WHATSAPP_CLOUD in config.platforms: + config.platforms[Platform.WHATSAPP_CLOUD].home_channel = HomeChannel( + platform=Platform.WHATSAPP_CLOUD, + chat_id=whatsapp_cloud_home, + name=os.getenv("WHATSAPP_CLOUD_HOME_CHANNEL_NAME", "Home"), + thread_id=os.getenv("WHATSAPP_CLOUD_HOME_CHANNEL_THREAD_ID") or None, + ) + # Slack slack_token = os.getenv("SLACK_BOT_TOKEN") if slack_token: diff --git a/gateway/display_config.py b/gateway/display_config.py index eab6bebc783..7f273b7bbab 100644 --- a/gateway/display_config.py +++ b/gateway/display_config.py @@ -95,6 +95,12 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = { # Tier 3 — no edit support, progress messages are permanent "signal": _TIER_LOW, "whatsapp": _TIER_MEDIUM, # Baileys bridge supports /edit + # WhatsApp Cloud API: Meta added message editing in 2023 but the + # Hermes Cloud adapter doesn't implement edit_message yet, so we + # stay on TIER_LOW (tool_progress off) to avoid spamming each + # status update as a separate message. Promote to TIER_MEDIUM once + # Cloud's edit_message lands. + "whatsapp_cloud": _TIER_LOW, "bluebubbles": _TIER_LOW, "weixin": _TIER_LOW, "wecom": _TIER_LOW, diff --git a/gateway/platforms/ADDING_A_PLATFORM.md b/gateway/platforms/ADDING_A_PLATFORM.md index c373b9fa0b9..e3b84fecaeb 100644 --- a/gateway/platforms/ADDING_A_PLATFORM.md +++ b/gateway/platforms/ADDING_A_PLATFORM.md @@ -52,6 +52,22 @@ for the full pattern (Template Buttons postback at 45s, `RequestCache` state machine, `interrupt_session_activity` override for `/stop` orphans) and the developer-guide page for the prose walkthrough. +**Sibling adapters that share behavior.** When a single platform has +two transport modes the user picks between — unofficial vs official +APIs, polling vs websocket, library A vs library B — the right +structure is two adapters that share a behavior mixin. WhatsApp does +this: `gateway/platforms/whatsapp.py` (Baileys bridge) and +`gateway/platforms/whatsapp_cloud.py` (Meta Cloud API) both inherit +from `WhatsAppBehaviorMixin` in `gateway/platforms/whatsapp_common.py`. +The mixin owns gating, allow-lists, mention parsing, broadcast +filters, and the WhatsApp-flavored markdown conversion — everything +that's platform-protocol-agnostic. Each adapter owns its transport. +Both register distinct `Platform.*` enum values so the gateway can run +both simultaneously against different phone numbers. The mixin must +come **first** in the bases list — `class WhatsAppAdapter(Mixin, +BasePlatformAdapter)` — so the mixin's `format_message` overrides +`BasePlatformAdapter`'s generic default. + See `plugins/platforms/irc/`, `plugins/platforms/teams/`, and `plugins/platforms/google_chat/` for complete working examples, and `website/docs/developer-guide/adding-platform-adapters.md` for the full @@ -94,6 +110,19 @@ The adapter is a subclass of `BasePlatformAdapter` from `gateway/platforms/base. | `send_animation(chat_id, path, caption)` | Send a GIF/animation | | `send_image_file(chat_id, path, caption)` | Send image from local file | +### Interactive UX (recommended if your platform supports tappable buttons) + +If your platform supports interactive button/menu messages, implement these for a more polished agent experience. They all degrade gracefully to plain text when not overridden: + +| Method | Purpose | +|--------|---------| +| `send_clarify(chat_id, question, choices, clarify_id, session_key, ...)` | Render the `clarify` tool's multi-choice question as tappable buttons. Pair with inbound dispatch that routes button taps to `tools.clarify_gateway.resolve_gateway_clarify`. | +| `send_exec_approval(chat_id, command, session_key, description, ...)` | Render dangerous-command approval as Approve/Deny buttons. Inbound dispatch routes to `tools.approval.resolve_gateway_approval`. | +| `send_slash_confirm(chat_id, title, message, session_key, confirm_id, ...)` | Render slash-command confirmations (e.g. `/reload-mcp`) as Once/Always/Cancel buttons. Inbound dispatch routes to `tools.slash_confirm.resolve`. | +| `send_model_picker(...)` | Interactive `/model` picker. Used by Telegram and Discord. | + +See `gateway/platforms/telegram.py`, `discord.py`, and `whatsapp_cloud.py` for reference implementations. The button-callback id convention (`cl::`, `appr::`, `sc::`) is shared across adapters — match it so the gateway-side resolvers work without modification. + ### Required function ```python diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 0ca3d41fabb..90d04a5e964 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -16,11 +16,9 @@ with different backends via a bridge pattern. """ import asyncio -import json import logging import os import platform -import re import shutil import signal import subprocess @@ -180,6 +178,7 @@ import sys sys.path.insert(0, str(Path(__file__).resolve().parents[2])) from gateway.config import Platform, PlatformConfig +from gateway.platforms.whatsapp_common import WhatsAppBehaviorMixin from gateway.platforms.base import ( BasePlatformAdapter, MessageEvent, @@ -215,7 +214,7 @@ def check_whatsapp_requirements() -> bool: return False -class WhatsAppAdapter(BasePlatformAdapter): +class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter): """ WhatsApp adapter. @@ -237,13 +236,12 @@ class WhatsAppAdapter(BasePlatformAdapter): - allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist") - group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open") - group_allow_from: List of group JIDs allowed (when group_policy="allowlist") + + Behavior (gating, mention parsing, markdown conversion, chunking) is + provided by ``WhatsAppBehaviorMixin`` so the Cloud API adapter can + share it. Only transport-specific code lives here. """ - - # WhatsApp message limits — practical UX limit, not protocol max. - # WhatsApp allows ~65K but long messages are unreadable on mobile. - MAX_MESSAGE_LENGTH = 4096 - DEFAULT_REPLY_PREFIX = "⚕ *Hermes Agent*\n────────────\n" - + # Default bridge location relative to the hermes-agent install _DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge" @@ -278,213 +276,6 @@ class WhatsAppAdapter(BasePlatformAdapter): # notification before the normal "✓ whatsapp disconnected" fires. self._shutting_down: bool = False - def _effective_reply_prefix(self) -> str: - """Return the prefix the Node bridge will add in self-chat mode.""" - whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat") - if whatsapp_mode != "self-chat": - return "" - if self._reply_prefix is not None: - return self._reply_prefix.replace("\\n", "\n") - env_prefix = os.getenv("WHATSAPP_REPLY_PREFIX") - if env_prefix is not None: - return env_prefix.replace("\\n", "\n") - return self.DEFAULT_REPLY_PREFIX - - def _outgoing_chunk_limit(self) -> int: - """Reserve room for the bridge-side prefix so final WhatsApp text fits.""" - prefix_len = len(self._effective_reply_prefix()) - # Keep enough space for truncate_message's pagination indicator and - # code-fence repair even if a user configures a very long prefix. - return max(1024, self.MAX_MESSAGE_LENGTH - prefix_len) - - def _whatsapp_require_mention(self) -> bool: - configured = self.config.extra.get("require_mention") - if configured is not None: - if isinstance(configured, str): - return configured.lower() in {"true", "1", "yes", "on"} - return bool(configured) - return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"} - - def _whatsapp_free_response_chats(self) -> set[str]: - raw = self.config.extra.get("free_response_chats") - if raw is None: - raw = os.getenv("WHATSAPP_FREE_RESPONSE_CHATS", "") - if isinstance(raw, list): - return {str(part).strip() for part in raw if str(part).strip()} - return {part.strip() for part in str(raw).split(",") if part.strip()} - - @staticmethod - def _coerce_allow_list(raw) -> set[str]: - """Parse allow_from / group_allow_from from config or env var.""" - if raw is None: - return set() - if isinstance(raw, list): - return {str(part).strip() for part in raw if str(part).strip()} - return {part.strip() for part in str(raw).split(",") if part.strip()} - - @staticmethod - def _is_broadcast_chat(chat_id: str) -> bool: - """True for WhatsApp pseudo-chats that aren't real conversations. - - Covers Status updates (Stories) and Channel/Newsletter broadcasts. - These show up as inbound messages on Baileys but the agent should - never reply — answering a Story update spams the contact's status - feed, and Channel posts aren't addressable in the first place. - """ - if not chat_id: - return False - cid = chat_id.strip().lower() - if cid == "status@broadcast": - return True - # @broadcast suffix covers status@broadcast plus any future - # broadcast-list variants. @newsletter is the Channel JID suffix. - if cid.endswith("@broadcast") or cid.endswith("@newsletter"): - return True - return False - - def _is_dm_allowed(self, sender_id: str) -> bool: - """Check whether a DM from the given sender should be processed.""" - if self._dm_policy == "disabled": - return False - if self._dm_policy == "allowlist": - return sender_id in self._allow_from - # "open" — all DMs allowed - return True - - def _is_group_allowed(self, chat_id: str) -> bool: - """Check whether a group chat should be processed.""" - if self._group_policy == "disabled": - return False - if self._group_policy == "allowlist": - return chat_id in self._group_allow_from - # "open" — all groups allowed - return True - - def _compile_mention_patterns(self): - patterns = self.config.extra.get("mention_patterns") - if patterns is None: - raw = os.getenv("WHATSAPP_MENTION_PATTERNS", "").strip() - if raw: - try: - patterns = json.loads(raw) - except Exception: - patterns = [part.strip() for part in raw.splitlines() if part.strip()] - if not patterns: - patterns = [part.strip() for part in raw.split(",") if part.strip()] - if patterns is None: - return [] - if isinstance(patterns, str): - patterns = [patterns] - if not isinstance(patterns, list): - logger.warning("[%s] whatsapp mention_patterns must be a list or string; got %s", self.name, type(patterns).__name__) - return [] - - compiled = [] - for pattern in patterns: - if not isinstance(pattern, str) or not pattern.strip(): - continue - try: - compiled.append(re.compile(pattern, re.IGNORECASE)) - except re.error as exc: - logger.warning("[%s] Invalid WhatsApp mention pattern %r: %s", self.name, pattern, exc) - if compiled: - logger.info("[%s] Loaded %d WhatsApp mention pattern(s)", self.name, len(compiled)) - return compiled - - @staticmethod - def _normalize_whatsapp_id(value: Optional[str]) -> str: - if not value: - return "" - normalized = str(value).strip() - if ":" in normalized and "@" in normalized: - normalized = normalized.replace(":", "@", 1) - return normalized - - def _bot_ids_from_message(self, data: Dict[str, Any]) -> set[str]: - bot_ids = set() - for candidate in data.get("botIds") or []: - normalized = self._normalize_whatsapp_id(candidate) - if normalized: - bot_ids.add(normalized) - return bot_ids - - def _message_is_reply_to_bot(self, data: Dict[str, Any]) -> bool: - quoted_participant = self._normalize_whatsapp_id(data.get("quotedParticipant")) - if not quoted_participant: - return False - return quoted_participant in self._bot_ids_from_message(data) - - def _message_mentions_bot(self, data: Dict[str, Any]) -> bool: - bot_ids = self._bot_ids_from_message(data) - if not bot_ids: - return False - mentioned_ids = { - nid - for candidate in (data.get("mentionedIds") or []) - if (nid := self._normalize_whatsapp_id(candidate)) - } - if mentioned_ids & bot_ids: - return True - - body = str(data.get("body") or "") - lower_body = body.lower() - for bot_id in bot_ids: - bare_id = bot_id.split("@", 1)[0].lower() - if bare_id and (f"@{bare_id}" in lower_body or bare_id in lower_body): - return True - return False - - def _message_matches_mention_patterns(self, data: Dict[str, Any]) -> bool: - if not self._mention_patterns: - return False - body = str(data.get("body") or "") - return any(pattern.search(body) for pattern in self._mention_patterns) - - def _clean_bot_mention_text(self, text: str, data: Dict[str, Any]) -> str: - if not text: - return text - bot_ids = self._bot_ids_from_message(data) - cleaned = text - for bot_id in bot_ids: - bare_id = bot_id.split("@", 1)[0] - if bare_id: - cleaned = re.sub(rf"@{re.escape(bare_id)}\b[,:\-]*\s*", "", cleaned) - return cleaned.strip() or text - - def _should_process_message(self, data: Dict[str, Any]) -> bool: - chat_id_raw = str(data.get("chatId") or "") - # WhatsApp uses pseudo-chats for Status updates (Stories) and - # Channel/Newsletter broadcasts. These are not real conversations - # and the agent should never reply to them — even in self-chat mode - # where the bridge may surface them as "fromMe" events. - if self._is_broadcast_chat(chat_id_raw): - return False - is_group = data.get("isGroup", False) - if is_group: - chat_id = chat_id_raw - if not self._is_group_allowed(chat_id): - return False - else: - sender_id = str(data.get("senderId") or data.get("from") or "") - if not self._is_dm_allowed(sender_id): - return False - # DMs that pass the policy gate are always processed - return True - # Group messages: check mention / free-response settings - chat_id = str(data.get("chatId") or "") - if chat_id in self._whatsapp_free_response_chats(): - return True - if not self._whatsapp_require_mention(): - return True - body = str(data.get("body") or "").strip() - if body.startswith("/"): - return True - if self._message_is_reply_to_bot(data): - return True - if self._message_mentions_bot(data): - return True - return self._message_matches_mention_patterns(data) - async def connect(self) -> bool: """ Start the WhatsApp bridge. @@ -808,63 +599,6 @@ class WhatsAppAdapter(BasePlatformAdapter): self._close_bridge_log() print(f"[{self.name}] Disconnected") - def format_message(self, content: str) -> str: - """Convert standard markdown to WhatsApp-compatible formatting. - - WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```, - and monospaced `inline`. Standard markdown uses different syntax - for bold/italic/strikethrough, so we convert here. - - Code blocks (``` fenced) and inline code (`) are protected from - conversion via placeholder substitution. - """ - if not content: - return content - - # --- 1. Protect fenced code blocks from formatting changes --- - _FENCE_PH = "\x00FENCE" - fences: list[str] = [] - - def _save_fence(m: re.Match) -> str: - fences.append(m.group(0)) - return f"{_FENCE_PH}{len(fences) - 1}\x00" - - result = re.sub(r"```[\s\S]*?```", _save_fence, content) - - # --- 2. Protect inline code --- - _CODE_PH = "\x00CODE" - codes: list[str] = [] - - def _save_code(m: re.Match) -> str: - codes.append(m.group(0)) - return f"{_CODE_PH}{len(codes) - 1}\x00" - - result = re.sub(r"`[^`\n]+`", _save_code, result) - - # --- 3. Convert markdown formatting to WhatsApp syntax --- - # Bold: **text** or __text__ → *text* - result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result) - result = re.sub(r"__(.+?)__", r"*\1*", result) - # Strikethrough: ~~text~~ → ~text~ - result = re.sub(r"~~(.+?)~~", r"~\1~", result) - # Italic: *text* is already WhatsApp italic — leave as-is - # _text_ is already WhatsApp italic — leave as-is - - # --- 4. Convert markdown headers to bold text --- - # # Header → *Header* - result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE) - - # --- 5. Convert markdown links: [text](url) → text (url) --- - result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result) - - # --- 6. Restore protected sections --- - for i, fence in enumerate(fences): - result = result.replace(f"{_FENCE_PH}{i}\x00", fence) - for i, code in enumerate(codes): - result = result.replace(f"{_CODE_PH}{i}\x00", code) - - return result - async def send( self, chat_id: str, diff --git a/gateway/platforms/whatsapp_cloud.py b/gateway/platforms/whatsapp_cloud.py new file mode 100644 index 00000000000..7a2337e367e --- /dev/null +++ b/gateway/platforms/whatsapp_cloud.py @@ -0,0 +1,1869 @@ +""" +WhatsApp Cloud API adapter — official Meta WhatsApp Business Platform. + +This adapter is a *complement* to ``whatsapp.py`` (the Baileys bridge), not +a replacement. The two are independent: + +- ``whatsapp.py`` — unofficial Baileys bridge, personal accounts, no + public URL needed, account-ban risk. +- ``whatsapp_cloud.py`` (this file) — official Meta Cloud API, Business + account required, public webhook URL required, + token-based auth. + +Both share gating / mention / formatting behavior via ``WhatsAppBehaviorMixin``. + +Phase scope (this file evolves across phases): +- Phase 2 — outbound text via Graph API + webhook server with verify-token + handshake. +- Phase 3 — X-Hub-Signature-256 HMAC verification (raw body, constant-time) + + wamid replay protection + dispatch via handle_message. Phase 3 + adapter is end-to-end usable for text DMs. +- Phase 4 — media upload + send (image/video/audio/document), inbound + media download via the Graph media endpoint, voice-note opus + conversion via ffmpeg with graceful MP3 fallback when ffmpeg + isn't on PATH. Document text injection for readable types. +- Phase 5 — 24-hour conversation window + template fallback. + +Required env vars to enable the adapter: +- WHATSAPP_CLOUD_PHONE_NUMBER_ID (the Graph URL path component) +- WHATSAPP_CLOUD_ACCESS_TOKEN (System User permanent token) + +Optional / Phase-3+: +- WHATSAPP_CLOUD_APP_ID +- WHATSAPP_CLOUD_APP_SECRET (HMAC key for X-Hub-Signature-256) +- WHATSAPP_CLOUD_WABA_ID (analytics / future use) +- WHATSAPP_CLOUD_VERIFY_TOKEN (hub.verify_token shared secret) +- WHATSAPP_CLOUD_WEBHOOK_HOST (default 0.0.0.0) +- WHATSAPP_CLOUD_WEBHOOK_PORT (default 8090) +- WHATSAPP_CLOUD_WEBHOOK_PATH (default /whatsapp/webhook) +- WHATSAPP_CLOUD_API_VERSION (default v20.0) +""" + +from __future__ import annotations + +import asyncio +import hashlib +import hmac +import logging +import mimetypes +import os +import shutil +import uuid +from collections import OrderedDict +from pathlib import Path +from typing import Any, Dict, Optional + +try: + from aiohttp import web + + AIOHTTP_AVAILABLE = True +except ImportError: + AIOHTTP_AVAILABLE = False + web = None # type: ignore[assignment] + +try: + import httpx + + HTTPX_AVAILABLE = True +except ImportError: + HTTPX_AVAILABLE = False + httpx = None # type: ignore[assignment] + +from gateway.config import Platform, PlatformConfig +from gateway.platforms.base import ( + BasePlatformAdapter, + MessageEvent, + MessageType, + SendResult, + SUPPORTED_DOCUMENT_TYPES, +) +from gateway.platforms.whatsapp_common import WhatsAppBehaviorMixin +from hermes_constants import get_hermes_dir + +logger = logging.getLogger(__name__) + + +DEFAULT_API_VERSION = "v20.0" +DEFAULT_WEBHOOK_HOST = "0.0.0.0" +DEFAULT_WEBHOOK_PORT = 8090 +DEFAULT_WEBHOOK_PATH = "/whatsapp/webhook" +GRAPH_API_BASE = "https://graph.facebook.com" +# Meta retries failed webhooks for up to 7 days. We don't need to remember +# every wamid for the full retry window — the practical risk is duplicate +# delivery within minutes, not days. 5000 entries with FIFO eviction is +# plenty for normal traffic and bounds memory. +WAMID_DEDUP_CACHE_SIZE = 5000 + +# Per-type size caps documented by Meta for the Cloud API /media endpoint. +# These are the hard limits; we refuse uploads above them with a clean +# error instead of round-tripping to Graph just to be rejected. +# https://developers.facebook.com/docs/whatsapp/cloud-api/reference/media +_MEDIA_SIZE_LIMITS = { + "image": 5 * 1024 * 1024, # 5 MB (JPEG, PNG) + "video": 16 * 1024 * 1024, # 16 MB + "audio": 16 * 1024 * 1024, # 16 MB (MP3, AAC, AMR, OGG opus) + "document": 100 * 1024 * 1024, # 100 MB + "sticker": 100 * 1024, # 100 KB animated, 500 KB static +} + +# Default mime types when we can't guess from the path's extension. +_DEFAULT_MIME = { + "image": "image/jpeg", + "video": "video/mp4", + "audio": "audio/mpeg", + "document": "application/octet-stream", + "sticker": "image/webp", +} + +# ffmpeg location at import time. ``shutil.which`` honours PATHEXT on +# Windows so a user's ``ffmpeg.exe`` is picked up. None means MP3 voice +# falls back to "audio file attachment" rendering in WhatsApp. +_FFMPEG_PATH = shutil.which("ffmpeg") + +# Python's mimetypes module returns RFC-correct but real-world-uncommon +# extensions for some types (audio/ogg → .oga since RFC 5334; audio/mp4 +# → .mp4 instead of the de-facto .m4a for voice notes). Our downstream +# STT pipeline whitelists the common-in-the-wild extensions, so override +# the few Meta sends that don't match those defaults. +_WHATSAPP_MIME_EXTENSION_OVERRIDES: Dict[str, str] = { + # WhatsApp voice notes — opus codec inside an Ogg container. + "audio/ogg": ".ogg", + "audio/x-opus+ogg": ".ogg", + "audio/opus": ".ogg", + # iOS voice memos — AAC inside an MP4 container; STT tools expect .m4a. + "audio/mp4": ".m4a", + "audio/x-m4a": ".m4a", + # Image — mimetypes occasionally returns .jpe (legacy IANA) instead + # of .jpg, which trips up tools that switch on extension. + "image/jpeg": ".jpg", +} + + +def _ext_for_mime(mime: str) -> Optional[str]: + """Resolve a mime type to the file extension we want on disk. + + Consults the override map first so types like ``audio/ogg`` produce + the extension downstream tools actually accept (``.ogg``, not the + technically-correct-but-broken ``.oga``). Falls back to Python's + ``mimetypes.guess_extension`` for anything we haven't pinned. + """ + if not mime: + return None + primary = mime.split(";")[0].strip().lower() + override = _WHATSAPP_MIME_EXTENSION_OVERRIDES.get(primary) + if override: + return override + return mimetypes.guess_extension(primary) or None + + +# Inbound media cache lives under the user's hermes dir so it survives +# restarts and gateway reloads — same convention the Baileys bridge uses. +_INBOUND_MEDIA_CACHE = Path(get_hermes_dir("platforms/whatsapp_cloud/media", "whatsapp_cloud/media")) + + +def check_whatsapp_cloud_requirements() -> bool: + """Return whether transport dependencies are available. + + aiohttp is needed for the webhook server (inbound). httpx is needed + for Graph API calls (outbound). Both ship with hermes-agent's default + dependency set, so this should always be True in normal installs. + """ + return AIOHTTP_AVAILABLE and HTTPX_AVAILABLE + + +class WhatsAppCloudAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter): + """WhatsApp Business Cloud API adapter. + + Outbound: HTTPS POST to ``graph.facebook.com///messages``. + Inbound: aiohttp server accepting Meta's webhook payloads. + + The mixin must come first in the bases list so its ``format_message`` + overrides ``BasePlatformAdapter.format_message`` (the base provides a + generic implementation that does not convert markdown to WhatsApp + syntax). The Baileys adapter does the same. + """ + + def __init__(self, config: PlatformConfig): + super().__init__(config, Platform.WHATSAPP_CLOUD) + extra = config.extra or {} + + # Required + self._phone_number_id: str = str(extra.get("phone_number_id", "")).strip() + self._access_token: str = str(extra.get("access_token", "")).strip() + + # Optional / used in later phases + self._app_id: str = str(extra.get("app_id", "")).strip() + self._app_secret: str = str(extra.get("app_secret", "")).strip() + self._waba_id: str = str(extra.get("waba_id", "")).strip() + self._verify_token: str = str(extra.get("verify_token", "")).strip() + + # Webhook server config + self._webhook_host: str = str(extra.get("webhook_host", DEFAULT_WEBHOOK_HOST)) + self._webhook_port: int = int(extra.get("webhook_port", DEFAULT_WEBHOOK_PORT)) + self._webhook_path: str = self._normalize_path( + extra.get("webhook_path", DEFAULT_WEBHOOK_PATH) + ) + self._health_path: str = self._normalize_path( + extra.get("health_path", "/health") + ) + + # Graph API + self._api_version: str = str(extra.get("api_version", DEFAULT_API_VERSION)) + + # Behavior-mixin contract: these names are read by the mixin's + # gating methods. Derived from env / config the same way the + # Baileys adapter derives them. + import os + + self._reply_prefix: Optional[str] = extra.get("reply_prefix") + self._dm_policy: str = str( + extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open") + ).strip().lower() + self._allow_from: set[str] = self._coerce_allow_list( + extra.get("allow_from") or extra.get("allowFrom") + ) + self._group_policy: str = str( + extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open") + ).strip().lower() + self._group_allow_from: set[str] = self._coerce_allow_list( + extra.get("group_allow_from") or extra.get("groupAllowFrom") + ) + self._mention_patterns = self._compile_mention_patterns() + + # Webhook dedup state — wamid → True. OrderedDict gives O(1) FIFO + # eviction. In-memory only; Phase 5 may promote to SessionDB if we + # decide we need replay protection across gateway restarts. + self._seen_wamids: "OrderedDict[str, bool]" = OrderedDict() + self._duplicate_count: int = 0 + self._accepted_count: int = 0 + self._rejected_signature_count: int = 0 + + # One-shot flags for warnings that would otherwise spam the log. + self._warned_no_ffmpeg: bool = False + + # Per-chat cache of the latest inbound wamid. Meta's typing + # indicator + read-receipt API requires a specific message_id + # to attach to (typically "the latest message in the + # conversation"). We refresh this on every accepted inbound + # message so ``send_typing`` always has a valid target without + # threading an extra kwarg through the gateway's base contract. + # In-memory only; on gateway restart the next inbound message + # repopulates it. + self._last_inbound_wamid_by_chat: Dict[str, str] = {} + + # Interactive-button state. Each maps a short id (embedded in the + # outbound button payload) → the session/correlation key needed + # by the gateway's resolver. See ``_handle_interactive_reply`` for + # the dispatch table. + # _clarify_state: clarify_id → session_key (resolves via + # tools.clarify_gateway.resolve_gateway_clarify) + # _exec_approval_state: approval_id → session_key (resolves via + # tools.approval.resolve_gateway_approval) + # _slash_confirm_state: confirm_id → session_key (resolves via + # tools.slash_confirm.resolve) + self._clarify_state: Dict[str, str] = {} + self._exec_approval_state: Dict[str, str] = {} + self._slash_confirm_state: Dict[str, str] = {} + + # Runtime + self._runner = None + self._http_client: Optional["httpx.AsyncClient"] = None + + # ------------------------------------------------------------------ helpers + @staticmethod + def _normalize_path(path: Any) -> str: + raw = str(path or "").strip() or "/" + return raw if raw.startswith("/") else f"/{raw}" + + def _graph_url(self, path: str) -> str: + """Build a Graph API URL for this adapter's phone-number scope.""" + if path.startswith("/"): + path = path[1:] + return f"{GRAPH_API_BASE}/{self._api_version}/{self._phone_number_id}/{path}" + + def _effective_reply_prefix(self) -> str: + """Cloud API has no self-chat concept — never prepend a reply prefix. + + Override the mixin default which keys off WHATSAPP_MODE=self-chat + (a Baileys-only setting). + """ + if self._reply_prefix is not None: + return self._reply_prefix.replace("\\n", "\n") + return "" + + # ------------------------------------------------------------------ lifecycle + async def connect(self) -> bool: + if not check_whatsapp_cloud_requirements(): + self._set_fatal_error( + "whatsapp_cloud_deps_missing", + "aiohttp and httpx are required for whatsapp_cloud — " + "reinstall hermes-agent.", + retryable=False, + ) + return False + if not self._phone_number_id or not self._access_token: + self._set_fatal_error( + "whatsapp_cloud_unconfigured", + "WHATSAPP_CLOUD_PHONE_NUMBER_ID and WHATSAPP_CLOUD_ACCESS_TOKEN " + "are required.", + retryable=False, + ) + return False + + # Outbound HTTP client. Tighter keepalive matches other platform + # adapters so idle CLOSE_WAIT drains promptly (#18451). + from gateway.platforms._http_client_limits import platform_httpx_limits + + self._http_client = httpx.AsyncClient( + timeout=30.0, limits=platform_httpx_limits() + ) + + # Inbound webhook server. + app = web.Application() + app.router.add_get(self._health_path, self._handle_health) + app.router.add_get(self._webhook_path, self._handle_verify) + app.router.add_post(self._webhook_path, self._handle_webhook) + + self._runner = web.AppRunner(app) + await self._runner.setup() + site = web.TCPSite(self._runner, self._webhook_host, self._webhook_port) + await site.start() + + self._mark_connected() + logger.info( + "[whatsapp_cloud] Listening on %s:%d%s (Graph %s, phone_id=%s)", + self._webhook_host, + self._webhook_port, + self._webhook_path, + self._api_version, + self._phone_number_id, + ) + if not self._verify_token: + logger.warning( + "[whatsapp_cloud] WHATSAPP_CLOUD_VERIFY_TOKEN is not set — " + "the GET subscription handshake will fail until it is." + ) + if not self._app_secret: + logger.warning( + "[whatsapp_cloud] WHATSAPP_CLOUD_APP_SECRET is not set — " + "incoming webhook POSTs will be refused with 503. Set " + "the app secret to enable inbound message delivery." + ) + return True + + async def disconnect(self) -> None: + if self._runner is not None: + try: + await self._runner.cleanup() + except Exception: + logger.exception("[whatsapp_cloud] webhook server cleanup failed") + self._runner = None + if self._http_client is not None: + try: + await self._http_client.aclose() + except Exception: + logger.exception("[whatsapp_cloud] http client close failed") + self._http_client = None + self._mark_disconnected() + + # ------------------------------------------------------------------ outbound + async def send( + self, + chat_id: str, + content: str, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Send a text message via Graph API. + + ``chat_id`` is the recipient's WhatsApp ID (``wa_id``) — typically + their phone number with country code, no plus sign. + """ + if self._http_client is None: + return SendResult(success=False, error="Not connected") + if not content or not content.strip(): + return SendResult(success=True, message_id=None) + + formatted = self.format_message(content) + chunks = self.truncate_message(formatted, self._outgoing_chunk_limit()) + + url = self._graph_url("messages") + headers = { + "Authorization": f"Bearer {self._access_token}", + "Content-Type": "application/json", + } + + last_message_id: Optional[str] = None + for idx, chunk in enumerate(chunks): + payload: Dict[str, Any] = { + "messaging_product": "whatsapp", + "recipient_type": "individual", + "to": chat_id, + "type": "text", + "text": {"body": chunk, "preview_url": True}, + } + if reply_to and idx == 0: + # Quote the user's message on the first chunk only. + payload["context"] = {"message_id": reply_to} + try: + resp = await self._http_client.post(url, headers=headers, json=payload) + except Exception as exc: + logger.exception("[whatsapp_cloud] send failed") + return SendResult(success=False, error=str(exc)) + + if resp.status_code != 200: + # Meta returns structured errors in the body — surface them + # to the caller so log lines have actionable context. + try: + body = resp.json() + except Exception: + body = {"raw": resp.text[:500]} + error_msg = self._format_graph_error(body, resp.status_code) + logger.warning( + "[whatsapp_cloud] send rejected (status=%d): %s", + resp.status_code, + error_msg, + ) + return SendResult(success=False, error=error_msg) + + try: + data = resp.json() + ids = data.get("messages") or [] + if ids: + last_message_id = ids[0].get("id") + except Exception: + pass + + return SendResult(success=True, message_id=last_message_id) + + # ------------------------------------------------------------------ typing indicator + read receipts + # + # Meta couples these into a single API call: a POST to /messages + # with ``status: "read"`` marks the message read (blue double + # checkmarks), and the optional ``typing_indicator`` field + # additionally shows the user a "typing..." pip in their chat UI. + # The indicator auto-dismisses when we respond OR after 25 seconds, + # whichever comes first — so this matches "I see your message and + # I'm working on a reply" UX exactly. + # + # The API requires a specific message_id to attach to. We cache the + # latest inbound wamid per chat in _last_inbound_wamid_by_chat + # (refreshed in _build_message_event_from_cloud) so this method can + # look it up without needing the gateway base contract to plumb + # event.message_id into send_typing's signature. + + async def send_typing(self, chat_id: str, metadata=None) -> None: + """Mark the latest inbound message as read AND show a typing + indicator in the user's chat UI. + + Best-effort: any error (no inbound wamid yet, network failure, + stale token, message older than 30 days) is swallowed silently + so the agent's main reply path isn't blocked by UX polish. + """ + if self._http_client is None: + return + wamid = self._last_inbound_wamid_by_chat.get(chat_id) + if not wamid: + # No inbound message yet for this chat (or cache cleared on + # restart) — skip. The next inbound message will repopulate. + return + + url = self._graph_url("messages") + headers = { + "Authorization": f"Bearer {self._access_token}", + "Content-Type": "application/json", + } + payload = { + "messaging_product": "whatsapp", + "status": "read", + "message_id": wamid, + "typing_indicator": {"type": "text"}, + } + try: + resp = await self._http_client.post(url, headers=headers, json=payload) + except Exception: + # Network / connection error — silent fail. Typing UX must + # never block message dispatch. + return + # Best-effort: surface 4xx for ops visibility but don't raise. + # Code 131009 = "Parameter value is not valid" (typically wamid + # > 30 days old) — common after a long-quiet conversation, log + # at info not warning. + if resp.status_code != 200: + try: + body = resp.json() + code = ((body or {}).get("error") or {}).get("code") + except Exception: + code = None + if code == 131009: + logger.info( + "[whatsapp_cloud] typing/read indicator rejected: " + "wamid %s likely older than 30 days", wamid, + ) + else: + logger.debug( + "[whatsapp_cloud] typing/read indicator returned %d (%s)", + resp.status_code, code, + ) + + # ------------------------------------------------------------------ interactive messages + # + # WhatsApp Cloud supports two interactive primitives we use here: + # * ``interactive.type=button`` — up to 3 quick-reply buttons. Each + # button has an ``id`` (≤256 chars, returned verbatim on tap) and + # a ``title`` (≤20 chars, the label shown). Used for clarify with + # ≤3 choices, exec_approval, and slash_confirm. + # * ``interactive.type=list`` — a single "Tap to choose" button + # that opens a sheet with up to 10 rows. Used for clarify with + # >3 choices and the model picker. + # + # Unlike utility templates these are FREE-FORM and need no Meta-side + # approval. They only work *inside* the 24-hour conversation window — + # which is fine because all five senders below fire in direct response + # to a user message (clarify mid-conversation, approval mid-tool-call, + # etc.) so we're always inside the window when they're invoked. + + async def _post_interactive( + self, + chat_id: str, + interactive_body: Dict[str, Any], + reply_to: Optional[str] = None, + ) -> SendResult: + """Low-level POST for an ``interactive`` message payload. + + ``interactive_body`` is the inner ``interactive: {...}`` dict — + the caller supplies ``type``, ``body``, and ``action``. This + wrapper handles auth, error mapping, and message_id extraction so + each send_* method stays focused on its own button shape. + """ + if self._http_client is None: + return SendResult(success=False, error="Not connected") + + url = self._graph_url("messages") + headers = { + "Authorization": f"Bearer {self._access_token}", + "Content-Type": "application/json", + } + payload: Dict[str, Any] = { + "messaging_product": "whatsapp", + "recipient_type": "individual", + "to": chat_id, + "type": "interactive", + "interactive": interactive_body, + } + if reply_to: + payload["context"] = {"message_id": reply_to} + + try: + resp = await self._http_client.post(url, headers=headers, json=payload) + except Exception as exc: + logger.exception("[whatsapp_cloud] interactive send failed") + return SendResult(success=False, error=str(exc)) + + if resp.status_code != 200: + try: + body = resp.json() + except Exception: + body = {"raw": resp.text[:500]} + error_msg = self._format_graph_error(body, resp.status_code) + logger.warning( + "[whatsapp_cloud] interactive rejected (status=%d): %s", + resp.status_code, error_msg, + ) + return SendResult(success=False, error=error_msg) + + last_message_id: Optional[str] = None + try: + data = resp.json() + ids = data.get("messages") or [] + if ids: + last_message_id = ids[0].get("id") + except Exception: + pass + return SendResult(success=True, message_id=last_message_id) + + @staticmethod + def _truncate_button_label(text: str, limit: int = 20) -> str: + """WhatsApp caps quick-reply button titles at 20 chars and list-row + titles at 24. Truncate with an ellipsis so we surface as much of + the choice as fits.""" + text = str(text or "").strip() + if len(text) <= limit: + return text + # Reserve 1 char for the ellipsis. WhatsApp counts the ellipsis + # toward the limit. + return text[: max(1, limit - 1)] + "…" + + @staticmethod + def _truncate_body(text: str, limit: int = 1024) -> str: + """``interactive.body.text`` caps at 1024 chars.""" + text = str(text or "") + if len(text) <= limit: + return text + return text[: limit - 3] + "..." + + async def send_clarify( + self, + chat_id: str, + question: str, + choices: Optional[list], + clarify_id: str, + session_key: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Render a clarify prompt as native WhatsApp interactive buttons. + + - 1–3 choices → ``interactive.type=button`` (inline pill buttons). + - 4+ choices → ``interactive.type=list`` (tap-to-open sheet with + up to 10 rows). Telegram's "Other (type answer)" escape hatch + is appended as the final row, picking it flips the entry into + text-capture mode handled by the gateway's text intercept. + - 0 choices (open-ended) → plain text question; the next message + in the session is captured by the gateway and resolves clarify. + + The button ``id`` field carries ``cl::`` (or + ``:other``); inbound webhook parsing dispatches on the prefix. + """ + if self._http_client is None: + return SendResult(success=False, error="Not connected") + + question = (question or "").strip() + reply_to = (metadata or {}).get("reply_to_message_id") if metadata else None + + # Open-ended → just send the question, gateway captures next msg. + if not choices: + return await self.send(chat_id, f"❓ {question}", reply_to=reply_to) + + # Mirror Telegram: render full choice text in body so long + # options aren't truncated to the 20-char button label cap. + # Truncate choices to MAX_CHOICES (4) — the tool layer enforces + # this already, but be defensive. + choices_list = [str(c).strip() for c in choices[:10] if str(c).strip()] + option_lines = "\n".join( + f"{i + 1}. {c}" for i, c in enumerate(choices_list) + ) + body_text = self._truncate_body(f"❓ {question}\n\n{option_lines}") + + if len(choices_list) <= 3: + buttons = [ + { + "type": "reply", + "reply": { + "id": f"cl:{clarify_id}:{idx}", + "title": self._truncate_button_label(str(idx + 1)), + }, + } + for idx in range(len(choices_list)) + ] + interactive: Dict[str, Any] = { + "type": "button", + "body": {"text": body_text}, + "action": {"buttons": buttons}, + } + else: + # List mode: rows must each have id + title (≤24 chars). + # Description (≤72 chars) renders below the title — we put + # the truncated choice text there for skimmability. + rows = [] + for idx, choice_text in enumerate(choices_list): + rows.append({ + "id": f"cl:{clarify_id}:{idx}", + "title": self._truncate_button_label(f"{idx + 1}", limit=24), + "description": self._truncate_button_label(choice_text, limit=72), + }) + rows.append({ + "id": f"cl:{clarify_id}:other", + "title": "✏️ Other", + "description": "Type your own answer", + }) + interactive = { + "type": "list", + "body": {"text": body_text}, + "action": { + "button": "Choose", + "sections": [{"title": "Options", "rows": rows}], + }, + } + + result = await self._post_interactive(chat_id, interactive, reply_to=reply_to) + if result.success: + self._clarify_state[clarify_id] = session_key + return result + + async def send_exec_approval( + self, + chat_id: str, + command: str, + session_key: str, + description: str = "dangerous command", + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Render a dangerous-command approval prompt with native buttons. + + Two quick-reply buttons (Approve / Deny). Tapping resolves the + waiting agent via ``tools.approval.resolve_gateway_approval`` — + same mechanism as the text ``/approve`` flow. The agent thread + is blocked until the user taps or types a response. + """ + if self._http_client is None: + return SendResult(success=False, error="Not connected") + + # WhatsApp body caps at 1024 chars; reserve room for the + # framing prose around the command. + cmd = command or "" + cmd_preview = cmd if len(cmd) <= 800 else cmd[:800] + "..." + body_text = self._truncate_body( + f"⚠️ *Command Approval Required*\n\n" + f"```\n{cmd_preview}\n```\n\n" + f"Reason: {description}" + ) + + approval_id = uuid.uuid4().hex[:12] + reply_to = (metadata or {}).get("reply_to_message_id") if metadata else None + + interactive = { + "type": "button", + "body": {"text": body_text}, + "action": { + "buttons": [ + { + "type": "reply", + "reply": {"id": f"appr:{approval_id}:approve", "title": "✅ Approve"}, + }, + { + "type": "reply", + "reply": {"id": f"appr:{approval_id}:deny", "title": "❌ Deny"}, + }, + ], + }, + } + + result = await self._post_interactive(chat_id, interactive, reply_to=reply_to) + if result.success: + self._exec_approval_state[approval_id] = session_key + return result + + async def send_slash_confirm( + self, + chat_id: str, + title: str, + message: str, + session_key: str, + confirm_id: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Render a 3-button slash-command confirmation prompt. + + Mirrors Telegram's send_slash_confirm: Approve Once / Always / + Cancel. The confirm_id is supplied by the caller (slash command + handler) — we just store the session_key mapping for the inbound + resolver to look up. + """ + if self._http_client is None: + return SendResult(success=False, error="Not connected") + + body_text = self._truncate_body(f"*{title}*\n\n{message}") + reply_to = (metadata or {}).get("reply_to_message_id") if metadata else None + + interactive = { + "type": "button", + "body": {"text": body_text}, + "action": { + "buttons": [ + { + "type": "reply", + "reply": {"id": f"sc:once:{confirm_id}", "title": "✅ Approve Once"}, + }, + { + "type": "reply", + "reply": {"id": f"sc:always:{confirm_id}", "title": "🔒 Always"}, + }, + { + "type": "reply", + "reply": {"id": f"sc:cancel:{confirm_id}", "title": "❌ Cancel"}, + }, + ], + }, + } + + result = await self._post_interactive(chat_id, interactive, reply_to=reply_to) + if result.success: + self._slash_confirm_state[confirm_id] = session_key + return result + + @staticmethod + def _format_graph_error(body: Dict[str, Any], status_code: int) -> str: + err = (body or {}).get("error") or {} + # Graph API error shape: + # {"error": {"message": "...", "type": "...", "code": ..., "fbtrace_id": "..."}} + message = err.get("message") or body.get("raw") or "unknown error" + code = err.get("code") + if code is not None: + return f"graph error {code} (HTTP {status_code}): {message}" + return f"HTTP {status_code}: {message}" + + async def get_chat_info(self, chat_id: str) -> Dict[str, Any]: + # Cloud API doesn't expose a direct "chat info" endpoint the way + # Slack/Discord do — we just echo the wa_id. Profile name (when + # known) flows in via webhook ``contacts[].profile.name`` and is + # cached on the MessageEvent, not here. + return {"name": chat_id, "type": "dm"} + + # ------------------------------------------------------------------ outbound media + async def _upload_media( + self, + file_path: str, + media_kind: str, + mime_type: Optional[str] = None, + ) -> tuple[Optional[str], Optional[str]]: + """Upload a local file to the Graph /media endpoint. + + Returns ``(media_id, None)`` on success, ``(None, error_string)`` + on failure. Two-step send: this gets the id, then ``_send_media`` + references it. Used when we have a local file and no public URL. + + ``media_kind`` is one of "image", "video", "audio", "document", + "sticker" — selects size cap + default mime fallback. + """ + if self._http_client is None: + return None, "Not connected" + if not os.path.exists(file_path): + return None, f"File not found: {file_path}" + + size = os.path.getsize(file_path) + cap = _MEDIA_SIZE_LIMITS.get(media_kind, _MEDIA_SIZE_LIMITS["document"]) + if size > cap: + return None, ( + f"File {os.path.basename(file_path)} is {size} bytes; " + f"Cloud API {media_kind} cap is {cap} bytes" + ) + + if not mime_type: + mime_type, _ = mimetypes.guess_type(file_path) + if not mime_type: + mime_type = _DEFAULT_MIME.get(media_kind, "application/octet-stream") + + url = self._graph_url("media") + headers = {"Authorization": f"Bearer {self._access_token}"} + try: + with open(file_path, "rb") as fh: + files = { + "file": (os.path.basename(file_path), fh, mime_type), + "messaging_product": (None, "whatsapp"), + "type": (None, mime_type), + } + resp = await self._http_client.post(url, headers=headers, files=files) + except Exception as exc: + logger.exception("[whatsapp_cloud] media upload failed") + return None, str(exc) + + if resp.status_code != 200: + try: + body = resp.json() + except Exception: + body = {"raw": resp.text[:500]} + return None, self._format_graph_error(body, resp.status_code) + + try: + data = resp.json() + media_id = data.get("id") + except Exception: + media_id = None + if not media_id: + return None, "Upload response missing 'id'" + return media_id, None + + async def _send_media( + self, + chat_id: str, + media_kind: str, + *, + media_id: Optional[str] = None, + media_link: Optional[str] = None, + caption: Optional[str] = None, + filename: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """POST a media message referencing either an uploaded media_id or + a public ``link``. + + Exactly one of ``media_id`` or ``media_link`` must be set. Captions + and filenames are passed through where Meta accepts them (caption + on image/video/document; filename on document only). + """ + if self._http_client is None: + return SendResult(success=False, error="Not connected") + if bool(media_id) == bool(media_link): + return SendResult( + success=False, + error="Exactly one of media_id or media_link must be set", + ) + + url = self._graph_url("messages") + headers = { + "Authorization": f"Bearer {self._access_token}", + "Content-Type": "application/json", + } + + media_block: Dict[str, Any] = {} + if media_id: + media_block["id"] = media_id + else: + media_block["link"] = media_link + if caption and media_kind in {"image", "video", "document"}: + media_block["caption"] = caption + if filename and media_kind == "document": + media_block["filename"] = filename + + payload: Dict[str, Any] = { + "messaging_product": "whatsapp", + "recipient_type": "individual", + "to": chat_id, + "type": media_kind, + media_kind: media_block, + } + if reply_to: + payload["context"] = {"message_id": reply_to} + + try: + resp = await self._http_client.post(url, headers=headers, json=payload) + except Exception as exc: + logger.exception("[whatsapp_cloud] media send failed") + return SendResult(success=False, error=str(exc)) + + if resp.status_code != 200: + try: + body = resp.json() + except Exception: + body = {"raw": resp.text[:500]} + error_msg = self._format_graph_error(body, resp.status_code) + logger.warning( + "[whatsapp_cloud] media send rejected (status=%d, kind=%s): %s", + resp.status_code, media_kind, error_msg, + ) + return SendResult(success=False, error=error_msg) + + try: + data = resp.json() + ids = data.get("messages") or [] + wamid = ids[0].get("id") if ids else None + except Exception: + wamid = None + return SendResult(success=True, message_id=wamid) + + async def _send_media_from_path_or_link( + self, + chat_id: str, + source: str, + media_kind: str, + *, + caption: Optional[str] = None, + filename: Optional[str] = None, + reply_to: Optional[str] = None, + mime_type: Optional[str] = None, + ) -> SendResult: + """Smart dispatcher: HTTPS URL → ``link`` send; local path → upload + ``id`` send. + + Prefers the ``link`` path when possible (one fewer Graph round + trip). Meta fetches from the URL themselves. Used as the common + backend for ``send_image`` / ``send_video`` / etc. — keeps the + public method bodies thin. + """ + if source.startswith(("http://", "https://")): + return await self._send_media( + chat_id, + media_kind, + media_link=source, + caption=caption, + filename=filename, + reply_to=reply_to, + ) + media_id, err = await self._upload_media(source, media_kind, mime_type) + if err: + return SendResult(success=False, error=err) + return await self._send_media( + chat_id, + media_kind, + media_id=media_id, + caption=caption, + filename=filename, + reply_to=reply_to, + ) + + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send an image by public URL. Prefers Meta's ``link`` mode. + + ``**kwargs`` absorbs platform-agnostic args the base class passes + (e.g. ``metadata``) that the Cloud API doesn't have a use for. + Mirrors send_image_file / send_video / send_voice / send_document. + """ + return await self._send_media_from_path_or_link( + chat_id, image_url, "image", caption=caption, reply_to=reply_to + ) + + async def send_image_file( + self, + chat_id: str, + image_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send a local image file via two-step upload + id.""" + return await self._send_media_from_path_or_link( + chat_id, image_path, "image", caption=caption, reply_to=reply_to + ) + + async def send_video( + self, + chat_id: str, + video_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send a video. Local path → upload; HTTPS URL → link mode.""" + return await self._send_media_from_path_or_link( + chat_id, video_path, "video", caption=caption, reply_to=reply_to + ) + + async def send_voice( + self, + chat_id: str, + audio_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send an audio file as a WhatsApp voice message. + + WhatsApp renders ``audio/ogg; codecs=opus`` as the green + voice-note bubble; other audio types (MP3, AAC, etc.) appear as + a generic audio attachment. Hermes TTS produces MP3, so we try + ffmpeg conversion to opus first and fall back to sending the + MP3 as-is when ffmpeg is unavailable. + """ + source = audio_path + mime_type: Optional[str] = None + + is_local_mp3 = ( + not audio_path.startswith(("http://", "https://")) + and audio_path.lower().endswith(".mp3") + and os.path.exists(audio_path) + ) + if is_local_mp3: + opus_path = await self._convert_to_opus(audio_path) + if opus_path: + source = opus_path + mime_type = "audio/ogg; codecs=opus" + else: + # Will deliver as MP3 attachment, not voice bubble. + # Warn-once is logged inside _convert_to_opus. + mime_type = "audio/mpeg" + + return await self._send_media_from_path_or_link( + chat_id, source, "audio", + caption=caption, reply_to=reply_to, mime_type=mime_type, + ) + + async def send_document( + self, + chat_id: str, + file_path: str, + caption: Optional[str] = None, + file_name: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send a document attachment with optional filename + caption.""" + return await self._send_media_from_path_or_link( + chat_id, file_path, "document", + caption=caption, + filename=file_name or os.path.basename(file_path), + reply_to=reply_to, + ) + + # ------------------------------------------------------------------ opus conversion + async def _convert_to_opus(self, mp3_path: str) -> Optional[str]: + """Convert an MP3 to ``audio/ogg; codecs=opus`` for voice bubbles. + + Returns the path to the converted file, or None if ffmpeg is + missing / conversion fails (caller falls back to sending the + original MP3 as an audio file). + + ``-application voip`` tunes the opus encoder for speech. + ``-b:a 32k -vbr on`` matches the bitrate WhatsApp produces for + native voice notes (small files, good intelligibility). + """ + if not _FFMPEG_PATH: + self._warn_once_no_ffmpeg() + return None + + out_path = mp3_path.rsplit(".", 1)[0] + ".ogg" + try: + proc = await asyncio.create_subprocess_exec( + _FFMPEG_PATH, "-y", "-i", mp3_path, + "-c:a", "libopus", "-b:a", "32k", "-vbr", "on", + "-application", "voip", out_path, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.PIPE, + ) + _, stderr = await proc.communicate() + if proc.returncode != 0 or not Path(out_path).exists(): + logger.error( + "[whatsapp_cloud] ffmpeg opus conversion failed " + "(returncode=%s): %s", + proc.returncode, + (stderr or b"").decode("utf-8", errors="replace")[:500], + ) + return None + return out_path + except Exception: + logger.exception("[whatsapp_cloud] ffmpeg subprocess raised") + return None + + def _warn_once_no_ffmpeg(self) -> None: + if self._warned_no_ffmpeg: + return + self._warned_no_ffmpeg = True + logger.warning( + "[whatsapp_cloud] ffmpeg not found on PATH — voice messages will " + "be delivered as MP3 audio attachments instead of native voice " + "notes (green waveform bubble). Install ffmpeg to enable: " + "Windows `winget install Gyan.FFmpeg`, macOS `brew install ffmpeg`, " + "Linux package manager." + ) + + # ------------------------------------------------------------------ inbound media + async def _download_media_to_cache( + self, + media_id: str, + *, + ext_hint: Optional[str] = None, + ) -> tuple[Optional[str], Optional[str]]: + """Two-step Graph media download: ``GET /`` → temp URL → bytes. + + Returns ``(local_path, mime_type)`` on success. ``mime_type`` + falls back to what Graph reports in the metadata response. + Returns ``(None, None)`` on any failure (logged). + + The temporary URL from step 1 is signed and expires in ~5 + minutes; we download immediately and never persist the URL. + """ + if self._http_client is None: + return None, None + headers = {"Authorization": f"Bearer {self._access_token}"} + + # Step 1 — metadata (gives us a temporary signed URL + mime) + try: + meta_resp = await self._http_client.get( + f"{GRAPH_API_BASE}/{self._api_version}/{media_id}", + headers=headers, + ) + except Exception: + logger.exception( + "[whatsapp_cloud] media metadata fetch raised (id=%s)", media_id + ) + return None, None + if meta_resp.status_code != 200: + logger.warning( + "[whatsapp_cloud] media metadata fetch failed (id=%s, status=%d)", + media_id, meta_resp.status_code, + ) + return None, None + + try: + meta = meta_resp.json() + except Exception: + return None, None + temp_url = meta.get("url") + mime = meta.get("mime_type") or "" + if not temp_url: + return None, None + + # Step 2 — bytes (auth required even though URL is signed; Meta + # documents this explicitly — the URL alone is not enough). + try: + blob_resp = await self._http_client.get(temp_url, headers=headers) + except Exception: + logger.exception( + "[whatsapp_cloud] media bytes fetch raised (id=%s)", media_id + ) + return None, None + if blob_resp.status_code != 200: + logger.warning( + "[whatsapp_cloud] media bytes fetch failed (id=%s, status=%d)", + media_id, blob_resp.status_code, + ) + return None, None + + # Decide the extension. Prefer the override map so audio/ogg + # produces .ogg (not the technically-correct-but-broken .oga + # mimetypes returns by default). Fall back to ext_hint then + # ``.bin`` for unknown types. + ext = ext_hint + if not ext and mime: + ext = _ext_for_mime(mime) + if not ext: + ext = ".bin" + + _INBOUND_MEDIA_CACHE.mkdir(parents=True, exist_ok=True) + out_path = _INBOUND_MEDIA_CACHE / f"{media_id}{ext}" + try: + out_path.write_bytes(blob_resp.content) + except OSError: + logger.exception( + "[whatsapp_cloud] failed to write cached media (id=%s)", media_id + ) + return None, None + + return str(out_path), mime or None + + + # ------------------------------------------------------------------ inbound + async def _handle_health(self, request: "web.Request") -> "web.Response": + return web.json_response( + { + "status": "ok", + "platform": self.platform.value, + "phone_number_id": self._phone_number_id, + "webhook_path": self._webhook_path, + "verify_token_configured": bool(self._verify_token), + "app_secret_configured": bool(self._app_secret), + "ffmpeg_present": _FFMPEG_PATH is not None, + "accepted": self._accepted_count, + "duplicates": self._duplicate_count, + "rejected_signature": self._rejected_signature_count, + } + ) + + async def _handle_verify(self, request: "web.Request") -> "web.Response": + """Meta subscription verification handshake. + + Meta calls GET ``?hub.mode=subscribe&hub.verify_token=... + &hub.challenge=...``. We must echo the challenge as plain text iff + ``hub.mode == "subscribe"`` AND ``hub.verify_token`` matches the + shared secret. Constant-time comparison. + """ + if not self._verify_token: + # Misconfigured server — refuse rather than silently accepting + # any verify_token, which would let an attacker subscribe. + return web.Response(status=503, text="verify_token not configured") + + mode = request.query.get("hub.mode", "") + token = request.query.get("hub.verify_token", "") + challenge = request.query.get("hub.challenge", "") + + if mode != "subscribe": + return web.Response(status=400, text="bad mode") + + # Constant-time compare to avoid token-length / token-content leaks + # via timing. ``hmac.compare_digest`` works on str. + import hmac as _hmac + + if not _hmac.compare_digest(token, self._verify_token): + return web.Response(status=403, text="verify_token mismatch") + if not challenge: + return web.Response(status=400, text="missing challenge") + return web.Response(text=challenge, content_type="text/plain") + + async def _handle_webhook(self, request: "web.Request") -> "web.Response": + """Inbound webhook POST handler. + + Lifecycle: + 1. Read raw bytes (signature is over the raw body — JSON parsing + must NOT happen first, or the bytes change). + 2. Verify ``X-Hub-Signature-256`` HMAC against ``app_secret``. + 3. Parse JSON. + 4. Walk ``entry[].changes[].value.{messages, statuses, contacts}``. + 5. Per-message: dedup by wamid, build MessageEvent, dispatch via + ``handle_message`` (which runs the mixin's gating). + 6. Always respond 200 once we've ack'd a valid request — Meta + retries on non-200 for up to 7 days, and we don't want to + multiply downstream agent work because of a transient bug + during dispatch. + """ + try: + raw = await request.read() + except Exception: + return web.Response(status=400) + + # Meta's documented max payload is 3MB. Reject earlier than aiohttp + # would so we don't even compute HMAC over giant junk. + if len(raw) > 3 * 1024 * 1024: + return web.Response(status=413) + + # Refuse to accept anything if app_secret isn't configured. Without + # it we can't authenticate the sender, and the handler would be a + # data-injection point. Same defensive posture as the GET verify + # handshake refusing when verify_token is empty. + if not self._app_secret: + logger.error( + "[whatsapp_cloud] webhook POST refused: app_secret unset. " + "Set WHATSAPP_CLOUD_APP_SECRET to enable inbound delivery." + ) + return web.Response(status=503, text="app_secret not configured") + + signature_header = request.headers.get("X-Hub-Signature-256", "") + if not self._verify_signature(raw, signature_header): + self._rejected_signature_count += 1 + logger.warning( + "[whatsapp_cloud] rejected webhook: invalid X-Hub-Signature-256 " + "(header=%r, body_len=%d)", + signature_header, + len(raw), + ) + return web.Response(status=401) + + # Parse only AFTER signature passes — bad JSON from an attacker is + # already filtered out, this just guards against Meta sending + # something malformed. + import json as _json + + try: + payload = _json.loads(raw) + except Exception: + logger.warning("[whatsapp_cloud] webhook body is not valid JSON") + return web.Response(status=400) + + if not isinstance(payload, dict): + return web.Response(status=400) + + await self._dispatch_payload(payload) + return web.Response(status=200) + + # ------------------------------------------------------------------ signature + def _verify_signature(self, raw_body: bytes, header: str) -> bool: + """Verify the X-Hub-Signature-256 HMAC. + + Meta sends ``sha256=``; we compute the same HMAC with + ``app_secret`` as the key and ``raw_body`` (UTF-8 bytes, not + re-serialized JSON) as the message. Constant-time compare. + """ + if not self._app_secret or not header: + return False + if not header.startswith("sha256="): + return False + expected_hex = header[len("sha256="):].strip() + if not expected_hex: + return False + computed = hmac.new( + self._app_secret.encode("utf-8"), + raw_body, + hashlib.sha256, + ).hexdigest() + return hmac.compare_digest(computed.lower(), expected_hex.lower()) + + # ------------------------------------------------------------------ dispatch + def _dedup_wamid(self, wamid: str) -> bool: + """Return True if this wamid is being seen for the first time. + + Returns False (and increments duplicate counter) if the wamid is + already in the in-memory cache. Cache is FIFO-evicted at + ``WAMID_DEDUP_CACHE_SIZE``. + """ + if not wamid: + # No wamid means we can't dedup — let it through. Meta should + # always populate ``id``, but be defensive. + return True + if wamid in self._seen_wamids: + self._duplicate_count += 1 + return False + self._seen_wamids[wamid] = True + # Trim oldest entries to stay under the cap. + while len(self._seen_wamids) > WAMID_DEDUP_CACHE_SIZE: + self._seen_wamids.popitem(last=False) + return True + + async def _dispatch_payload(self, payload: Dict[str, Any]) -> None: + """Walk a verified Meta webhook payload and dispatch each message. + + Payload shape (truncated): + {object, entry: [{id, changes: [{value: {messages, contacts, + statuses, metadata}, field: "messages"}]}]} + + We surface ``messages`` events as MessageEvents; ``statuses`` + events (sent/delivered/read/failed) are logged but not dispatched + — the agent doesn't currently consume delivery receipts and + forwarding them would create noisy synthetic events. + """ + if payload.get("object") != "whatsapp_business_account": + logger.debug( + "[whatsapp_cloud] ignoring non-WABA payload (object=%r)", + payload.get("object"), + ) + return + for entry in payload.get("entry") or []: + if not isinstance(entry, dict): + continue + for change in entry.get("changes") or []: + if not isinstance(change, dict): + continue + if change.get("field") != "messages": + # Other fields (account_alerts, template_status_update, + # etc.) are subscription-dependent and not message + # ingress. Silent skip. + continue + value = change.get("value") or {} + contacts = value.get("contacts") or [] + metadata = value.get("metadata") or {} + # Build a wa_id → profile-name index for the messages we're + # about to surface. + contacts_by_waid: Dict[str, str] = {} + for contact in contacts: + if not isinstance(contact, dict): + continue + wa_id = str(contact.get("wa_id") or "").strip() + profile = contact.get("profile") or {} + name = str(profile.get("name") or "").strip() + if wa_id: + contacts_by_waid[wa_id] = name + + for raw_message in value.get("messages") or []: + if not isinstance(raw_message, dict): + continue + wamid = str(raw_message.get("id") or "").strip() + if not self._dedup_wamid(wamid): + logger.debug( + "[whatsapp_cloud] duplicate wamid %s, skipping", + wamid, + ) + continue + event = await self._build_message_event_from_cloud( + raw_message, contacts_by_waid, metadata + ) + if event is None: + continue + self._accepted_count += 1 + try: + await self.handle_message(event) + except Exception: + # Dispatch errors must not bubble out — Meta would + # retry the whole batch, multiplying the bug. + logger.exception( + "[whatsapp_cloud] handle_message raised for wamid %s", + wamid, + ) + + # Log status updates at debug level — useful for diagnosing + # "did Meta accept my outbound" without flooding INFO logs. + for status in value.get("statuses") or []: + if isinstance(status, dict): + logger.debug( + "[whatsapp_cloud] status %s for %s", + status.get("status"), + status.get("id"), + ) + + async def _dispatch_interactive_reply( + self, + raw_message: Dict[str, Any], + contacts_by_waid: Dict[str, str], + ) -> bool: + """Route an inbound interactive reply to the matching resolver. + + Returns True if the tap was claimed (caller should drop the + webhook entry without dispatching a fresh conversation turn). + Returns False when the id has no recognized prefix, no live + state entry, or the resolver itself reports no waiter — in + those cases the caller falls back to standard text-event + dispatch, which treats the button title as a normal user + message. That graceful fallback covers stale-tap and + cross-process-restart scenarios. + + Dispatch table: + ``cl::`` → resolve_gateway_clarify + ``appr::approve|deny`` → resolve_gateway_approval + ``sc::`` → slash_confirm.resolve + """ + inter = raw_message.get("interactive") or {} + # button_reply (interactive.type=button) and list_reply + # (interactive.type=list) carry id+title in different sub-objects. + inner = inter.get("button_reply") or inter.get("list_reply") or {} + button_id = str(inner.get("id") or "").strip() + if not button_id: + return False + + # Clarify: cl:: + if button_id.startswith("cl:"): + parts = button_id.split(":", 2) + if len(parts) != 3: + return False + _, clarify_id, choice = parts + session_key = self._clarify_state.pop(clarify_id, None) + if not session_key: + logger.info( + "[whatsapp_cloud] clarify tap with no matching state " + "(clarify_id=%s) — likely stale; falling back to text", + clarify_id, + ) + return False + try: + from tools.clarify_gateway import resolve_gateway_clarify + except ImportError: + logger.warning( + "[whatsapp_cloud] clarify resolver unavailable; " + "falling back to text dispatch" + ) + return False + if choice == "other": + # User wants to type a free-form answer. Flip the entry + # into text-capture mode so the gateway's text-intercept + # (in _handle_message) picks up their next message and + # resolves the clarify. Without this flip, + # ``get_pending_for_session`` won't return the entry — + # the next text would fall through to the regular agent + # path, which collides with the agent thread still + # blocked in clarify and produces an "Interrupting + # current task" loop. + try: + from tools.clarify_gateway import mark_awaiting_text + flipped = mark_awaiting_text(clarify_id) + except Exception: + logger.exception( + "[whatsapp_cloud] mark_awaiting_text failed for %s", + clarify_id, + ) + flipped = False + if not flipped: + # Entry vanished between the user tap and our handler + # (timeout, /new, gateway restart). Drop the stale + # state and fall through to text dispatch so the + # user's tap isn't completely ignored. + logger.info( + "[whatsapp_cloud] clarify 'Other' tap but entry " + "missing (clarify_id=%s); falling back to text", + clarify_id, + ) + return False + # Put state back since we popped it earlier — keep the + # clarify_id → session_key mapping live in case future + # taps land on the same prompt. + self._clarify_state[clarify_id] = session_key + try: + await self.send( + str(raw_message.get("from") or ""), + "✏️ Type your answer:", + ) + except Exception: + logger.exception("[whatsapp_cloud] clarify other-prompt failed") + return True # claim so we don't also dispatch the tap as text + try: + idx = int(choice) + except ValueError: + logger.warning( + "[whatsapp_cloud] clarify tap had non-int choice: %r", + choice, + ) + # Put state back so a follow-up text can still resolve. + self._clarify_state[clarify_id] = session_key + return False + # Use the title text as the resolved response so the agent + # sees the human-readable answer, not the index. Title is + # the numeric label ("1", "2", ...) so we look up the + # full choice from the original prompt — but we didn't + # persist that. Fall back to passing the index; the agent + # has the prompt in context and can interpret it. + response_text = str(inner.get("title") or str(idx + 1)) + resolved = resolve_gateway_clarify(clarify_id, response_text) + if not resolved: + # Resolver couldn't find a waiter (e.g. agent already + # timed out). Fall through to text dispatch. + logger.info( + "[whatsapp_cloud] clarify resolver reported no waiter " + "(clarify_id=%s) — falling back to text", clarify_id, + ) + return False + return True + + # Exec approval: appr::approve|deny + if button_id.startswith("appr:"): + parts = button_id.split(":", 2) + if len(parts) != 3: + return False + _, approval_id, choice = parts + session_key = self._exec_approval_state.pop(approval_id, None) + if not session_key: + logger.info( + "[whatsapp_cloud] approval tap with no matching state " + "(approval_id=%s) — likely stale; falling back to text", + approval_id, + ) + return False + if choice not in ("approve", "deny"): + self._exec_approval_state[approval_id] = session_key + return False + try: + from tools.approval import resolve_gateway_approval + except ImportError: + logger.warning( + "[whatsapp_cloud] approval resolver unavailable" + ) + return False + count = resolve_gateway_approval(session_key, choice) + if not count: + logger.info( + "[whatsapp_cloud] approval resolver reported no waiter " + "(session_key=%s) — likely already resolved", + session_key, + ) + # Send confirmation message — paralleling Telegram's UX. + try: + confirm_text = ( + "✅ Approved." if choice == "approve" else "❌ Denied." + ) + await self.send(str(raw_message.get("from") or ""), confirm_text) + except Exception: + logger.exception("[whatsapp_cloud] approval confirm failed") + return True + + # Slash confirm: sc:: + if button_id.startswith("sc:"): + parts = button_id.split(":", 2) + if len(parts) != 3: + return False + _, choice, confirm_id = parts + session_key = self._slash_confirm_state.pop(confirm_id, None) + if not session_key: + logger.info( + "[whatsapp_cloud] slash_confirm tap with no matching state " + "(confirm_id=%s) — likely stale", confirm_id, + ) + return False + if choice not in ("once", "always", "cancel"): + self._slash_confirm_state[confirm_id] = session_key + return False + try: + from tools import slash_confirm as _slash_confirm_mod + except ImportError: + logger.warning( + "[whatsapp_cloud] slash_confirm resolver unavailable" + ) + return False + try: + result_text = await _slash_confirm_mod.resolve( + session_key, confirm_id, choice + ) + except Exception: + logger.exception("[whatsapp_cloud] slash_confirm.resolve failed") + return True # still claim the tap; surfacing it as text wouldn't help + if result_text: + try: + await self.send(str(raw_message.get("from") or ""), result_text) + except Exception: + logger.exception("[whatsapp_cloud] slash_confirm reply failed") + return True + + # Unknown prefix — let text dispatch handle the title as a + # regular message. Could be a tap from a plugin-defined adapter + # we don't know about; treating it as text is the safe default. + return False + + async def _build_message_event_from_cloud( + self, + raw_message: Dict[str, Any], + contacts_by_waid: Dict[str, str], + metadata: Dict[str, Any], + ) -> Optional[MessageEvent]: + """Convert a Cloud-API message object into a Hermes MessageEvent. + + Phase 4 expands beyond text to download inbound media (image, + video, audio/voice, document, sticker) by ``media_id`` via the + two-step Graph endpoint. Cached files are populated into + ``media_urls`` / ``media_types`` so the agent's vision and STT + layers see them. Text-readable documents (.txt, .md, .json, + source code, etc.) are read and prepended to the message body + up to 100KB — same heuristic the Baileys adapter uses. + + Returns None if the message is filtered out by the mixin's + gating (broadcast filter, allow-list, mention requirements). + """ + msg_type_str = str(raw_message.get("type") or "text").lower() + + # Interactive replies (button taps, list selections) carry an ``id`` + # we set when sending the prompt. Route those to the appropriate + # gateway resolver BEFORE falling through to text dispatch — the + # resolver unblocks the waiting agent thread, so we don't want to + # also kick a fresh conversation turn off the same tap. + if msg_type_str == "interactive": + handled = await self._dispatch_interactive_reply( + raw_message, contacts_by_waid + ) + if handled: + return None + + body = "" + if msg_type_str == "text": + text = raw_message.get("text") or {} + body = str(text.get("body") or "") + elif msg_type_str in {"button", "interactive"}: + # Quick-reply buttons. Treat the button payload as text so the + # agent can reason about the user's choice. + if msg_type_str == "button": + body = str((raw_message.get("button") or {}).get("text") or "") + else: + inter = raw_message.get("interactive") or {} + # button_reply / list_reply both expose ``title`` + inner = inter.get("button_reply") or inter.get("list_reply") or {} + body = str(inner.get("title") or "") + elif msg_type_str in {"image", "video", "audio", "voice", "document", "sticker"}: + # Captions live on image / video / document. Other media types + # don't carry a caption in Meta's spec, but be defensive. + inner = raw_message.get(msg_type_str) or {} + body = str(inner.get("caption") or "") + + message_type = { + "text": MessageType.TEXT, + "image": MessageType.PHOTO, + "video": MessageType.VIDEO, + "audio": MessageType.VOICE, + "voice": MessageType.VOICE, + "document": MessageType.DOCUMENT, + "sticker": MessageType.PHOTO, + "button": MessageType.TEXT, + "interactive": MessageType.TEXT, + "location": MessageType.TEXT, + "contacts": MessageType.TEXT, + }.get(msg_type_str, MessageType.TEXT) + + sender_id = str(raw_message.get("from") or "").strip() + sender_name = contacts_by_waid.get(sender_id, "") + + # Cloud API doesn't have a separate "chat" entity for DMs — chat_id + # equals the sender's wa_id. Group support is deferred to v2. + # + # Defensive guard: if Meta ever delivers a group-shaped payload + # (group support is capability-tier gated by Meta; some WABAs + # have it enabled), refuse rather than silently treating it as + # a DM. Group messages carry a ``chat`` field on the message + # object identifying the group JID — its absence signals DM. + chat_field = raw_message.get("chat") + if chat_field: + logger.warning( + "[whatsapp_cloud] received group-shaped message (chat=%s, " + "wamid=%s) — group support is not yet implemented; dropping. " + "Use the Baileys whatsapp adapter for group chats.", + chat_field, raw_message.get("id"), + ) + return None + + chat_id = sender_id + + # Build the data dict the mixin's _should_process_message expects. + # Cloud API uses different field names from Baileys, so we adapt. + gating_data = { + "chatId": chat_id, + "senderId": sender_id, + "isGroup": False, # Phase 3 = DM only + "body": body, + } + if not self._should_process_message(gating_data): + return None + + # Download media if this is a non-text message type. Inbound media + # arrives as ``{type: "image", image: {id, mime_type, sha256, ...}}``. + media_urls: list[str] = [] + media_types: list[str] = [] + if msg_type_str in {"image", "video", "audio", "voice", "document", "sticker"}: + inner = raw_message.get(msg_type_str) or {} + media_id = str(inner.get("id") or "").strip() + inbound_mime = str(inner.get("mime_type") or "").strip() + if media_id: + ext_hint = None + if inbound_mime: + ext_hint = _ext_for_mime(inbound_mime) + local_path, dl_mime = await self._download_media_to_cache( + media_id, ext_hint=ext_hint + ) + if local_path: + media_urls.append(local_path) + media_types.append(dl_mime or inbound_mime or "application/octet-stream") + logger.info( + "[whatsapp_cloud] cached inbound %s media: %s", + msg_type_str, local_path, + ) + else: + logger.warning( + "[whatsapp_cloud] failed to download inbound %s (id=%s) — " + "agent will see message metadata but not the binary", + msg_type_str, media_id, + ) + # Document: original filename for the agent's UX. + if msg_type_str == "document": + fname = str(inner.get("filename") or "").strip() + if fname and not body: + body = f"[Document: {fname}]" + + # For text-readable documents, inject the file content directly into + # the message body so the agent can reason about it without a + # separate read_file call. Same heuristic the Baileys adapter uses. + # 100KB cap matches Telegram/Discord/Slack. + MAX_TEXT_INJECT_BYTES = 100 * 1024 + if msg_type_str == "document" and media_urls: + for doc_path in media_urls: + ext = Path(doc_path).suffix.lower() + if ext in { + ".txt", ".md", ".csv", ".json", ".xml", ".yaml", ".yml", + ".log", ".py", ".js", ".ts", ".html", ".css", + }: + try: + file_size = Path(doc_path).stat().st_size + if file_size > MAX_TEXT_INJECT_BYTES: + logger.info( + "[whatsapp_cloud] skipping text injection for %s " + "(%d bytes > %d)", + doc_path, file_size, MAX_TEXT_INJECT_BYTES, + ) + continue + content = Path(doc_path).read_text( + encoding="utf-8", errors="replace" + ) + display_name = Path(doc_path).name + injection = f"[Content of {display_name}]:\n{content}" + body = f"{injection}\n\n{body}" if body else injection + except OSError: + logger.exception( + "[whatsapp_cloud] failed to read document text: %s", + doc_path, + ) + + # context.id is set when the user replied to one of our messages. + context = raw_message.get("context") or {} + reply_to_id = str(context.get("id") or "").strip() or None + + source = self.build_source( + chat_id=chat_id, + chat_name=sender_name or chat_id, + chat_type="dm", + user_id=sender_id, + user_name=sender_name or None, + ) + + # Cloud API timestamps are unix seconds (string). MessageEvent + # doesn't enforce a type but downstream code formats with it. + wamid = str(raw_message.get("id") or "") or None + if wamid and chat_id: + # Refresh the per-chat latest-wamid cache so a subsequent + # send_typing call can attach the indicator + read receipt + # to this message. Done HERE (after _should_process_message + # gating) so filtered messages don't leak typing on + # unwanted inbound traffic. + self._last_inbound_wamid_by_chat[chat_id] = wamid + + return MessageEvent( + text=body, + message_type=message_type, + source=source, + raw_message=raw_message, + message_id=wamid, + reply_to_message_id=reply_to_id, + media_urls=media_urls, + media_types=media_types, + ) diff --git a/gateway/platforms/whatsapp_common.py b/gateway/platforms/whatsapp_common.py new file mode 100644 index 00000000000..2405d6ee0b3 --- /dev/null +++ b/gateway/platforms/whatsapp_common.py @@ -0,0 +1,351 @@ +""" +Transport-agnostic WhatsApp behavior shared by the Baileys bridge adapter +and the official WhatsApp Cloud API adapter. + +The mixin provides: +- Allow-list / DM / group gating +- Mention detection (explicit @-mentions + configurable regex patterns) +- Quoted-reply-to-bot detection +- Broadcast / Channel / Newsletter filtering +- WhatsApp-flavored markdown conversion +- Outgoing chunk length budgeting + +It is the *behavior layer*. Transport-specific concerns (subprocess management, +HTTP webhooks, Graph API calls, media upload protocols) live in each adapter. + +Mixin contract — the adapter must set these on ``self`` before any of the +mixin's methods are called (typically in ``__init__``): + + self.config # gateway.config.PlatformConfig + self.name # str — adapter name (used in log lines) + self._dm_policy # str: "open" | "allowlist" | "disabled" + self._allow_from # set[str] + self._group_policy # str: "open" | "allowlist" | "disabled" + self._group_allow_from # set[str] + self._mention_patterns # list[re.Pattern] + self._reply_prefix # Optional[str] + +Class attributes ``MAX_MESSAGE_LENGTH`` and ``DEFAULT_REPLY_PREFIX`` are +defined on the mixin and may be overridden per-adapter if needed. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +from typing import Any, Dict, Optional + + +logger = logging.getLogger(__name__) + + +class WhatsAppBehaviorMixin: + """Shared behavior for all WhatsApp adapters (Baileys + Cloud API). + + See module docstring for the attribute contract the host adapter must + satisfy. This mixin owns no state of its own — every value it touches + is either a class attribute or set by the adapter's ``__init__``. + """ + + # WhatsApp message limits — practical UX limit, not protocol max. + # WhatsApp allows ~65K but long messages are unreadable on mobile. + MAX_MESSAGE_LENGTH: int = 4096 + + DEFAULT_REPLY_PREFIX: str = "⚕ *Hermes Agent*\n────────────\n" + + # ------------------------------------------------------------------ config + def _effective_reply_prefix(self) -> str: + """Return the prefix to add to outgoing replies in self-chat mode. + + Subclasses that don't have a self-chat concept (the Cloud API + adapter) can override this to always return ``""`` or apply a + different policy. + """ + whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat") + if whatsapp_mode != "self-chat": + return "" + if self._reply_prefix is not None: + return self._reply_prefix.replace("\\n", "\n") + env_prefix = os.getenv("WHATSAPP_REPLY_PREFIX") + if env_prefix is not None: + return env_prefix.replace("\\n", "\n") + return self.DEFAULT_REPLY_PREFIX + + def _outgoing_chunk_limit(self) -> int: + """Reserve room for the reply prefix so the final message fits.""" + prefix_len = len(self._effective_reply_prefix()) + # Keep enough space for truncate_message's pagination indicator and + # code-fence repair even if a user configures a very long prefix. + return max(1024, self.MAX_MESSAGE_LENGTH - prefix_len) + + def _whatsapp_require_mention(self) -> bool: + configured = self.config.extra.get("require_mention") + if configured is not None: + if isinstance(configured, str): + return configured.lower() in {"true", "1", "yes", "on"} + return bool(configured) + return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in { + "true", + "1", + "yes", + "on", + } + + def _whatsapp_free_response_chats(self) -> set[str]: + raw = self.config.extra.get("free_response_chats") + if raw is None: + raw = os.getenv("WHATSAPP_FREE_RESPONSE_CHATS", "") + if isinstance(raw, list): + return {str(part).strip() for part in raw if str(part).strip()} + return {part.strip() for part in str(raw).split(",") if part.strip()} + + @staticmethod + def _coerce_allow_list(raw) -> set[str]: + """Parse allow_from / group_allow_from from config or env var.""" + if raw is None: + return set() + if isinstance(raw, list): + return {str(part).strip() for part in raw if str(part).strip()} + return {part.strip() for part in str(raw).split(",") if part.strip()} + + # ------------------------------------------------------------------ JID helpers + @staticmethod + def _normalize_whatsapp_id(value: Optional[str]) -> str: + if not value: + return "" + normalized = str(value).strip() + if ":" in normalized and "@" in normalized: + normalized = normalized.replace(":", "@", 1) + return normalized + + @staticmethod + def _is_broadcast_chat(chat_id: str) -> bool: + """True for WhatsApp pseudo-chats that aren't real conversations. + + Covers Status updates (Stories) and Channel/Newsletter broadcasts. + These show up as inbound messages on Baileys but the agent should + never reply — answering a Story update spams the contact's status + feed, and Channel posts aren't addressable in the first place. + """ + if not chat_id: + return False + cid = chat_id.strip().lower() + if cid == "status@broadcast": + return True + # @broadcast suffix covers status@broadcast plus any future + # broadcast-list variants. @newsletter is the Channel JID suffix. + if cid.endswith("@broadcast") or cid.endswith("@newsletter"): + return True + return False + + # ------------------------------------------------------------------ gating + def _is_dm_allowed(self, sender_id: str) -> bool: + """Check whether a DM from the given sender should be processed.""" + if self._dm_policy == "disabled": + return False + if self._dm_policy == "allowlist": + return sender_id in self._allow_from + # "open" — all DMs allowed + return True + + def _is_group_allowed(self, chat_id: str) -> bool: + """Check whether a group chat should be processed.""" + if self._group_policy == "disabled": + return False + if self._group_policy == "allowlist": + return chat_id in self._group_allow_from + # "open" — all groups allowed + return True + + def _compile_mention_patterns(self): + patterns = self.config.extra.get("mention_patterns") + if patterns is None: + raw = os.getenv("WHATSAPP_MENTION_PATTERNS", "").strip() + if raw: + try: + patterns = json.loads(raw) + except Exception: + patterns = [ + part.strip() for part in raw.splitlines() if part.strip() + ] + if not patterns: + patterns = [ + part.strip() for part in raw.split(",") if part.strip() + ] + if patterns is None: + return [] + if isinstance(patterns, str): + patterns = [patterns] + if not isinstance(patterns, list): + logger.warning( + "[%s] whatsapp mention_patterns must be a list or string; got %s", + self.name, + type(patterns).__name__, + ) + return [] + + compiled = [] + for pattern in patterns: + if not isinstance(pattern, str) or not pattern.strip(): + continue + try: + compiled.append(re.compile(pattern, re.IGNORECASE)) + except re.error as exc: + logger.warning( + "[%s] Invalid WhatsApp mention pattern %r: %s", + self.name, + pattern, + exc, + ) + if compiled: + logger.info( + "[%s] Loaded %d WhatsApp mention pattern(s)", self.name, len(compiled) + ) + return compiled + + def _bot_ids_from_message(self, data: Dict[str, Any]) -> set[str]: + bot_ids = set() + for candidate in data.get("botIds") or []: + normalized = self._normalize_whatsapp_id(candidate) + if normalized: + bot_ids.add(normalized) + return bot_ids + + def _message_is_reply_to_bot(self, data: Dict[str, Any]) -> bool: + quoted_participant = self._normalize_whatsapp_id(data.get("quotedParticipant")) + if not quoted_participant: + return False + return quoted_participant in self._bot_ids_from_message(data) + + def _message_mentions_bot(self, data: Dict[str, Any]) -> bool: + bot_ids = self._bot_ids_from_message(data) + if not bot_ids: + return False + mentioned_ids = { + nid + for candidate in (data.get("mentionedIds") or []) + if (nid := self._normalize_whatsapp_id(candidate)) + } + if mentioned_ids & bot_ids: + return True + + body = str(data.get("body") or "") + lower_body = body.lower() + for bot_id in bot_ids: + bare_id = bot_id.split("@", 1)[0].lower() + if bare_id and (f"@{bare_id}" in lower_body or bare_id in lower_body): + return True + return False + + def _message_matches_mention_patterns(self, data: Dict[str, Any]) -> bool: + if not self._mention_patterns: + return False + body = str(data.get("body") or "") + return any(pattern.search(body) for pattern in self._mention_patterns) + + def _clean_bot_mention_text(self, text: str, data: Dict[str, Any]) -> str: + if not text: + return text + bot_ids = self._bot_ids_from_message(data) + cleaned = text + for bot_id in bot_ids: + bare_id = bot_id.split("@", 1)[0] + if bare_id: + cleaned = re.sub( + rf"@{re.escape(bare_id)}\b[,:\-]*\s*", "", cleaned + ) + return cleaned.strip() or text + + def _should_process_message(self, data: Dict[str, Any]) -> bool: + chat_id_raw = str(data.get("chatId") or "") + # WhatsApp uses pseudo-chats for Status updates (Stories) and + # Channel/Newsletter broadcasts. These are not real conversations + # and the agent should never reply to them — even in self-chat mode + # where the bridge may surface them as "fromMe" events. + if self._is_broadcast_chat(chat_id_raw): + return False + is_group = data.get("isGroup", False) + if is_group: + chat_id = chat_id_raw + if not self._is_group_allowed(chat_id): + return False + else: + sender_id = str(data.get("senderId") or data.get("from") or "") + if not self._is_dm_allowed(sender_id): + return False + # DMs that pass the policy gate are always processed + return True + # Group messages: check mention / free-response settings + chat_id = str(data.get("chatId") or "") + if chat_id in self._whatsapp_free_response_chats(): + return True + if not self._whatsapp_require_mention(): + return True + body = str(data.get("body") or "").strip() + if body.startswith("/"): + return True + if self._message_is_reply_to_bot(data): + return True + if self._message_mentions_bot(data): + return True + return self._message_matches_mention_patterns(data) + + # ------------------------------------------------------------------ formatting + def format_message(self, content: str) -> str: + """Convert standard markdown to WhatsApp-compatible formatting. + + WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```, + and monospaced `inline`. Standard markdown uses different syntax + for bold/italic/strikethrough, so we convert here. + + Code blocks (``` fenced) and inline code (`) are protected from + conversion via placeholder substitution. + """ + if not content: + return content + + # --- 1. Protect fenced code blocks from formatting changes --- + _FENCE_PH = "\x00FENCE" + fences: list[str] = [] + + def _save_fence(m: re.Match) -> str: + fences.append(m.group(0)) + return f"{_FENCE_PH}{len(fences) - 1}\x00" + + result = re.sub(r"```[\s\S]*?```", _save_fence, content) + + # --- 2. Protect inline code --- + _CODE_PH = "\x00CODE" + codes: list[str] = [] + + def _save_code(m: re.Match) -> str: + codes.append(m.group(0)) + return f"{_CODE_PH}{len(codes) - 1}\x00" + + result = re.sub(r"`[^`\n]+`", _save_code, result) + + # --- 3. Convert markdown formatting to WhatsApp syntax --- + # Bold: **text** or __text__ → *text* + result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result) + result = re.sub(r"__(.+?)__", r"*\1*", result) + # Strikethrough: ~~text~~ → ~text~ + result = re.sub(r"~~(.+?)~~", r"~\1~", result) + # Italic: *text* is already WhatsApp italic — leave as-is + # _text_ is already WhatsApp italic — leave as-is + + # --- 4. Convert markdown headers to bold text --- + # # Header → *Header* + result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE) + + # --- 5. Convert markdown links: [text](url) → text (url) --- + result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result) + + # --- 6. Restore protected sections --- + for i, fence in enumerate(fences): + result = result.replace(f"{_FENCE_PH}{i}\x00", fence) + for i, code in enumerate(codes): + result = result.replace(f"{_CODE_PH}{i}\x00", code) + + return result diff --git a/gateway/run.py b/gateway/run.py index 0f56ad61c39..fad8ed792a9 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3678,7 +3678,8 @@ class GatewayRunner: # Warn if no user allowlists are configured and open access is not opted in _builtin_allowed_vars = ( "TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS", - "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS", + "WHATSAPP_ALLOWED_USERS", "WHATSAPP_CLOUD_ALLOWED_USERS", + "SLACK_ALLOWED_USERS", "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS", "TELEGRAM_GROUP_ALLOWED_USERS", "TELEGRAM_GROUP_ALLOWED_CHATS", @@ -3696,7 +3697,8 @@ class GatewayRunner: ) _builtin_allow_all_vars = ( "TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS", - "WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS", + "WHATSAPP_ALLOW_ALL_USERS", "WHATSAPP_CLOUD_ALLOW_ALL_USERS", + "SLACK_ALLOW_ALL_USERS", "SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS", "SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS", "MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS", @@ -5954,6 +5956,18 @@ class GatewayRunner: logger.warning("WhatsApp: Node.js not installed or bridge not configured") return None return WhatsAppAdapter(config) + + elif platform == Platform.WHATSAPP_CLOUD: + from gateway.platforms.whatsapp_cloud import ( + WhatsAppCloudAdapter, + check_whatsapp_cloud_requirements, + ) + if not check_whatsapp_cloud_requirements(): + logger.warning( + "WhatsApp Cloud: aiohttp/httpx missing — reinstall hermes-agent" + ) + return None + return WhatsAppCloudAdapter(config) elif platform == Platform.SLACK: from gateway.platforms.slack import SlackAdapter, check_slack_requirements @@ -6144,6 +6158,7 @@ class GatewayRunner: Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS", Platform.DISCORD: "DISCORD_ALLOWED_USERS", Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS", + Platform.WHATSAPP_CLOUD: "WHATSAPP_CLOUD_ALLOWED_USERS", Platform.SLACK: "SLACK_ALLOWED_USERS", Platform.SIGNAL: "SIGNAL_ALLOWED_USERS", Platform.EMAIL: "EMAIL_ALLOWED_USERS", @@ -6170,6 +6185,7 @@ class GatewayRunner: Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS", Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS", Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS", + Platform.WHATSAPP_CLOUD: "WHATSAPP_CLOUD_ALLOW_ALL_USERS", Platform.SLACK: "SLACK_ALLOW_ALL_USERS", Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS", Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS", diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 72f8a91c342..5ea7384b312 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1981,6 +1981,25 @@ def cmd_whatsapp(args): print("⚠ Pairing may not have completed. Run 'hermes whatsapp' to try again.") +def cmd_whatsapp_cloud(args): + """Set up WhatsApp Business Cloud API (official Meta integration). + + Walks the user through the Meta-side credentials (Phone Number ID, + Access Token, App Secret, optional App/WABA IDs) plus webhook + configuration. Includes field-shape validators that catch the most + common setup mistakes (e.g. pasting a phone number into the Phone + Number ID field). + + Distinct from ``hermes whatsapp`` (the Baileys bridge wizard) — the + two adapters are complementary, not alternatives. See + ``hermes_cli/setup_whatsapp_cloud.py``. + """ + _require_tty("whatsapp-cloud") + from hermes_cli.setup_whatsapp_cloud import run_whatsapp_cloud_setup + + return run_whatsapp_cloud_setup() + + def cmd_setup(args): """Interactive setup wizard.""" from hermes_cli.setup import run_setup_wizard @@ -9699,6 +9718,7 @@ def _coalesce_session_name_args(argv: list) -> list: "gateway", "setup", "whatsapp", + "whatsapp-cloud", "login", "logout", "auth", @@ -10560,7 +10580,7 @@ _BUILTIN_SUBCOMMANDS = frozenset( "model", "pairing", "plugins", "postinstall", "profile", "proxy", "send", "sessions", "setup", "skills", "slack", "status", "tools", "uninstall", "update", - "version", "webhook", "whatsapp", "chat", "secrets", + "version", "webhook", "whatsapp", "whatsapp-cloud", "chat", "secrets", # Help-ish invocations — plugin commands not being listed in # top-level --help is an acceptable trade-off for skipping an # expensive eager import of every bundled plugin module. @@ -11311,6 +11331,21 @@ def main(): ) whatsapp_parser.set_defaults(func=cmd_whatsapp) + # ========================================================================= + # whatsapp-cloud command (official Meta Cloud API; complement to Baileys) + # ========================================================================= + whatsapp_cloud_parser = subparsers.add_parser( + "whatsapp-cloud", + help="Set up WhatsApp Business Cloud API integration", + description=( + "Configure the official Meta WhatsApp Business Cloud API " + "adapter (Business account required, public webhook URL " + "required). Distinct from `hermes whatsapp` which sets up " + "the Baileys bridge for personal accounts." + ), + ) + whatsapp_cloud_parser.set_defaults(func=cmd_whatsapp_cloud) + # ========================================================================= # slack command # ========================================================================= diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py index be027e85cd1..9809827dcfa 100644 --- a/hermes_cli/nous_subscription.py +++ b/hermes_cli/nous_subscription.py @@ -66,6 +66,10 @@ class NousSubscriptionFeatures: def tts(self) -> NousFeatureState: return self.features["tts"] + @property + def stt(self) -> NousFeatureState: + return self.features["stt"] + @property def browser(self) -> NousFeatureState: return self.features["browser"] @@ -75,7 +79,7 @@ class NousSubscriptionFeatures: return self.features["modal"] def items(self) -> Iterable[NousFeatureState]: - ordered = ("web", "image_gen", "tts", "browser", "modal") + ordered = ("web", "image_gen", "tts", "stt", "browser", "modal") for key in ordered: yield self.features[key] @@ -159,6 +163,16 @@ def _tts_label(current_provider: str) -> str: return mapping.get(current_provider or "edge", current_provider or "Edge TTS") +def _stt_label(current_provider: str) -> str: + mapping = { + "openai": "OpenAI Whisper", + "groq": "Groq Whisper", + "mistral": "Mistral Voxtral Transcribe", + "local": "Local faster-whisper", + } + return mapping.get(current_provider or "local", current_provider or "Local faster-whisper") + + def _resolve_browser_feature_state( *, browser_tool_enabled: bool, @@ -251,6 +265,7 @@ def get_nous_subscription_features( web_cfg = config.get("web") if isinstance(config.get("web"), dict) else {} tts_cfg = config.get("tts") if isinstance(config.get("tts"), dict) else {} + stt_cfg = config.get("stt") if isinstance(config.get("stt"), dict) else {} browser_cfg = config.get("browser") if isinstance(config.get("browser"), dict) else {} terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {} @@ -260,6 +275,11 @@ def get_nous_subscription_features( web_search_backend = str(web_cfg.get("search_backend") or "").strip().lower() web_extract_backend = str(web_cfg.get("extract_backend") or "").strip().lower() tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower() + # STT default is "local" (faster-whisper) per DEFAULT_CONFIG, which + # requires `pip install faster-whisper`. For Nous subscribers we'd + # rather route through the managed OpenAI audio gateway — see + # apply_nous_managed_defaults below. + stt_provider = str(stt_cfg.get("provider") or "local").strip().lower() browser_provider_explicit = "cloud_provider" in browser_cfg browser_provider = normalize_browser_cloud_provider( browser_cfg.get("cloud_provider") if browser_provider_explicit else None @@ -276,6 +296,7 @@ def get_nous_subscription_features( # prevent gateway routing. web_use_gateway = _uses_gateway(web_cfg) tts_use_gateway = _uses_gateway(tts_cfg) + stt_use_gateway = _uses_gateway(stt_cfg) browser_use_gateway = _uses_gateway(browser_cfg) image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {} image_use_gateway = _uses_gateway(image_gen_cfg) @@ -293,6 +314,22 @@ def get_nous_subscription_features( direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY")) direct_modal = has_direct_modal_credentials() + # STT direct providers. OpenAI Whisper reuses the same audio key as + # OpenAI TTS — resolve_openai_audio_api_key() reads VOICE_TOOLS_OPENAI_KEY + # and falls back to OPENAI_API_KEY. The local provider's "direct" + # signal is whether faster-whisper is importable; we lazy-import so + # this module stays cheap on the happy path. + direct_openai_stt = bool(resolve_openai_audio_api_key()) + direct_groq_stt = bool(get_env_value("GROQ_API_KEY")) + direct_mistral_stt = bool(get_env_value("MISTRAL_API_KEY")) + try: + from tools.transcription_tools import _HAS_FASTER_WHISPER + local_stt_available = bool(_HAS_FASTER_WHISPER) or bool( + get_env_value("HERMES_LOCAL_STT_COMMAND") + ) + except Exception: + local_stt_available = bool(get_env_value("HERMES_LOCAL_STT_COMMAND")) + # When use_gateway is set, suppress direct credentials for managed detection if web_use_gateway: direct_firecrawl = False @@ -304,6 +341,11 @@ def get_nous_subscription_features( if tts_use_gateway: direct_openai_tts = False direct_elevenlabs = False + if stt_use_gateway: + direct_openai_stt = False + direct_groq_stt = False + direct_mistral_stt = False + local_stt_available = False if browser_use_gateway: direct_browser_use = False direct_browserbase = False @@ -311,6 +353,10 @@ def get_nous_subscription_features( managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl") managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue") managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio") + # STT and TTS share the same managed gateway endpoint ("openai-audio") + # because the OpenAI audio API covers both /audio/speech (TTS) and + # /audio/transcriptions (STT). One probe, used by both. + managed_stt_available = managed_tts_available managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use") managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal") modal_state = resolve_modal_backend_state( @@ -361,6 +407,24 @@ def get_nous_subscription_features( ) tts_active = bool(tts_tool_enabled and tts_available) + # STT availability per provider. Unlike TTS, STT isn't a model-callable + # tool — the gateway voice middleware calls it on every inbound voice + # message — so toolset_enabled is N/A and we treat stt as always + # "enabled" if a usable provider is configured. + stt_current_provider = stt_provider or "local" + stt_managed = ( + stt_current_provider == "openai" + and managed_stt_available + and not direct_openai_stt + ) + stt_available = bool( + (stt_current_provider == "local" and local_stt_available) + or (stt_current_provider == "openai" and (managed_stt_available or direct_openai_stt)) + or (stt_current_provider == "groq" and direct_groq_stt) + or (stt_current_provider == "mistral" and direct_mistral_stt) + ) + stt_active = stt_available + browser_local_available = _has_agent_browser() ( browser_current_provider, @@ -415,6 +479,13 @@ def get_nous_subscription_features( if isinstance(raw_tts_cfg, dict) and "provider" in raw_tts_cfg: tts_explicit_configured = tts_provider not in {"", "edge"} + # STT considers any non-default provider explicit. "local" is the + # DEFAULT_CONFIG seed, so seeing it doesn't mean the user picked it. + stt_explicit_configured = False + raw_stt_cfg = config.get("stt") + if isinstance(raw_stt_cfg, dict) and "provider" in raw_stt_cfg: + stt_explicit_configured = stt_provider not in {"", "local"} + features = { "web": NousFeatureState( key="web", @@ -452,6 +523,21 @@ def get_nous_subscription_features( current_provider=_tts_label(tts_current_provider), explicit_configured=tts_explicit_configured, ), + "stt": NousFeatureState( + key="stt", + label="Speech-to-text", + included_by_default=True, + available=stt_available, + active=stt_active, + managed_by_nous=stt_managed, + direct_override=stt_active and not stt_managed, + # STT isn't toolset-gated (gateway middleware calls it + # unconditionally on inbound voice), so report True so the + # status display doesn't flag it as "tool disabled". + toolset_enabled=True, + current_provider=_stt_label(stt_current_provider), + explicit_configured=stt_explicit_configured, + ), "browser": NousFeatureState( key="browser", label="Browser automation", @@ -514,6 +600,11 @@ def apply_nous_managed_defaults( tts_cfg = {} config["tts"] = tts_cfg + stt_cfg = config.get("stt") + if not isinstance(stt_cfg, dict): + stt_cfg = {} + config["stt"] = stt_cfg + browser_cfg = config.get("browser") if not isinstance(browser_cfg, dict): browser_cfg = {} @@ -535,6 +626,18 @@ def apply_nous_managed_defaults( tts_cfg["provider"] = "openai" changed.add("tts") + # STT: same pattern as TTS. The DEFAULT_CONFIG seed is "local" + # (requires `pip install faster-whisper`); for Nous subscribers we + # flip it to "openai" so the managed audio gateway handles transcription + # via the same auth as TTS. Skipped when the user has explicitly + # configured STT or has direct credentials for a non-managed provider. + if not features.stt.explicit_configured and not ( + get_env_value("GROQ_API_KEY") + or get_env_value("MISTRAL_API_KEY") + ): + stt_cfg["provider"] = "openai" + changed.add("stt") + if "browser" in selected_toolsets and not features.browser.explicit_configured and not ( get_env_value("BROWSER_USE_API_KEY") or get_env_value("BROWSERBASE_API_KEY") @@ -556,6 +659,7 @@ _GATEWAY_TOOL_LABELS = { "web": "Web search & extract (Firecrawl)", "image_gen": "Image generation (FAL)", "tts": "Text-to-speech (OpenAI TTS)", + "stt": "Speech-to-text (OpenAI Whisper)", "browser": "Browser automation (Browser Use)", } @@ -575,6 +679,15 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]: resolve_openai_audio_api_key() or get_env_value("ELEVENLABS_API_KEY") ), + # STT direct credentials. OpenAI Whisper shares the audio key + # with TTS via resolve_openai_audio_api_key() — counting it here + # too is intentional: if the user has an OpenAI audio key they + # don't need the gateway for either. + "stt": bool( + resolve_openai_audio_api_key() + or get_env_value("GROQ_API_KEY") + or get_env_value("MISTRAL_API_KEY") + ), "browser": bool( get_env_value("BROWSER_USE_API_KEY") or (get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID")) @@ -586,10 +699,11 @@ _GATEWAY_DIRECT_LABELS = { "web": "Firecrawl/Exa/Parallel/Tavily key", "image_gen": "FAL key", "tts": "OpenAI/ElevenLabs key", + "stt": "OpenAI/Groq/Mistral key", "browser": "Browser Use/Browserbase key", } -_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "browser") +_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "stt", "browser") def get_gateway_eligible_tools( @@ -625,6 +739,7 @@ def get_gateway_eligible_tools( "web": _uses_gateway(config.get("web")), "image_gen": _uses_gateway(config.get("image_gen")), "tts": _uses_gateway(config.get("tts")), + "stt": _uses_gateway(config.get("stt")), "browser": _uses_gateway(config.get("browser")), } @@ -664,6 +779,11 @@ def apply_gateway_defaults( tts_cfg = {} config["tts"] = tts_cfg + stt_cfg = config.get("stt") + if not isinstance(stt_cfg, dict): + stt_cfg = {} + config["stt"] = stt_cfg + browser_cfg = config.get("browser") if not isinstance(browser_cfg, dict): browser_cfg = {} @@ -679,6 +799,11 @@ def apply_gateway_defaults( tts_cfg["use_gateway"] = True changed.add("tts") + if "stt" in tool_keys: + stt_cfg["provider"] = "openai" + stt_cfg["use_gateway"] = True + changed.add("stt") + if "browser" in tool_keys: browser_cfg["cloud_provider"] = "browser-use" browser_cfg["use_gateway"] = True @@ -717,8 +842,9 @@ def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]: desc_parts: list[str] = [ "", " The Tool Gateway gives you access to web search, image generation,", - " text-to-speech, and browser automation through your Nous subscription.", - " No need to sign up for separate API keys — just pick the tools you want.", + " text-to-speech, speech-to-text, and browser automation through your", + " Nous subscription. No need to sign up for separate API keys — just", + " pick the tools you want.", "", ] if already_managed: diff --git a/hermes_cli/platforms.py b/hermes_cli/platforms.py index e341b734ee1..730dbed8a16 100644 --- a/hermes_cli/platforms.py +++ b/hermes_cli/platforms.py @@ -24,6 +24,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([ ("discord", PlatformInfo(label="💬 Discord", default_toolset="hermes-discord")), ("slack", PlatformInfo(label="💼 Slack", default_toolset="hermes-slack")), ("whatsapp", PlatformInfo(label="📱 WhatsApp", default_toolset="hermes-whatsapp")), + ("whatsapp_cloud", PlatformInfo(label="📱 WhatsApp Business (Cloud)", default_toolset="hermes-whatsapp")), ("signal", PlatformInfo(label="📡 Signal", default_toolset="hermes-signal")), ("bluebubbles", PlatformInfo(label="💙 BlueBubbles", default_toolset="hermes-bluebubbles")), ("email", PlatformInfo(label="📧 Email", default_toolset="hermes-email")), diff --git a/hermes_cli/setup_whatsapp_cloud.py b/hermes_cli/setup_whatsapp_cloud.py new file mode 100644 index 00000000000..f885e40fc49 --- /dev/null +++ b/hermes_cli/setup_whatsapp_cloud.py @@ -0,0 +1,530 @@ +""" +Interactive setup wizard for the WhatsApp Cloud API adapter. + +Entry point: ``hermes whatsapp-cloud`` (dispatched from +``cmd_whatsapp_cloud`` in ``hermes_cli/main.py``). + +Walks the user through the 6 credentials Meta requires + recipient +allowlist, auto-generates the verify token, and prints exact follow-up +instructions for the parts that can't happen inside the wizard process +(starting cloudflared, starting the gateway, configuring Meta's +webhook dashboard, adding their phone to the recipient list). + +Heavy emphasis on field-shape validation to catch the most common +configuration mistakes: + +- Putting the actual phone number in ``WHATSAPP_CLOUD_PHONE_NUMBER_ID`` + (the field expects Meta's 15-17 digit internal ID, not a phone number). + This is the #1 trap — caught us during Phase 3 live testing. +- Pasting tokens with trailing whitespace. +- Pasting an OpenAI / Slack / GitHub key by mistake. +- Confusing App ID with WABA ID with Phone Number ID. + +Each prompt has contextual help showing exactly where to find the value +in Meta's App Dashboard, with a one-line description and the field's +expected shape ("starts with EAA", "15-17 digits", "32 hex chars", etc.). + +The wizard intentionally does NOT smoke-test the webhook itself — the +Hermes gateway and the cloudflared tunnel both run in separate +processes the user starts AFTER this wizard exits, so any in-wizard +probe would fail by design. Instead the final SETUP COMPLETE block +prints the exact curl command the user can run from a third terminal +to verify the loop end-to-end once everything's running. +""" + +from __future__ import annotations + +import re +import secrets +import sys +from typing import Optional + + +# --------------------------------------------------------------------------- +# Field-shape validators +# --------------------------------------------------------------------------- +# +# Each validator returns (ok, reason_if_not_ok). The wizard uses them to +# reject obviously-malformed input before saving — saves users a round +# trip with Meta's 401 / 400 errors. + + +def _validate_phone_number_id(value: str) -> tuple[bool, Optional[str]]: + """Phone Number ID is a 15-17 digit numeric ID assigned by Meta. + + It's NOT a phone number. The #1 setup mistake is pasting the actual + phone number (e.g. ``15556422442``) into this field — that's only + 10-11 digits and gets rejected by Graph as "Object with ID does + not exist." + """ + if not value: + return False, "Phone Number ID is required" + s = value.strip() + if not s.isdigit(): + return False, "Phone Number ID must be numeric (no '+', spaces, or dashes)" + # Real phone numbers are 10-11 digits (US/CA country code + area code + # + 7 digits). Meta's internal IDs are 15-17 digits. If we see a + # phone-number-sized value, the user almost certainly pasted the + # phone number by mistake. + if 10 <= len(s) <= 12: + return False, ( + "That looks like a phone number — but this field needs the " + "Phone Number ID (Meta's internal ID, 15-17 digits, e.g. " + "'7794189252778687'). Look just BELOW the 'From' dropdown in " + "API Setup → it's labelled 'Phone number ID'." + ) + if len(s) < 13: + return False, "Phone Number ID looks too short (expected 13-18 digits)" + if len(s) > 20: + return False, "Phone Number ID looks too long (expected 13-18 digits)" + return True, None + + +def _validate_waba_id(value: str) -> tuple[bool, Optional[str]]: + """WABA ID is numeric, similar length range as Phone Number ID.""" + if not value: + return False, "WABA ID is required" + s = value.strip() + if not s.isdigit(): + return False, "WABA ID must be numeric" + if len(s) < 10 or len(s) > 25: + return False, "WABA ID looks wrong (expected 10-25 digits)" + return True, None + + +def _validate_app_id(value: str) -> tuple[bool, Optional[str]]: + """Meta App ID is numeric, typically 15-16 digits.""" + if not value: + return False, "App ID is required" + s = value.strip() + if not s.isdigit(): + return False, "App ID must be numeric" + if len(s) < 13 or len(s) > 20: + return False, "App ID looks wrong (expected 15-16 digits)" + return True, None + + +def _validate_app_secret(value: str) -> tuple[bool, Optional[str]]: + """App Secret is a 32-character lowercase hex string.""" + if not value: + return False, "App Secret is required" + s = value.strip() + if not re.fullmatch(r"[0-9a-f]+", s.lower()): + return False, ( + "App Secret should be a hex string (only digits 0-9 and " + "letters a-f). Make sure you copied the 'App secret' from " + "Settings → Basic, not some other token." + ) + if len(s) != 32: + return False, f"App Secret should be exactly 32 hex characters (got {len(s)})" + return True, None + + +def _validate_access_token(value: str) -> tuple[bool, Optional[str]]: + """Meta access tokens start with ``EAA`` and are 100-300+ characters. + + Both temp tokens (24h) and System User permanent tokens share this + prefix. We don't try to distinguish them. + """ + if not value: + return False, "Access token is required" + s = value.strip() + if not s.startswith("EAA"): + # Diagnose common paste mistakes + if s.startswith("sk-"): + return False, ( + "That's an OpenAI key (starts with 'sk-'), not a Meta " + "WhatsApp access token. Meta tokens start with 'EAA'." + ) + if s.startswith("xoxb-") or s.startswith("xoxp-"): + return False, ( + "That's a Slack token, not a Meta WhatsApp access token. " + "Meta tokens start with 'EAA'." + ) + if s.startswith("ghp_") or s.startswith("gho_"): + return False, ( + "That's a GitHub token, not a Meta WhatsApp access " + "token. Meta tokens start with 'EAA'." + ) + return False, ( + "Meta WhatsApp access tokens start with 'EAA'. Check that " + "you're copying from the right place (API Setup → 'Generate " + "access token', or Business Settings → System Users → " + "'Generate token' for a permanent one)." + ) + if len(s) < 100: + return False, f"Access token looks too short ({len(s)} chars, expected 100+)" + return True, None + + +# --------------------------------------------------------------------------- +# Prompt helpers +# --------------------------------------------------------------------------- + + +def _prompt(message: str, default: Optional[str] = None) -> str: + """Read one line of input. Returns "" on EOF / Ctrl+C / empty input. + + The ``default`` parameter is shown to the user but NOT auto-applied + on empty input — callers handle the "user kept existing" case + explicitly so they can distinguish between a real value and a + display preview (e.g. ``"abc12345..."`` for masked secrets). + """ + try: + suffix = f" [{default}]" if default else "" + raw = input(f"{message}{suffix}: ").strip() + except (EOFError, KeyboardInterrupt): + print() + return "" + return raw + + +def _prompt_validated( + message: str, + validator, + *, + current: Optional[str] = None, + help_text: Optional[str] = None, +) -> Optional[str]: + """Repeat the prompt until the user enters a valid value or aborts. + + Returns the validated value, or None if the user gave up (empty + response after an error, or Ctrl+C). ``current`` is shown as a + default for re-runs of the wizard with existing config. + """ + if help_text: + for line in help_text.strip().splitlines(): + print(f" {line}") + attempts = 0 + while True: + attempts += 1 + value = _prompt(f" → {message}", default=current) + if not value: + return None + ok, reason = validator(value) + if ok: + return value.strip() + print(f" ✗ {reason}") + if attempts >= 3: + try: + cont = input(" Try again, or press Enter to skip: ").strip() + except (EOFError, KeyboardInterrupt): + return None + if not cont: + return None + attempts = 0 + + +# --------------------------------------------------------------------------- +# Wizard +# --------------------------------------------------------------------------- + + +def run_whatsapp_cloud_setup() -> int: + """Interactive wizard for the WhatsApp Cloud API adapter. + + Returns 0 on full success, 1 on user abort, 2 on partial completion + (some fields written but the user bailed before finishing). + """ + from hermes_cli.config import get_env_value, save_env_value + + print() + print("⚕ WhatsApp Business Cloud API Setup") + print("=" * 50) + print() + print("This wizard configures Hermes to talk to WhatsApp via Meta's") + print("official Cloud API. It's the production-grade path:") + print() + print(" • No QR codes, no Node.js bridge subprocess") + print(" • Stable connection — no account-ban risk") + print(" • Business account required (not personal WhatsApp)") + print(" • Public webhook URL required (Cloudflare Tunnel, ngrok,") + print(" or your own reverse proxy with TLS)") + print() + print("If you don't have a Meta app set up yet, follow these steps") + print("FIRST, then come back and re-run this wizard:") + print() + print(" 1. https://developers.facebook.com/apps → Create App") + print(" → 'Connect with customers through WhatsApp'") + print(" 2. App Dashboard → WhatsApp → API Setup") + print(" 3. Click 'Generate access token' (temp 24h token is fine to") + print(" start; switch to a System User permanent token later)") + print() + try: + proceed = input("Press Enter to continue, or Ctrl+C to abort... ").strip() + except (EOFError, KeyboardInterrupt): + print("\nSetup cancelled.") + return 1 + + print() + print("─" * 50) + print("STEP 1 — Phone Number ID") + print("─" * 50) + current_phone_id = get_env_value("WHATSAPP_CLOUD_PHONE_NUMBER_ID") or None + phone_id = _prompt_validated( + "Phone Number ID", + _validate_phone_number_id, + current=current_phone_id, + help_text=( + "Found in: App Dashboard → WhatsApp → API Setup, in the\n" + "'Send and receive messages' section.\n" + "Look BELOW the 'From' dropdown — there's a 'Phone number ID'\n" + "line with the value (15-17 digits, e.g. '7794189252778687').\n" + "It is NOT the phone number itself (+1 555-...). That's the\n" + "single most common setup mistake." + ), + ) + if not phone_id: + if current_phone_id: + phone_id = current_phone_id + print(f" ✓ Keeping existing: {phone_id}") + else: + print("\n✗ Phone Number ID is required. Aborting.") + return 1 + else: + save_env_value("WHATSAPP_CLOUD_PHONE_NUMBER_ID", phone_id) + print(f" ✓ Saved: {phone_id}") + print() + + print("─" * 50) + print("STEP 2 — Access Token") + print("─" * 50) + current_token = get_env_value("WHATSAPP_CLOUD_ACCESS_TOKEN") or None + current_display = (current_token[:15] + "...") if current_token else None + token = _prompt_validated( + "Access Token", + _validate_access_token, + current=current_display, + help_text=( + "Two options for getting one:\n\n" + " (a) TEMP — App Dashboard → WhatsApp → API Setup →\n" + " 'Generate access token' button. Lasts 24 hours.\n" + " Fine for testing today; you'll have to regenerate\n" + " tomorrow.\n\n" + " (b) PERMANENT (production) — System User token. One-time\n" + " setup, never expires:\n" + " • business.facebook.com → Settings → System users →\n" + " Add → Admin role\n" + " • Assign Assets → your app (Manage app), your\n" + " WhatsApp account (Manage WABAs)\n" + " • Generate token → expiration: Never → permissions:\n" + " business_management, whatsapp_business_messaging,\n" + " whatsapp_business_management\n\n" + "Tokens start with 'EAA'." + ), + ) + # If they had a current token and just hit Enter, keep it. + if not token: + if current_token: + token = current_token + print(" ✓ Keeping existing token") + else: + print("\n✗ Access Token is required. Aborting.") + return 1 + else: + save_env_value("WHATSAPP_CLOUD_ACCESS_TOKEN", token) + print(" ✓ Saved (token hidden)") + print() + + print("─" * 50) + print("STEP 3 — App Secret (required for webhook signature verification)") + print("─" * 50) + current_secret = get_env_value("WHATSAPP_CLOUD_APP_SECRET") or None + current_secret_display = (current_secret[:8] + "...") if current_secret else None + app_secret = _prompt_validated( + "App Secret", + _validate_app_secret, + current=current_secret_display, + help_text=( + "Found in: App Dashboard → Settings → Basic →\n" + "'App secret' field (click 'Show', enter your Facebook password).\n\n" + "If 'Show' doesn't appear, you may need Admin role on the app.\n" + "It's a 32-character lowercase hex string.\n\n" + "Without the App Secret, inbound webhook POSTs are refused\n" + "with HTTP 503 (we can't verify they actually came from Meta)." + ), + ) + if not app_secret: + if current_secret: + app_secret = current_secret + print(" ✓ Keeping existing App Secret") + else: + print("\n⚠ Skipping App Secret — inbound webhooks will be refused") + print(" until you set WHATSAPP_CLOUD_APP_SECRET manually.") + else: + save_env_value("WHATSAPP_CLOUD_APP_SECRET", app_secret) + print(" ✓ Saved (secret hidden)") + print() + + print("─" * 50) + print("STEP 4 — App ID & WABA ID (optional, for analytics)") + print("─" * 50) + current_app_id = get_env_value("WHATSAPP_CLOUD_APP_ID") or None + app_id = _prompt_validated( + "App ID (optional, press Enter to skip)", + lambda v: (True, None) if not v else _validate_app_id(v), + current=current_app_id, + help_text=( + "Found in: App Dashboard → Settings → Basic → 'App ID' at the\n" + "top of the page. Numeric, ~15-16 digits.\n" + "Not required for messaging — useful only for analytics later." + ), + ) + if app_id: + save_env_value("WHATSAPP_CLOUD_APP_ID", app_id) + print(f" ✓ Saved: {app_id}") + elif current_app_id: + print(f" ✓ Keeping existing: {current_app_id}") + + current_waba_id = get_env_value("WHATSAPP_CLOUD_WABA_ID") or None + waba_id = _prompt_validated( + "WABA ID (optional, press Enter to skip)", + lambda v: (True, None) if not v else _validate_waba_id(v), + current=current_waba_id, + help_text=( + "WhatsApp Business Account ID. Found in: App Dashboard →\n" + "WhatsApp → API Setup, near the top — 'WhatsApp Business\n" + "Account ID'. Numeric, ~15+ digits.\n" + "Not required for messaging — useful for analytics." + ), + ) + if waba_id: + save_env_value("WHATSAPP_CLOUD_WABA_ID", waba_id) + print(f" ✓ Saved: {waba_id}") + elif current_waba_id: + print(f" ✓ Keeping existing: {current_waba_id}") + print() + + print("─" * 50) + print("STEP 5 — Verify Token (auto-generated)") + print("─" * 50) + current_verify = get_env_value("WHATSAPP_CLOUD_VERIFY_TOKEN") or None + if current_verify: + print(f" An existing verify token is already set ({current_verify[:8]}...).") + try: + regen = input(" Generate a new one? [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + regen = "n" + if regen in {"y", "yes"}: + verify_token = secrets.token_urlsafe(32) + save_env_value("WHATSAPP_CLOUD_VERIFY_TOKEN", verify_token) + print(f" ✓ New verify token: {verify_token}") + else: + verify_token = current_verify + print(" ✓ Keeping existing verify token") + else: + verify_token = secrets.token_urlsafe(32) + save_env_value("WHATSAPP_CLOUD_VERIFY_TOKEN", verify_token) + print(f" ✓ Generated: {verify_token}") + print() + print(" → COPY THIS TOKEN NOW. You'll paste it into Meta's webhook") + print(" configuration dialog (next step).") + print() + + print("─" * 50) + print("STEP 6 — Recipient Allowlist") + print("─" * 50) + print() + print(" Who is allowed to message the bot? (Comma-separated phone") + print(" numbers with country code, no '+' / spaces / dashes. Use '*'") + print(" to allow anyone — only safe if you've also configured Meta's") + print(" recipient whitelist for app-development mode.)") + print() + current_allow = get_env_value("WHATSAPP_CLOUD_ALLOWED_USERS") or None + allow_default = current_allow if current_allow else None + try: + allowed = input( + f" → Allowed users{' [' + allow_default + ']' if allow_default else ''}: " + ).strip() or (allow_default or "") + except (EOFError, KeyboardInterrupt): + allowed = "" + if allowed: + # Light normalization — strip spaces and dashes from each entry. + allowed = ",".join( + re.sub(r"[\s\-+]", "", part) for part in allowed.split(",") if part.strip() + ) + save_env_value("WHATSAPP_CLOUD_ALLOWED_USERS", allowed) + print(f" ✓ Saved: {allowed}") + else: + print(" ⚠ No allowlist — every inbound message will be denied.") + print(" Re-run this wizard or set WHATSAPP_CLOUD_ALLOWED_USERS manually.") + print() + + print("─" * 50) + print("SETUP COMPLETE — Next steps") + print("─" * 50) + print() + print(" Hermes needs a public HTTPS URL to receive WhatsApp messages.") + print(" The recommended path is Cloudflare Tunnel (free, no port") + print(" forwarding, no DNS setup).") + print() + print(" 1. Install cloudflared (one-time, if you don't have it):") + print(" Windows: winget install Cloudflare.cloudflared") + print(" macOS: brew install cloudflared") + print(" Linux: https://github.com/cloudflare/cloudflared/releases") + print() + print(" Alternatives: ngrok, or your own domain + reverse proxy") + print(" with TLS.") + print() + print(" 2. Start the tunnel in a separate terminal:") + print(" cloudflared tunnel --url http://localhost:8090") + print(" Note the printed https://.trycloudflare.com URL.") + print() + print(" 3. Start the Hermes gateway in another terminal:") + print(" hermes gateway") + print() + print(" 4. Verify your local config is reachable. From a third") + print(" terminal, with the tunnel URL substituted:") + print() + print(" curl 'https://YOUR-TUNNEL.trycloudflare.com/whatsapp/webhook?\\") + print(f" hub.mode=subscribe&hub.verify_token={verify_token}&\\") + print(" hub.challenge=hello'") + print() + print(" Expected: HTTP 200 with body 'hello'.") + print(" Also try: curl https://YOUR-TUNNEL.trycloudflare.com/health") + print(" (should return JSON with verify_token_configured: true).") + print() + print(" 5. Configure Meta to point at your tunnel:") + print(" App Dashboard → WhatsApp → Configuration → Edit webhook") + print(" Callback URL: /whatsapp/webhook") + print(f" Verify Token: {verify_token}") + print(" → Click 'Verify and save'") + print(" → Then 'Manage' webhook fields → subscribe to 'messages'") + print() + print(" 6. Add your phone to Meta's recipient list:") + print(" App Dashboard → WhatsApp → API Setup → 'To' →") + print(" 'Manage phone number list'") + print() + print(" 7. DM the bot's test number from your phone.") + print() + print("─" * 50) + print("Optional: polish your bot's WhatsApp profile") + print("─" * 50) + print() + print(" WhatsApp shows a display name and profile picture for your bot") + print(" in every chat header and contact list. These are set in Meta's") + print(" Business Manager, not via this wizard — but here's where to do") + print(" it once you're up and running:") + print() + effective_waba = waba_id or current_waba_id + if effective_waba: + print(" • Display name + profile picture:") + print(" https://business.facebook.com/wa/manage/phone-numbers/" + f"?waba_id={effective_waba}") + else: + print(" • Display name + profile picture:") + print(" https://business.facebook.com/wa/manage/phone-numbers/") + print(" (select your WhatsApp Business Account on that page)") + print(" Display-name changes go through a ~24-48h Meta review.") + print() + print(" • About, description, website, hours, business category:") + print(" Same page → click your phone number → 'Edit profile'.") + print() + print(" • Verified badge (the green check):") + print(" Requires Meta's business verification process —") + print(" Business Manager → Security Center → Start Verification.") + print() + print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/") + print(" messaging/whatsapp-cloud") + print() + return 0 diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 5629da03fe3..8561aaa718f 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -309,7 +309,7 @@ def show_status(args): print() print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD)) print(" Your free-tier Nous account does not include Tool Gateway access.") - print(" Upgrade your subscription to unlock managed web, image, TTS, and browser tools.") + print(" Upgrade your subscription to unlock managed web, image, TTS, STT, and browser tools.") try: portal_url = nous_status.get("portal_base_url", "").rstrip("/") if portal_url: diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 76d13f5d22c..8e3b8cfb81a 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -442,6 +442,7 @@ class TestBuildNousSubscriptionPrompt: "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"), "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"), "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"), + "stt": NousFeatureState("stt", "Speech-to-text", True, True, True, True, False, True, "OpenAI Whisper"), "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"), "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"), }, @@ -452,7 +453,7 @@ class TestBuildNousSubscriptionPrompt: assert "Browser Use" in prompt assert "Modal execution is optional" in prompt - assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys" in prompt + assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, OpenAI Whisper, or Browser-Use API keys" in prompt def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch): monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: True) @@ -466,6 +467,7 @@ class TestBuildNousSubscriptionPrompt: "web": NousFeatureState("web", "Web tools", True, False, False, False, False, True, ""), "image_gen": NousFeatureState("image_gen", "Image generation", True, False, False, False, False, True, ""), "tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""), + "stt": NousFeatureState("stt", "Speech-to-text", True, False, False, False, False, True, ""), "browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, ""), "modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, ""), }, @@ -784,6 +786,7 @@ class TestPromptBuilderConstants: def test_platform_hints_known_platforms(self): assert "whatsapp" in PLATFORM_HINTS + assert "whatsapp_cloud" in PLATFORM_HINTS assert "telegram" in PLATFORM_HINTS assert "discord" in PLATFORM_HINTS assert "cron" in PLATFORM_HINTS @@ -791,6 +794,22 @@ class TestPromptBuilderConstants: assert "api_server" in PLATFORM_HINTS assert "webui" in PLATFORM_HINTS + def test_whatsapp_cloud_hint_mentions_24h_window(self): + """The Cloud API's 24-hour conversation window is a hard rule the + agent should know about. Phase 5 (template fallback) was deferred, + so the model needs to know free-form replies outside the window + will fail with Graph error 131047 — otherwise it'll cheerfully + try to schedule delayed messages that silently break.""" + hint = PLATFORM_HINTS["whatsapp_cloud"] + assert "24-hour" in hint or "24h" in hint or "24 hour" in hint + assert "131047" in hint + + def test_whatsapp_cloud_hint_advertises_media(self): + """Cloud adapter supports the same MEDIA:/path/ convention as + Baileys for outbound attachments.""" + hint = PLATFORM_HINTS["whatsapp_cloud"] + assert "MEDIA:" in hint + def test_cli_hint_does_not_suggest_media_tags(self): # Regression: MEDIA:/path tags are intercepted only by messaging # gateway platforms. On the CLI they render as literal text and diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 32485a917e0..95333dbf69b 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -2510,3 +2510,26 @@ class TestSendMediaTimeoutCancelsFuture: # 2. Second file still got dispatched — one timeout doesn't abort the batch adapter.send_video.assert_called_once() assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4" + + +class TestHomeTargetEnvVarRegistry: + """Regression: ``_HOME_TARGET_ENV_VARS`` must include every gateway + platform that supports cron-driven outbound delivery. Missing an + entry means ``hermes cron create --deliver=`` silently + fails to route through the platform's home channel.""" + + def test_whatsapp_cloud_registered(self): + """``deliver=whatsapp_cloud`` routes through + WHATSAPP_CLOUD_HOME_CHANNEL — added alongside the existing + ``whatsapp`` Baileys entry.""" + from cron.scheduler import _HOME_TARGET_ENV_VARS + + assert "whatsapp_cloud" in _HOME_TARGET_ENV_VARS + assert _HOME_TARGET_ENV_VARS["whatsapp_cloud"] == "WHATSAPP_CLOUD_HOME_CHANNEL" + + def test_baileys_whatsapp_still_registered(self): + """Sanity guard: the Cloud addition didn't disturb Baileys + whatsapp routing.""" + from cron.scheduler import _HOME_TARGET_ENV_VARS + + assert _HOME_TARGET_ENV_VARS.get("whatsapp") == "WHATSAPP_HOME_CHANNEL" diff --git a/tests/gateway/test_display_config.py b/tests/gateway/test_display_config.py index 5b50ec9c9ca..57cabe1f731 100644 --- a/tests/gateway/test_display_config.py +++ b/tests/gateway/test_display_config.py @@ -206,9 +206,23 @@ class TestPlatformDefaults: """Signal, BlueBubbles, etc. default to 'off' tool progress.""" from gateway.display_config import resolve_display_setting - for plat in ("signal", "bluebubbles", "weixin", "wecom", "dingtalk"): + for plat in ("signal", "bluebubbles", "weixin", "wecom", "dingtalk", "whatsapp_cloud"): assert resolve_display_setting({}, plat, "tool_progress") == "off", plat + def test_whatsapp_cloud_locked_to_low_tier_until_edit_message_lands(self): + """Regression guard: ``whatsapp_cloud`` must stay TIER_LOW until the + adapter implements edit_message. Without an edit endpoint, raising + the tier to MEDIUM would spam separate WhatsApp messages for every + tool-progress update, which is the exact failure mode this entry + exists to avoid. + + When/if Cloud's edit_message lands, update _PLATFORM_DEFAULTS to + TIER_MEDIUM and update this test to assert ``"new"`` accordingly. + """ + from gateway.display_config import resolve_display_setting + assert resolve_display_setting({}, "whatsapp_cloud", "tool_progress") == "off" + assert resolve_display_setting({}, "whatsapp_cloud", "streaming") is False + def test_minimal_tier_platforms(self): """Email, SMS, webhook default to 'off' tool progress.""" from gateway.display_config import resolve_display_setting diff --git a/tests/gateway/test_whatsapp_cloud.py b/tests/gateway/test_whatsapp_cloud.py new file mode 100644 index 00000000000..735bf7d24d9 --- /dev/null +++ b/tests/gateway/test_whatsapp_cloud.py @@ -0,0 +1,2250 @@ +"""Tests for the WhatsApp Cloud API adapter (Phase 2). + +Covers the outbound Graph API send path and the inbound verify-token +handshake. The webhook POST path is currently a stub (Phase 3 will add +signature verification + dispatch); we just confirm it accepts a body +and returns 200 here. + +All tests are fixture-driven — no live network. httpx is patched so the +adapter never reaches graph.facebook.com, and the aiohttp server is +exercised with synthetic ``Request`` objects. +""" + +from __future__ import annotations + +import json +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from gateway.config import Platform + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_adapter(**overrides): + """Build a WhatsAppCloudAdapter with test attributes (bypass __init__). + + Mirrors the pattern in tests/gateway/test_whatsapp_*.py. + """ + from gateway.platforms.whatsapp_cloud import WhatsAppCloudAdapter + + adapter = WhatsAppCloudAdapter.__new__(WhatsAppCloudAdapter) + adapter.platform = Platform.WHATSAPP_CLOUD + adapter.config = MagicMock() + adapter.config.extra = {} + + # Cloud-API-specific attributes + adapter._phone_number_id = overrides.pop("phone_number_id", "1234567890") + adapter._access_token = overrides.pop("access_token", "test-token") + adapter._app_id = overrides.pop("app_id", "") + adapter._app_secret = overrides.pop("app_secret", "") + adapter._waba_id = overrides.pop("waba_id", "") + adapter._verify_token = overrides.pop("verify_token", "") + adapter._webhook_host = "127.0.0.1" + adapter._webhook_port = 8090 + adapter._webhook_path = "/whatsapp/webhook" + adapter._health_path = "/health" + adapter._api_version = overrides.pop("api_version", "v20.0") + adapter._runner = None + adapter._http_client = None + + # Behavior-mixin contract + adapter._reply_prefix = None + adapter._dm_policy = "open" + adapter._allow_from = set() + adapter._group_policy = "open" + adapter._group_allow_from = set() + adapter._mention_patterns = [] + + # Webhook dispatch state (Phase 3) + from collections import OrderedDict + adapter._seen_wamids = OrderedDict() + adapter._duplicate_count = 0 + adapter._accepted_count = 0 + adapter._rejected_signature_count = 0 + + # Phase 4 state — one-shot warnings. + adapter._warned_no_ffmpeg = False + + # Phase 10 state — per-chat latest inbound wamid (for typing/read). + adapter._last_inbound_wamid_by_chat = {} + + # Phase 9 state — interactive-button correlation dicts. + adapter._clarify_state = {} + adapter._exec_approval_state = {} + adapter._slash_confirm_state = {} + + # BasePlatformAdapter contract — minimum to keep send/lifecycle happy + adapter._running = True + adapter._message_handler = None + adapter._fatal_error_code = None + adapter._fatal_error_message = None + adapter._fatal_error_retryable = True + adapter._fatal_error_handler = None + adapter._active_sessions = {} + adapter._pending_messages = {} + adapter._background_tasks = set() + adapter._auto_tts_disabled_chats = set() + + # Apply any leftover overrides directly + for key, value in overrides.items(): + setattr(adapter, key, value) + return adapter + + +def _mock_httpx_response(status_code: int, json_body: dict): + """Build an httpx-Response-like mock the adapter's ``send`` will accept.""" + resp = MagicMock() + resp.status_code = status_code + resp.json = MagicMock(return_value=json_body) + resp.text = json.dumps(json_body) + return resp + + +# --------------------------------------------------------------------------- +# Outbound send via Graph API +# --------------------------------------------------------------------------- + +class TestSendText: + """Outbound text-message path.""" + + @pytest.mark.asyncio + async def test_send_builds_correct_url(self): + adapter = _make_adapter(phone_number_id="9999", api_version="v20.0") + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 200, {"messages": [{"id": "wamid.abc"}]} + ) + ) + + await adapter.send("15551234567", "hello") + + called_url = adapter._http_client.post.call_args.args[0] + assert called_url == "https://graph.facebook.com/v20.0/9999/messages" + + @pytest.mark.asyncio + async def test_send_includes_bearer_auth(self): + adapter = _make_adapter(access_token="my-secret-token") + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 200, {"messages": [{"id": "wamid.abc"}]} + ) + ) + + await adapter.send("15551234567", "hi") + + headers = adapter._http_client.post.call_args.kwargs["headers"] + assert headers["Authorization"] == "Bearer my-secret-token" + assert headers["Content-Type"] == "application/json" + + @pytest.mark.asyncio + async def test_send_payload_shape(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 200, {"messages": [{"id": "wamid.abc"}]} + ) + ) + + await adapter.send("15551234567", "hello world") + + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["messaging_product"] == "whatsapp" + assert payload["recipient_type"] == "individual" + assert payload["to"] == "15551234567" + assert payload["type"] == "text" + assert payload["text"]["body"] == "hello world" + assert payload["text"]["preview_url"] is True + + @pytest.mark.asyncio + async def test_send_returns_wamid(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 200, {"messages": [{"id": "wamid.HBgL...="}]} + ) + ) + + result = await adapter.send("15551234567", "hi") + + assert result.success is True + assert result.message_id == "wamid.HBgL...=" + + @pytest.mark.asyncio + async def test_send_applies_markdown_conversion(self): + """Mixin's format_message should run before send.""" + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 200, {"messages": [{"id": "wamid.x"}]} + ) + ) + + await adapter.send("15551234567", "**bold** text") + + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["text"]["body"] == "*bold* text" + + @pytest.mark.asyncio + async def test_send_reply_to_attaches_context_first_chunk_only(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 200, {"messages": [{"id": "wamid.x"}]} + ) + ) + + await adapter.send("15551234567", "short reply", reply_to="wamid.original") + + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["context"] == {"message_id": "wamid.original"} + + @pytest.mark.asyncio + async def test_send_long_message_chunked(self): + """Messages over the chunk limit are split into multiple POSTs.""" + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 200, {"messages": [{"id": "wamid.x"}]} + ) + ) + + # MAX_MESSAGE_LENGTH = 4096 from the mixin. 8500 chars forces 2+ chunks. + long_text = "a" * 8500 + await adapter.send("15551234567", long_text) + + # At least 2 POST calls + assert adapter._http_client.post.call_count >= 2 + # Second call should NOT have context (only first chunk gets reply_to) + first_call = adapter._http_client.post.call_args_list[0] + second_call = adapter._http_client.post.call_args_list[1] + # No reply_to passed → no context anywhere, but verify structure anyway + assert "context" not in second_call.kwargs["json"] + + @pytest.mark.asyncio + async def test_send_graph_error_returns_failure(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 400, + { + "error": { + "message": "Invalid parameter", + "type": "OAuthException", + "code": 100, + "fbtrace_id": "abc", + } + }, + ) + ) + + result = await adapter.send("15551234567", "hi") + + assert result.success is False + assert "graph error 100" in result.error + assert "Invalid parameter" in result.error + + @pytest.mark.asyncio + async def test_send_empty_content_no_request(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock() + + result = await adapter.send("15551234567", "") + assert result.success is True + assert result.message_id is None + adapter._http_client.post.assert_not_called() + + result = await adapter.send("15551234567", " \n ") + assert result.success is True + adapter._http_client.post.assert_not_called() + + @pytest.mark.asyncio + async def test_send_not_connected_returns_failure(self): + adapter = _make_adapter() + adapter._http_client = None + + result = await adapter.send("15551234567", "hi") + assert result.success is False + assert "Not connected" in result.error + + @pytest.mark.asyncio + async def test_send_network_exception_returns_failure(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(side_effect=RuntimeError("boom")) + + result = await adapter.send("15551234567", "hi") + assert result.success is False + assert "boom" in result.error + + +# --------------------------------------------------------------------------- +# Inbound webhook verify (GET) handshake +# --------------------------------------------------------------------------- + +def _verify_request(query: dict): + """Build a minimal aiohttp.web.Request stub for verify tests.""" + request = MagicMock() + request.query = query + return request + + +class TestWebhookVerify: + """GET ?hub.mode=...&hub.verify_token=...&hub.challenge=...""" + + @pytest.mark.asyncio + async def test_verify_echoes_challenge_on_match(self): + adapter = _make_adapter(verify_token="shared-secret-123") + request = _verify_request({ + "hub.mode": "subscribe", + "hub.verify_token": "shared-secret-123", + "hub.challenge": "abc-12345", + }) + + response = await adapter._handle_verify(request) + + assert response.status == 200 + assert response.text == "abc-12345" + assert response.content_type == "text/plain" + + @pytest.mark.asyncio + async def test_verify_rejects_token_mismatch(self): + adapter = _make_adapter(verify_token="shared-secret-123") + request = _verify_request({ + "hub.mode": "subscribe", + "hub.verify_token": "wrong-token", + "hub.challenge": "abc-12345", + }) + + response = await adapter._handle_verify(request) + + assert response.status == 403 + + @pytest.mark.asyncio + async def test_verify_rejects_wrong_mode(self): + adapter = _make_adapter(verify_token="shared-secret-123") + request = _verify_request({ + "hub.mode": "unsubscribe", + "hub.verify_token": "shared-secret-123", + "hub.challenge": "abc-12345", + }) + + response = await adapter._handle_verify(request) + + assert response.status == 400 + + @pytest.mark.asyncio + async def test_verify_rejects_missing_challenge(self): + adapter = _make_adapter(verify_token="shared-secret-123") + request = _verify_request({ + "hub.mode": "subscribe", + "hub.verify_token": "shared-secret-123", + }) + + response = await adapter._handle_verify(request) + + assert response.status == 400 + + @pytest.mark.asyncio + async def test_verify_refuses_when_token_unconfigured(self): + """An empty verify_token must NOT match an empty incoming token — + otherwise an attacker who guesses the misconfiguration could + subscribe their own webhook URL. + """ + adapter = _make_adapter(verify_token="") + request = _verify_request({ + "hub.mode": "subscribe", + "hub.verify_token": "", + "hub.challenge": "abc", + }) + + response = await adapter._handle_verify(request) + + assert response.status == 503 # service refuses to perform handshake + + +# --------------------------------------------------------------------------- +# Inbound webhook POST — signature verification + dispatch (Phase 3) +# --------------------------------------------------------------------------- + +import hashlib +import hmac as _hmac_lib + + +def _sign(secret: str, body: bytes) -> str: + """Compute the X-Hub-Signature-256 header value Meta would send.""" + digest = _hmac_lib.new( + secret.encode("utf-8"), body, hashlib.sha256 + ).hexdigest() + return f"sha256={digest}" + + +def _post_request(body: bytes, headers: dict | None = None): + """Build a minimal aiohttp.web.Request stub for POST tests.""" + request = MagicMock() + request.read = AsyncMock(return_value=body) + request.headers = headers or {} + return request + + +# A realistic Meta inbound text-message payload, modelled on the +# get-started docs sample. +_SAMPLE_INBOUND_TEXT_PAYLOAD = { + "object": "whatsapp_business_account", + "entry": [ + { + "id": "215589313241560883", + "changes": [ + { + "field": "messages", + "value": { + "messaging_product": "whatsapp", + "metadata": { + "display_phone_number": "15551797781", + "phone_number_id": "7794189252778687", + }, + "contacts": [ + { + "profile": {"name": "Jessica Laverdetman"}, + "wa_id": "13557825698", + } + ], + "messages": [ + { + "from": "13557825698", + "id": "wamid.HBgLMTM1NTc4MjU2OTgVAGHAYWYET688aASGNTI1QzZFQjhEMDk2QQA=", + "timestamp": "1758254144", + "text": {"body": "Hi!"}, + "type": "text", + } + ], + }, + } + ], + } + ], +} + + +class TestWebhookSignature: + """X-Hub-Signature-256 HMAC verification.""" + + @pytest.mark.asyncio + async def test_valid_signature_accepted(self): + adapter = _make_adapter(app_secret="signing-key-123") + # Patch the dispatcher to a no-op so we don't depend on + # MessageEvent construction here (covered separately). + adapter._dispatch_payload = AsyncMock() + body = b'{"object":"whatsapp_business_account","entry":[]}' + request = _post_request(body, {"X-Hub-Signature-256": _sign("signing-key-123", body)}) + + response = await adapter._handle_webhook(request) + + assert response.status == 200 + adapter._dispatch_payload.assert_called_once() + + @pytest.mark.asyncio + async def test_tampered_body_rejected(self): + adapter = _make_adapter(app_secret="signing-key-123") + adapter._dispatch_payload = AsyncMock() + original = b'{"object":"whatsapp_business_account"}' + tampered = b'{"object":"evil_payload"}' + sig_for_original = _sign("signing-key-123", original) + request = _post_request(tampered, {"X-Hub-Signature-256": sig_for_original}) + + response = await adapter._handle_webhook(request) + + assert response.status == 401 + adapter._dispatch_payload.assert_not_called() + assert adapter._rejected_signature_count == 1 + + @pytest.mark.asyncio + async def test_missing_signature_header_rejected(self): + adapter = _make_adapter(app_secret="signing-key-123") + adapter._dispatch_payload = AsyncMock() + body = b'{"object":"whatsapp_business_account"}' + request = _post_request(body, {}) + + response = await adapter._handle_webhook(request) + + assert response.status == 401 + adapter._dispatch_payload.assert_not_called() + + @pytest.mark.asyncio + async def test_wrong_signature_format_rejected(self): + adapter = _make_adapter(app_secret="signing-key-123") + adapter._dispatch_payload = AsyncMock() + body = b"{}" + # Missing the required ``sha256=`` prefix + request = _post_request(body, {"X-Hub-Signature-256": "deadbeef"}) + + response = await adapter._handle_webhook(request) + assert response.status == 401 + + @pytest.mark.asyncio + async def test_unconfigured_app_secret_refuses_503(self): + """Don't quietly accept webhooks when we can't authenticate them.""" + adapter = _make_adapter(app_secret="") + adapter._dispatch_payload = AsyncMock() + body = b'{"object":"whatsapp_business_account"}' + request = _post_request(body, {"X-Hub-Signature-256": "sha256=deadbeef"}) + + response = await adapter._handle_webhook(request) + + assert response.status == 503 + adapter._dispatch_payload.assert_not_called() + + @pytest.mark.asyncio + async def test_signature_uses_constant_time_compare(self): + """Smoke-test: equivalent signatures with case differences both pass.""" + adapter = _make_adapter(app_secret="key") + adapter._dispatch_payload = AsyncMock() + body = b'{"object":"whatsapp_business_account","entry":[]}' + proper = _sign("key", body) + # Capitalize hex — hmac.compare_digest is case-sensitive but our + # implementation lowercases both sides so case differences in the + # incoming header don't accidentally fail valid signatures. + upper = proper.upper().replace("SHA256=", "sha256=") + request = _post_request(body, {"X-Hub-Signature-256": upper}) + + response = await adapter._handle_webhook(request) + assert response.status == 200 + + @pytest.mark.asyncio + async def test_oversize_body_rejected_before_signature(self): + """3MB cap per Meta — refuse without computing HMAC over giant junk.""" + adapter = _make_adapter(app_secret="key") + adapter._dispatch_payload = AsyncMock() + body = b"x" * (4 * 1024 * 1024) + request = _post_request(body, {"X-Hub-Signature-256": "sha256=ignored"}) + + response = await adapter._handle_webhook(request) + assert response.status == 413 + adapter._dispatch_payload.assert_not_called() + + @pytest.mark.asyncio + async def test_unreadable_body_rejected(self): + adapter = _make_adapter(app_secret="key") + request = MagicMock() + request.read = AsyncMock(side_effect=RuntimeError("read failed")) + request.headers = {} + + response = await adapter._handle_webhook(request) + assert response.status == 400 + + +class TestWebhookReplay: + """wamid dedup — Meta retries failed deliveries up to 7 days.""" + + @pytest.mark.asyncio + async def test_duplicate_wamid_not_redispatched(self): + adapter = _make_adapter(app_secret="key") + adapter.handle_message = AsyncMock() + body = json.dumps(_SAMPLE_INBOUND_TEXT_PAYLOAD).encode("utf-8") + sig = _sign("key", body) + + # First delivery + await adapter._handle_webhook(_post_request(body, {"X-Hub-Signature-256": sig})) + # Second delivery (same payload, valid signature, same wamid) + await adapter._handle_webhook(_post_request(body, {"X-Hub-Signature-256": sig})) + + # handle_message fires once, even though the webhook fired twice + assert adapter.handle_message.call_count == 1 + assert adapter._duplicate_count == 1 + assert adapter._accepted_count == 1 + + def test_dedup_cache_evicts_oldest(self): + from gateway.platforms.whatsapp_cloud import WAMID_DEDUP_CACHE_SIZE + adapter = _make_adapter() + # Fill the cache plus 5 extra + for i in range(WAMID_DEDUP_CACHE_SIZE + 5): + assert adapter._dedup_wamid(f"wamid_{i}") is True + assert len(adapter._seen_wamids) == WAMID_DEDUP_CACHE_SIZE + # The first 5 should have been evicted + assert "wamid_0" not in adapter._seen_wamids + assert "wamid_4" not in adapter._seen_wamids + assert "wamid_5" in adapter._seen_wamids + assert f"wamid_{WAMID_DEDUP_CACHE_SIZE + 4}" in adapter._seen_wamids + + def test_dedup_no_wamid_lets_through(self): + """Defensive — Meta should always populate ``id``, but we don't + want to silently drop messages if it's missing.""" + adapter = _make_adapter() + assert adapter._dedup_wamid("") is True + assert adapter._dedup_wamid("") is True # both pass + + +class TestWebhookDispatch: + """End-to-end dispatch from a verified payload to handle_message.""" + + @pytest.mark.asyncio + async def test_text_message_dispatched_with_event_shape(self): + adapter = _make_adapter(app_secret="key") + captured = [] + + async def _capture(event): + captured.append(event) + + adapter.handle_message = _capture + body = json.dumps(_SAMPLE_INBOUND_TEXT_PAYLOAD).encode("utf-8") + sig = _sign("key", body) + request = _post_request(body, {"X-Hub-Signature-256": sig}) + + response = await adapter._handle_webhook(request) + + assert response.status == 200 + assert len(captured) == 1 + event = captured[0] + assert event.text == "Hi!" + assert event.message_id == ( + "wamid.HBgLMTM1NTc4MjU2OTgVAGHAYWYET688aASGNTI1QzZFQjhEMDk2QQA=" + ) + assert event.source.platform == Platform.WHATSAPP_CLOUD + assert event.source.chat_id == "13557825698" + assert event.source.user_name == "Jessica Laverdetman" + assert event.source.chat_type == "dm" + + @pytest.mark.asyncio + async def test_dispatch_filters_via_mixin_gating(self): + adapter = _make_adapter(app_secret="key") + adapter._dm_policy = "disabled" # block all DMs + adapter.handle_message = AsyncMock() + body = json.dumps(_SAMPLE_INBOUND_TEXT_PAYLOAD).encode("utf-8") + sig = _sign("key", body) + + response = await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + + assert response.status == 200 + adapter.handle_message.assert_not_called() + # Gated messages don't increment the accepted counter + assert adapter._accepted_count == 0 + + @pytest.mark.asyncio + async def test_dispatch_handler_exception_does_not_crash(self): + """If the agent dispatch raises, we still return 200 to Meta so + retries don't multiply the bug into a 7-day storm.""" + adapter = _make_adapter(app_secret="key") + adapter.handle_message = AsyncMock(side_effect=RuntimeError("boom")) + body = json.dumps(_SAMPLE_INBOUND_TEXT_PAYLOAD).encode("utf-8") + sig = _sign("key", body) + + response = await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + assert response.status == 200 + + @pytest.mark.asyncio + async def test_dispatch_ignores_non_message_field(self): + """``field: 'statuses'`` etc. should not produce MessageEvents.""" + adapter = _make_adapter(app_secret="key") + adapter.handle_message = AsyncMock() + payload = { + "object": "whatsapp_business_account", + "entry": [ + { + "id": "x", + "changes": [ + { + "field": "account_alerts", + "value": {"some": "alert"}, + } + ], + } + ], + } + body = json.dumps(payload).encode("utf-8") + sig = _sign("key", body) + + response = await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + assert response.status == 200 + adapter.handle_message.assert_not_called() + + @pytest.mark.asyncio + async def test_dispatch_ignores_non_waba_object(self): + adapter = _make_adapter(app_secret="key") + adapter.handle_message = AsyncMock() + payload = {"object": "page", "entry": []} + body = json.dumps(payload).encode("utf-8") + sig = _sign("key", body) + + response = await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + assert response.status == 200 + adapter.handle_message.assert_not_called() + + @pytest.mark.asyncio + async def test_dispatch_handles_button_reply(self): + adapter = _make_adapter(app_secret="key") + captured = [] + + async def _capture(event): + captured.append(event) + + adapter.handle_message = _capture + payload = { + "object": "whatsapp_business_account", + "entry": [ + { + "id": "x", + "changes": [ + { + "field": "messages", + "value": { + "messaging_product": "whatsapp", + "metadata": {"phone_number_id": "1"}, + "contacts": [ + {"profile": {"name": "U"}, "wa_id": "1555"} + ], + "messages": [ + { + "from": "1555", + "id": "wamid.button1", + "timestamp": "0", + "type": "interactive", + "interactive": { + "type": "button_reply", + "button_reply": { + "id": "yes", + "title": "Yes please", + }, + }, + } + ], + }, + } + ], + } + ], + } + body = json.dumps(payload).encode("utf-8") + sig = _sign("key", body) + + response = await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + assert response.status == 200 + assert len(captured) == 1 + assert captured[0].text == "Yes please" + + @pytest.mark.asyncio + async def test_dispatch_propagates_reply_to(self): + """``context.id`` on inbound = user replied to one of our messages.""" + adapter = _make_adapter(app_secret="key") + captured = [] + + async def _capture(event): + captured.append(event) + + adapter.handle_message = _capture + + payload_with_ctx = json.loads( + json.dumps(_SAMPLE_INBOUND_TEXT_PAYLOAD) + ) # deep copy + msg = payload_with_ctx["entry"][0]["changes"][0]["value"]["messages"][0] + msg["context"] = {"id": "wamid.our_outbound", "from": "15551797781"} + body = json.dumps(payload_with_ctx).encode("utf-8") + sig = _sign("key", body) + + await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + assert len(captured) == 1 + assert captured[0].reply_to_message_id == "wamid.our_outbound" + + @pytest.mark.asyncio + async def test_invalid_json_after_signature_returns_400(self): + """Pathological case: signature passes but body isn't JSON.""" + adapter = _make_adapter(app_secret="key") + body = b"not-json" + sig = _sign("key", body) + response = await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + assert response.status == 400 + + +# --------------------------------------------------------------------------- +# Health endpoint +# --------------------------------------------------------------------------- + +class TestHealth: + @pytest.mark.asyncio + async def test_health_reports_config_visibility(self): + adapter = _make_adapter( + phone_number_id="555", + verify_token="secret", + app_secret="signing-key", + ) + request = MagicMock() + + response = await adapter._handle_health(request) + + # web.json_response stores the dict on .text as JSON + body = json.loads(response.text) + assert body["status"] == "ok" + assert body["platform"] == "whatsapp_cloud" + assert body["phone_number_id"] == "555" + assert body["verify_token_configured"] is True + assert body["app_secret_configured"] is True + assert body["accepted"] == 0 + assert body["duplicates"] == 0 + assert body["rejected_signature"] == 0 + # ffmpeg_present is True/False depending on the test host; + # just verify the key is exposed. + assert "ffmpeg_present" in body + assert isinstance(body["ffmpeg_present"], bool) + + @pytest.mark.asyncio + async def test_health_flags_missing_secrets(self): + adapter = _make_adapter(verify_token="", app_secret="") + request = MagicMock() + + response = await adapter._handle_health(request) + body = json.loads(response.text) + assert body["verify_token_configured"] is False + assert body["app_secret_configured"] is False + + +# --------------------------------------------------------------------------- +# Mixin contract — gating still works on the cloud adapter +# --------------------------------------------------------------------------- + +class TestMixinInherited: + """Sanity-check: the Cloud adapter inherits the same gating behavior + as the Baileys adapter via WhatsAppBehaviorMixin. + """ + + def test_format_message_converts_markdown(self): + adapter = _make_adapter() + assert adapter.format_message("**bold**") == "*bold*" + assert adapter.format_message("# Title") == "*Title*" + + def test_should_process_message_dm_open(self): + adapter = _make_adapter() + adapter._dm_policy = "open" + assert adapter._should_process_message({ + "chatId": "15551234567@c.us", + "senderId": "15551234567@c.us", + "isGroup": False, + "body": "hi", + }) is True + + def test_should_process_message_dm_disabled(self): + adapter = _make_adapter() + adapter._dm_policy = "disabled" + assert adapter._should_process_message({ + "chatId": "15551234567@c.us", + "senderId": "15551234567@c.us", + "isGroup": False, + "body": "hi", + }) is False + + def test_broadcast_chats_filtered(self): + adapter = _make_adapter() + assert adapter._should_process_message({ + "chatId": "status@broadcast", + "isGroup": False, + "body": "x", + }) is False + + +# --------------------------------------------------------------------------- +# Outbound media — link mode + upload mode (Phase 4) +# --------------------------------------------------------------------------- + +import os as _os +import tempfile as _tempfile +from unittest.mock import patch as _patch + + +def _mock_upload_response(media_id: str = "media_abc123"): + """Graph /media POST response shape.""" + resp = MagicMock() + resp.status_code = 200 + resp.json = MagicMock(return_value={"id": media_id}) + resp.text = json.dumps({"id": media_id}) + return resp + + +def _mock_message_response(wamid: str = "wamid.outbound1"): + """Graph /messages POST response shape.""" + resp = MagicMock() + resp.status_code = 200 + resp.json = MagicMock(return_value={"messages": [{"id": wamid}]}) + resp.text = json.dumps({"messages": [{"id": wamid}]}) + return resp + + +def _tmpfile(suffix: str = ".jpg", content: bytes = b"\xff\xd8\xff\xe0") -> str: + """Write a small temp file and return its path. Caller cleans up.""" + fd, path = _tempfile.mkstemp(suffix=suffix) + with _os.fdopen(fd, "wb") as fh: + fh.write(content) + return path + + +class TestSendImage: + """send_image — public URL takes the link path; local file uploads first.""" + + @pytest.mark.asyncio + async def test_send_image_link_mode_skips_upload(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(return_value=_mock_message_response()) + + result = await adapter.send_image("15551234567", "https://cdn.example.com/cat.jpg") + + assert result.success is True + # Exactly one POST — straight to /messages, no /media upload + assert adapter._http_client.post.call_count == 1 + url = adapter._http_client.post.call_args.args[0] + assert url.endswith("/messages") + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["type"] == "image" + assert payload["image"] == {"link": "https://cdn.example.com/cat.jpg"} + + @pytest.mark.asyncio + async def test_send_image_local_path_uploads_then_sends(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(side_effect=[ + _mock_upload_response("media_uploaded_id"), + _mock_message_response(), + ]) + path = _tmpfile(".jpg") + try: + result = await adapter.send_image_file("15551234567", path) + assert result.success is True + assert adapter._http_client.post.call_count == 2 + + upload_url = adapter._http_client.post.call_args_list[0].args[0] + send_url = adapter._http_client.post.call_args_list[1].args[0] + assert upload_url.endswith("/media") + assert send_url.endswith("/messages") + + send_payload = adapter._http_client.post.call_args_list[1].kwargs["json"] + assert send_payload["image"] == {"id": "media_uploaded_id"} + finally: + _os.unlink(path) + + @pytest.mark.asyncio + async def test_send_image_caption_attached(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(return_value=_mock_message_response()) + + await adapter.send_image( + "15551234567", "https://cdn.example.com/cat.jpg", caption="cute cat" + ) + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["image"]["caption"] == "cute cat" + + @pytest.mark.asyncio + async def test_send_image_oversize_rejected_locally(self): + """Don't round-trip to Graph just to be told the file's too big.""" + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock() + # 6MB > 5MB image cap + path = _tmpfile(".jpg", content=b"x" * (6 * 1024 * 1024)) + try: + result = await adapter.send_image_file("15551234567", path) + assert result.success is False + assert "5242880" in result.error or "cap is" in result.error + # Never even POSTed + adapter._http_client.post.assert_not_called() + finally: + _os.unlink(path) + + @pytest.mark.asyncio + async def test_send_image_missing_local_file_returns_failure(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock() + + result = await adapter.send_image_file( + "15551234567", "/nonexistent/path/foo.jpg" + ) + assert result.success is False + assert "File not found" in result.error + adapter._http_client.post.assert_not_called() + + @pytest.mark.asyncio + async def test_send_image_upload_failure_returns_failure(self): + adapter = _make_adapter() + # First call (upload) fails with a Graph error + upload_fail = MagicMock() + upload_fail.status_code = 400 + upload_fail.json = MagicMock(return_value={ + "error": {"code": 100, "message": "Bad media"} + }) + upload_fail.text = '{"error":{"code":100,"message":"Bad media"}}' + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(return_value=upload_fail) + + path = _tmpfile(".jpg") + try: + result = await adapter.send_image_file("15551234567", path) + assert result.success is False + assert "graph error 100" in result.error + # Only the upload call — never reached /messages + assert adapter._http_client.post.call_count == 1 + finally: + _os.unlink(path) + + +class TestSendVideo: + @pytest.mark.asyncio + async def test_send_video_link_mode(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(return_value=_mock_message_response()) + + await adapter.send_video("15551234567", "https://cdn.example.com/v.mp4", caption="clip") + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["type"] == "video" + assert payload["video"]["link"] == "https://cdn.example.com/v.mp4" + assert payload["video"]["caption"] == "clip" + + +class TestSendMethodsAcceptBaseClassKwargs: + """Regression: every send_* method must absorb ``metadata=`` (and any + other future kwargs) without raising TypeError. + + base.BasePlatformAdapter.send_multiple_images and friends pass + ``metadata=...`` to send_image; if a subclass forgets ``**kwargs``, + the agent crashes mid-send_multiple_images instead of just sending + the image. This test guards against that for every Cloud send_* + surface. + """ + + @pytest.mark.asyncio + async def test_send_image_accepts_metadata(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(return_value=_mock_message_response()) + # Should not raise TypeError. + result = await adapter.send_image( + "15551234567", "https://cdn.example.com/x.jpg", + metadata={"trace_id": "abc"}, + ) + assert result.success is True + + @pytest.mark.asyncio + async def test_send_image_file_accepts_metadata(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(side_effect=[ + _mock_upload_response(), + _mock_message_response(), + ]) + path = _tmpfile(".jpg") + try: + result = await adapter.send_image_file( + "15551234567", path, metadata={"x": 1}, + ) + assert result.success is True + finally: + _os.unlink(path) + + @pytest.mark.asyncio + async def test_send_video_accepts_metadata(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(return_value=_mock_message_response()) + result = await adapter.send_video( + "15551234567", "https://cdn.example.com/v.mp4", + metadata={"x": 1}, + ) + assert result.success is True + + @pytest.mark.asyncio + async def test_send_voice_accepts_metadata(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(return_value=_mock_message_response()) + result = await adapter.send_voice( + "15551234567", "https://cdn.example.com/a.ogg", + metadata={"x": 1}, + ) + assert result.success is True + + @pytest.mark.asyncio + async def test_send_document_accepts_metadata(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(side_effect=[ + _mock_upload_response(), + _mock_message_response(), + ]) + path = _tmpfile(".pdf", content=b"%PDF") + try: + result = await adapter.send_document( + "15551234567", path, metadata={"x": 1}, + ) + assert result.success is True + finally: + _os.unlink(path) + + +class TestSendDocument: + @pytest.mark.asyncio + async def test_send_document_filename_attached(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(side_effect=[ + _mock_upload_response("doc_id"), + _mock_message_response(), + ]) + path = _tmpfile(".pdf", content=b"%PDF-1.4 ...") + try: + await adapter.send_document( + "15551234567", path, caption="Q3 report", + file_name="report.pdf", + ) + send_payload = adapter._http_client.post.call_args_list[1].kwargs["json"] + assert send_payload["type"] == "document" + assert send_payload["document"]["id"] == "doc_id" + assert send_payload["document"]["caption"] == "Q3 report" + assert send_payload["document"]["filename"] == "report.pdf" + finally: + _os.unlink(path) + + +class TestSendVoice: + """MP3 voice with ffmpeg present -> opus; without ffmpeg -> MP3 fallback.""" + + @pytest.mark.asyncio + async def test_send_voice_no_ffmpeg_falls_back_to_mp3(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(side_effect=[ + _mock_upload_response("audio_id"), + _mock_message_response(), + ]) + # Simulate ffmpeg absent — adapter._convert_to_opus returns None + adapter._convert_to_opus = AsyncMock(return_value=None) + + path = _tmpfile(".mp3", content=b"ID3\x04\x00\x00\x00\x00") + try: + result = await adapter.send_voice("15551234567", path) + assert result.success is True + # Adapter still uploaded + sent the MP3 as audio + assert adapter._http_client.post.call_count == 2 + send_payload = adapter._http_client.post.call_args_list[1].kwargs["json"] + assert send_payload["type"] == "audio" + assert send_payload["audio"]["id"] == "audio_id" + finally: + _os.unlink(path) + + @pytest.mark.asyncio + async def test_send_voice_ffmpeg_present_uses_opus(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock(side_effect=[ + _mock_upload_response("voice_id"), + _mock_message_response(), + ]) + # Pretend ffmpeg conversion succeeded by returning a fake opus path. + opus_path = _tmpfile(".ogg", content=b"OggS") + adapter._convert_to_opus = AsyncMock(return_value=opus_path) + + mp3_path = _tmpfile(".mp3", content=b"ID3") + try: + result = await adapter.send_voice("15551234567", mp3_path) + assert result.success is True + # Conversion was invoked with the original MP3 + uploaded_path = adapter._convert_to_opus.call_args.args[0] + assert uploaded_path == mp3_path + send_payload = adapter._http_client.post.call_args_list[1].kwargs["json"] + assert send_payload["type"] == "audio" + finally: + _os.unlink(mp3_path) + if _os.path.exists(opus_path): + _os.unlink(opus_path) + + @pytest.mark.asyncio + async def test_warn_once_no_ffmpeg_actually_only_warns_once(self): + adapter = _make_adapter() + adapter._warned_no_ffmpeg = False + adapter._warn_once_no_ffmpeg() + assert adapter._warned_no_ffmpeg is True + # Second call: no-op (we just verify no exception + flag stays True) + adapter._warn_once_no_ffmpeg() + assert adapter._warned_no_ffmpeg is True + + +# --------------------------------------------------------------------------- +# Inbound media — Graph two-step download (Phase 4) +# --------------------------------------------------------------------------- + +class TestDownloadMedia: + """Two-step Graph media download: meta -> temp URL -> bytes.""" + + @pytest.mark.asyncio + async def test_two_step_download_writes_cache_file(self, tmp_path): + from gateway.platforms import whatsapp_cloud as wac + + adapter = _make_adapter() + adapter._http_client = MagicMock() + + # Step 1 — metadata returns temp URL + mime + meta_resp = MagicMock(status_code=200) + meta_resp.json = MagicMock(return_value={ + "url": "https://lookaside.fbsbx.com/whatsapp/m/...", + "mime_type": "image/jpeg", + "sha256": "abc", + "file_size": 12345, + "id": "media_xyz", + "messaging_product": "whatsapp", + }) + # Step 2 — bytes + blob_resp = MagicMock(status_code=200, content=b"\xff\xd8\xff\xe0jpegdata") + + adapter._http_client.get = AsyncMock(side_effect=[meta_resp, blob_resp]) + + with _patch.object(wac, "_INBOUND_MEDIA_CACHE", tmp_path): + local_path, mime = await adapter._download_media_to_cache("media_xyz") + + assert mime == "image/jpeg" + assert local_path is not None + assert _os.path.exists(local_path) + assert _os.path.basename(local_path).startswith("media_xyz") + assert _os.path.basename(local_path).endswith(".jpg") + with open(local_path, "rb") as fh: + assert fh.read() == b"\xff\xd8\xff\xe0jpegdata" + + @pytest.mark.asyncio + async def test_metadata_failure_returns_none(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + meta_fail = MagicMock(status_code=404) + meta_fail.json = MagicMock(return_value={"error": {"code": 100}}) + adapter._http_client.get = AsyncMock(return_value=meta_fail) + + local_path, mime = await adapter._download_media_to_cache("missing") + assert local_path is None and mime is None + + @pytest.mark.asyncio + async def test_bytes_failure_returns_none(self, tmp_path): + from gateway.platforms import whatsapp_cloud as wac + + adapter = _make_adapter() + adapter._http_client = MagicMock() + meta_resp = MagicMock(status_code=200) + meta_resp.json = MagicMock(return_value={ + "url": "https://lookaside.fbsbx.com/...", + "mime_type": "image/jpeg", + }) + blob_fail = MagicMock(status_code=403, content=b"") + adapter._http_client.get = AsyncMock(side_effect=[meta_resp, blob_fail]) + + with _patch.object(wac, "_INBOUND_MEDIA_CACHE", tmp_path): + local_path, mime = await adapter._download_media_to_cache("x") + assert local_path is None + + @pytest.mark.asyncio + async def test_metadata_includes_auth_header(self): + adapter = _make_adapter(access_token="bearer-tok") + adapter._http_client = MagicMock() + adapter._http_client.get = AsyncMock(return_value=MagicMock(status_code=500)) + await adapter._download_media_to_cache("x") + headers = adapter._http_client.get.call_args.kwargs["headers"] + assert headers["Authorization"] == "Bearer bearer-tok" + + @pytest.mark.asyncio + @pytest.mark.parametrize("mime,expected_ext", [ + # Regression for the ".oga vs .ogg" voice-note bug — Python's + # mimetypes module returns the RFC-correct .oga which downstream + # STT pipelines reject. + ("audio/ogg", ".ogg"), + ("audio/ogg; codecs=opus", ".ogg"), + ("audio/x-opus+ogg", ".ogg"), + ("audio/opus", ".ogg"), + # iOS voice memos arrive as audio/mp4 — must become .m4a, not .mp4. + ("audio/mp4", ".m4a"), + ("audio/x-m4a", ".m4a"), + # JPEG should never land as .jpe (legacy IANA). + ("image/jpeg", ".jpg"), + ]) + async def test_extension_overrides_for_real_world_mimes(self, tmp_path, mime, expected_ext): + from gateway.platforms import whatsapp_cloud as wac + + adapter = _make_adapter() + adapter._http_client = MagicMock() + meta_resp = MagicMock(status_code=200) + meta_resp.json = MagicMock(return_value={ + "url": "https://lookaside.fbsbx.com/test", + "mime_type": mime, + }) + blob_resp = MagicMock(status_code=200, content=b"x") + adapter._http_client.get = AsyncMock(side_effect=[meta_resp, blob_resp]) + + with _patch.object(wac, "_INBOUND_MEDIA_CACHE", tmp_path): + local_path, _ = await adapter._download_media_to_cache("media_x") + + assert local_path is not None + assert local_path.endswith(expected_ext), ( + f"mime {mime!r} should map to {expected_ext} but got {local_path}" + ) + + +class TestInboundMediaDispatch: + """End-to-end: webhook with image_id -> adapter downloads -> MessageEvent.media_urls populated.""" + + @pytest.mark.asyncio + async def test_inbound_image_populates_media_urls(self, tmp_path): + from gateway.platforms import whatsapp_cloud as wac + + adapter = _make_adapter(app_secret="key") + captured: list = [] + + async def _capture(event): + captured.append(event) + + adapter.handle_message = _capture + + # Mock the two-step Graph download + meta_resp = MagicMock(status_code=200) + meta_resp.json = MagicMock(return_value={ + "url": "https://lookaside.fbsbx.com/whatsapp/m/abc", + "mime_type": "image/jpeg", + }) + blob_resp = MagicMock(status_code=200, content=b"\xff\xd8\xff\xe0fake_jpeg") + adapter._http_client = MagicMock() + adapter._http_client.get = AsyncMock(side_effect=[meta_resp, blob_resp]) + + # Build an inbound image webhook payload + payload = { + "object": "whatsapp_business_account", + "entry": [{ + "id": "x", + "changes": [{ + "field": "messages", + "value": { + "messaging_product": "whatsapp", + "metadata": {"phone_number_id": "1"}, + "contacts": [{"profile": {"name": "U"}, "wa_id": "1555"}], + "messages": [{ + "from": "1555", + "id": "wamid.img1", + "timestamp": "0", + "type": "image", + "image": { + "id": "media_image_abc", + "mime_type": "image/jpeg", + "sha256": "...", + "caption": "look at this", + }, + }], + }, + }], + }], + } + body = json.dumps(payload).encode("utf-8") + sig = _sign("key", body) + + with _patch.object(wac, "_INBOUND_MEDIA_CACHE", tmp_path): + response = await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + + assert response.status == 200 + assert len(captured) == 1 + event = captured[0] + # Caption became the body + assert event.text == "look at this" + # Cached file path populated + assert len(event.media_urls) == 1 + assert _os.path.exists(event.media_urls[0]) + assert event.media_types[0] == "image/jpeg" + from gateway.platforms.base import MessageType + assert event.message_type == MessageType.PHOTO + + @pytest.mark.asyncio + async def test_inbound_text_document_injected_into_body(self, tmp_path): + """A .txt document should have its content prepended to the body.""" + from gateway.platforms import whatsapp_cloud as wac + + adapter = _make_adapter(app_secret="key") + captured: list = [] + + async def _capture(event): + captured.append(event) + + adapter.handle_message = _capture + + text_content = b"hello\nthis is the file\n" + meta_resp = MagicMock(status_code=200) + meta_resp.json = MagicMock(return_value={ + "url": "https://lookaside.fbsbx.com/whatsapp/m/doc", + "mime_type": "text/plain", + }) + blob_resp = MagicMock(status_code=200, content=text_content) + adapter._http_client = MagicMock() + adapter._http_client.get = AsyncMock(side_effect=[meta_resp, blob_resp]) + + payload = { + "object": "whatsapp_business_account", + "entry": [{ + "id": "x", + "changes": [{ + "field": "messages", + "value": { + "messaging_product": "whatsapp", + "metadata": {"phone_number_id": "1"}, + "contacts": [{"profile": {"name": "U"}, "wa_id": "1555"}], + "messages": [{ + "from": "1555", + "id": "wamid.doc1", + "timestamp": "0", + "type": "document", + "document": { + "id": "media_doc_abc", + "mime_type": "text/plain", + "filename": "notes.txt", + }, + }], + }, + }], + }], + } + body = json.dumps(payload).encode("utf-8") + sig = _sign("key", body) + + with _patch.object(wac, "_INBOUND_MEDIA_CACHE", tmp_path): + await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + + assert len(captured) == 1 + event = captured[0] + assert "hello\nthis is the file" in event.text + assert "[Content of" in event.text + # File still available in media_urls for the agent's other tools + assert len(event.media_urls) == 1 + + @pytest.mark.asyncio + async def test_inbound_image_download_failure_still_dispatches(self, tmp_path): + """If the binary fetch fails we still want the agent to see the + message metadata + caption — better than silently dropping.""" + from gateway.platforms import whatsapp_cloud as wac + + adapter = _make_adapter(app_secret="key") + captured: list = [] + + async def _capture(event): + captured.append(event) + + adapter.handle_message = _capture + adapter._http_client = MagicMock() + # Metadata fetch fails + adapter._http_client.get = AsyncMock(return_value=MagicMock(status_code=500)) + + payload = { + "object": "whatsapp_business_account", + "entry": [{ + "id": "x", + "changes": [{ + "field": "messages", + "value": { + "messaging_product": "whatsapp", + "metadata": {"phone_number_id": "1"}, + "contacts": [{"profile": {"name": "U"}, "wa_id": "1555"}], + "messages": [{ + "from": "1555", + "id": "wamid.bad_img", + "timestamp": "0", + "type": "image", + "image": {"id": "borked", "mime_type": "image/jpeg"}, + }], + }, + }], + }], + } + body = json.dumps(payload).encode("utf-8") + sig = _sign("key", body) + + with _patch.object(wac, "_INBOUND_MEDIA_CACHE", tmp_path): + response = await adapter._handle_webhook( + _post_request(body, {"X-Hub-Signature-256": sig}) + ) + + assert response.status == 200 + assert len(captured) == 1 + # Agent gets the event, just with empty media_urls + assert captured[0].media_urls == [] + + +# --------------------------------------------------------------------------- +# Group-shaped message guard +# --------------------------------------------------------------------------- + +class TestGroupMessageGuard: + """Cloud API group support is deferred to v2 (Meta capability-tier + gated, different payload shape than DMs). If Meta delivers a + group-shaped message — identifiable by a populated ``chat`` field + on the message object — the adapter should refuse cleanly rather + than silently treating the sender's wa_id as the chat_id (which + would route the bot's reply back to the sender as a DM, not the + group).""" + + @pytest.mark.asyncio + async def test_group_shaped_message_dropped_with_warning(self, caplog): + adapter = _make_adapter() + adapter.handle_message = AsyncMock() + raw = { + "from": "15551234567", + "id": "wamid.group1", + "timestamp": "0", + "type": "text", + "text": {"body": "hi from a group"}, + "chat": "120363012345678901@g.us", # presence of `chat` = group + } + with caplog.at_level("WARNING"): + event = await adapter._build_message_event_from_cloud( + raw, {"15551234567": "Alice"}, {} + ) + assert event is None + # Warning surfaced so the operator knows group messages are being dropped + assert any( + "group-shaped" in rec.message + for rec in caplog.records + ) + # Defensive: handler not invoked + adapter.handle_message.assert_not_called() + + @pytest.mark.asyncio + async def test_normal_dm_still_dispatches(self): + """Sanity: the guard is keyed on `chat`, not just `from`. Normal + DMs (which only have `from`, no `chat`) must still dispatch.""" + adapter = _make_adapter() + raw = { + "from": "15551234567", + "id": "wamid.dm1", + "timestamp": "0", + "type": "text", + "text": {"body": "hi from a DM"}, + # NO `chat` field — this is a DM + } + event = await adapter._build_message_event_from_cloud( + raw, {"15551234567": "Alice"}, {} + ) + assert event is not None + assert event.text == "hi from a DM" + assert event.source.chat_id == "15551234567" + + +# ========================================================================= +# Phase 9 — Interactive button messages (clarify / approval / slash-confirm) +# ========================================================================= +# +# These tests cover the four hooks the gateway uses for richer UX on +# platforms that support interactive buttons: +# - send_clarify (mid-conversation multi-choice question) +# - send_exec_approval (dangerous-command Y/N gate) +# - send_slash_confirm (3-button slash-command preview) +# - _dispatch_interactive_reply (inbound side: route button taps to +# the right resolver) +# Telegram and Discord have the same hooks; we mirror their callback-id +# format (cl:, appr:, sc:) so the gateway's existing degrade-to-text +# fallback works transparently. + + +class TestSendClarifyButtons: + """``send_clarify`` outbound — picks button vs list mode by choice count.""" + + @pytest.mark.asyncio + async def test_three_choices_uses_button_mode(self): + """1–3 choices → interactive.type=button (inline pills).""" + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "wamid.q1"}]}) + ) + + result = await adapter.send_clarify( + chat_id="15551234567", + question="Pick one", + choices=["Alpha", "Bravo", "Charlie"], + clarify_id="abc123", + session_key="sess-1", + ) + + assert result.success + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["type"] == "interactive" + assert payload["interactive"]["type"] == "button" + buttons = payload["interactive"]["action"]["buttons"] + assert len(buttons) == 3 + assert [b["reply"]["title"] for b in buttons] == ["1", "2", "3"] + assert buttons[0]["reply"]["id"] == "cl:abc123:0" + assert buttons[2]["reply"]["id"] == "cl:abc123:2" + body_text = payload["interactive"]["body"]["text"] + assert "Alpha" in body_text and "Bravo" in body_text and "Charlie" in body_text + assert adapter._clarify_state["abc123"] == "sess-1" + + @pytest.mark.asyncio + async def test_four_choices_promoted_to_list_mode(self): + """4+ choices → interactive.type=list (sheet with rows).""" + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "wamid.q2"}]}) + ) + + result = await adapter.send_clarify( + chat_id="15551234567", + question="Pick one", + choices=["A", "B", "C", "D"], + clarify_id="q2", + session_key="sess-2", + ) + + assert result.success + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["interactive"]["type"] == "list" + rows = payload["interactive"]["action"]["sections"][0]["rows"] + assert len(rows) == 5 # 4 choices + 1 "Other" + assert rows[0]["id"] == "cl:q2:0" + assert rows[3]["id"] == "cl:q2:3" + assert rows[4]["id"] == "cl:q2:other" + assert "Other" in rows[4]["title"] + + @pytest.mark.asyncio + async def test_open_ended_falls_back_to_plain_text(self): + """No choices → plain text send, no interactive payload.""" + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "wamid.q3"}]}) + ) + + result = await adapter.send_clarify( + chat_id="15551234567", + question="What's your name?", + choices=None, + clarify_id="q3", + session_key="sess-3", + ) + + assert result.success + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["type"] == "text" + assert "What's your name?" in payload["text"]["body"] + # Open-ended state is NOT stored on the adapter — the gateway's + # text-intercept handles open-ended resolution (mirrors Telegram). + assert "q3" not in adapter._clarify_state + + @pytest.mark.asyncio + async def test_send_failure_does_not_register_state(self): + """If Meta rejects the send, don't leave dangling state behind.""" + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 400, {"error": {"code": 100, "message": "bad payload"}} + ) + ) + + result = await adapter.send_clarify( + chat_id="15551234567", + question="hi", + choices=["yes", "no"], + clarify_id="dead", + session_key="sess-x", + ) + + assert not result.success + assert "dead" not in adapter._clarify_state + + +class TestSendExecApprovalButtons: + """``send_exec_approval`` outbound — 2-button Approve/Deny gate.""" + + @pytest.mark.asyncio + async def test_approval_renders_two_buttons(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "wamid.a1"}]}) + ) + + result = await adapter.send_exec_approval( + chat_id="15551234567", + command="rm -rf /tmp/foo", + session_key="sess-app-1", + description="cleanup script", + ) + + assert result.success + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["interactive"]["type"] == "button" + buttons = payload["interactive"]["action"]["buttons"] + assert len(buttons) == 2 + assert "Approve" in buttons[0]["reply"]["title"] + assert "Deny" in buttons[1]["reply"]["title"] + approve_id = buttons[0]["reply"]["id"] + deny_id = buttons[1]["reply"]["id"] + assert approve_id.startswith("appr:") and approve_id.endswith(":approve") + assert deny_id.startswith("appr:") and deny_id.endswith(":deny") + approval_id = approve_id.split(":")[1] + assert deny_id.split(":")[1] == approval_id + body = payload["interactive"]["body"]["text"] + assert "rm -rf /tmp/foo" in body + assert "cleanup script" in body + assert adapter._exec_approval_state[approval_id] == "sess-app-1" + + @pytest.mark.asyncio + async def test_long_command_is_truncated(self): + """Body must stay under WhatsApp's 1024-char interactive cap.""" + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "x"}]}) + ) + + huge = "echo " + ("x" * 5000) + result = await adapter.send_exec_approval( + chat_id="15551234567", + command=huge, + session_key="sess-x", + ) + assert result.success + payload = adapter._http_client.post.call_args.kwargs["json"] + assert len(payload["interactive"]["body"]["text"]) <= 1024 + + +class TestSendSlashConfirmButtons: + """``send_slash_confirm`` outbound — 3-button Once/Always/Cancel.""" + + @pytest.mark.asyncio + async def test_three_buttons_with_ids(self): + adapter = _make_adapter() + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "wamid.s1"}]}) + ) + + result = await adapter.send_slash_confirm( + chat_id="15551234567", + title="Reload MCP", + message="This will restart all MCP servers.", + session_key="sess-sc-1", + confirm_id="cf-9", + ) + + assert result.success + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["interactive"]["type"] == "button" + buttons = payload["interactive"]["action"]["buttons"] + ids = [b["reply"]["id"] for b in buttons] + assert ids == ["sc:once:cf-9", "sc:always:cf-9", "sc:cancel:cf-9"] + assert adapter._slash_confirm_state["cf-9"] == "sess-sc-1" + + +class TestDispatchInteractiveReplyClarify: + """Inbound side: button-tap → clarify resolver.""" + + @pytest.mark.asyncio + async def test_clarify_tap_resolves_and_pops_state(self, monkeypatch): + adapter = _make_adapter() + adapter._clarify_state["q1"] = "sess-1" + + captured = {} + + def fake_resolve(clarify_id, response): + captured["clarify_id"] = clarify_id + captured["response"] = response + return True + + monkeypatch.setattr( + "tools.clarify_gateway.resolve_gateway_clarify", fake_resolve + ) + + raw = { + "from": "15551234567", + "type": "interactive", + "interactive": { + "type": "button_reply", + "button_reply": {"id": "cl:q1:2", "title": "3"}, + }, + } + handled = await adapter._dispatch_interactive_reply(raw, {}) + + assert handled is True + assert captured == {"clarify_id": "q1", "response": "3"} + assert "q1" not in adapter._clarify_state + + @pytest.mark.asyncio + async def test_clarify_other_button_keeps_state_and_prompts(self, monkeypatch): + """Picking 'Other' should NOT resolve — it should flip the + clarify entry into text-capture mode (via mark_awaiting_text) + AND keep the state mapping so the gateway's text-intercept can + resolve the next typed message. Without the flip, + ``get_pending_for_session`` wouldn't return the entry and the + user's next message would collide with the still-blocked agent + thread, producing an "Interrupting current task" loop.""" + adapter = _make_adapter() + adapter._clarify_state["q1"] = "sess-1" + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "x"}]}) + ) + + flipped_ids = [] + monkeypatch.setattr( + "tools.clarify_gateway.mark_awaiting_text", + lambda cid: flipped_ids.append(cid) or True, + ) + + raw = { + "from": "15551234567", + "type": "interactive", + "interactive": { + "type": "list_reply", + "list_reply": {"id": "cl:q1:other", "title": "Other"}, + }, + } + handled = await adapter._dispatch_interactive_reply(raw, {}) + + assert handled is True + # State stays so text-intercept can resolve the next message + assert adapter._clarify_state.get("q1") == "sess-1" + # mark_awaiting_text was called with the right clarify_id + assert flipped_ids == ["q1"] + # Follow-up "type your answer" prompt was sent + adapter._http_client.post.assert_called_once() + + @pytest.mark.asyncio + async def test_clarify_other_with_no_entry_falls_back(self, monkeypatch): + """If the underlying clarify entry vanished (timed out, /new, + gateway restart) between the prompt and the tap, + ``mark_awaiting_text`` returns False — drop the stale adapter + state and fall through to text dispatch.""" + adapter = _make_adapter() + adapter._clarify_state["q1"] = "sess-1" + monkeypatch.setattr( + "tools.clarify_gateway.mark_awaiting_text", + lambda cid: False, # entry missing on the gateway side + ) + + raw = { + "from": "15551234567", + "type": "interactive", + "interactive": { + "type": "list_reply", + "list_reply": {"id": "cl:q1:other", "title": "Other"}, + }, + } + handled = await adapter._dispatch_interactive_reply(raw, {}) + assert handled is False + # Adapter state was already popped before the gateway check; we + # leave it popped on the missing-entry path so a real follow-up + # text doesn't try to resolve a ghost. + assert "q1" not in adapter._clarify_state + + @pytest.mark.asyncio + async def test_stale_clarify_tap_falls_back_to_text(self): + """No state entry → return False so caller treats it as text.""" + adapter = _make_adapter() # _clarify_state is empty + + raw = { + "from": "15551234567", + "type": "interactive", + "interactive": { + "type": "button_reply", + "button_reply": {"id": "cl:ghost:0", "title": "1"}, + }, + } + handled = await adapter._dispatch_interactive_reply(raw, {}) + assert handled is False + + @pytest.mark.asyncio + async def test_clarify_resolver_no_waiter_falls_back(self, monkeypatch): + """Resolver returns False (e.g. agent timed out) → caller falls + back to text dispatch.""" + adapter = _make_adapter() + adapter._clarify_state["q1"] = "sess-1" + monkeypatch.setattr( + "tools.clarify_gateway.resolve_gateway_clarify", + lambda cid, r: False, + ) + + raw = { + "from": "15551234567", + "type": "interactive", + "interactive": { + "type": "button_reply", + "button_reply": {"id": "cl:q1:0", "title": "1"}, + }, + } + handled = await adapter._dispatch_interactive_reply(raw, {}) + assert handled is False + + +class TestDispatchInteractiveReplyApproval: + """Inbound side: approval-tap → resolve_gateway_approval.""" + + @pytest.mark.asyncio + async def test_approve_tap_calls_resolver_and_confirms(self, monkeypatch): + adapter = _make_adapter() + adapter._exec_approval_state["app1"] = "sess-app-1" + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "x"}]}) + ) + + calls = [] + monkeypatch.setattr( + "tools.approval.resolve_gateway_approval", + lambda session_key, choice: calls.append((session_key, choice)) or 1, + ) + + raw = { + "from": "15551234567", + "type": "interactive", + "interactive": { + "type": "button_reply", + "button_reply": {"id": "appr:app1:approve", "title": "Approve"}, + }, + } + handled = await adapter._dispatch_interactive_reply(raw, {}) + + assert handled is True + assert calls == [("sess-app-1", "approve")] + assert "app1" not in adapter._exec_approval_state + confirm_payload = adapter._http_client.post.call_args.kwargs["json"] + assert confirm_payload["type"] == "text" + assert "Approved" in confirm_payload["text"]["body"] + + @pytest.mark.asyncio + async def test_deny_tap_passes_deny_choice(self, monkeypatch): + adapter = _make_adapter() + adapter._exec_approval_state["app2"] = "sess-app-2" + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "x"}]}) + ) + + choices_seen = [] + monkeypatch.setattr( + "tools.approval.resolve_gateway_approval", + lambda session_key, choice: choices_seen.append(choice) or 1, + ) + + raw = { + "from": "15551234567", + "type": "interactive", + "interactive": { + "type": "button_reply", + "button_reply": {"id": "appr:app2:deny", "title": "Deny"}, + }, + } + await adapter._dispatch_interactive_reply(raw, {}) + + assert choices_seen == ["deny"] + confirm_payload = adapter._http_client.post.call_args.kwargs["json"] + assert "Denied" in confirm_payload["text"]["body"] + + +class TestDispatchInteractiveReplySlashConfirm: + """Inbound side: slash-confirm-tap → tools.slash_confirm.resolve.""" + + @pytest.mark.asyncio + async def test_once_tap_calls_resolver(self, monkeypatch): + adapter = _make_adapter() + adapter._slash_confirm_state["cf-9"] = "sess-sc-1" + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"messages": [{"id": "x"}]}) + ) + + captured = {} + + async def fake_resolve(session_key, confirm_id, choice): + captured.update( + session_key=session_key, confirm_id=confirm_id, choice=choice + ) + return "MCP reloaded." + + import tools.slash_confirm as _sc + monkeypatch.setattr(_sc, "resolve", fake_resolve) + + raw = { + "from": "15551234567", + "type": "interactive", + "interactive": { + "type": "button_reply", + "button_reply": {"id": "sc:once:cf-9", "title": "Approve Once"}, + }, + } + handled = await adapter._dispatch_interactive_reply(raw, {}) + + assert handled is True + assert captured == { + "session_key": "sess-sc-1", + "confirm_id": "cf-9", + "choice": "once", + } + reply_payload = adapter._http_client.post.call_args.kwargs["json"] + assert "MCP reloaded" in reply_payload["text"]["body"] + + +class TestInteractiveReplyEndToEnd: + """Integration: `_build_message_event_from_cloud` must SHORT-CIRCUIT + on a recognized interactive reply and NOT also produce a fresh + conversation turn (which would double-fire the agent).""" + + @pytest.mark.asyncio + async def test_recognized_tap_returns_none_no_text_dispatch(self, monkeypatch): + adapter = _make_adapter() + adapter._clarify_state["q1"] = "sess-1" + monkeypatch.setattr( + "tools.clarify_gateway.resolve_gateway_clarify", + lambda cid, r: True, + ) + + raw = { + "from": "15551234567", + "id": "wamid.tap1", + "type": "interactive", + "interactive": { + "type": "button_reply", + "button_reply": {"id": "cl:q1:0", "title": "1"}, + }, + } + event = await adapter._build_message_event_from_cloud( + raw, {"15551234567": "Alice"}, {} + ) + # The tap resolved the clarify; no MessageEvent dispatched so the + # agent thread that was waiting on clarify is unblocked exactly + # once, not once + a new turn for the tap. + assert event is None + + @pytest.mark.asyncio + async def test_unrecognized_tap_falls_through_to_text(self): + """Button taps from unrelated plugin adapters (or stale taps) + should be treated as plain text input — this preserves the + graceful-degrade path the gateway already relies on.""" + adapter = _make_adapter() + raw = { + "from": "15551234567", + "id": "wamid.tap2", + "type": "interactive", + "interactive": { + "type": "button_reply", + "button_reply": {"id": "unknown:foo", "title": "Hello"}, + }, + } + event = await adapter._build_message_event_from_cloud( + raw, {"15551234567": "Alice"}, {} + ) + # Falls through to text dispatch — the button title becomes the + # user message body so the agent at least sees what they tapped. + assert event is not None + assert event.text == "Hello" + + +# ========================================================================= +# Phase 10 — Typing indicator + mark-as-read +# ========================================================================= +# +# Meta couples the read receipt and typing indicator into a single POST +# to the messages endpoint. We refresh _last_inbound_wamid_by_chat on +# every accepted inbound message so the gateway can call send_typing() +# without threading event.message_id through the base contract. + + +class TestInboundWamidCache: + """Cache hygiene: refreshes on accepted inbound, skipped on filtered.""" + + @pytest.mark.asyncio + async def test_accepted_message_populates_cache(self): + adapter = _make_adapter() + raw = { + "from": "15551234567", + "id": "wamid.AAA", + "type": "text", + "text": {"body": "hi"}, + } + event = await adapter._build_message_event_from_cloud( + raw, {"15551234567": "Alice"}, {} + ) + assert event is not None + assert adapter._last_inbound_wamid_by_chat["15551234567"] == "wamid.AAA" + + @pytest.mark.asyncio + async def test_subsequent_messages_overwrite_cache(self): + """Cache holds the LATEST inbound, not the first — typing indicator + must attach to the most recent message in the conversation.""" + adapter = _make_adapter() + for wamid in ("wamid.first", "wamid.second", "wamid.third"): + await adapter._build_message_event_from_cloud( + { + "from": "15551234567", + "id": wamid, + "type": "text", + "text": {"body": "msg"}, + }, + {"15551234567": "Alice"}, + {}, + ) + assert adapter._last_inbound_wamid_by_chat["15551234567"] == "wamid.third" + + @pytest.mark.asyncio + async def test_filtered_message_does_not_pollute_cache(self): + """Group-shaped messages get dropped before the cache write — + we don't want typing indicators triggered by inbound traffic the + agent never sees.""" + adapter = _make_adapter() + raw = { + "from": "15551234567", + "id": "wamid.BBB", + "type": "text", + "text": {"body": "hi from group"}, + "chat": "120363012345678901@g.us", # group marker + } + event = await adapter._build_message_event_from_cloud( + raw, {"15551234567": "Alice"}, {} + ) + assert event is None # group guard rejected it + # Cache stays empty + assert "15551234567" not in adapter._last_inbound_wamid_by_chat + + +class TestSendTyping: + """``send_typing`` outbound — combined read receipt + indicator.""" + + @pytest.mark.asyncio + async def test_send_typing_posts_correct_payload(self): + adapter = _make_adapter() + adapter._last_inbound_wamid_by_chat["15551234567"] = "wamid.LATEST" + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"success": True}) + ) + + await adapter.send_typing("15551234567") + + adapter._http_client.post.assert_called_once() + payload = adapter._http_client.post.call_args.kwargs["json"] + # Meta's combined endpoint shape + assert payload["messaging_product"] == "whatsapp" + assert payload["status"] == "read" + assert payload["message_id"] == "wamid.LATEST" + assert payload["typing_indicator"] == {"type": "text"} + + @pytest.mark.asyncio + async def test_send_typing_uses_latest_cached_wamid(self): + """If multiple messages have arrived, the indicator must attach + to the LATEST one (mirrors Meta's documented behavior — the + typing indicator only renders against the most recent message + in the conversation).""" + adapter = _make_adapter() + adapter._last_inbound_wamid_by_chat["15551234567"] = "wamid.OLD" + adapter._last_inbound_wamid_by_chat["15551234567"] = "wamid.NEW" + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"success": True}) + ) + + await adapter.send_typing("15551234567") + payload = adapter._http_client.post.call_args.kwargs["json"] + assert payload["message_id"] == "wamid.NEW" + + @pytest.mark.asyncio + async def test_send_typing_no_cached_wamid_is_noop(self): + """No inbound message yet for this chat (or cache cleared on + gateway restart) → skip silently. Don't fail, don't log noisily. + The next inbound message will repopulate the cache.""" + adapter = _make_adapter() + # _last_inbound_wamid_by_chat is empty + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"success": True}) + ) + + await adapter.send_typing("15551234567") + # No HTTP call at all + adapter._http_client.post.assert_not_called() + + @pytest.mark.asyncio + async def test_send_typing_swallows_network_errors(self): + """Any HTTP exception must NOT propagate — typing is best-effort + UX polish and must never block the agent's main reply path. + Verified by the absence of a raise.""" + adapter = _make_adapter() + adapter._last_inbound_wamid_by_chat["15551234567"] = "wamid.X" + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + side_effect=RuntimeError("connection refused") + ) + + # Should NOT raise + await adapter.send_typing("15551234567") + + @pytest.mark.asyncio + async def test_send_typing_stale_message_logged_at_info(self, caplog): + """Graph error 131009 = wamid > 30 days old. Common after a + long-quiet conversation — log at INFO so it doesn't pollute + WARNING-level monitoring dashboards.""" + adapter = _make_adapter() + adapter._last_inbound_wamid_by_chat["15551234567"] = "wamid.OLD" + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response( + 400, {"error": {"code": 131009, "message": "Parameter value is not valid"}} + ) + ) + + with caplog.at_level("INFO"): + await adapter.send_typing("15551234567") + + assert any( + "older than 30 days" in rec.message + for rec in caplog.records + ) + + @pytest.mark.asyncio + async def test_send_typing_no_http_client_is_noop(self): + """If the adapter isn't connected yet, send_typing must be a + silent no-op — matches the rest of the adapter's "best-effort + when not running" pattern.""" + adapter = _make_adapter() + adapter._http_client = None + adapter._last_inbound_wamid_by_chat["15551234567"] = "wamid.X" + # Should NOT raise + await adapter.send_typing("15551234567") + + @pytest.mark.asyncio + async def test_send_typing_includes_bearer_auth(self): + """Same auth shape as the rest of the Graph API surface — bearer + token in the Authorization header.""" + adapter = _make_adapter(access_token="my-test-token") + adapter._last_inbound_wamid_by_chat["15551234567"] = "wamid.X" + adapter._http_client = MagicMock() + adapter._http_client.post = AsyncMock( + return_value=_mock_httpx_response(200, {"success": True}) + ) + + await adapter.send_typing("15551234567") + headers = adapter._http_client.post.call_args.kwargs["headers"] + assert headers["Authorization"] == "Bearer my-test-token" diff --git a/tests/hermes_cli/test_nous_subscription.py b/tests/hermes_cli/test_nous_subscription.py index c1deaf77070..1ba38237ea9 100644 --- a/tests/hermes_cli/test_nous_subscription.py +++ b/tests/hermes_cli/test_nous_subscription.py @@ -179,7 +179,13 @@ def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch): monkeypatch.setattr( ns, "_get_gateway_direct_credentials", - lambda: {"web": True, "image_gen": False, "tts": False, "browser": False}, + lambda: { + "web": True, + "image_gen": False, + "tts": False, + "stt": False, + "browser": False, + }, ) unconfigured, has_direct, already_managed = ns.get_gateway_eligible_tools( @@ -191,4 +197,150 @@ def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch): assert "web" in has_direct assert "web" not in already_managed - assert set(unconfigured) == {"image_gen", "tts", "browser"} + assert set(unconfigured) == {"image_gen", "tts", "stt", "browser"} + + +# --------------------------------------------------------------------------- +# STT — managed-by-Nous detection (Phase 4 follow-up) +# --------------------------------------------------------------------------- + +def test_stt_managed_by_nous_when_provider_openai_and_no_direct_key(monkeypatch): + """Default `stt.provider: openai` with a Nous sub + no direct OpenAI key + should route through the managed audio gateway.""" + monkeypatch.setattr(ns, "get_env_value", lambda name: "") + monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True}) + monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True) + monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: False) + monkeypatch.setattr(ns, "_has_agent_browser", lambda: False) + monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "") + monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False) + monkeypatch.setattr( + ns, + "is_managed_tool_gateway_ready", + lambda vendor: vendor == "openai-audio", + ) + + features = ns.get_nous_subscription_features({"stt": {"provider": "openai"}}) + + assert features.stt.available is True + assert features.stt.active is True + assert features.stt.managed_by_nous is True + assert features.stt.direct_override is False + assert features.stt.current_provider == "OpenAI Whisper" + + +def test_stt_direct_key_overrides_managed(monkeypatch): + """When the user has VOICE_TOOLS_OPENAI_KEY set, STT should use the + direct key, not the managed gateway — same precedence as TTS.""" + monkeypatch.setattr(ns, "get_env_value", lambda name: "") + monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True}) + monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True) + monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: False) + monkeypatch.setattr(ns, "_has_agent_browser", lambda: False) + monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "sk-direct-key") + monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False) + monkeypatch.setattr( + ns, + "is_managed_tool_gateway_ready", + lambda vendor: vendor == "openai-audio", + ) + + features = ns.get_nous_subscription_features({"stt": {"provider": "openai"}}) + + assert features.stt.available is True + assert features.stt.managed_by_nous is False + assert features.stt.direct_override is True + + +def test_stt_groq_provider_requires_groq_key(monkeypatch): + env = {"GROQ_API_KEY": "groq-key"} + monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, "")) + monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {}) + monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: False) + monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: False) + monkeypatch.setattr(ns, "_has_agent_browser", lambda: False) + monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "") + monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False) + monkeypatch.setattr(ns, "is_managed_tool_gateway_ready", lambda vendor: False) + + features = ns.get_nous_subscription_features({"stt": {"provider": "groq"}}) + + assert features.stt.available is True + assert features.stt.managed_by_nous is False + assert features.stt.current_provider == "Groq Whisper" + assert features.stt.explicit_configured is True + + +def test_apply_nous_managed_defaults_flips_stt_provider_to_openai_for_nous_users(monkeypatch): + """Fresh Nous-subscribed user with the DEFAULT_CONFIG `stt.provider: local` + seed should have it auto-flipped to "openai" so the managed audio + gateway transcribes their voice notes without needing faster-whisper + installed.""" + monkeypatch.setattr(ns, "get_env_value", lambda name: "") + monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True) + # Avoid the heavy real probing in get_nous_subscription_features. + monkeypatch.setattr( + ns, + "get_nous_subscription_features", + lambda config: ns.NousSubscriptionFeatures( + subscribed=True, + nous_auth_present=True, + provider_is_nous=True, + features={ + key: ns.NousFeatureState( + key=key, label=key, included_by_default=True, + available=False, active=False, managed_by_nous=False, + direct_override=False, toolset_enabled=False, + explicit_configured=False, + ) + for key in ("web", "image_gen", "tts", "stt", "browser", "modal") + }, + ), + ) + + config = {"stt": {"provider": "local"}} + changed = ns.apply_nous_managed_defaults(config, enabled_toolsets=[]) + + assert "stt" in changed + assert config["stt"]["provider"] == "openai" + + +def test_apply_nous_managed_defaults_skips_stt_when_groq_key_present(monkeypatch): + """Don't override a user who explicitly set up Groq for STT.""" + env = {"GROQ_API_KEY": "groq-key"} + monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, "")) + monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True) + monkeypatch.setattr( + ns, + "get_nous_subscription_features", + lambda config: ns.NousSubscriptionFeatures( + subscribed=True, + nous_auth_present=True, + provider_is_nous=True, + features={ + key: ns.NousFeatureState( + key=key, label=key, included_by_default=True, + available=False, active=False, managed_by_nous=False, + direct_override=False, toolset_enabled=False, + explicit_configured=False, + ) + for key in ("web", "image_gen", "tts", "stt", "browser", "modal") + }, + ), + ) + + config = {"stt": {"provider": "local"}} + changed = ns.apply_nous_managed_defaults(config, enabled_toolsets=[]) + + # STT was not flipped because the user has a Groq key configured. + assert "stt" not in changed + assert config["stt"]["provider"] == "local" + + +def test_apply_gateway_defaults_sets_stt_use_gateway(monkeypatch): + config = {} + changed = ns.apply_gateway_defaults(config, ["stt"]) + + assert "stt" in changed + assert config["stt"]["provider"] == "openai" + assert config["stt"]["use_gateway"] is True diff --git a/tests/hermes_cli/test_status_model_provider.py b/tests/hermes_cli/test_status_model_provider.py index af6b90204ca..dc775ecd092 100644 --- a/tests/hermes_cli/test_status_model_provider.py +++ b/tests/hermes_cli/test_status_model_provider.py @@ -88,6 +88,7 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"), "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"), "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"), + "stt": NousFeatureState("stt", "Speech-to-text", True, True, True, True, False, True, "OpenAI Whisper"), "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"), "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"), }, diff --git a/tests/hermes_cli/test_whatsapp_cloud_setup.py b/tests/hermes_cli/test_whatsapp_cloud_setup.py new file mode 100644 index 00000000000..cf886887693 --- /dev/null +++ b/tests/hermes_cli/test_whatsapp_cloud_setup.py @@ -0,0 +1,406 @@ +"""Tests for the WhatsApp Cloud API setup wizard. + +Covers: +- Field-shape validators (catch the #1 setup mistake — phone number in + the Phone Number ID field — plus the OpenAI / Slack / GitHub token + paste-by-mistake cases) +- Wizard end-to-end flow with mocked stdin/stdout — verifies each step + writes the expected env var, validation errors block invalid input, + optional fields can be skipped, and the SETUP COMPLETE block prints + the post-setup tunnel + Meta-dashboard instructions the user needs + (the wizard can't smoke-test reachability itself because the gateway + isn't running yet during setup). +""" + +from __future__ import annotations + +import io +import os +from contextlib import redirect_stdout +from pathlib import Path + +import pytest + +from hermes_cli.setup_whatsapp_cloud import ( + _validate_phone_number_id, + _validate_waba_id, + _validate_app_id, + _validate_app_secret, + _validate_access_token, + run_whatsapp_cloud_setup, +) + + +# --------------------------------------------------------------------------- +# Validator tests — the cheap, exhaustive coverage layer +# --------------------------------------------------------------------------- + + +class TestPhoneNumberIdValidator: + def test_accepts_real_meta_phone_number_id(self): + ok, _ = _validate_phone_number_id("7794189252778687") + assert ok + + def test_rejects_actual_phone_number_with_helpful_message(self): + """The #1 setup trap — pasting the phone number instead of the ID.""" + ok, reason = _validate_phone_number_id("15556422442") + assert not ok + assert "phone number" in reason.lower() + assert "Phone number ID" in reason # tells them where to look + + def test_rejects_phone_number_with_plus(self): + ok, reason = _validate_phone_number_id("+15556422442") + assert not ok + assert "numeric" in reason.lower() or "phone number" in reason.lower() + + def test_rejects_empty(self): + ok, reason = _validate_phone_number_id("") + assert not ok + assert "required" in reason.lower() + + def test_rejects_too_short(self): + ok, _ = _validate_phone_number_id("12345") + assert not ok + + def test_rejects_too_long(self): + ok, _ = _validate_phone_number_id("1" * 25) + assert not ok + + def test_strips_surrounding_whitespace(self): + ok, _ = _validate_phone_number_id(" 7794189252778687 ") + assert ok + + +class TestAccessTokenValidator: + def test_accepts_eaa_token(self): + ok, _ = _validate_access_token("EAA" + "a" * 100) + assert ok + + def test_rejects_empty(self): + ok, reason = _validate_access_token("") + assert not ok + assert "required" in reason.lower() + + def test_rejects_openai_key_with_helpful_message(self): + ok, reason = _validate_access_token("sk-proj-" + "a" * 100) + assert not ok + assert "OpenAI" in reason + + def test_rejects_slack_token_with_helpful_message(self): + ok, reason = _validate_access_token("xoxb-1234-5678-abcdef") + assert not ok + assert "Slack" in reason + + def test_rejects_github_token_with_helpful_message(self): + ok, reason = _validate_access_token("ghp_abcdefghijklmnop") + assert not ok + assert "GitHub" in reason + + def test_rejects_garbage_with_helpful_message(self): + ok, reason = _validate_access_token("random-string-here") + assert not ok + assert "EAA" in reason # tells them what to look for + + def test_rejects_short_token(self): + ok, reason = _validate_access_token("EAAabc") + assert not ok + assert "short" in reason.lower() + + +class TestAppSecretValidator: + def test_accepts_32_hex_chars(self): + ok, _ = _validate_app_secret("0123456789abcdef0123456789abcdef") + assert ok + + def test_accepts_uppercase_hex(self): + ok, _ = _validate_app_secret("0123456789ABCDEF0123456789ABCDEF") + assert ok + + def test_rejects_wrong_length(self): + ok, reason = _validate_app_secret("0123456789abcdef") # 16 chars + assert not ok + assert "32" in reason + + def test_rejects_non_hex(self): + ok, reason = _validate_app_secret("zzzz56789abcdef0123456789abcdezz") + assert not ok + assert "hex" in reason.lower() + + def test_rejects_empty(self): + ok, _ = _validate_app_secret("") + assert not ok + + +class TestAppIdValidator: + def test_accepts_valid(self): + ok, _ = _validate_app_id("1234567890123456") + assert ok + + def test_rejects_non_numeric(self): + ok, _ = _validate_app_id("abcdef") + assert not ok + + def test_rejects_too_short(self): + ok, _ = _validate_app_id("123") + assert not ok + + +class TestWabaIdValidator: + def test_accepts_valid(self): + ok, _ = _validate_waba_id("215589313241560883") + assert ok + + def test_rejects_non_numeric(self): + ok, _ = _validate_waba_id("abc-def") + assert not ok + + +# --------------------------------------------------------------------------- +# End-to-end wizard flow +# --------------------------------------------------------------------------- + + +@pytest.fixture +def isolated_home(tmp_path, monkeypatch): + """Redirect HERMES_HOME so save_env_value writes into a temp .env.""" + home = tmp_path / "home" + hermes = home / ".hermes" + hermes.mkdir(parents=True) + monkeypatch.setattr(Path, "home", lambda: home) + monkeypatch.setenv("HERMES_HOME", str(hermes)) + for key in list(os.environ): + if key.startswith("WHATSAPP_CLOUD_"): + monkeypatch.delenv(key, raising=False) + return hermes + + +def _env_value(hermes_home: Path, key: str) -> str | None: + env_file = hermes_home / ".env" + if not env_file.exists(): + return None + for line in env_file.read_text().splitlines(): + if "=" not in line: + continue + k, _, v = line.partition("=") + if k.strip() == key: + return v.strip().strip('"').strip("'") + return None + + +class TestWizardFlow: + def test_happy_path_minimal(self, isolated_home, monkeypatch): + """Provide only the required fields; skip optional steps.""" + inputs = iter([ + "", # press Enter to continue + "7794189252778687", # Phone Number ID + "EAA" + "x" * 200, # Access Token + "0123456789abcdef0123456789abcdef", # App Secret + "", # App ID — skip + "", # WABA ID — skip + "15551234567", # Allowed users + ]) + monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs)) + buf = io.StringIO() + with redirect_stdout(buf): + rc = run_whatsapp_cloud_setup() + assert rc == 0 + out = buf.getvalue() + assert "SETUP COMPLETE" in out + # Required fields written + assert _env_value(isolated_home, "WHATSAPP_CLOUD_PHONE_NUMBER_ID") == "7794189252778687" + assert _env_value(isolated_home, "WHATSAPP_CLOUD_ACCESS_TOKEN").startswith("EAA") + assert len(_env_value(isolated_home, "WHATSAPP_CLOUD_APP_SECRET")) == 32 + assert _env_value(isolated_home, "WHATSAPP_CLOUD_ALLOWED_USERS") == "15551234567" + # Verify token auto-generated + assert _env_value(isolated_home, "WHATSAPP_CLOUD_VERIFY_TOKEN") + # Optional fields stayed unset + assert _env_value(isolated_home, "WHATSAPP_CLOUD_APP_ID") is None + assert _env_value(isolated_home, "WHATSAPP_CLOUD_WABA_ID") is None + + def test_phone_number_id_validator_catches_phone_number(self, isolated_home, monkeypatch): + """The trap test — user pastes their phone number into the + Phone Number ID field. Wizard MUST reject with a helpful + explanation, not pass through.""" + inputs = iter([ + "", # press Enter to continue + "15556422442", # phone number — rejected + "", # empty — gives up + ]) + monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs)) + buf = io.StringIO() + with redirect_stdout(buf): + rc = run_whatsapp_cloud_setup() + assert rc == 1 + out = buf.getvalue() + # Must surface the specific guidance about Phone Number ID + assert "Phone number ID" in out + assert "15-17 digits" in out + # Should NOT have saved the bad value + assert _env_value(isolated_home, "WHATSAPP_CLOUD_PHONE_NUMBER_ID") is None + + def test_access_token_validator_catches_openai_key(self, isolated_home, monkeypatch): + """User pastes 'sk-proj-...' by mistake. Wizard rejects.""" + inputs = iter([ + "", # continue + "7794189252778687", # good Phone ID + "sk-proj-" + "x" * 100, # OpenAI key — rejected + "", # give up + ]) + monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs)) + buf = io.StringIO() + with redirect_stdout(buf): + rc = run_whatsapp_cloud_setup() + assert rc == 1 + out = buf.getvalue() + assert "OpenAI" in out # diagnostic in error message + # Phone Number ID was saved (it was valid), but access token was not + assert _env_value(isolated_home, "WHATSAPP_CLOUD_PHONE_NUMBER_ID") == "7794189252778687" + assert _env_value(isolated_home, "WHATSAPP_CLOUD_ACCESS_TOKEN") is None + + def test_verify_token_is_auto_generated(self, isolated_home, monkeypatch): + """The verify token is one of the few things the user shouldn't + have to invent. Wizard generates a strong random one.""" + inputs = iter([ + "", # continue + "7794189252778687", # Phone ID + "EAA" + "x" * 200, # Token + "0123456789abcdef0123456789abcdef", # App Secret + "", # App ID — skip + "", # WABA ID — skip + "15551234567", # Allowed users + ]) + monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs)) + buf = io.StringIO() + with redirect_stdout(buf): + run_whatsapp_cloud_setup() + verify_token = _env_value(isolated_home, "WHATSAPP_CLOUD_VERIFY_TOKEN") + assert verify_token is not None + # secrets.token_urlsafe(32) produces ~43 chars (base64-of-32-bytes) + assert len(verify_token) >= 32 + # Should also be echoed to user output so they can paste into Meta + assert verify_token in buf.getvalue() + + def test_setup_complete_block_includes_post_setup_instructions(self, isolated_home, monkeypatch): + """The wizard can't smoke-test the webhook itself (the gateway + isn't running yet), so it MUST print the exact curl/cloudflared + steps the user needs after the wizard exits.""" + inputs = iter([ + "", # continue + "7794189252778687", # Phone ID + "EAA" + "x" * 200, # Token + "0123456789abcdef0123456789abcdef", # App Secret + "", # App ID — skip + "", # WABA ID — skip + "15551234567", # Allowed users + ]) + monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs)) + buf = io.StringIO() + with redirect_stdout(buf): + run_whatsapp_cloud_setup() + out = buf.getvalue() + # Required post-setup guidance + assert "cloudflared tunnel --url http://localhost:8090" in out + assert "hermes gateway" in out + assert "Verify and save" in out + assert "messages" in out + # The verify token should be quotable on the curl line + verify_token = _env_value(isolated_home, "WHATSAPP_CLOUD_VERIFY_TOKEN") + assert verify_token in out + + def test_existing_token_preserved_on_rerun(self, isolated_home, monkeypatch): + """Re-running the wizard with existing config should let the + user keep current values by hitting Enter.""" + # Pre-populate .env as if a previous run succeeded + env_file = isolated_home / ".env" + env_file.write_text( + "WHATSAPP_CLOUD_PHONE_NUMBER_ID=7794189252778687\n" + "WHATSAPP_CLOUD_ACCESS_TOKEN=EAAprevious_token_here_" + "x" * 100 + "\n" + "WHATSAPP_CLOUD_APP_SECRET=0123456789abcdef0123456789abcdef\n" + "WHATSAPP_CLOUD_VERIFY_TOKEN=existing_verify_token_already_set\n" + ) + inputs = iter([ + "", # continue + "", # Phone ID — keep existing + "", # Token — keep existing + "", # App Secret — keep existing + "", # App ID — skip + "", # WABA ID — skip + "", # verify token: regenerate? [y/N] — no + "", # Allowed users — keep + ]) + monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs)) + buf = io.StringIO() + with redirect_stdout(buf): + rc = run_whatsapp_cloud_setup() + assert rc == 0 + # Values preserved + token = _env_value(isolated_home, "WHATSAPP_CLOUD_ACCESS_TOKEN") + assert token is not None + assert token.startswith("EAAprevious_token_here_") + # Verify token preserved (user said no to regenerate) + assert _env_value(isolated_home, "WHATSAPP_CLOUD_VERIFY_TOKEN") == "existing_verify_token_already_set" + + +# ========================================================================= +# Profile polish block (SETUP COMPLETE → optional WhatsApp profile setup) +# ========================================================================= + + +class TestProfilePolishGuidance: + """The wizard can't set the bot's WhatsApp display name or profile + picture via the API — those go through Meta's Business Manager UI. + Verify that the SETUP COMPLETE block points the user at the right + place rather than leaving them to figure it out on their own.""" + + def test_polish_block_present_and_points_at_business_manager( + self, isolated_home, monkeypatch + ): + inputs = iter([ + "", + "7794189252778687", + "EAA" + "x" * 200, + "0123456789abcdef0123456789abcdef", + "", # App ID — skip + "", # WABA ID — skip + "15551234567", + ]) + monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs)) + buf = io.StringIO() + with redirect_stdout(buf): + run_whatsapp_cloud_setup() + out = buf.getvalue() + # Polish block header + assert "polish your bot's WhatsApp profile" in out + # Direct user at Meta's Business Manager (not the developer dash) + assert "business.facebook.com/wa/manage/phone-numbers" in out + # Mention each of the three things the user can do there + assert "Display name" in out + assert "profile picture" in out + assert "Edit profile" in out + # Set expectations about display-name reviews + assert "24-48h" in out or "24–48h" in out + + def test_polish_block_deeplinks_when_waba_id_known( + self, isolated_home, monkeypatch + ): + """If the user gave us the WABA ID earlier in the wizard, the + Business Manager URL should pre-select their account.""" + waba = "987654321098765" + inputs = iter([ + "", + "7794189252778687", + "EAA" + "x" * 200, + "0123456789abcdef0123456789abcdef", + "", # App ID — skip + waba, # WABA ID — provided + "15551234567", + ]) + monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs)) + buf = io.StringIO() + with redirect_stdout(buf): + run_whatsapp_cloud_setup() + out = buf.getvalue() + # Deep-linked URL with the user's WABA pre-selected + assert f"waba_id={waba}" in out + # Without WABA, we tell the user they'll need to pick their account + assert "select your WhatsApp Business Account" not in out diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index e9403337063..d3943032865 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -301,6 +301,19 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI | `WHATSAPP_ALLOWED_USERS` | Comma-separated phone numbers (with country code, no `+`), or `*` to allow all senders | | `WHATSAPP_ALLOW_ALL_USERS` | Allow all WhatsApp senders without an allowlist (`true`/`false`) | | `WHATSAPP_DEBUG` | Log raw message events in the bridge for troubleshooting (`true`/`false`) | +| `WHATSAPP_CLOUD_PHONE_NUMBER_ID` | Meta Phone Number ID from the WhatsApp Business Cloud API (15–17 digits; **not** the phone number itself) | +| `WHATSAPP_CLOUD_ACCESS_TOKEN` | Meta access token (starts with `EAA`); temporary tokens expire after 24h, System User tokens are permanent | +| `WHATSAPP_CLOUD_APP_SECRET` | 32-char hex app secret used to verify inbound webhook signatures | +| `WHATSAPP_CLOUD_VERIFY_TOKEN` | Shared secret for Meta's webhook verification handshake (auto-generated by the setup wizard) | +| `WHATSAPP_CLOUD_ALLOWED_USERS` | Comma-separated `wa_id`s (phone numbers with country code, no `+`) allowed to message the bot | +| `WHATSAPP_CLOUD_ALLOW_ALL_USERS` | Allow all WhatsApp Cloud senders without an allowlist (`true`/`false`) | +| `WHATSAPP_CLOUD_APP_ID` | Optional Meta App ID (for future analytics integration) | +| `WHATSAPP_CLOUD_WABA_ID` | Optional WhatsApp Business Account ID (for future analytics integration) | +| `WHATSAPP_CLOUD_WEBHOOK_HOST` | Interface the inbound webhook server binds to (default `0.0.0.0`) | +| `WHATSAPP_CLOUD_WEBHOOK_PORT` | Port the inbound webhook server binds to (default `8090`) | +| `WHATSAPP_CLOUD_WEBHOOK_PATH` | URL path Meta posts inbound messages to (default `/whatsapp/webhook`) | +| `WHATSAPP_CLOUD_API_VERSION` | Meta Graph API version to call (default `v20.0`) | +| `WHATSAPP_CLOUD_HOME_CHANNEL` | `wa_id` to use as the bot's home channel (for cron jobs etc.) | | `SIGNAL_HTTP_URL` | signal-cli daemon HTTP endpoint (for example `http://127.0.0.1:8080`) | | `SIGNAL_ACCOUNT` | Bot phone number in E.164 format | | `SIGNAL_ALLOWED_USERS` | Comma-separated E.164 phone numbers or UUIDs | diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index 2dc130d8889..a1c866cf653 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -423,6 +423,7 @@ Each platform has its own toolset: | Telegram | `hermes-telegram` | Full tools including terminal | | Discord | `hermes-discord` | Full tools including terminal | | WhatsApp | `hermes-whatsapp` | Full tools including terminal | +| WhatsApp Cloud API | `hermes-whatsapp` | Full tools including terminal (shares toolset with the Baileys bridge) | | Slack | `hermes-slack` | Full tools including terminal | | Google Chat | `hermes-google_chat` | Full tools including terminal | | Signal | `hermes-signal` | Full tools including terminal | @@ -528,6 +529,7 @@ Defaults to `false`. Only platforms whose adapter implements `delete_message` ho - [Slack Setup](slack.md) - [Google Chat Setup](google_chat.md) - [WhatsApp Setup](whatsapp.md) +- [WhatsApp Business Cloud API Setup](whatsapp-cloud.md) - [Signal Setup](signal.md) - [SMS Setup (Twilio)](sms.md) - [Email Setup](email.md) diff --git a/website/docs/user-guide/messaging/whatsapp-cloud.md b/website/docs/user-guide/messaging/whatsapp-cloud.md new file mode 100644 index 00000000000..34cc457fca8 --- /dev/null +++ b/website/docs/user-guide/messaging/whatsapp-cloud.md @@ -0,0 +1,418 @@ +--- +sidebar_position: 6 +title: "WhatsApp Business (Cloud API)" +description: "Set up Hermes Agent as a WhatsApp bot via Meta's official Business Cloud API" +--- + +# WhatsApp Business Cloud API Setup + +Hermes can connect to WhatsApp through Meta's **official** WhatsApp Business Cloud API. This is the production-grade path: no Node.js bridge subprocess, no QR codes, no account-ban risk. + +In exchange: + +- You need a **Meta Business account** (not personal WhatsApp). +- The bot operates on a dedicated business phone number, not your personal number. +- The Hermes gateway needs a **public HTTPS URL** so Meta can deliver inbound messages via webhook. +- Replies more than 24 hours after the user's last message require a pre-approved **template** (this is Meta's "customer service window" rule, not a Hermes limit). + +If those constraints don't work for your use case, the [Baileys bridge integration](./whatsapp.md) is the alternative — personal account, no public URL needed, but unofficial and ban-prone. + +:::tip Which one should I use? +- **Cloud API (this guide)** — running a real business bot, want stability, fine with the Meta verification + template paperwork +- **[Baileys bridge](./whatsapp.md)** — personal projects, quick demos, single-user setups, willing to risk the bot phone number's account +::: + +--- + +## Quick start + +```bash +hermes whatsapp-cloud +``` + +The wizard walks you through every credential, validates each one as you paste it (catches the #1 setup trap — pasting a phone number into the Phone Number ID field), and prints exact follow-up instructions for the parts that need to happen outside the wizard (starting cloudflared, configuring Meta's webhook dashboard). + +The rest of this page is the manual reference. + +--- + +## Prerequisites + +1. **A Meta Business account**. Create one at [business.facebook.com](https://business.facebook.com/). +2. **A Meta app with WhatsApp enabled**. See "Creating the Meta app" below. +3. **A way to expose a local port to the public internet** with HTTPS. Cloudflare Tunnel (`cloudflared`) is recommended — free, no port forwarding, no domain required. ngrok, your own domain with a reverse proxy + TLS, or a VPS with the gateway directly bound to a public IP all work too. +4. **Optional but recommended**: ffmpeg on `PATH` so outbound voice messages render as native WhatsApp voice-note bubbles (green waveform) instead of MP3 audio attachments. Hermes degrades gracefully if absent. + +--- + +## Creating the Meta app + +1. Go to [developers.facebook.com/apps](https://developers.facebook.com/apps) → **Create App**. +2. Choose use case: **"Connect with customers through WhatsApp"** → **Next**. +3. Pick or create a business portfolio. Review the publishing requirements. Confirm → **Create app**. +4. After creation you'll land on **Customize use case → Connect on WhatsApp → Quickstart**. Click **Start using the API** → you're now on the **API Setup** page. +5. Make sure a WhatsApp Business Account (WABA) is linked. If you created a new portfolio in step 3, one was auto-created. Verify in the API Setup page. + +You'll need these values from the dashboard — the wizard prompts for them in this order: + +| Value | Where in dashboard | Field shape | Notes | +|---|---|---|---| +| **Phone Number ID** | App Dashboard → WhatsApp → API Setup → below the "From" dropdown | Numeric, 15-17 digits | **NOT** the phone number itself. The #1 setup mistake is pasting the actual phone number here. | +| **Access Token** | App Dashboard → WhatsApp → API Setup → "Generate access token" | Starts with `EAA`, 100+ chars | Temp tokens last 24h — see "Permanent token" below for production. | +| **App Secret** | App Dashboard → Settings → Basic → click "Show" next to App secret | 32-character lowercase hex | Used to verify incoming webhook signatures. Without it, inbound delivery is refused with 503. | +| **App ID** (optional) | App Dashboard → Settings → Basic | Numeric, 15-16 digits | Not required for messaging, useful for analytics. | +| **WABA ID** (optional) | App Dashboard → WhatsApp → API Setup → near the top | Numeric, 15+ digits | Not required for messaging, useful for analytics. | + +--- + +## Permanent token (production) + +Temporary access tokens expire after **24 hours**, which means a token generated today stops working tomorrow. For production deployments use a **System User permanent token**: + +1. Go to [business.facebook.com/latest/settings](https://business.facebook.com/latest/settings) → **System users** (left sidebar). +2. **Add** → name (e.g. `hermes-bot`) → role: **Admin**. +3. Select the new user → **Assign Assets**: + - Select your app → toggle **Manage app** under Full control. + - Select your WhatsApp account → toggle **Manage WhatsApp Business Accounts** under Full control. + - Click **Assign assets**. +4. **Generate token** with these permissions: + - `business_management` + - `whatsapp_business_messaging` + - `whatsapp_business_management` +5. Set **token expiration: Never**. +6. Copy the token → update `WHATSAPP_CLOUD_ACCESS_TOKEN` in `~/.hermes/.env` → restart the gateway. + +System User tokens don't expire unless you explicitly revoke them. + +--- + +## Exposing Hermes to the internet + +The Cloud API delivers inbound messages by HTTPS POST to your webhook URL — that means the Hermes gateway has to be reachable from Meta's servers. Three common ways: + +### Cloudflare Tunnel (recommended) + +Free, no port forwarding, works on Windows / macOS / Linux. Runs as a separate process alongside the gateway. + +**Install:** + +```bash +# Windows +winget install Cloudflare.cloudflared + +# macOS +brew install cloudflared + +# Linux +# Download the binary from https://github.com/cloudflare/cloudflared/releases +``` + +**Run a quick tunnel** (no Cloudflare account needed — gives you a `https://.trycloudflare.com` URL): + +```bash +cloudflared tunnel --url http://localhost:8090 +``` + +Note the printed URL — that's what you'll give Meta. + +:::warning Quick tunnels rotate +The free quick-tunnel URL changes every time you restart `cloudflared`. For a stable URL, log in with `cloudflared tunnel login` and create a named tunnel. Free Cloudflare accounts get unlimited named tunnels — see [Cloudflare's docs](https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/) for the named-tunnel workflow. +::: + +### ngrok + +```bash +ngrok http 8090 +``` + +Free tier shows a different URL on each restart. Paid tier gives you a stable subdomain. + +### Your own domain + reverse proxy + +If you already have a server with a TLS cert (Caddy, nginx, etc.), point a route at `localhost:8090`. This is the most stable option for production but requires existing infrastructure. + +--- + +## Configuring the webhook on Meta's side + +Once your tunnel is running: + +1. Note the public URL printed by your tunnel — say `https://abc123.trycloudflare.com`. +2. Generate a **Verify Token** — the wizard does this for you with `secrets.token_urlsafe(32)`; if you're configuring manually, run: + ```bash + python -c "import secrets; print(secrets.token_urlsafe(32))" + ``` + Save it as `WHATSAPP_CLOUD_VERIFY_TOKEN` in `~/.hermes/.env`. +3. Start the Hermes gateway: `hermes gateway`. +4. In the Meta App Dashboard → **WhatsApp → Configuration** (or **Use cases → Customize → Configuration** depending on UI version) → click **Edit** on the Webhook section. +5. Fill in: + - **Callback URL**: `https://abc123.trycloudflare.com/whatsapp/webhook` + - **Verify Token**: the string from step 2 (must match exactly) +6. Click **Verify and save**. Meta hits your URL with a GET request, the gateway echoes back the challenge, and Meta marks the webhook as verified. +7. Under **Webhook fields**, click **Manage** → subscribe to the **messages** field. This is what tells Meta to actually deliver inbound messages to your webhook. + +**To verify the loop manually** (from a third terminal): + +```bash +TUNNEL="https://abc123.trycloudflare.com" +VERIFY="" + +# Should print HTTP 200 with body "hello" +curl -i "$TUNNEL/whatsapp/webhook?hub.mode=subscribe&hub.verify_token=$VERIFY&hub.challenge=hello" + +# Health endpoint — should show verify_token_configured: true and app_secret_configured: true +curl "$TUNNEL/health" +``` + +--- + +## Recipient whitelist (Meta-side) + +In development mode (before your app goes through App Review), Meta restricts which numbers your bot can message: + +1. App Dashboard → WhatsApp → API Setup → **To** dropdown. +2. Click **Manage phone number list**. +3. Add the phone numbers you want to message (yours, your team's, friendly testers). Meta sends each one a 6-digit verification code via SMS or WhatsApp. + +Up to 5 numbers in dev mode. Going to App Review removes this limit. + +--- + +## Allowlist (Hermes-side) + +In addition to Meta's recipient whitelist, Hermes has its own per-platform allowlist that controls **which incoming messages the agent processes**. Add to `~/.hermes/.env`: + +```bash +# Comma-separated phone numbers, country code, no '+' / spaces / dashes +WHATSAPP_CLOUD_ALLOWED_USERS=15551234567,15557654321 + +# Or allow everyone (only safe in combination with Meta's recipient whitelist) +# WHATSAPP_CLOUD_ALLOW_ALL_USERS=true +``` + +The wizard sets this in step 6. Without an allowlist, **every inbound message is denied** — this is intentional, so the bot can't be invoked by random numbers if the recipient whitelist is ever loosened. + +--- + +## Polishing your bot's WhatsApp profile + +WhatsApp displays a **name and profile picture** for your bot in the chat header and contact list. These can't be set via the Cloud API — they live in Meta's Business Manager. + +Once your bot is working, head to **[business.facebook.com/wa/manage/phone-numbers](https://business.facebook.com/wa/manage/phone-numbers/)**, click your phone number, and you'll find: + +| What | Where | Notes | +|---|---|---| +| **Display name** | Top of the phone-number page | Changes go through Meta's name-review process (~24–48 hours). | +| **Profile picture** | Top of the phone-number page | Square image, ≥640×640px recommended. Updates immediately. | +| **About / description / website / email / hours / category** | "Edit profile" button | These appear in the info pane when a user taps the bot's name. Cosmetic. | +| **Verified badge** (green checkmark) | Business Manager → Security Center → Start Verification | Requires Meta's separate business verification process. | + +The `hermes whatsapp-cloud` wizard prints these links at the end of setup. None of this is required for the bot to work — it's pure polish for how your bot appears to users. + +--- + +## Configuration reference + +All settings live in `~/.hermes/.env`. Required values are in **bold**. + +| Variable | Default | Description | +|---|---|---| +| **`WHATSAPP_CLOUD_PHONE_NUMBER_ID`** | — | The 15-17 digit ID from API Setup. **Not** the phone number. | +| **`WHATSAPP_CLOUD_ACCESS_TOKEN`** | — | Meta access token (starts with `EAA`). Temp 24h or System User permanent. | +| **`WHATSAPP_CLOUD_APP_SECRET`** | — | 32-char hex from Settings → Basic. Without it, inbound is refused with 503. | +| **`WHATSAPP_CLOUD_VERIFY_TOKEN`** | — | Shared secret for the GET handshake. Auto-generated by the wizard. | +| **`WHATSAPP_CLOUD_ALLOWED_USERS`** | — | Comma-separated wa_ids allowed to message the bot. | +| `WHATSAPP_CLOUD_ALLOW_ALL_USERS` | `false` | Set to `true` to bypass the allowlist. | +| `WHATSAPP_CLOUD_APP_ID` | — | Optional, for future analytics integration. | +| `WHATSAPP_CLOUD_WABA_ID` | — | Optional, for future analytics integration. | +| `WHATSAPP_CLOUD_WEBHOOK_HOST` | `0.0.0.0` | Interface the webhook server binds to. | +| `WHATSAPP_CLOUD_WEBHOOK_PORT` | `8090` | Port the webhook server binds to. Must match the port your tunnel forwards. | +| `WHATSAPP_CLOUD_WEBHOOK_PATH` | `/whatsapp/webhook` | URL path Meta posts to. | +| `WHATSAPP_CLOUD_API_VERSION` | `v20.0` | Meta Graph API version. Only override if a newer version is recommended in Meta's docs. | +| `WHATSAPP_CLOUD_HOME_CHANNEL` | — | wa_id to use as the bot's home channel (for cron jobs etc). | + +You can have **both** the Baileys (`whatsapp`) and Cloud (`whatsapp_cloud`) adapters enabled simultaneously, targeting different phone numbers. + +--- + +## Features + +### Inbound + +- **Text messages** — passed straight to the agent. +- **Images** — auto-downloaded and attached to the agent's input. Models with native vision (Claude, GPT-4o, Gemini, etc.) read the image directly; non-vision models receive an auto-generated text description. +- **Voice notes** — auto-downloaded as `.ogg`, transcribed via your configured STT provider (local faster-whisper, OpenAI/Nous, Groq, etc.), then handed to the agent as text. +- **Documents** — auto-downloaded. Small text-readable files (`.txt`, `.md`, `.json`, `.py`, `.csv`, etc.) up to 100KB get inlined into the agent's input so it can read them without a tool call. Larger files are cached locally for the agent's other tools to access. +- **Button taps** — when the user taps a button the bot sent earlier (clarify choice, command approval, slash-command confirm), the tap is routed directly to the right handler. Stale taps fall back to being treated as regular text input. +- **Reply context** — when the user replies to a previous bot message, the agent sees the original message as context. + +### Outbound + +- **Text** — markdown is auto-converted to WhatsApp's flavored syntax (`**bold**` → `*bold*`, `~~strike~~` → `~strike~`, headers → bold, `[link](url)` → `link (url)`). Long messages split at 4096 chars per chunk. +- **Images** — agent-generated images and local image files both supported, delivered as native photo attachments. +- **Voice messages** — text-to-speech output is converted via ffmpeg into the native WhatsApp voice-note bubble (green waveform). Without ffmpeg installed, falls back to an MP3 audio attachment. See "Voice messages" below. +- **Video / documents** — both supported, sent as native attachments. + +### Interactive UX + +When the agent invokes any of these flows, Hermes uses WhatsApp's native interactive messages — tap-to-answer buttons instead of "reply with the number" prompts: + +- **`clarify` tool** — multi-choice questions render as quick-reply buttons (1–3 choices) or a tap-to-open list sheet (4+ choices). Picking "✏️ Other" lets the user type a free-form answer that the agent receives as the resolution. +- **Dangerous-command approvals** — when the agent's terminal/code execution hits a gated command, the user sees `✅ Approve` / `❌ Deny` buttons instead of needing to type `/approve` or `/deny`. +- **Slash-command confirmations** — privileged commands like `/reload-mcp` show `✅ Approve Once` / `🔒 Always` / `❌ Cancel` buttons. + +All interactive prompts gracefully degrade to plain text if the buttons fail to render (e.g. on legacy WhatsApp clients). + +### Read receipts and typing indicator + +Hermes acknowledges inbound messages immediately: + +- Your message shows **blue double-checkmarks** as soon as the gateway receives it. +- The bot's name in your WhatsApp chat shows **"typing…"** while the agent is preparing a reply. +- The typing indicator auto-dismisses when the bot's first response message arrives. + +This makes it obvious when the bot has seen your message versus when it's still working on a response. + +### Voice messages + +WhatsApp distinguishes between a "voice note" (the green waveform bubble) and a generic audio file attachment. The difference is purely codec: voice notes need to be `audio/ogg` with `opus` encoding. + +Hermes TTS produces MP3. Two paths: + +- **With ffmpeg on PATH** (recommended) — outbound TTS is converted and arrives as a proper voice note. Install: + - Windows: `winget install Gyan.FFmpeg` + - macOS: `brew install ffmpeg` + - Linux: package manager +- **Without ffmpeg** — outbound TTS arrives as an MP3 audio attachment. Plays fine, just doesn't look like a voice note. A one-time warning fires in the gateway log so you know. + +You can check whether the gateway found ffmpeg via the health endpoint: + +```bash +curl http://localhost:8090/health +# look for "ffmpeg_present": true +``` + +--- + +## Known limitations + +### 24-hour conversation window + +Meta only allows **free-form messages** within a 24-hour window after the user's last inbound message. Outside that window, the only thing Meta's API accepts is a pre-approved **message template**. + +**What this means in practice:** + +- Reactive chat (user DMs → bot replies within 24h → user replies → ...) works forever. This covers >95% of normal bot use. +- **Cron jobs that deliver to WhatsApp** after a gap > 24h will fail with Graph error code `131047` ("Re-engagement message"). +- **Long-running `delegate_task` async results** that take longer than 24h fail the same way. +- **Webhook subscribers** that route external events to WhatsApp fail when the user hasn't DM'd the bot recently. + +Hermes warns the agent about this window in its system prompt, so the model knows to mention it when scheduling delayed messages. + +Message-template support (the workaround for outside-window sends) is not yet implemented in Hermes. If you need it, please [open an issue](https://github.com/NousResearch/hermes-agent/issues) — it's planned but waiting on a clear demand signal. + +### Group chats + +The Cloud API has limited group support (capability-tier gated by Meta). Hermes's `whatsapp_cloud` adapter currently handles **direct messages only** in v1. If you need group chats, use the Baileys bridge. + +### Outbound rate limit + +Meta's default throughput is **80 messages/second per business phone number**, with upgrades available. Hermes doesn't currently enforce this client-side — extremely high-volume sends could hit Meta's limit. + +--- + +## Troubleshooting + +### Setup verification fails ("URL couldn't be validated") in Meta dashboard + +Almost always one of: + +- **Tunnel URL is wrong or stale** — cloudflared quick tunnels rotate. Get a fresh URL and update both `.env` and Meta's dashboard. +- **Verify token mismatch** — the token in `~/.hermes/.env`'s `WHATSAPP_CLOUD_VERIFY_TOKEN` must match exactly what you typed into Meta's dashboard. Run the curl probe above to confirm the gateway's verify handshake works locally first. +- **Gateway not running** — check `hermes gateway` is up. +- **App Secret not set** — without it, Hermes refuses inbound POSTs with 503. Meta interprets that as "can't validate." + +### `graph error 100`: Object with ID '...' does not exist + +You pasted your phone number (10-11 digits) into `WHATSAPP_CLOUD_PHONE_NUMBER_ID` instead of the Phone Number ID (Meta's 15-17 digit internal ID). Re-check the API Setup page — the Phone Number ID is shown *below* the "From" dropdown. + +The wizard catches this with a validator now, but it's worth knowing if you're configuring manually. + +### `graph error 190`: Authentication Error + +Your access token is invalid. Subcodes: + +- `subcode 463` — token expired. Temp tokens last 24h. Regenerate, or switch to a System User permanent token (see above). +- `subcode 467` — token invalidated (revoked or password changed). +- Other 190 — token didn't have the required permissions when generated. Make sure all three (`business_management`, `whatsapp_business_messaging`, `whatsapp_business_management`) were selected. + +### `graph error 131047`: Re-engagement message + +The 24-hour conversation window expired (see "Known limitations"). Either: + +- Ask the user to DM the bot first to reopen the window. +- Wait for template support to land in Hermes. + +### Inbound message: `media metadata fetch failed (status=401)` + +Same 401 root causes as outbound (`graph error 190`) — the access token is invalid or expired. Fix the token. + +### Bot replies appear as raw JSON / tool-call leakage + +Common cause: the toolset configured for `whatsapp_cloud` is missing the tools the agent wants to call. Check `hermes tools list` and verify the platform is using `hermes-whatsapp` (the default Cloud adapter toolset, same as Baileys). + +If the model emits tool-call-shaped text instead of a structured call, it usually means the toolset was effectively empty. See `hermes_cli/platforms.py` for the platform → default toolset mapping. + +### STT (voice note transcription) returns empty / "could not transcribe" + +The default `stt.provider: local` requires `pip install faster-whisper`. If you're a Nous subscriber, you can route STT through Meta's managed audio gateway instead: + +```bash +hermes config set stt.provider openai +hermes config set stt.use_gateway true +hermes gateway restart +``` + +This uses your Nous Portal access token instead of needing a separate OpenAI key. + +--- + +## Security notes + +- **Treat the App Secret like a password** — anyone with it can forge webhook payloads that Hermes will accept as authentic. +- **The verify token is a shared secret** — leaks are lower-stakes (worst case someone could re-subscribe Meta's webhook to a different URL of theirs), but still avoid committing it. +- **The access token is your bot's identity** — System User tokens are equivalent to long-lived API keys. Rotate immediately if a deployment is compromised. +- **The webhook endpoint accepts only signed requests when `WHATSAPP_CLOUD_APP_SECRET` is set** — leave it set even in development. Without it, the gateway refuses inbound delivery with HTTP 503. +- **The `/health` endpoint is unauthenticated** — it's safe to expose because it only reports config-presence booleans, not the values themselves. But if you'd rather not surface it, restrict access at the reverse proxy / tunnel layer. + +--- + +## Comparison to the Baileys bridge + +| | Baileys (`hermes whatsapp`) | Cloud API (`hermes whatsapp-cloud`) | +|---|---|---| +| Account type | Personal | Business | +| Setup | QR code scan | Meta app + WABA + token | +| Dependencies | Node.js + npm | Pure Python (httpx + aiohttp) | +| Process | Managed Node subprocess | aiohttp webhook server | +| Public URL needed? | No | Yes | +| Account ban risk | Yes (unofficial API) | No (officially supported) | +| Inbound | Polling Node bridge | Webhook POST from Meta | +| Outbound | Local bridge → Baileys | HTTPS to graph.facebook.com | +| Groups | Full support | DMs only (v1) | +| 24h window | No restriction | Hard rule — templates required after | +| Voice notes (out) | Native | Native with ffmpeg, MP3 fallback otherwise | +| Read receipts | No | Yes (blue double-checkmarks) | +| Typing indicator | No | Yes (auto-dismisses on response) | +| Interactive buttons | Text fallback only | Native (clarify, approval, slash-confirm) | +| Production use | Risky (Meta can ban) | Designed for it | + +Most users running Hermes for personal projects prefer Baileys. Most users running customer-facing bots prefer Cloud API. + +--- + +## See also + +- [Meta's official WhatsApp Business Cloud API docs](https://developers.facebook.com/documentation/business-messaging/whatsapp/) — authoritative reference for the underlying platform, pricing, App Review, and Meta-side rate limits. +- [WhatsApp (Baileys bridge) Setup](whatsapp.md) — the alternative integration for personal projects. +- [Messaging Platforms overview](index.md) — all messaging integrations at a glance. diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md index e4a8def0773..8a7311176d7 100644 --- a/website/docs/user-guide/messaging/whatsapp.md +++ b/website/docs/user-guide/messaging/whatsapp.md @@ -8,6 +8,14 @@ description: "Set up Hermes Agent as a WhatsApp bot via the built-in Baileys bri Hermes connects to WhatsApp through a built-in bridge based on **Baileys**. This works by emulating a WhatsApp Web session — **not** through the official WhatsApp Business API. No Meta developer account or Business verification is required. +:::tip Two WhatsApp integrations +This page is for the **Baileys bridge** — quick to set up, personal accounts, no public URL needed, ban risk. + +If you're running a real business bot and want stability, see the **[WhatsApp Business Cloud API guide](./whatsapp-cloud.md)** instead. It's the official Meta-supported path: no account ban risk, but requires a Meta Business account and a public webhook URL. + +The two adapters can also run in parallel against different phone numbers if you have a reason to. +::: + :::warning Unofficial API — Ban Risk WhatsApp does **not** officially support third-party bots outside the Business API. Using a third-party bridge carries a small risk of account restrictions. To minimize risk: - **Use a dedicated phone number** for the bot (not your personal number) diff --git a/website/sidebars.ts b/website/sidebars.ts index 640c0a1614c..04fa8718db6 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -617,6 +617,7 @@ const sidebars: SidebarsConfig = { 'user-guide/messaging/discord', 'user-guide/messaging/slack', 'user-guide/messaging/whatsapp', + 'user-guide/messaging/whatsapp-cloud', 'user-guide/messaging/signal', 'user-guide/messaging/email', 'user-guide/messaging/sms',