feat(send_message): native WhatsApp media delivery via Baileys bridge (#53598)

send_message with MEDIA:/path to a WhatsApp target previously dropped the
attachment: the WhatsApp branch never passed media_files, the plugin's
_standalone_send accepted the param but only POSTed text, and WhatsApp was
absent from the media-supported platform list.

- send_message_tool: add a Platform.WHATSAPP media block (mirrors Feishu) that
  routes media_files through the whatsapp plugin's standalone_sender_fn, and
  add whatsapp to the supported-media list strings.
- whatsapp adapter: _standalone_send now sends text first (skipped when the
  chunk is media-only), then uploads each file via the bridge /send-media
  endpoint with a mediaType derived from extension/is_voice/force_document, so
  images/videos/voice arrive as native bubbles instead of documents.
- _bridge_media_type classifier maps ext -> image|video|audio|document.

Closes #19105 (remaining send_message gap). Other items in the report
(inbound video paths, image_generate auto-deliver, history dedup, native
gateway bubbles) already landed on main.
This commit is contained in:
Teknium 2026-06-27 04:40:05 -07:00 committed by GitHub
parent 88c02469cc
commit cd592c105c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 322 additions and 16 deletions

View file

@ -1293,6 +1293,31 @@ class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):
# ──────────────────────────────────────────────────────────────────────────
_WA_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
_WA_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
_WA_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"}
def _bridge_media_type(file_path: str, is_voice: bool, force_document: bool) -> str:
"""Map a local media file to the bridge /send-media ``mediaType``.
Returns one of ``image`` | ``video`` | ``audio`` | ``document`` so the
Baileys bridge renders the right native WhatsApp message kind. Voice notes
and audio files route to ``audio``; ``force_document`` (the [[as_document]]
directive) forces every file to ``document`` regardless of extension.
"""
if force_document:
return "document"
ext = os.path.splitext(file_path)[1].lower()
if is_voice or ext in _WA_AUDIO_EXTS:
return "audio"
if ext in _WA_IMAGE_EXTS:
return "image"
if ext in _WA_VIDEO_EXTS:
return "video"
return "document"
async def _standalone_send(
pconfig,
chat_id,
@ -1316,22 +1341,55 @@ async def _standalone_send(
try:
bridge_port = extra.get("bridge_port", 3000)
normalized_chat_id = to_whatsapp_jid(chat_id)
media = media_files or []
text = message or ""
last_message_id = None
async with aiohttp.ClientSession() as session:
async with session.post(
f"http://localhost:{bridge_port}/send",
json={"chatId": normalized_chat_id, "message": message},
timeout=aiohttp.ClientTimeout(total=30),
) as resp:
if resp.status == 200:
# 1) Text first (skip the /send call when this chunk is media-only).
if text.strip():
async with session.post(
f"http://localhost:{bridge_port}/send",
json={"chatId": normalized_chat_id, "message": text},
timeout=aiohttp.ClientTimeout(total=30),
) as resp:
if resp.status != 200:
body = await resp.text()
return {"error": f"WhatsApp bridge error ({resp.status}): {body}"}
data = await resp.json()
return {
"success": True,
"platform": "whatsapp",
"chat_id": normalized_chat_id,
"message_id": data.get("messageId"),
}
body = await resp.text()
return {"error": f"WhatsApp bridge error ({resp.status}): {body}"}
last_message_id = data.get("messageId")
# 2) Each media file as a native attachment via /send-media. The
# bridge maps mediaType -> image/video/audio/document message kinds
# so PNG/JPEG/WebP/GIF arrive as inline images, MP4 as a video
# bubble, and ogg/opus as a voice note — not a file/document.
for media_path, is_voice in media:
if not os.path.exists(media_path):
return {"error": f"WhatsApp media file not found: {media_path}"}
media_type = _bridge_media_type(media_path, is_voice, force_document)
payload: Dict[str, Any] = {
"chatId": normalized_chat_id,
"filePath": media_path,
"mediaType": media_type,
}
if media_type == "document":
payload["fileName"] = os.path.basename(media_path)
async with session.post(
f"http://localhost:{bridge_port}/send-media",
json=payload,
timeout=aiohttp.ClientTimeout(total=120),
) as resp:
if resp.status != 200:
body = await resp.text()
return {"error": f"WhatsApp media error ({resp.status}): {body}"}
data = await resp.json()
last_message_id = data.get("messageId") or last_message_id
return {
"success": True,
"platform": "whatsapp",
"chat_id": normalized_chat_id,
"message_id": last_message_id,
}
except Exception as e:
return {"error": f"WhatsApp send failed: {e}"}

View file

@ -0,0 +1,221 @@
"""WhatsApp media delivery for send_message (#19105).
Covers two layers:
* ``_bridge_media_type`` extension/voice/force_document -> bridge mediaType.
* ``_standalone_send`` text-first then per-file ``/send-media`` uploads,
media-only (skip ``/send``), and missing-file errors. The bridge HTTP calls
are mocked at the ``aiohttp.ClientSession`` boundary.
"""
import asyncio
import os
import tempfile
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from plugins.platforms.whatsapp.adapter import _bridge_media_type, _standalone_send
# ---------------------------------------------------------------------------
# _bridge_media_type
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"path,is_voice,force_document,expected",
[
("a.png", False, False, "image"),
("a.JPG", False, False, "image"),
("a.jpeg", False, False, "image"),
("a.webp", False, False, "image"),
("a.gif", False, False, "image"),
("a.mp4", False, False, "video"),
("a.mov", False, False, "video"),
("a.webm", False, False, "video"),
("a.ogg", True, False, "audio"),
("a.opus", False, False, "audio"),
("a.mp3", False, False, "audio"),
("a.wav", False, False, "audio"),
("a.pdf", False, False, "document"),
("a.zip", False, False, "document"),
# force_document overrides everything
("a.png", False, True, "document"),
("a.mp4", False, True, "document"),
# is_voice wins over a video extension
("a.mp4", True, False, "audio"),
],
)
def test_bridge_media_type(path, is_voice, force_document, expected):
assert _bridge_media_type(path, is_voice, force_document) == expected
# ---------------------------------------------------------------------------
# _standalone_send — bridge HTTP mocked
# ---------------------------------------------------------------------------
def _resp(status, json_data=None, text_data=None):
r = AsyncMock()
r.status = status
r.json = AsyncMock(return_value=json_data or {})
r.text = AsyncMock(return_value=text_data or "")
return r
def _session_with(responses):
"""Build a mocked aiohttp.ClientSession that returns *responses* in order
and records every POST (url, json_payload)."""
calls = []
idx = [0]
def _post(url, **kwargs):
calls.append((url, kwargs.get("json")))
r = responses[idx[0]] if idx[0] < len(responses) else responses[-1]
idx[0] += 1
ctx = MagicMock()
ctx.__aenter__ = AsyncMock(return_value=r)
ctx.__aexit__ = AsyncMock(return_value=False)
return ctx
session = MagicMock()
session.post = MagicMock(side_effect=_post)
session_ctx = MagicMock()
session_ctx.__aenter__ = AsyncMock(return_value=session)
session_ctx.__aexit__ = AsyncMock(return_value=False)
return session_ctx, calls
def _pconfig():
return SimpleNamespace(token="", extra={"bridge_port": 3000})
def _tmpfile(suffix):
f = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
f.write(b"x")
f.close()
return f.name
def test_text_plus_mixed_media_routes_native_types():
img = _tmpfile(".png")
vid = _tmpfile(".mp4")
voice = _tmpfile(".ogg")
try:
session_ctx, calls = _session_with(
[
_resp(200, {"messageId": "t1"}),
_resp(200, {"messageId": "m1"}),
_resp(200, {"messageId": "m2"}),
_resp(200, {"messageId": "m3"}),
]
)
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(
_pconfig(),
"12345",
"hello",
media_files=[(img, False), (vid, False), (voice, True)],
)
)
assert res["success"] is True
# text first, then three media uploads in order
assert calls[0][0].endswith("/send")
assert calls[0][1]["message"] == "hello"
media_types = [c[1]["mediaType"] for c in calls if c[0].endswith("/send-media")]
assert media_types == ["image", "video", "audio"]
# chat id normalized to a WhatsApp JID
assert "@" in calls[0][1]["chatId"]
finally:
for p in (img, vid, voice):
os.unlink(p)
def test_media_only_skips_text_send():
img = _tmpfile(".jpg")
try:
session_ctx, calls = _session_with([_resp(200, {"messageId": "m1"})])
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(_pconfig(), "12345", "", media_files=[(img, False)])
)
assert res["success"] is True
assert all(c[0].endswith("/send-media") for c in calls)
finally:
os.unlink(img)
def test_force_document_sends_image_as_document():
img = _tmpfile(".png")
try:
session_ctx, calls = _session_with(
[_resp(200, {"messageId": "t1"}), _resp(200, {"messageId": "m1"})]
)
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(
_pconfig(),
"12345",
"doc",
media_files=[(img, False)],
force_document=True,
)
)
assert res["success"] is True
media_call = [c for c in calls if c[0].endswith("/send-media")][0]
assert media_call[1]["mediaType"] == "document"
assert media_call[1]["fileName"] == os.path.basename(img)
finally:
os.unlink(img)
def test_missing_media_file_errors():
session_ctx, _ = _session_with([_resp(200, {"messageId": "t1"})])
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(
_pconfig(),
"12345",
"hi",
media_files=[("/no/such/file.png", False)],
)
)
assert "error" in res
assert "not found" in res["error"]
def test_media_upload_error_propagates():
img = _tmpfile(".png")
try:
session_ctx, _ = _session_with(
[
_resp(200, {"messageId": "t1"}),
_resp(500, text_data="boom"),
]
)
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(
_pconfig(), "12345", "hi", media_files=[(img, False)]
)
)
assert "error" in res
assert "500" in res["error"]
finally:
os.unlink(img)
def test_text_only_unchanged_behavior():
session_ctx, calls = _session_with([_resp(200, {"messageId": "t1"})])
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(_standalone_send(_pconfig(), "12345", "just text"))
assert res == {
"success": True,
"platform": "whatsapp",
"chat_id": calls[0][1]["chatId"],
"message_id": "t1",
}
assert len(calls) == 1 and calls[0][0].endswith("/send")

View file

@ -903,11 +903,38 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
last_result = result
return last_result
# --- WhatsApp: native media attachment support via the registry's
# standalone_sender_fn (plugins/platforms/whatsapp/adapter.py::_standalone_send).
# The plugin uploads each file through the local Baileys bridge /send-media
# endpoint so images/videos/audio arrive as native bubbles, not documents. #41112
if platform == Platform.WHATSAPP and media_files:
from gateway.platform_registry import platform_registry as _pr_wa
from hermes_cli.plugins import discover_plugins as _dp_wa
_dp_wa()
_wa_entry = _pr_wa.get("whatsapp")
if _wa_entry is None or _wa_entry.standalone_sender_fn is None:
return {"error": "WhatsApp plugin not registered or missing standalone_sender_fn"}
last_result = None
for i, chunk in enumerate(chunks):
is_last = (i == len(chunks) - 1)
result = await _wa_entry.standalone_sender_fn(
pconfig,
chat_id,
chunk,
media_files=media_files if is_last else None,
thread_id=thread_id,
force_document=force_document,
)
if isinstance(result, dict) and result.get("error"):
return result
last_result = result
return last_result
# --- Non-media platforms ---
if media_files and not message.strip():
return {
"error": (
f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, weixin, signal, yuanbao and feishu; "
f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, weixin, signal, yuanbao, feishu and whatsapp; "
f"target {platform.value} had only media attachments"
)
}
@ -915,7 +942,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
if media_files:
warning = (
f"MEDIA attachments were omitted for {platform.value}; "
"native send_message media delivery is currently only supported for telegram, discord, matrix, weixin, signal, yuanbao and feishu"
"native send_message media delivery is currently only supported for telegram, discord, matrix, weixin, signal, yuanbao, feishu and whatsapp"
)
last_result = None