hermes-agent/tests/tools/test_whatsapp_send_message_media.py
Teknium cd592c105c
feat(send_message): native WhatsApp media delivery via Baileys bridge (#53598)
send_message with MEDIA:/path to a WhatsApp target previously dropped the
attachment: the WhatsApp branch never passed media_files, the plugin's
_standalone_send accepted the param but only POSTed text, and WhatsApp was
absent from the media-supported platform list.

- send_message_tool: add a Platform.WHATSAPP media block (mirrors Feishu) that
  routes media_files through the whatsapp plugin's standalone_sender_fn, and
  add whatsapp to the supported-media list strings.
- whatsapp adapter: _standalone_send now sends text first (skipped when the
  chunk is media-only), then uploads each file via the bridge /send-media
  endpoint with a mediaType derived from extension/is_voice/force_document, so
  images/videos/voice arrive as native bubbles instead of documents.
- _bridge_media_type classifier maps ext -> image|video|audio|document.

Closes #19105 (remaining send_message gap). Other items in the report
(inbound video paths, image_generate auto-deliver, history dedup, native
gateway bubbles) already landed on main.
2026-06-27 04:40:05 -07:00

221 lines
7.2 KiB
Python

"""WhatsApp media delivery for send_message (#19105).
Covers two layers:
* ``_bridge_media_type`` — extension/voice/force_document -> bridge mediaType.
* ``_standalone_send`` — text-first then per-file ``/send-media`` uploads,
media-only (skip ``/send``), and missing-file errors. The bridge HTTP calls
are mocked at the ``aiohttp.ClientSession`` boundary.
"""
import asyncio
import os
import tempfile
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from plugins.platforms.whatsapp.adapter import _bridge_media_type, _standalone_send
# ---------------------------------------------------------------------------
# _bridge_media_type
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"path,is_voice,force_document,expected",
[
("a.png", False, False, "image"),
("a.JPG", False, False, "image"),
("a.jpeg", False, False, "image"),
("a.webp", False, False, "image"),
("a.gif", False, False, "image"),
("a.mp4", False, False, "video"),
("a.mov", False, False, "video"),
("a.webm", False, False, "video"),
("a.ogg", True, False, "audio"),
("a.opus", False, False, "audio"),
("a.mp3", False, False, "audio"),
("a.wav", False, False, "audio"),
("a.pdf", False, False, "document"),
("a.zip", False, False, "document"),
# force_document overrides everything
("a.png", False, True, "document"),
("a.mp4", False, True, "document"),
# is_voice wins over a video extension
("a.mp4", True, False, "audio"),
],
)
def test_bridge_media_type(path, is_voice, force_document, expected):
assert _bridge_media_type(path, is_voice, force_document) == expected
# ---------------------------------------------------------------------------
# _standalone_send — bridge HTTP mocked
# ---------------------------------------------------------------------------
def _resp(status, json_data=None, text_data=None):
r = AsyncMock()
r.status = status
r.json = AsyncMock(return_value=json_data or {})
r.text = AsyncMock(return_value=text_data or "")
return r
def _session_with(responses):
"""Build a mocked aiohttp.ClientSession that returns *responses* in order
and records every POST (url, json_payload)."""
calls = []
idx = [0]
def _post(url, **kwargs):
calls.append((url, kwargs.get("json")))
r = responses[idx[0]] if idx[0] < len(responses) else responses[-1]
idx[0] += 1
ctx = MagicMock()
ctx.__aenter__ = AsyncMock(return_value=r)
ctx.__aexit__ = AsyncMock(return_value=False)
return ctx
session = MagicMock()
session.post = MagicMock(side_effect=_post)
session_ctx = MagicMock()
session_ctx.__aenter__ = AsyncMock(return_value=session)
session_ctx.__aexit__ = AsyncMock(return_value=False)
return session_ctx, calls
def _pconfig():
return SimpleNamespace(token="", extra={"bridge_port": 3000})
def _tmpfile(suffix):
f = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
f.write(b"x")
f.close()
return f.name
def test_text_plus_mixed_media_routes_native_types():
img = _tmpfile(".png")
vid = _tmpfile(".mp4")
voice = _tmpfile(".ogg")
try:
session_ctx, calls = _session_with(
[
_resp(200, {"messageId": "t1"}),
_resp(200, {"messageId": "m1"}),
_resp(200, {"messageId": "m2"}),
_resp(200, {"messageId": "m3"}),
]
)
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(
_pconfig(),
"12345",
"hello",
media_files=[(img, False), (vid, False), (voice, True)],
)
)
assert res["success"] is True
# text first, then three media uploads in order
assert calls[0][0].endswith("/send")
assert calls[0][1]["message"] == "hello"
media_types = [c[1]["mediaType"] for c in calls if c[0].endswith("/send-media")]
assert media_types == ["image", "video", "audio"]
# chat id normalized to a WhatsApp JID
assert "@" in calls[0][1]["chatId"]
finally:
for p in (img, vid, voice):
os.unlink(p)
def test_media_only_skips_text_send():
img = _tmpfile(".jpg")
try:
session_ctx, calls = _session_with([_resp(200, {"messageId": "m1"})])
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(_pconfig(), "12345", "", media_files=[(img, False)])
)
assert res["success"] is True
assert all(c[0].endswith("/send-media") for c in calls)
finally:
os.unlink(img)
def test_force_document_sends_image_as_document():
img = _tmpfile(".png")
try:
session_ctx, calls = _session_with(
[_resp(200, {"messageId": "t1"}), _resp(200, {"messageId": "m1"})]
)
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(
_pconfig(),
"12345",
"doc",
media_files=[(img, False)],
force_document=True,
)
)
assert res["success"] is True
media_call = [c for c in calls if c[0].endswith("/send-media")][0]
assert media_call[1]["mediaType"] == "document"
assert media_call[1]["fileName"] == os.path.basename(img)
finally:
os.unlink(img)
def test_missing_media_file_errors():
session_ctx, _ = _session_with([_resp(200, {"messageId": "t1"})])
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(
_pconfig(),
"12345",
"hi",
media_files=[("/no/such/file.png", False)],
)
)
assert "error" in res
assert "not found" in res["error"]
def test_media_upload_error_propagates():
img = _tmpfile(".png")
try:
session_ctx, _ = _session_with(
[
_resp(200, {"messageId": "t1"}),
_resp(500, text_data="boom"),
]
)
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(
_standalone_send(
_pconfig(), "12345", "hi", media_files=[(img, False)]
)
)
assert "error" in res
assert "500" in res["error"]
finally:
os.unlink(img)
def test_text_only_unchanged_behavior():
session_ctx, calls = _session_with([_resp(200, {"messageId": "t1"})])
with patch("aiohttp.ClientSession", return_value=session_ctx):
res = asyncio.run(_standalone_send(_pconfig(), "12345", "just text"))
assert res == {
"success": True,
"platform": "whatsapp",
"chat_id": calls[0][1]["chatId"],
"message_id": "t1",
}
assert len(calls) == 1 and calls[0][0].endswith("/send")