diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 66353656a3..b0d70399b8 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -618,16 +618,22 @@ class BasePlatformAdapter(ABC): has_voice_tag = "[[audio_as_voice]]" in content cleaned = cleaned.replace("[[audio_as_voice]]", "") - # Extract MEDIA: tags - media_pattern = r'MEDIA:\s*(\S+)' - for match in re.finditer(media_pattern, content): - path = match.group(1).strip().rstrip('`"\',)}') + # Extract MEDIA: tags, allowing optional whitespace after the colon + # and quoted/backticked paths for LLM-formatted outputs. + media_pattern = re.compile( + r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|\S+)[`"']?''' + ) + for match in media_pattern.finditer(content): + path = match.group("path").strip() + if len(path) >= 2 and path[0] == path[-1] and path[0] in "`\"'": + path = path[1:-1].strip() + path = path.lstrip("`\"'").rstrip("`\"',.;:)}]") if path: media.append((path, has_voice_tag)) - - # Remove MEDIA tags from content (including surrounding backticks/quotes) + + # Remove MEDIA tags from content (including surrounding quote/backtick wrappers) if media: - cleaned = re.sub(r'[`"\']*MEDIA:\s*\S+[`"\']*', '', cleaned) + cleaned = media_pattern.sub('', cleaned) cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip() return media, cleaned diff --git a/tests/gateway/test_platform_base.py b/tests/gateway/test_platform_base.py index 8334006fdb..1aa0e11445 100644 --- a/tests/gateway/test_platform_base.py +++ b/tests/gateway/test_platform_base.py @@ -258,6 +258,29 @@ class TestExtractMedia: _, cleaned = BasePlatformAdapter.extract_media(content) assert "\n\n\n" not in cleaned + def test_media_tag_allows_optional_whitespace_after_colon(self): + content = "MEDIA: /path/to/audio.ogg" + media, cleaned = BasePlatformAdapter.extract_media(content) + assert media == [("/path/to/audio.ogg", False)] + assert cleaned == "" + + def test_media_tag_strips_wrapping_quotes_and_backticks(self): + content = "MEDIA: `/path/to/file.png`\nMEDIA:\"/path/to/file2.png\"\nMEDIA:'/path/to/file3.png'" + media, cleaned = BasePlatformAdapter.extract_media(content) + assert media == [ + ("/path/to/file.png", False), + ("/path/to/file2.png", False), + ("/path/to/file3.png", False), + ] + assert cleaned == "" + + def test_media_tag_supports_quoted_paths_with_spaces(self): + content = "Here\nMEDIA: '/tmp/my image.png'\nAfter" + media, cleaned = BasePlatformAdapter.extract_media(content) + assert media == [("/tmp/my image.png", False)] + assert "Here" in cleaned + assert "After" in cleaned + # --------------------------------------------------------------------------- # truncate_message diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py index fc037bc846..3ad44f0f70 100644 --- a/tests/tools/test_send_message_tool.py +++ b/tests/tools/test_send_message_tool.py @@ -2,11 +2,13 @@ import asyncio import json +import sys +from pathlib import Path from types import SimpleNamespace -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch from gateway.config import Platform -from tools.send_message_tool import send_message_tool +from tools.send_message_tool import _send_telegram, send_message_tool def _run_async_immediately(coro): @@ -14,13 +16,18 @@ def _run_async_immediately(coro): def _make_config(): - telegram_cfg = SimpleNamespace(enabled=True, token="fake-token", extra={}) + telegram_cfg = SimpleNamespace(enabled=True, token="***", extra={}) return SimpleNamespace( platforms={Platform.TELEGRAM: telegram_cfg}, get_home_channel=lambda _platform: None, ), telegram_cfg +def _install_telegram_mock(monkeypatch, bot): + telegram_mod = SimpleNamespace(Bot=lambda token: bot) + monkeypatch.setitem(sys.modules, "telegram", telegram_mod) + + class TestSendMessageTool: def test_sends_to_explicit_telegram_topic_target(self): config, telegram_cfg = _make_config() @@ -41,7 +48,14 @@ class TestSendMessageTool: ) assert result["success"] is True - send_mock.assert_awaited_once_with(Platform.TELEGRAM, telegram_cfg, "-1001", "hello", thread_id="17585") + send_mock.assert_awaited_once_with( + Platform.TELEGRAM, + telegram_cfg, + "-1001", + "hello", + thread_id="17585", + media_files=[], + ) mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="17585") def test_resolved_telegram_topic_name_preserves_thread_id(self): @@ -64,4 +78,154 @@ class TestSendMessageTool: ) assert result["success"] is True - send_mock.assert_awaited_once_with(Platform.TELEGRAM, telegram_cfg, "-1001", "hello", thread_id="17585") + send_mock.assert_awaited_once_with( + Platform.TELEGRAM, + telegram_cfg, + "-1001", + "hello", + thread_id="17585", + media_files=[], + ) + + def test_media_only_message_uses_placeholder_for_mirroring(self): + config, telegram_cfg = _make_config() + + with patch("gateway.config.load_gateway_config", return_value=config), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch("model_tools._run_async", side_effect=_run_async_immediately), \ + patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \ + patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock: + result = json.loads( + send_message_tool( + { + "action": "send", + "target": "telegram:-1001", + "message": "MEDIA:/tmp/example.ogg", + } + ) + ) + + assert result["success"] is True + send_mock.assert_awaited_once_with( + Platform.TELEGRAM, + telegram_cfg, + "-1001", + "", + thread_id=None, + media_files=[("/tmp/example.ogg", False)], + ) + mirror_mock.assert_called_once_with( + "telegram", + "-1001", + "[Sent audio attachment]", + source_label="cli", + thread_id=None, + ) + + +class TestSendTelegramMediaDelivery: + def test_sends_text_then_photo_for_media_tag(self, tmp_path, monkeypatch): + image_path = tmp_path / "photo.png" + image_path.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 32) + + bot = MagicMock() + bot.send_message = AsyncMock(return_value=SimpleNamespace(message_id=1)) + bot.send_photo = AsyncMock(return_value=SimpleNamespace(message_id=2)) + bot.send_video = AsyncMock() + bot.send_voice = AsyncMock() + bot.send_audio = AsyncMock() + bot.send_document = AsyncMock() + _install_telegram_mock(monkeypatch, bot) + + result = asyncio.run( + _send_telegram( + "token", + "12345", + "Hello there", + media_files=[(str(image_path), False)], + ) + ) + + assert result["success"] is True + assert result["message_id"] == "2" + bot.send_message.assert_awaited_once() + bot.send_photo.assert_awaited_once() + sent_text = bot.send_message.await_args.kwargs["text"] + assert "MEDIA:" not in sent_text + assert sent_text == "Hello there" + + def test_sends_voice_for_ogg_with_voice_directive(self, tmp_path, monkeypatch): + voice_path = tmp_path / "voice.ogg" + voice_path.write_bytes(b"OggS" + b"\x00" * 32) + + bot = MagicMock() + bot.send_message = AsyncMock() + bot.send_photo = AsyncMock() + bot.send_video = AsyncMock() + bot.send_voice = AsyncMock(return_value=SimpleNamespace(message_id=7)) + bot.send_audio = AsyncMock() + bot.send_document = AsyncMock() + _install_telegram_mock(monkeypatch, bot) + + result = asyncio.run( + _send_telegram( + "token", + "12345", + "", + media_files=[(str(voice_path), True)], + ) + ) + + assert result["success"] is True + bot.send_voice.assert_awaited_once() + bot.send_audio.assert_not_awaited() + bot.send_message.assert_not_awaited() + + def test_sends_audio_for_mp3(self, tmp_path, monkeypatch): + audio_path = tmp_path / "clip.mp3" + audio_path.write_bytes(b"ID3" + b"\x00" * 32) + + bot = MagicMock() + bot.send_message = AsyncMock() + bot.send_photo = AsyncMock() + bot.send_video = AsyncMock() + bot.send_voice = AsyncMock() + bot.send_audio = AsyncMock(return_value=SimpleNamespace(message_id=8)) + bot.send_document = AsyncMock() + _install_telegram_mock(monkeypatch, bot) + + result = asyncio.run( + _send_telegram( + "token", + "12345", + "", + media_files=[(str(audio_path), False)], + ) + ) + + assert result["success"] is True + bot.send_audio.assert_awaited_once() + bot.send_voice.assert_not_awaited() + + def test_missing_media_returns_error_without_leaking_raw_tag(self, monkeypatch): + bot = MagicMock() + bot.send_message = AsyncMock() + bot.send_photo = AsyncMock() + bot.send_video = AsyncMock() + bot.send_voice = AsyncMock() + bot.send_audio = AsyncMock() + bot.send_document = AsyncMock() + _install_telegram_mock(monkeypatch, bot) + + result = asyncio.run( + _send_telegram( + "token", + "12345", + "", + media_files=[("/tmp/does-not-exist.png", False)], + ) + ) + + assert "error" in result + assert "No deliverable text or media remained" in result["error"] + bot.send_message.assert_not_awaited() diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index a9c8ae9bbe..56ea65f2c5 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -14,11 +14,10 @@ import time logger = logging.getLogger(__name__) _TELEGRAM_TOPIC_TARGET_RE = re.compile(r"^\s*(-?\d+)(?::(\d+))?\s*$") - -_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'} -_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.3gp'} -_AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'} -_VOICE_EXTS = {'.ogg', '.opus'} +_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"} +_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".3gp"} +_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"} +_VOICE_EXTS = {".ogg", ".opus"} SEND_MESSAGE_SCHEMA = { @@ -135,6 +134,11 @@ def _handle_send(args): if not pconfig or not pconfig.enabled: return json.dumps({"error": f"Platform '{platform_name}' is not configured. Set up credentials in ~/.hermes/gateway.json or environment variables."}) + from gateway.platforms.base import BasePlatformAdapter + + media_files, cleaned_message = BasePlatformAdapter.extract_media(message) + mirror_text = cleaned_message.strip() or _describe_media_for_mirror(media_files) + used_home_channel = False if not chat_id: home = config.get_home_channel(platform) @@ -150,16 +154,25 @@ def _handle_send(args): try: from model_tools import _run_async - result = _run_async(_send_to_platform(platform, pconfig, chat_id, message, thread_id=thread_id)) + result = _run_async( + _send_to_platform( + platform, + pconfig, + chat_id, + cleaned_message, + thread_id=thread_id, + media_files=media_files, + ) + ) if used_home_channel and isinstance(result, dict) and result.get("success"): result["note"] = f"Sent to {platform_name} home channel (chat_id: {chat_id})" # Mirror the sent message into the target's gateway session - if isinstance(result, dict) and result.get("success"): + if isinstance(result, dict) and result.get("success") and mirror_text: try: from gateway.mirror import mirror_to_session source_label = os.getenv("HERMES_SESSION_PLATFORM", "cli") - if mirror_to_session(platform_name, chat_id, message, source_label=source_label, thread_id=thread_id): + if mirror_to_session(platform_name, chat_id, mirror_text, source_label=source_label, thread_id=thread_id): result["mirrored"] = True except Exception: pass @@ -180,48 +193,97 @@ def _parse_target_ref(platform_name: str, target_ref: str): return None, None, False -async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None): +def _describe_media_for_mirror(media_files): + """Return a human-readable mirror summary when a message only contains media.""" + if not media_files: + return "" + if len(media_files) == 1: + media_path, is_voice = media_files[0] + ext = os.path.splitext(media_path)[1].lower() + if is_voice and ext in _VOICE_EXTS: + return "[Sent voice message]" + if ext in _IMAGE_EXTS: + return "[Sent image attachment]" + if ext in _VIDEO_EXTS: + return "[Sent video attachment]" + if ext in _AUDIO_EXTS: + return "[Sent audio attachment]" + return "[Sent document attachment]" + return f"[Sent {len(media_files)} media attachments]" + + +async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None): """Route a message to the appropriate platform sender.""" from gateway.config import Platform + + media_files = media_files or [] if platform == Platform.TELEGRAM: - return await _send_telegram(pconfig.token, chat_id, message, thread_id=thread_id) - elif platform == Platform.DISCORD: - return await _send_discord(pconfig.token, chat_id, message) + return await _send_telegram( + pconfig.token, + chat_id, + message, + media_files=media_files, + thread_id=thread_id, + ) + if media_files and not message.strip(): + return { + "error": ( + f"send_message MEDIA delivery is currently only supported for telegram; " + f"target {platform.value} had only media attachments" + ) + } + warning = None + if media_files: + warning = ( + f"MEDIA attachments were omitted for {platform.value}; " + "native send_message media delivery is currently only supported for telegram" + ) + + if platform == Platform.DISCORD: + result = await _send_discord(pconfig.token, chat_id, message) elif platform == Platform.SLACK: - return await _send_slack(pconfig.token, chat_id, message) + result = await _send_slack(pconfig.token, chat_id, message) elif platform == Platform.SIGNAL: - return await _send_signal(pconfig.extra, chat_id, message) + result = await _send_signal(pconfig.extra, chat_id, message) elif platform == Platform.EMAIL: - return await _send_email(pconfig.extra, chat_id, message) - return {"error": f"Direct sending not yet implemented for {platform.value}"} + result = await _send_email(pconfig.extra, chat_id, message) + else: + result = {"error": f"Direct sending not yet implemented for {platform.value}"} + + if warning and isinstance(result, dict) and result.get("success"): + warnings = list(result.get("warnings", [])) + warnings.append(warning) + result["warnings"] = warnings + return result -async def _send_telegram(token, chat_id, message, thread_id=None): +async def _send_telegram(token, chat_id, message, media_files=None, thread_id=None): """Send via Telegram Bot API (one-shot, no polling needed).""" try: from telegram import Bot - from gateway.platforms.base import BasePlatformAdapter + bot = Bot(token=token) int_chat_id = int(chat_id) + media_files = media_files or [] thread_kwargs = {} if thread_id is not None: thread_kwargs["message_thread_id"] = int(thread_id) - # Extract MEDIA: tags and send files natively - media_files, cleaned = BasePlatformAdapter.extract_media(message) - last_msg = None - # Send text portion if any remains - if cleaned.strip(): + warnings = [] + + if message.strip(): last_msg = await bot.send_message( - chat_id=int_chat_id, text=cleaned, **thread_kwargs + chat_id=int_chat_id, text=message, **thread_kwargs ) - # Send extracted media files for media_path, is_voice in media_files: if not os.path.exists(media_path): - logger.warning("Media file not found, skipping: %s", media_path) + warning = f"Media file not found, skipping: {media_path}" + logger.warning(warning) + warnings.append(warning) continue + ext = os.path.splitext(media_path)[1].lower() try: with open(media_path, "rb") as f: @@ -246,15 +308,25 @@ async def _send_telegram(token, chat_id, message, thread_id=None): chat_id=int_chat_id, document=f, **thread_kwargs ) except Exception as e: - logger.error("Failed to send media %s: %s", media_path, e) + warning = f"Failed to send media {media_path}: {e}" + logger.error(warning) + warnings.append(warning) - # If no text and no media sent, send cleaned text as fallback if last_msg is None: - last_msg = await bot.send_message( - chat_id=int_chat_id, text=cleaned if cleaned.strip() else message, **thread_kwargs - ) + error = "No deliverable text or media remained after processing MEDIA tags" + if warnings: + return {"error": error, "warnings": warnings} + return {"error": error} - return {"success": True, "platform": "telegram", "chat_id": chat_id, "message_id": str(last_msg.message_id)} + result = { + "success": True, + "platform": "telegram", + "chat_id": chat_id, + "message_id": str(last_msg.message_id), + } + if warnings: + result["warnings"] = warnings + return result except ImportError: return {"error": "python-telegram-bot not installed. Run: pip install python-telegram-bot"} except Exception as e: