diff --git a/cron/scheduler.py b/cron/scheduler.py index 02260c8a4a..08d73c1beb 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -277,13 +277,21 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]: return targets[0] if targets else None -# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background -_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'}) +# Media extension sets — audio routing is centralized in gateway.platforms.base +# via should_send_media_as_audio() so Telegram-specific rules stay in one place. _VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}) _IMAGE_EXTS = frozenset({'.jpg', '.jpeg', '.png', '.webp', '.gif'}) -def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: dict | None, loop, job: dict) -> None: +def _send_media_via_adapter( + adapter, + chat_id: str, + media_files: list, + metadata: dict | None, + loop, + job: dict, + platform=None, +) -> None: """Send extracted MEDIA files as native platform attachments via a live adapter. Routes each file to the appropriate adapter method (send_voice, send_image_file, @@ -292,10 +300,13 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: """ from pathlib import Path + from gateway.platforms.base import should_send_media_as_audio + for media_path, _is_voice in media_files: try: ext = Path(media_path).suffix.lower() - if ext in _AUDIO_EXTS: + route_platform = platform if platform is not None else getattr(adapter, "platform", None) + if should_send_media_as_audio(route_platform, ext, is_voice=_is_voice): coro = adapter.send_voice(chat_id=chat_id, audio_path=media_path, metadata=metadata) elif ext in _VIDEO_EXTS: coro = adapter.send_video(chat_id=chat_id, video_path=media_path, metadata=metadata) @@ -444,7 +455,15 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option # Send extracted media files as native attachments via the live adapter if adapter_ok and media_files: - _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job) + _send_media_via_adapter( + runtime_adapter, + chat_id, + media_files, + send_metadata, + loop, + job, + platform=platform, + ) if adapter_ok: logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 1a5ce4a6e0..da992792e3 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -23,6 +23,45 @@ from utils import normalize_proxy_url logger = logging.getLogger(__name__) +# Audio file extensions Hermes recognizes for native audio delivery. +# Kept in sync with tools/send_message_tool.py and cron/scheduler.py via +# should_send_media_as_audio() below. +_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a', '.flac'}) +# Telegram's Bot API sendAudio only accepts MP3 / M4A. Other audio +# formats either need to go through sendVoice (Opus/OGG) or must be +# delivered as a regular document. +_TELEGRAM_AUDIO_ATTACHMENT_EXTS = frozenset({'.mp3', '.m4a'}) +_TELEGRAM_VOICE_EXTS = frozenset({'.ogg', '.opus'}) + + +def _platform_name(platform) -> str: + """Normalize a Platform enum / raw string into a lowercase name.""" + value = getattr(platform, "value", platform) + return str(value or "").lower() + + +def should_send_media_as_audio(platform, ext: str, is_voice: bool = False) -> bool: + """Return True when a media file should use the platform's audio sender. + + Other platforms: every recognized audio extension routes through the + audio sender. + + Telegram: the Bot API only accepts MP3/M4A for sendAudio and + Opus/OGG for sendVoice. Opus/OGG is only routed as audio when the + caller flagged ``is_voice=True`` (so we don't turn a regular audio + attachment into a voice bubble just because the file happens to be + Opus). Everything else falls through to document delivery by + returning ``False``. + """ + normalized_ext = (ext or "").lower() + if normalized_ext not in _AUDIO_EXTS: + return False + if _platform_name(platform) == "telegram": + if normalized_ext in _TELEGRAM_VOICE_EXTS: + return is_voice + return normalized_ext in _TELEGRAM_AUDIO_ATTACHMENT_EXTS + return True + def utf16_len(s: str) -> int: """Count UTF-16 code units in *s*. @@ -1675,7 +1714,7 @@ class BasePlatformAdapter(ABC): # Extract MEDIA: tags, allowing optional whitespace after the colon # and quoted/backticked paths for LLM-formatted outputs. media_pattern = re.compile( - r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' + r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' ) for match in media_pattern.finditer(content): path = match.group("path").strip() @@ -2579,7 +2618,6 @@ class BasePlatformAdapter(ABC): logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True) # Send extracted media files — route by file type - _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'} _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'} _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'} @@ -2588,7 +2626,7 @@ class BasePlatformAdapter(ABC): await asyncio.sleep(human_delay) try: ext = Path(media_path).suffix.lower() - if ext in _AUDIO_EXTS: + if should_send_media_as_audio(self.platform, ext, is_voice=is_voice): media_result = await self.send_voice( chat_id=event.source.chat_id, audio_path=media_path, diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 1985247246..b58ca45ec9 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -1951,8 +1951,9 @@ class TelegramAdapter(BasePlatformAdapter): return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path)) with open(audio_path, "rb") as audio_file: - # .ogg files -> send as voice (round playable bubble) - if audio_path.endswith((".ogg", ".opus")): + ext = os.path.splitext(audio_path)[1].lower() + # .ogg / .opus files -> send as voice (round playable bubble) + if ext in (".ogg", ".opus"): _voice_thread = self._metadata_thread_id(metadata) msg = await self._bot.send_voice( chat_id=int(chat_id), @@ -1961,8 +1962,8 @@ class TelegramAdapter(BasePlatformAdapter): reply_to_message_id=int(reply_to) if reply_to else None, message_thread_id=self._message_thread_id_for_send(_voice_thread), ) - else: - # .mp3 and others -> send as audio file + elif ext in (".mp3", ".m4a"): + # Telegram's Bot API sendAudio only accepts MP3 / M4A. _audio_thread = self._metadata_thread_id(metadata) msg = await self._bot.send_audio( chat_id=int(chat_id), @@ -1971,6 +1972,16 @@ class TelegramAdapter(BasePlatformAdapter): reply_to_message_id=int(reply_to) if reply_to else None, message_thread_id=self._message_thread_id_for_send(_audio_thread), ) + else: + # Formats Telegram can't play natively (.wav, .flac, ...) + # — fall back to document delivery instead of raising. + return await self.send_document( + chat_id=chat_id, + file_path=audio_path, + caption=caption, + reply_to=reply_to, + metadata=metadata, + ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: logger.error( diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py index 6f6b56caa4..72b7d2a4df 100644 --- a/gateway/platforms/weixin.py +++ b/gateway/platforms/weixin.py @@ -1620,7 +1620,7 @@ class WeixinAdapter(BasePlatformAdapter): _, image_cleaned = self.extract_images(cleaned_content) local_files, final_content = self.extract_local_files(image_cleaned) - _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"} + _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"} _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"} _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"} diff --git a/gateway/run.py b/gateway/run.py index 4885cbefd8..19dc5eae74 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -7179,14 +7179,15 @@ class GatewayRunner: _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None - _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'} + from gateway.platforms.base import should_send_media_as_audio + _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'} _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'} for media_path, is_voice in media_files: try: ext = Path(media_path).suffix.lower() - if ext in _AUDIO_EXTS: + if should_send_media_as_audio(event.source.platform, ext, is_voice=is_voice): await adapter.send_voice( chat_id=event.source.chat_id, audio_path=media_path, diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 6170228c2d..638146989b 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -551,14 +551,14 @@ class TestDeliverResultWrapping: patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro): _deliver_result( job, - "MEDIA:/tmp/voice.ogg", + "[[audio_as_voice]]\nMEDIA:/tmp/voice.ogg", adapters={Platform.TELEGRAM: adapter}, loop=loop, ) # Text send should NOT be called (no text after stripping MEDIA tag) adapter.send.assert_not_called() - # Audio should still be delivered + # Audio should still be delivered as a voice bubble adapter.send_voice.assert_called_once() def test_live_adapter_sends_cleaned_text_not_raw(self): diff --git a/tests/gateway/test_platform_base.py b/tests/gateway/test_platform_base.py index 59246b7990..a6e0d51d60 100644 --- a/tests/gateway/test_platform_base.py +++ b/tests/gateway/test_platform_base.py @@ -323,6 +323,55 @@ class TestExtractMedia: assert "Here" in cleaned assert "After" in cleaned + def test_media_tag_supports_unquoted_flac_paths_with_spaces(self): + content = "MEDIA:/tmp/Jane Doe/speech.flac" + media, cleaned = BasePlatformAdapter.extract_media(content) + assert media == [("/tmp/Jane Doe/speech.flac", False)] + assert cleaned == "" + + +# --------------------------------------------------------------------------- +# should_send_media_as_audio +# --------------------------------------------------------------------------- + +class TestShouldSendMediaAsAudio: + """Audio-routing policy shared by gateway + scheduler + send_message.""" + + def test_unknown_extension_returns_false(self): + from gateway.platforms.base import should_send_media_as_audio + assert should_send_media_as_audio(None, ".png") is False + assert should_send_media_as_audio("telegram", ".pdf") is False + + def test_non_telegram_platforms_route_all_audio(self): + from gateway.platforms.base import should_send_media_as_audio + for ext in (".mp3", ".m4a", ".wav", ".flac", ".ogg", ".opus"): + assert should_send_media_as_audio("discord", ext) is True + assert should_send_media_as_audio("slack", ext) is True + + def test_telegram_mp3_and_m4a_route_to_audio(self): + from gateway.platforms.base import should_send_media_as_audio + assert should_send_media_as_audio("telegram", ".mp3") is True + assert should_send_media_as_audio("telegram", ".m4a") is True + + def test_telegram_wav_and_flac_fall_through_to_document(self): + from gateway.platforms.base import should_send_media_as_audio + assert should_send_media_as_audio("telegram", ".wav") is False + assert should_send_media_as_audio("telegram", ".flac") is False + + def test_telegram_ogg_opus_only_when_voice_flagged(self): + from gateway.platforms.base import should_send_media_as_audio + assert should_send_media_as_audio("telegram", ".ogg", is_voice=True) is True + assert should_send_media_as_audio("telegram", ".opus", is_voice=True) is True + assert should_send_media_as_audio("telegram", ".ogg") is False + assert should_send_media_as_audio("telegram", ".opus") is False + + def test_accepts_platform_enum(self): + from gateway.config import Platform + from gateway.platforms.base import should_send_media_as_audio + assert should_send_media_as_audio(Platform.TELEGRAM, ".mp3") is True + assert should_send_media_as_audio(Platform.TELEGRAM, ".flac") is False + assert should_send_media_as_audio(Platform.DISCORD, ".flac") is True + # --------------------------------------------------------------------------- # truncate_message diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py index d5564cbf46..4b3e58f459 100644 --- a/tests/gateway/test_telegram_documents.py +++ b/tests/gateway/test_telegram_documents.py @@ -453,6 +453,87 @@ class TestMediaGroups: adapter.handle_message.assert_not_awaited() +# --------------------------------------------------------------------------- +# TestSendVoice — outbound audio delivery +# --------------------------------------------------------------------------- + +class TestSendVoice: + """Tests for TelegramAdapter.send_voice() routing across audio formats.""" + + @pytest.fixture() + def connected_adapter(self, adapter): + """Adapter with a mock bot attached.""" + bot = AsyncMock() + adapter._bot = bot + return adapter + + @pytest.mark.asyncio + async def test_flac_falls_back_to_document(self, connected_adapter, tmp_path): + """Telegram sendAudio does not accept FLAC — must fall back to sendDocument.""" + audio_file = tmp_path / "clip.flac" + audio_file.write_bytes(b"fLaC" + b"\x00" * 32) + + mock_msg = MagicMock() + mock_msg.message_id = 101 + connected_adapter._bot.send_voice = AsyncMock() + connected_adapter._bot.send_audio = AsyncMock() + connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg) + + result = await connected_adapter.send_voice( + chat_id="12345", + audio_path=str(audio_file), + caption="Audio", + ) + + assert result.success is True + assert result.message_id == "101" + connected_adapter._bot.send_document.assert_awaited_once() + connected_adapter._bot.send_audio.assert_not_awaited() + connected_adapter._bot.send_voice.assert_not_awaited() + + @pytest.mark.asyncio + async def test_wav_falls_back_to_document(self, connected_adapter, tmp_path): + """Telegram sendAudio does not accept WAV — must fall back to sendDocument.""" + audio_file = tmp_path / "clip.wav" + audio_file.write_bytes(b"RIFF" + b"\x00" * 32) + + mock_msg = MagicMock() + mock_msg.message_id = 102 + connected_adapter._bot.send_voice = AsyncMock() + connected_adapter._bot.send_audio = AsyncMock() + connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg) + + result = await connected_adapter.send_voice( + chat_id="12345", + audio_path=str(audio_file), + ) + + assert result.success is True + connected_adapter._bot.send_document.assert_awaited_once() + connected_adapter._bot.send_audio.assert_not_awaited() + + @pytest.mark.asyncio + async def test_mp3_routes_to_send_audio(self, connected_adapter, tmp_path): + """MP3 is Telegram-sendAudio-compatible.""" + audio_file = tmp_path / "clip.mp3" + audio_file.write_bytes(b"ID3" + b"\x00" * 32) + + mock_msg = MagicMock() + mock_msg.message_id = 103 + connected_adapter._bot.send_voice = AsyncMock() + connected_adapter._bot.send_audio = AsyncMock(return_value=mock_msg) + connected_adapter._bot.send_document = AsyncMock() + + result = await connected_adapter.send_voice( + chat_id="12345", + audio_path=str(audio_file), + ) + + assert result.success is True + connected_adapter._bot.send_audio.assert_awaited_once() + connected_adapter._bot.send_document.assert_not_awaited() + + # --------------------------------------------------------------------------- # TestSendDocument — outbound file attachment delivery # --------------------------------------------------------------------------- diff --git a/tests/gateway/test_tts_media_routing.py b/tests/gateway/test_tts_media_routing.py new file mode 100644 index 0000000000..0ef37deb3e --- /dev/null +++ b/tests/gateway/test_tts_media_routing.py @@ -0,0 +1,195 @@ +""" +Tests for cross-platform audio/voice media routing. + +These tests pin the expected delivery path for audio media files across +Telegram (where Bot-API sendAudio only accepts MP3/M4A and .ogg/.opus +only renders as a voice bubble when explicitly flagged) and via +``GatewayRunner._deliver_media_from_response``. +""" + +from types import SimpleNamespace +from unittest.mock import AsyncMock + +import pytest + +from gateway.config import Platform, PlatformConfig +from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, SendResult +from gateway.run import GatewayRunner +from gateway.session import SessionSource, build_session_key + + +class _MediaRoutingAdapter(BasePlatformAdapter): + def __init__(self): + super().__init__(PlatformConfig(enabled=True, token="test"), Platform.TELEGRAM) + + async def connect(self): + return True + + async def disconnect(self): + pass + + async def send(self, chat_id, content=None, **kwargs): + return SendResult(success=True, message_id="text") + + async def get_chat_info(self, chat_id): + return {"id": chat_id, "type": "dm"} + + +def _event(thread_id=None): + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="chat-1", + chat_type="dm", + thread_id=thread_id, + ) + return MessageEvent( + text="make speech", + message_type=MessageType.TEXT, + source=source, + message_id="msg-1", + ) + + +@pytest.mark.asyncio +async def test_base_adapter_routes_telegram_flac_media_tag_to_document_sender(): + adapter = _MediaRoutingAdapter() + event = _event() + adapter._message_handler = AsyncMock(return_value="MEDIA:/tmp/speech.flac") + adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice")) + adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc")) + + await adapter._process_message_background(event, build_session_key(event.source)) + + adapter.send_document.assert_awaited_once_with( + chat_id="chat-1", + file_path="/tmp/speech.flac", + metadata=None, + ) + adapter.send_voice.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_base_adapter_routes_non_voice_telegram_ogg_media_tag_to_document_sender(): + adapter = _MediaRoutingAdapter() + event = _event() + adapter._message_handler = AsyncMock(return_value="MEDIA:/tmp/speech.ogg") + adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice")) + adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc")) + + await adapter._process_message_background(event, build_session_key(event.source)) + + adapter.send_document.assert_awaited_once_with( + chat_id="chat-1", + file_path="/tmp/speech.ogg", + metadata=None, + ) + adapter.send_voice.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_sender(): + adapter = _MediaRoutingAdapter() + event = _event() + adapter._message_handler = AsyncMock( + return_value="[[audio_as_voice]]\nMEDIA:/tmp/speech.ogg" + ) + adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice")) + adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc")) + + await adapter._process_message_background(event, build_session_key(event.source)) + + adapter.send_voice.assert_awaited_once_with( + chat_id="chat-1", + audio_path="/tmp/speech.ogg", + metadata=None, + ) + adapter.send_document.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender(): + event = _event(thread_id="topic-1") + adapter = SimpleNamespace( + name="test", + extract_media=BasePlatformAdapter.extract_media, + extract_images=BasePlatformAdapter.extract_images, + extract_local_files=BasePlatformAdapter.extract_local_files, + send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")), + send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")), + send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")), + send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")), + ) + + await GatewayRunner._deliver_media_from_response( + object(), + "MEDIA:/tmp/speech.flac", + event, + adapter, + ) + + adapter.send_document.assert_awaited_once_with( + chat_id="chat-1", + file_path="/tmp/speech.flac", + metadata={"thread_id": "topic-1"}, + ) + adapter.send_voice.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_document_sender(): + event = _event(thread_id="topic-1") + adapter = SimpleNamespace( + name="test", + extract_media=BasePlatformAdapter.extract_media, + extract_images=BasePlatformAdapter.extract_images, + extract_local_files=BasePlatformAdapter.extract_local_files, + send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")), + send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")), + send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")), + send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")), + ) + + await GatewayRunner._deliver_media_from_response( + object(), + "MEDIA:/tmp/speech.ogg", + event, + adapter, + ) + + adapter.send_document.assert_awaited_once_with( + chat_id="chat-1", + file_path="/tmp/speech.ogg", + metadata={"thread_id": "topic-1"}, + ) + adapter.send_voice.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender(): + """MP3 audio on Telegram must go through send_voice (which routes to + sendAudio internally); Telegram accepts MP3 for the audio player.""" + event = _event(thread_id="topic-1") + adapter = SimpleNamespace( + name="test", + extract_media=BasePlatformAdapter.extract_media, + extract_images=BasePlatformAdapter.extract_images, + extract_local_files=BasePlatformAdapter.extract_local_files, + send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")), + send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")), + send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")), + send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")), + ) + + await GatewayRunner._deliver_media_from_response( + object(), + "MEDIA:/tmp/speech.mp3", + event, + adapter, + ) + + adapter.send_voice.assert_awaited_once_with( + chat_id="chat-1", + audio_path="/tmp/speech.mp3", + metadata={"thread_id": "topic-1"}, + ) + adapter.send_document.assert_not_awaited() diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index 2c15af4c35..1a3ede29d6 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -40,8 +40,12 @@ _PHONE_PLATFORMS = frozenset({"signal", "sms", "whatsapp"}) _E164_TARGET_RE = re.compile(r"^\s*\+(\d{7,15})\s*$") _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"} _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".3gp"} -_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"} +_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"} _VOICE_EXTS = {".ogg", ".opus"} +# Telegram's Bot API sendAudio only accepts MP3 / M4A. Other audio +# formats either route through sendVoice (Opus/OGG) or fall back to +# document delivery. +_TELEGRAM_SEND_AUDIO_EXTS = {".mp3", ".m4a"} _URL_SECRET_QUERY_RE = re.compile( r"([?&](?:access_token|api[_-]?key|auth[_-]?token|token|signature|sig)=)([^&#\s]+)", re.IGNORECASE, @@ -740,7 +744,7 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No last_msg = await bot.send_voice( chat_id=int_chat_id, voice=f, **thread_kwargs ) - elif ext in _AUDIO_EXTS: + elif ext in _TELEGRAM_SEND_AUDIO_EXTS: last_msg = await bot.send_audio( chat_id=int_chat_id, audio=f, **thread_kwargs )