mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 02:11:48 +00:00
Extracted from PR #17211 (@versun) so it can land independently of the local_command TTS provider redesign. - Add should_send_media_as_audio(platform, ext, is_voice) in gateway/platforms/base.py; single source of truth for audio routing. - Add .flac to recognized audio extensions (MEDIA regex, weixin audio set, send_message audio set). - Telegram send_voice() now falls back to send_document for formats Telegram's Bot API can't play natively (.wav, .flac, ...) instead of raising; MP3/M4A still go to sendAudio, Opus/OGG still go to sendVoice. - Route _send_telegram() in send_message_tool through a narrower _TELEGRAM_SEND_AUDIO_EXTS = {.mp3, .m4a} set. - cron.scheduler._send_media_via_adapter now delegates the audio decision to should_send_media_as_audio so it matches the gateway. - Update the cron live-adapter ogg test to flag [[audio_as_voice]] so it still routes to sendVoice under the new Telegram-specific policy. - Tests: unit coverage for should_send_media_as_audio across platforms, end-to-end MEDIA routing via _process_message_background and GatewayRunner._deliver_media_from_response, TelegramAdapter.send_voice fallback for FLAC/WAV. Co-authored-by: Versun <me+github7604@versun.org>
195 lines
6.9 KiB
Python
195 lines
6.9 KiB
Python
"""
|
|
Tests for cross-platform audio/voice media routing.
|
|
|
|
These tests pin the expected delivery path for audio media files across
|
|
Telegram (where Bot-API sendAudio only accepts MP3/M4A and .ogg/.opus
|
|
only renders as a voice bubble when explicitly flagged) and via
|
|
``GatewayRunner._deliver_media_from_response``.
|
|
"""
|
|
|
|
from types import SimpleNamespace
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
|
|
from gateway.config import Platform, PlatformConfig
|
|
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, SendResult
|
|
from gateway.run import GatewayRunner
|
|
from gateway.session import SessionSource, build_session_key
|
|
|
|
|
|
class _MediaRoutingAdapter(BasePlatformAdapter):
|
|
def __init__(self):
|
|
super().__init__(PlatformConfig(enabled=True, token="test"), Platform.TELEGRAM)
|
|
|
|
async def connect(self):
|
|
return True
|
|
|
|
async def disconnect(self):
|
|
pass
|
|
|
|
async def send(self, chat_id, content=None, **kwargs):
|
|
return SendResult(success=True, message_id="text")
|
|
|
|
async def get_chat_info(self, chat_id):
|
|
return {"id": chat_id, "type": "dm"}
|
|
|
|
|
|
def _event(thread_id=None):
|
|
source = SessionSource(
|
|
platform=Platform.TELEGRAM,
|
|
chat_id="chat-1",
|
|
chat_type="dm",
|
|
thread_id=thread_id,
|
|
)
|
|
return MessageEvent(
|
|
text="make speech",
|
|
message_type=MessageType.TEXT,
|
|
source=source,
|
|
message_id="msg-1",
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_base_adapter_routes_telegram_flac_media_tag_to_document_sender():
|
|
adapter = _MediaRoutingAdapter()
|
|
event = _event()
|
|
adapter._message_handler = AsyncMock(return_value="MEDIA:/tmp/speech.flac")
|
|
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
|
|
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
|
|
|
|
await adapter._process_message_background(event, build_session_key(event.source))
|
|
|
|
adapter.send_document.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
file_path="/tmp/speech.flac",
|
|
metadata=None,
|
|
)
|
|
adapter.send_voice.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_base_adapter_routes_non_voice_telegram_ogg_media_tag_to_document_sender():
|
|
adapter = _MediaRoutingAdapter()
|
|
event = _event()
|
|
adapter._message_handler = AsyncMock(return_value="MEDIA:/tmp/speech.ogg")
|
|
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
|
|
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
|
|
|
|
await adapter._process_message_background(event, build_session_key(event.source))
|
|
|
|
adapter.send_document.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
file_path="/tmp/speech.ogg",
|
|
metadata=None,
|
|
)
|
|
adapter.send_voice.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_sender():
|
|
adapter = _MediaRoutingAdapter()
|
|
event = _event()
|
|
adapter._message_handler = AsyncMock(
|
|
return_value="[[audio_as_voice]]\nMEDIA:/tmp/speech.ogg"
|
|
)
|
|
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
|
|
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
|
|
|
|
await adapter._process_message_background(event, build_session_key(event.source))
|
|
|
|
adapter.send_voice.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
audio_path="/tmp/speech.ogg",
|
|
metadata=None,
|
|
)
|
|
adapter.send_document.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender():
|
|
event = _event(thread_id="topic-1")
|
|
adapter = SimpleNamespace(
|
|
name="test",
|
|
extract_media=BasePlatformAdapter.extract_media,
|
|
extract_images=BasePlatformAdapter.extract_images,
|
|
extract_local_files=BasePlatformAdapter.extract_local_files,
|
|
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
|
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
|
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
|
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
|
)
|
|
|
|
await GatewayRunner._deliver_media_from_response(
|
|
object(),
|
|
"MEDIA:/tmp/speech.flac",
|
|
event,
|
|
adapter,
|
|
)
|
|
|
|
adapter.send_document.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
file_path="/tmp/speech.flac",
|
|
metadata={"thread_id": "topic-1"},
|
|
)
|
|
adapter.send_voice.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_document_sender():
|
|
event = _event(thread_id="topic-1")
|
|
adapter = SimpleNamespace(
|
|
name="test",
|
|
extract_media=BasePlatformAdapter.extract_media,
|
|
extract_images=BasePlatformAdapter.extract_images,
|
|
extract_local_files=BasePlatformAdapter.extract_local_files,
|
|
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
|
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
|
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
|
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
|
)
|
|
|
|
await GatewayRunner._deliver_media_from_response(
|
|
object(),
|
|
"MEDIA:/tmp/speech.ogg",
|
|
event,
|
|
adapter,
|
|
)
|
|
|
|
adapter.send_document.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
file_path="/tmp/speech.ogg",
|
|
metadata={"thread_id": "topic-1"},
|
|
)
|
|
adapter.send_voice.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender():
|
|
"""MP3 audio on Telegram must go through send_voice (which routes to
|
|
sendAudio internally); Telegram accepts MP3 for the audio player."""
|
|
event = _event(thread_id="topic-1")
|
|
adapter = SimpleNamespace(
|
|
name="test",
|
|
extract_media=BasePlatformAdapter.extract_media,
|
|
extract_images=BasePlatformAdapter.extract_images,
|
|
extract_local_files=BasePlatformAdapter.extract_local_files,
|
|
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
|
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
|
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
|
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
|
)
|
|
|
|
await GatewayRunner._deliver_media_from_response(
|
|
object(),
|
|
"MEDIA:/tmp/speech.mp3",
|
|
event,
|
|
adapter,
|
|
)
|
|
|
|
adapter.send_voice.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
audio_path="/tmp/speech.mp3",
|
|
metadata={"thread_id": "topic-1"},
|
|
)
|
|
adapter.send_document.assert_not_awaited()
|