mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-11 08:42:11 +00:00
fix(gateway): use OGG for Telegram auto TTS
This commit is contained in:
parent
cb83149dc6
commit
ae82eed2b1
2 changed files with 104 additions and 3 deletions
|
|
@ -12422,11 +12422,12 @@ class GatewayRunner:
|
||||||
if not tts_text:
|
if not tts_text:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Use .mp3 extension so edge-tts conversion to opus works correctly.
|
# Telegram's adapter only sends native voice bubbles for OGG/Opus.
|
||||||
# The TTS tool may convert to .ogg — use file_path from result.
|
# Other platforms keep the existing MP3 default.
|
||||||
|
audio_ext = "ogg" if event.source.platform == Platform.TELEGRAM else "mp3"
|
||||||
audio_path = os.path.join(
|
audio_path = os.path.join(
|
||||||
tempfile.gettempdir(), "hermes_voice",
|
tempfile.gettempdir(), "hermes_voice",
|
||||||
f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
|
f"tts_reply_{_uuid.uuid4().hex[:12]}.{audio_ext}",
|
||||||
)
|
)
|
||||||
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
|
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
|
||||||
|
|
||||||
|
|
|
||||||
100
tests/gateway/test_auto_voice_reply_format.py
Normal file
100
tests/gateway/test_auto_voice_reply_format.py
Normal file
|
|
@ -0,0 +1,100 @@
|
||||||
|
"""Tests for gateway auto-TTS voice reply audio format selection."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from gateway.config import Platform
|
||||||
|
from gateway.platforms.base import MessageEvent
|
||||||
|
from gateway.run import GatewayRunner
|
||||||
|
from gateway.session import SessionSource
|
||||||
|
|
||||||
|
|
||||||
|
class TestAutoVoiceReplyFormat:
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_telegram_auto_voice_reply_requests_ogg_for_native_voice_bubble(self):
|
||||||
|
"""Telegram auto-TTS should request OGG/Opus so send_voice sends a voice bubble."""
|
||||||
|
runner = _make_runner()
|
||||||
|
adapter = _make_adapter(Platform.TELEGRAM)
|
||||||
|
runner.adapters[Platform.TELEGRAM] = adapter
|
||||||
|
event = _make_event(Platform.TELEGRAM)
|
||||||
|
requested_paths = []
|
||||||
|
|
||||||
|
def fake_tts(*, text, output_path):
|
||||||
|
requested_paths.append(output_path)
|
||||||
|
assert output_path.endswith(".ogg")
|
||||||
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
Path(output_path).write_bytes(b"fake ogg opus")
|
||||||
|
return json.dumps({
|
||||||
|
"success": True,
|
||||||
|
"file_path": output_path,
|
||||||
|
"provider": "gemini",
|
||||||
|
"voice_compatible": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
with patch("tools.tts_tool.text_to_speech_tool", side_effect=fake_tts):
|
||||||
|
await runner._send_voice_reply(event, "hello from auto tts")
|
||||||
|
|
||||||
|
assert requested_paths
|
||||||
|
assert requested_paths[0].endswith(".ogg")
|
||||||
|
adapter.send_voice.assert_awaited_once()
|
||||||
|
assert adapter.send_voice.await_args.kwargs["audio_path"].endswith(".ogg")
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_non_telegram_auto_voice_reply_keeps_mp3_default(self):
|
||||||
|
"""Non-Telegram platforms should keep the current MP3 default."""
|
||||||
|
runner = _make_runner()
|
||||||
|
adapter = _make_adapter(Platform.SLACK)
|
||||||
|
runner.adapters[Platform.SLACK] = adapter
|
||||||
|
event = _make_event(Platform.SLACK)
|
||||||
|
requested_paths = []
|
||||||
|
|
||||||
|
def fake_tts(*, text, output_path):
|
||||||
|
requested_paths.append(output_path)
|
||||||
|
assert output_path.endswith(".mp3")
|
||||||
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
Path(output_path).write_bytes(b"fake mp3")
|
||||||
|
return json.dumps({
|
||||||
|
"success": True,
|
||||||
|
"file_path": output_path,
|
||||||
|
"provider": "gemini",
|
||||||
|
"voice_compatible": False,
|
||||||
|
})
|
||||||
|
|
||||||
|
with patch("tools.tts_tool.text_to_speech_tool", side_effect=fake_tts):
|
||||||
|
await runner._send_voice_reply(event, "hello from auto tts")
|
||||||
|
|
||||||
|
assert requested_paths
|
||||||
|
assert requested_paths[0].endswith(".mp3")
|
||||||
|
adapter.send_voice.assert_awaited_once()
|
||||||
|
assert adapter.send_voice.await_args.kwargs["audio_path"].endswith(".mp3")
|
||||||
|
|
||||||
|
|
||||||
|
def _make_runner() -> GatewayRunner:
|
||||||
|
with patch("gateway.run.GatewayRunner._load_voice_modes", return_value={}):
|
||||||
|
runner = GatewayRunner.__new__(GatewayRunner)
|
||||||
|
runner._voice_mode = {}
|
||||||
|
runner.adapters = {}
|
||||||
|
return runner
|
||||||
|
|
||||||
|
|
||||||
|
def _make_adapter(platform: Platform) -> MagicMock:
|
||||||
|
adapter = MagicMock()
|
||||||
|
adapter.platform = platform
|
||||||
|
adapter.send_voice = AsyncMock()
|
||||||
|
return adapter
|
||||||
|
|
||||||
|
|
||||||
|
def _make_event(platform: Platform) -> MessageEvent:
|
||||||
|
return MessageEvent(
|
||||||
|
text="trigger",
|
||||||
|
source=SessionSource(
|
||||||
|
platform=platform,
|
||||||
|
chat_id="123",
|
||||||
|
user_id="u1",
|
||||||
|
user_name="User",
|
||||||
|
),
|
||||||
|
message_id="456",
|
||||||
|
)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue