fix(gateway): use OGG for Telegram auto TTS

This commit is contained in:
Gilad Bauman 2026-05-05 11:28:27 +00:00 committed by Teknium
parent cb83149dc6
commit ae82eed2b1
2 changed files with 104 additions and 3 deletions

View file

@ -12422,11 +12422,12 @@ class GatewayRunner:
if not tts_text:
return
# Use .mp3 extension so edge-tts conversion to opus works correctly.
# The TTS tool may convert to .ogg — use file_path from result.
# Telegram's adapter only sends native voice bubbles for OGG/Opus.
# Other platforms keep the existing MP3 default.
audio_ext = "ogg" if event.source.platform == Platform.TELEGRAM else "mp3"
audio_path = os.path.join(
tempfile.gettempdir(), "hermes_voice",
f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
f"tts_reply_{_uuid.uuid4().hex[:12]}.{audio_ext}",
)
os.makedirs(os.path.dirname(audio_path), exist_ok=True)

View file

@ -0,0 +1,100 @@
"""Tests for gateway auto-TTS voice reply audio format selection."""
import json
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from gateway.config import Platform
from gateway.platforms.base import MessageEvent
from gateway.run import GatewayRunner
from gateway.session import SessionSource
class TestAutoVoiceReplyFormat:
@pytest.mark.asyncio
async def test_telegram_auto_voice_reply_requests_ogg_for_native_voice_bubble(self):
"""Telegram auto-TTS should request OGG/Opus so send_voice sends a voice bubble."""
runner = _make_runner()
adapter = _make_adapter(Platform.TELEGRAM)
runner.adapters[Platform.TELEGRAM] = adapter
event = _make_event(Platform.TELEGRAM)
requested_paths = []
def fake_tts(*, text, output_path):
requested_paths.append(output_path)
assert output_path.endswith(".ogg")
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
Path(output_path).write_bytes(b"fake ogg opus")
return json.dumps({
"success": True,
"file_path": output_path,
"provider": "gemini",
"voice_compatible": True,
})
with patch("tools.tts_tool.text_to_speech_tool", side_effect=fake_tts):
await runner._send_voice_reply(event, "hello from auto tts")
assert requested_paths
assert requested_paths[0].endswith(".ogg")
adapter.send_voice.assert_awaited_once()
assert adapter.send_voice.await_args.kwargs["audio_path"].endswith(".ogg")
@pytest.mark.asyncio
async def test_non_telegram_auto_voice_reply_keeps_mp3_default(self):
"""Non-Telegram platforms should keep the current MP3 default."""
runner = _make_runner()
adapter = _make_adapter(Platform.SLACK)
runner.adapters[Platform.SLACK] = adapter
event = _make_event(Platform.SLACK)
requested_paths = []
def fake_tts(*, text, output_path):
requested_paths.append(output_path)
assert output_path.endswith(".mp3")
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
Path(output_path).write_bytes(b"fake mp3")
return json.dumps({
"success": True,
"file_path": output_path,
"provider": "gemini",
"voice_compatible": False,
})
with patch("tools.tts_tool.text_to_speech_tool", side_effect=fake_tts):
await runner._send_voice_reply(event, "hello from auto tts")
assert requested_paths
assert requested_paths[0].endswith(".mp3")
adapter.send_voice.assert_awaited_once()
assert adapter.send_voice.await_args.kwargs["audio_path"].endswith(".mp3")
def _make_runner() -> GatewayRunner:
with patch("gateway.run.GatewayRunner._load_voice_modes", return_value={}):
runner = GatewayRunner.__new__(GatewayRunner)
runner._voice_mode = {}
runner.adapters = {}
return runner
def _make_adapter(platform: Platform) -> MagicMock:
adapter = MagicMock()
adapter.platform = platform
adapter.send_voice = AsyncMock()
return adapter
def _make_event(platform: Platform) -> MessageEvent:
return MessageEvent(
text="trigger",
source=SessionSource(
platform=platform,
chat_id="123",
user_id="u1",
user_name="User",
),
message_id="456",
)