mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-02 07:11:49 +00:00
fix(tts): keep native audio outside Telegram voice delivery
This commit is contained in:
parent
e19f4c1730
commit
ed9087fce7
2 changed files with 80 additions and 4 deletions
70
tests/tools/test_tts_opus_routing.py
Normal file
70
tests/tools/test_tts_opus_routing.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.session_context import _UNSET, _VAR_MAP
|
||||
from tools import tts_tool
|
||||
|
||||
|
||||
def _reset_session_context() -> None:
|
||||
for var in _VAR_MAP.values():
|
||||
var.set(_UNSET)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_session_platform(monkeypatch):
|
||||
_reset_session_context()
|
||||
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
|
||||
yield
|
||||
_reset_session_context()
|
||||
|
||||
|
||||
async def _write_edge_output(_text: str, output_path: str, _tts_config: dict) -> str:
|
||||
Path(output_path).write_bytes(b"mp3")
|
||||
return output_path
|
||||
|
||||
|
||||
def test_edge_cli_preserves_native_mp3(tmp_path, monkeypatch):
|
||||
out = tmp_path / "speech.mp3"
|
||||
convert = Mock()
|
||||
|
||||
monkeypatch.setattr(tts_tool, "_load_tts_config", lambda: {"provider": "edge"})
|
||||
monkeypatch.setattr(tts_tool, "_import_edge_tts", lambda: object())
|
||||
monkeypatch.setattr(tts_tool, "_generate_edge_tts", _write_edge_output)
|
||||
monkeypatch.setattr(tts_tool, "_convert_to_opus", convert)
|
||||
|
||||
result = json.loads(tts_tool.text_to_speech_tool("hello", output_path=str(out)))
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["file_path"] == str(out)
|
||||
assert result["voice_compatible"] is False
|
||||
assert result["media_tag"] == f"MEDIA:{out}"
|
||||
convert.assert_not_called()
|
||||
|
||||
|
||||
def test_edge_telegram_converts_to_opus_voice(tmp_path, monkeypatch):
|
||||
out = tmp_path / "speech.mp3"
|
||||
opus = tmp_path / "speech.ogg"
|
||||
|
||||
def fake_convert(path: str) -> str:
|
||||
assert path == str(out)
|
||||
opus.write_bytes(b"ogg")
|
||||
return str(opus)
|
||||
|
||||
convert = Mock(side_effect=fake_convert)
|
||||
|
||||
monkeypatch.setenv("HERMES_SESSION_PLATFORM", "telegram")
|
||||
monkeypatch.setattr(tts_tool, "_load_tts_config", lambda: {"provider": "edge"})
|
||||
monkeypatch.setattr(tts_tool, "_import_edge_tts", lambda: object())
|
||||
monkeypatch.setattr(tts_tool, "_generate_edge_tts", _write_edge_output)
|
||||
monkeypatch.setattr(tts_tool, "_convert_to_opus", convert)
|
||||
|
||||
result = json.loads(tts_tool.text_to_speech_tool("hello", output_path=str(out)))
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["file_path"] == str(opus)
|
||||
assert result["voice_compatible"] is True
|
||||
assert result["media_tag"] == f"[[audio_as_voice]]\nMEDIA:{opus}"
|
||||
convert.assert_called_once_with(str(out))
|
||||
|
|
@ -1830,8 +1830,10 @@ def text_to_speech_tool(
|
|||
"error": f"TTS generation produced no output (provider: {provider})"
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Try Opus conversion for Telegram compatibility
|
||||
# Edge TTS outputs MP3, NeuTTS/KittenTTS output WAV — all need ffmpeg conversion
|
||||
# Try Opus conversion for Telegram compatibility.
|
||||
# Edge TTS outputs MP3, NeuTTS/KittenTTS output WAV. Keep those native
|
||||
# formats for local/CLI playback and only convert when the current
|
||||
# platform actually needs Opus voice delivery.
|
||||
voice_compatible = False
|
||||
if command_provider_config is not None:
|
||||
# Command providers are documents by default. Voice-bubble
|
||||
|
|
@ -1843,13 +1845,17 @@ def text_to_speech_tool(
|
|||
if opus_path:
|
||||
file_str = opus_path
|
||||
voice_compatible = file_str.endswith(".ogg")
|
||||
elif provider in {"edge", "neutts", "minimax", "xai", "kittentts", "piper"} and not file_str.endswith(".ogg"):
|
||||
elif (
|
||||
want_opus
|
||||
and provider in {"edge", "neutts", "minimax", "xai", "kittentts", "piper"}
|
||||
and not file_str.endswith(".ogg")
|
||||
):
|
||||
opus_path = _convert_to_opus(file_str)
|
||||
if opus_path:
|
||||
file_str = opus_path
|
||||
voice_compatible = True
|
||||
elif provider in {"elevenlabs", "openai", "mistral", "gemini"}:
|
||||
voice_compatible = file_str.endswith(".ogg")
|
||||
voice_compatible = want_opus and file_str.endswith(".ogg")
|
||||
|
||||
file_size = os.path.getsize(file_str)
|
||||
logger.info("TTS audio saved: %s (%s bytes, provider: %s)", file_str, f"{file_size:,}", provider)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue