fix(gateway): avoid duplicate Telegram text after auto-TTS voice replies

2026-06-05 07:41:39 +00:00 · 2026-05-16 07:36:08 +03:00 · 2026-05-16 07:36:08 +03:00 · f8eeb570cb
commit f8eeb570cb
parent b46ef2ef7a
2 changed files with 121 additions and 3 deletions
--- a/tests/gateway/test_base_topic_sessions.py
+++ b/tests/gateway/test_base_topic_sessions.py
@ -1,12 +1,14 @@
 """Tests for BasePlatformAdapter topic-aware session handling."""

 import asyncio
+import json
 from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch

 import pytest

 from gateway.config import Platform, PlatformConfig
-from gateway.platforms.base import BasePlatformAdapter, MessageEvent, ProcessingOutcome, SendResult
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, ProcessingOutcome, SendResult
 from gateway.session import SessionSource, build_session_key


@ -246,3 +248,107 @@ class TestBasePlatformTopicSessions:
            ("start", "1"),
            ("complete", "1", ProcessingOutcome.CANCELLED),
        ]
+
+
+class TestTelegramAutoTtsCaptionDelivery:
+    @staticmethod
+    def _make_voice_event(chat_id: str = "-1001", thread_id: str = "17585") -> MessageEvent:
+        return MessageEvent(
+            text="hello",
+            message_type=MessageType.VOICE,
+            source=SessionSource(
+                platform=Platform.TELEGRAM,
+                chat_id=chat_id,
+                chat_type="group",
+                thread_id=thread_id,
+            ),
+            message_id="voice-1",
+        )
+
+    @staticmethod
+    def _hold_typing():
+        async def hold(_chat_id, interval=2.0, metadata=None):
+            await asyncio.Event().wait()
+
+        return hold
+
+    @pytest.mark.asyncio
+    async def test_short_telegram_auto_tts_uses_caption_without_followup_text(self, tmp_path):
+        adapter = DummyTelegramAdapter()
+        adapter._keep_typing = self._hold_typing()
+        adapter._should_auto_tts_for_chat = lambda _chat_id: True
+        adapter.play_tts = AsyncMock(return_value=SendResult(success=True, message_id="tts-1"))
+        adapter.set_message_handler(lambda _event: asyncio.sleep(0, result="Short reply"))
+
+        tts_path = tmp_path / "reply.ogg"
+        tts_path.write_text("audio", encoding="utf-8")
+        event = self._make_voice_event()
+
+        with patch("tools.tts_tool.check_tts_requirements", return_value=True), patch(
+            "tools.tts_tool.text_to_speech_tool",
+            return_value=json.dumps({"file_path": str(tts_path)}),
+        ):
+            await adapter._process_message_background(event, build_session_key(event.source))
+
+        adapter.play_tts.assert_awaited_once()
+        assert adapter.play_tts.await_args.kwargs["caption"] == "Short reply"
+        assert adapter.sent == []
+
+    @pytest.mark.asyncio
+    async def test_long_telegram_auto_tts_keeps_followup_text_when_caption_would_truncate(self, tmp_path):
+        adapter = DummyTelegramAdapter()
+        adapter._keep_typing = self._hold_typing()
+        adapter._should_auto_tts_for_chat = lambda _chat_id: True
+        adapter.play_tts = AsyncMock(return_value=SendResult(success=True, message_id="tts-1"))
+        long_reply = "x" * 1025
+        adapter.set_message_handler(lambda _event: asyncio.sleep(0, result=long_reply))
+
+        tts_path = tmp_path / "reply.ogg"
+        tts_path.write_text("audio", encoding="utf-8")
+        event = self._make_voice_event()
+
+        with patch("tools.tts_tool.check_tts_requirements", return_value=True), patch(
+            "tools.tts_tool.text_to_speech_tool",
+            return_value=json.dumps({"file_path": str(tts_path)}),
+        ):
+            await adapter._process_message_background(event, build_session_key(event.source))
+
+        adapter.play_tts.assert_awaited_once()
+        assert adapter.play_tts.await_args.kwargs["caption"] is None
+        assert adapter.sent == [
+            {
+                "chat_id": "-1001",
+                "content": long_reply,
+                "reply_to": None,
+                "metadata": {"thread_id": "17585", "notify": True},
+            }
+        ]
+
+    @pytest.mark.asyncio
+    async def test_telegram_auto_tts_send_failure_keeps_followup_text(self, tmp_path):
+        adapter = DummyTelegramAdapter()
+        adapter._keep_typing = self._hold_typing()
+        adapter._should_auto_tts_for_chat = lambda _chat_id: True
+        adapter.play_tts = AsyncMock(return_value=SendResult(success=False, error="boom"))
+        adapter.set_message_handler(lambda _event: asyncio.sleep(0, result="Short reply"))
+
+        tts_path = tmp_path / "reply.ogg"
+        tts_path.write_text("audio", encoding="utf-8")
+        event = self._make_voice_event()
+
+        with patch("tools.tts_tool.check_tts_requirements", return_value=True), patch(
+            "tools.tts_tool.text_to_speech_tool",
+            return_value=json.dumps({"file_path": str(tts_path)}),
+        ):
+            await adapter._process_message_background(event, build_session_key(event.source))
+
+        adapter.play_tts.assert_awaited_once()
+        assert adapter.play_tts.await_args.kwargs["caption"] == "Short reply"
+        assert adapter.sent == [
+            {
+                "chat_id": "-1001",
+                "content": "Short reply",
+                "reply_to": None,
+                "metadata": {"thread_id": "17585", "notify": True},
+            }
+        ]