From 1bd975c0ba87c644d560ca7bd62cc47274a8a919 Mon Sep 17 00:00:00 2001 From: clawbot Date: Sun, 3 May 2026 09:24:08 -0600 Subject: [PATCH] fix(gateway): suppress duplicate voice transcripts Deduplicate exact and near-exact Discord voice STT transcripts per guild/user over a short window to avoid duplicate delayed agent replies. Adds regression tests for exact and near-duplicate voice transcript suppression. --- gateway/run.py | 54 +++++++++++++++++++++++++++++ tests/gateway/test_voice_command.py | 40 +++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index d604947e99..1ba1984bac 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1161,6 +1161,10 @@ class GatewayRunner: # Per-chat voice reply mode: "off" | "voice_only" | "all" self._voice_mode: Dict[str, str] = self._load_voice_modes() + # Recent voice transcripts per (guild,user) for duplicate suppression. + # Protects against the same utterance being emitted twice by the voice + # capture / STT pipeline, which otherwise produces a second delayed reply. + self._recent_voice_transcripts: Dict[tuple[int, int], List[tuple[float, str]]] = {} # Track background tasks to prevent garbage collection mid-execution self._background_tasks: set = set() @@ -8261,6 +8265,47 @@ class GatewayRunner: adapter = self.adapters.get(Platform.DISCORD) self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True) + def _is_duplicate_voice_transcript(self, guild_id: int, user_id: int, transcript: str) -> bool: + """Suppress repeated STT outputs for the same recent utterance. + + Voice capture can occasionally emit the same utterance twice a few + seconds apart, which creates a second queued agent run and overlapping + spoken replies. Dedup exact and near-exact repeats per guild/user over a + short window while allowing genuinely new turns through. + """ + from difflib import SequenceMatcher + + normalized = re.sub(r"\s+", " ", transcript).strip().lower() + normalized = re.sub(r"[^\w\s]", "", normalized) + if not normalized: + return False + + now = time.monotonic() + window_seconds = 12.0 + key = (guild_id, user_id) + recent_store = getattr(self, "_recent_voice_transcripts", None) + if not isinstance(recent_store, dict): + recent_store = {} + self._recent_voice_transcripts = recent_store + recent = [ + (ts, txt) + for ts, txt in recent_store.get(key, []) + if now - ts <= window_seconds + ] + + for _, prior in recent: + if prior == normalized: + recent_store[key] = recent + return True + if len(prior) >= 16 and len(normalized) >= 16: + if SequenceMatcher(None, prior, normalized).ratio() >= 0.95: + recent_store[key] = recent + return True + + recent.append((now, normalized)) + recent_store[key] = recent[-5:] + return False + async def _handle_voice_channel_input( self, guild_id: int, user_id: int, transcript: str ): @@ -8298,6 +8343,15 @@ class GatewayRunner: logger.debug("Unauthorized voice input from user %d, ignoring", user_id) return + if self._is_duplicate_voice_transcript(guild_id, user_id, transcript): + logger.info( + "Suppressing duplicate voice transcript for guild=%s user=%s: %s", + guild_id, + user_id, + transcript[:100], + ) + return + # Show transcript in text channel (after auth, with mention sanitization) try: channel = adapter._client.get_channel(text_ch_id) diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py index 2e9c54608a..947d4904aa 100644 --- a/tests/gateway/test_voice_command.py +++ b/tests/gateway/test_voice_command.py @@ -954,6 +954,46 @@ class TestVoiceChannelCommands: assert "Test transcript" in msg assert "42" in msg # user_id in mention + @pytest.mark.asyncio + async def test_input_suppresses_duplicate_transcript(self, runner): + """Near-immediate duplicate STT output should not dispatch twice.""" + from gateway.config import Platform + + mock_adapter = AsyncMock() + mock_adapter._voice_text_channels = {111: 123} + mock_adapter._voice_sources = {} + mock_channel = AsyncMock() + mock_adapter._client = MagicMock() + mock_adapter._client.get_channel = MagicMock(return_value=mock_channel) + mock_adapter.handle_message = AsyncMock() + runner.adapters[Platform.DISCORD] = mock_adapter + + await runner._handle_voice_channel_input(111, 42, "Hello from VC") + await runner._handle_voice_channel_input(111, 42, "Hello from VC") + + mock_adapter.handle_message.assert_called_once() + mock_channel.send.assert_called_once() + + @pytest.mark.asyncio + async def test_input_suppresses_near_duplicate_transcript(self, runner): + """Small STT wording drift should still be treated as the same utterance.""" + from gateway.config import Platform + + mock_adapter = AsyncMock() + mock_adapter._voice_text_channels = {111: 123} + mock_adapter._voice_sources = {} + mock_channel = AsyncMock() + mock_adapter._client = MagicMock() + mock_adapter._client.get_channel = MagicMock(return_value=mock_channel) + mock_adapter.handle_message = AsyncMock() + runner.adapters[Platform.DISCORD] = mock_adapter + + await runner._handle_voice_channel_input(111, 42, "This is a test of the voice system") + await runner._handle_voice_channel_input(111, 42, "This is a test for the voice system") + + mock_adapter.handle_message.assert_called_once() + mock_channel.send.assert_called_once() + # -- _get_guild_id -- def test_get_guild_id_from_guild(self, runner):