mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix(gateway): suppress duplicate voice transcripts
Deduplicate exact and near-exact Discord voice STT transcripts per guild/user over a short window to avoid duplicate delayed agent replies. Adds regression tests for exact and near-duplicate voice transcript suppression.
This commit is contained in:
parent
b58db237e4
commit
1bd975c0ba
2 changed files with 94 additions and 0 deletions
|
|
@ -1161,6 +1161,10 @@ class GatewayRunner:
|
||||||
|
|
||||||
# Per-chat voice reply mode: "off" | "voice_only" | "all"
|
# Per-chat voice reply mode: "off" | "voice_only" | "all"
|
||||||
self._voice_mode: Dict[str, str] = self._load_voice_modes()
|
self._voice_mode: Dict[str, str] = self._load_voice_modes()
|
||||||
|
# Recent voice transcripts per (guild,user) for duplicate suppression.
|
||||||
|
# Protects against the same utterance being emitted twice by the voice
|
||||||
|
# capture / STT pipeline, which otherwise produces a second delayed reply.
|
||||||
|
self._recent_voice_transcripts: Dict[tuple[int, int], List[tuple[float, str]]] = {}
|
||||||
|
|
||||||
# Track background tasks to prevent garbage collection mid-execution
|
# Track background tasks to prevent garbage collection mid-execution
|
||||||
self._background_tasks: set = set()
|
self._background_tasks: set = set()
|
||||||
|
|
@ -8261,6 +8265,47 @@ class GatewayRunner:
|
||||||
adapter = self.adapters.get(Platform.DISCORD)
|
adapter = self.adapters.get(Platform.DISCORD)
|
||||||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
|
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True)
|
||||||
|
|
||||||
|
def _is_duplicate_voice_transcript(self, guild_id: int, user_id: int, transcript: str) -> bool:
|
||||||
|
"""Suppress repeated STT outputs for the same recent utterance.
|
||||||
|
|
||||||
|
Voice capture can occasionally emit the same utterance twice a few
|
||||||
|
seconds apart, which creates a second queued agent run and overlapping
|
||||||
|
spoken replies. Dedup exact and near-exact repeats per guild/user over a
|
||||||
|
short window while allowing genuinely new turns through.
|
||||||
|
"""
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
|
||||||
|
normalized = re.sub(r"\s+", " ", transcript).strip().lower()
|
||||||
|
normalized = re.sub(r"[^\w\s]", "", normalized)
|
||||||
|
if not normalized:
|
||||||
|
return False
|
||||||
|
|
||||||
|
now = time.monotonic()
|
||||||
|
window_seconds = 12.0
|
||||||
|
key = (guild_id, user_id)
|
||||||
|
recent_store = getattr(self, "_recent_voice_transcripts", None)
|
||||||
|
if not isinstance(recent_store, dict):
|
||||||
|
recent_store = {}
|
||||||
|
self._recent_voice_transcripts = recent_store
|
||||||
|
recent = [
|
||||||
|
(ts, txt)
|
||||||
|
for ts, txt in recent_store.get(key, [])
|
||||||
|
if now - ts <= window_seconds
|
||||||
|
]
|
||||||
|
|
||||||
|
for _, prior in recent:
|
||||||
|
if prior == normalized:
|
||||||
|
recent_store[key] = recent
|
||||||
|
return True
|
||||||
|
if len(prior) >= 16 and len(normalized) >= 16:
|
||||||
|
if SequenceMatcher(None, prior, normalized).ratio() >= 0.95:
|
||||||
|
recent_store[key] = recent
|
||||||
|
return True
|
||||||
|
|
||||||
|
recent.append((now, normalized))
|
||||||
|
recent_store[key] = recent[-5:]
|
||||||
|
return False
|
||||||
|
|
||||||
async def _handle_voice_channel_input(
|
async def _handle_voice_channel_input(
|
||||||
self, guild_id: int, user_id: int, transcript: str
|
self, guild_id: int, user_id: int, transcript: str
|
||||||
):
|
):
|
||||||
|
|
@ -8298,6 +8343,15 @@ class GatewayRunner:
|
||||||
logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
|
logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if self._is_duplicate_voice_transcript(guild_id, user_id, transcript):
|
||||||
|
logger.info(
|
||||||
|
"Suppressing duplicate voice transcript for guild=%s user=%s: %s",
|
||||||
|
guild_id,
|
||||||
|
user_id,
|
||||||
|
transcript[:100],
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
# Show transcript in text channel (after auth, with mention sanitization)
|
# Show transcript in text channel (after auth, with mention sanitization)
|
||||||
try:
|
try:
|
||||||
channel = adapter._client.get_channel(text_ch_id)
|
channel = adapter._client.get_channel(text_ch_id)
|
||||||
|
|
|
||||||
|
|
@ -954,6 +954,46 @@ class TestVoiceChannelCommands:
|
||||||
assert "Test transcript" in msg
|
assert "Test transcript" in msg
|
||||||
assert "42" in msg # user_id in mention
|
assert "42" in msg # user_id in mention
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_input_suppresses_duplicate_transcript(self, runner):
|
||||||
|
"""Near-immediate duplicate STT output should not dispatch twice."""
|
||||||
|
from gateway.config import Platform
|
||||||
|
|
||||||
|
mock_adapter = AsyncMock()
|
||||||
|
mock_adapter._voice_text_channels = {111: 123}
|
||||||
|
mock_adapter._voice_sources = {}
|
||||||
|
mock_channel = AsyncMock()
|
||||||
|
mock_adapter._client = MagicMock()
|
||||||
|
mock_adapter._client.get_channel = MagicMock(return_value=mock_channel)
|
||||||
|
mock_adapter.handle_message = AsyncMock()
|
||||||
|
runner.adapters[Platform.DISCORD] = mock_adapter
|
||||||
|
|
||||||
|
await runner._handle_voice_channel_input(111, 42, "Hello from VC")
|
||||||
|
await runner._handle_voice_channel_input(111, 42, "Hello from VC")
|
||||||
|
|
||||||
|
mock_adapter.handle_message.assert_called_once()
|
||||||
|
mock_channel.send.assert_called_once()
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_input_suppresses_near_duplicate_transcript(self, runner):
|
||||||
|
"""Small STT wording drift should still be treated as the same utterance."""
|
||||||
|
from gateway.config import Platform
|
||||||
|
|
||||||
|
mock_adapter = AsyncMock()
|
||||||
|
mock_adapter._voice_text_channels = {111: 123}
|
||||||
|
mock_adapter._voice_sources = {}
|
||||||
|
mock_channel = AsyncMock()
|
||||||
|
mock_adapter._client = MagicMock()
|
||||||
|
mock_adapter._client.get_channel = MagicMock(return_value=mock_channel)
|
||||||
|
mock_adapter.handle_message = AsyncMock()
|
||||||
|
runner.adapters[Platform.DISCORD] = mock_adapter
|
||||||
|
|
||||||
|
await runner._handle_voice_channel_input(111, 42, "This is a test of the voice system")
|
||||||
|
await runner._handle_voice_channel_input(111, 42, "This is a test for the voice system")
|
||||||
|
|
||||||
|
mock_adapter.handle_message.assert_called_once()
|
||||||
|
mock_channel.send.assert_called_once()
|
||||||
|
|
||||||
# -- _get_guild_id --
|
# -- _get_guild_id --
|
||||||
|
|
||||||
def test_get_guild_id_from_guild(self, runner):
|
def test_get_guild_id_from_guild(self, runner):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue