diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 558903ba297..32a0026973a 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -3602,6 +3602,24 @@ class DiscordAdapter(BasePlatformAdapter): return 32 * 1024 * 1024 return max(0, value) + @staticmethod + def _is_discord_voice_message_attachment(att: Any) -> bool: + """Return True when a Discord audio attachment is a native voice note.""" + marker = getattr(att, "is_voice_message", None) + if marker is not None: + if callable(marker): + try: + return bool(marker()) + except Exception as exc: + logger.debug("[Discord] is_voice_message() failed for attachment: %s", exc) + return False + return bool(marker) + + return ( + getattr(att, "duration", None) is not None + and getattr(att, "waveform", None) is not None + ) + def _discord_free_response_channels(self) -> set: """Return Discord channel IDs where no bot mention is required. @@ -4542,7 +4560,10 @@ class DiscordAdapter(BasePlatformAdapter): elif att.content_type.startswith("video/"): msg_type = MessageType.VIDEO elif att.content_type.startswith("audio/"): - msg_type = MessageType.AUDIO + if self._is_discord_voice_message_attachment(att): + msg_type = MessageType.VOICE + else: + msg_type = MessageType.AUDIO else: doc_ext = "" if att.filename: diff --git a/tests/gateway/test_discord_attachment_download.py b/tests/gateway/test_discord_attachment_download.py index b70ee780885..06384aead82 100644 --- a/tests/gateway/test_discord_attachment_download.py +++ b/tests/gateway/test_discord_attachment_download.py @@ -59,6 +59,7 @@ def _ensure_discord_mock(): _ensure_discord_mock() from gateway.platforms.discord import DiscordAdapter # noqa: E402 +from gateway.platforms.base import MessageType # noqa: E402 # Minimal valid image / audio / PDF bytes so the cache_*_from_bytes @@ -358,3 +359,91 @@ class TestHandleMessageUsesAuthenticatedRead: event = adapter.handle_message.call_args[0][0] assert event.media_urls == ["/tmp/img_from_read.png"] assert event.media_types == ["image/png"] + + @pytest.mark.asyncio + async def test_native_voice_note_is_classified_as_voice(self, monkeypatch): + """Discord native voice notes must enter the auto-STT voice path.""" + adapter = _make_adapter() + adapter._client = SimpleNamespace(user=SimpleNamespace(id=999)) + adapter.handle_message = AsyncMock() + + with patch( + "gateway.platforms.discord.cache_audio_from_bytes", + return_value="/tmp/voice_from_read.ogg", + ): + att = SimpleNamespace( + url="https://cdn.discordapp.com/attachments/fake/voice.ogg", + filename="voice.ogg", + content_type="audio/ogg", + size=len(_OGG_BYTES), + read=AsyncMock(return_value=_OGG_BYTES), + is_voice_message=lambda: True, + ) + from datetime import datetime, timezone + + class _FakeDMChannel: + id = 100 + name = "dm" + + monkeypatch.setattr( + "gateway.platforms.discord.discord.DMChannel", + _FakeDMChannel, + ) + chan = _FakeDMChannel() + msg = SimpleNamespace( + id=1, content="", attachments=[att], mentions=[], + reference=None, + created_at=datetime.now(timezone.utc), + channel=chan, + author=SimpleNamespace(id=42, display_name="U", name="U"), + ) + await adapter._handle_message(msg) + + event = adapter.handle_message.call_args[0][0] + assert event.message_type == MessageType.VOICE + assert event.media_urls == ["/tmp/voice_from_read.ogg"] + assert event.media_types == ["audio/ogg"] + + @pytest.mark.asyncio + async def test_plain_audio_attachment_stays_audio(self, monkeypatch): + """Plain audio uploads should stay out of automatic voice-note STT.""" + adapter = _make_adapter() + adapter._client = SimpleNamespace(user=SimpleNamespace(id=999)) + adapter.handle_message = AsyncMock() + + with patch( + "gateway.platforms.discord.cache_audio_from_bytes", + return_value="/tmp/audio_from_read.ogg", + ): + att = SimpleNamespace( + url="https://cdn.discordapp.com/attachments/fake/audio.ogg", + filename="audio.ogg", + content_type="audio/ogg", + size=len(_OGG_BYTES), + read=AsyncMock(return_value=_OGG_BYTES), + is_voice_message=lambda: False, + ) + from datetime import datetime, timezone + + class _FakeDMChannel: + id = 100 + name = "dm" + + monkeypatch.setattr( + "gateway.platforms.discord.discord.DMChannel", + _FakeDMChannel, + ) + chan = _FakeDMChannel() + msg = SimpleNamespace( + id=1, content="", attachments=[att], mentions=[], + reference=None, + created_at=datetime.now(timezone.utc), + channel=chan, + author=SimpleNamespace(id=42, display_name="U", name="U"), + ) + await adapter._handle_message(msg) + + event = adapter.handle_message.call_args[0][0] + assert event.message_type == MessageType.AUDIO + assert event.media_urls == ["/tmp/audio_from_read.ogg"] + assert event.media_types == ["audio/ogg"]