mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(discord): transcribe native voice notes
This commit is contained in:
parent
d35f8932e8
commit
448a3f9ea2
2 changed files with 111 additions and 1 deletions
|
|
@ -3602,6 +3602,24 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
return 32 * 1024 * 1024
|
||||
return max(0, value)
|
||||
|
||||
@staticmethod
|
||||
def _is_discord_voice_message_attachment(att: Any) -> bool:
|
||||
"""Return True when a Discord audio attachment is a native voice note."""
|
||||
marker = getattr(att, "is_voice_message", None)
|
||||
if marker is not None:
|
||||
if callable(marker):
|
||||
try:
|
||||
return bool(marker())
|
||||
except Exception as exc:
|
||||
logger.debug("[Discord] is_voice_message() failed for attachment: %s", exc)
|
||||
return False
|
||||
return bool(marker)
|
||||
|
||||
return (
|
||||
getattr(att, "duration", None) is not None
|
||||
and getattr(att, "waveform", None) is not None
|
||||
)
|
||||
|
||||
def _discord_free_response_channels(self) -> set:
|
||||
"""Return Discord channel IDs where no bot mention is required.
|
||||
|
||||
|
|
@ -4542,7 +4560,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
elif att.content_type.startswith("video/"):
|
||||
msg_type = MessageType.VIDEO
|
||||
elif att.content_type.startswith("audio/"):
|
||||
msg_type = MessageType.AUDIO
|
||||
if self._is_discord_voice_message_attachment(att):
|
||||
msg_type = MessageType.VOICE
|
||||
else:
|
||||
msg_type = MessageType.AUDIO
|
||||
else:
|
||||
doc_ext = ""
|
||||
if att.filename:
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ def _ensure_discord_mock():
|
|||
_ensure_discord_mock()
|
||||
|
||||
from gateway.platforms.discord import DiscordAdapter # noqa: E402
|
||||
from gateway.platforms.base import MessageType # noqa: E402
|
||||
|
||||
|
||||
# Minimal valid image / audio / PDF bytes so the cache_*_from_bytes
|
||||
|
|
@ -358,3 +359,91 @@ class TestHandleMessageUsesAuthenticatedRead:
|
|||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.media_urls == ["/tmp/img_from_read.png"]
|
||||
assert event.media_types == ["image/png"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_native_voice_note_is_classified_as_voice(self, monkeypatch):
|
||||
"""Discord native voice notes must enter the auto-STT voice path."""
|
||||
adapter = _make_adapter()
|
||||
adapter._client = SimpleNamespace(user=SimpleNamespace(id=999))
|
||||
adapter.handle_message = AsyncMock()
|
||||
|
||||
with patch(
|
||||
"gateway.platforms.discord.cache_audio_from_bytes",
|
||||
return_value="/tmp/voice_from_read.ogg",
|
||||
):
|
||||
att = SimpleNamespace(
|
||||
url="https://cdn.discordapp.com/attachments/fake/voice.ogg",
|
||||
filename="voice.ogg",
|
||||
content_type="audio/ogg",
|
||||
size=len(_OGG_BYTES),
|
||||
read=AsyncMock(return_value=_OGG_BYTES),
|
||||
is_voice_message=lambda: True,
|
||||
)
|
||||
from datetime import datetime, timezone
|
||||
|
||||
class _FakeDMChannel:
|
||||
id = 100
|
||||
name = "dm"
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.discord.discord.DMChannel",
|
||||
_FakeDMChannel,
|
||||
)
|
||||
chan = _FakeDMChannel()
|
||||
msg = SimpleNamespace(
|
||||
id=1, content="", attachments=[att], mentions=[],
|
||||
reference=None,
|
||||
created_at=datetime.now(timezone.utc),
|
||||
channel=chan,
|
||||
author=SimpleNamespace(id=42, display_name="U", name="U"),
|
||||
)
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.message_type == MessageType.VOICE
|
||||
assert event.media_urls == ["/tmp/voice_from_read.ogg"]
|
||||
assert event.media_types == ["audio/ogg"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_plain_audio_attachment_stays_audio(self, monkeypatch):
|
||||
"""Plain audio uploads should stay out of automatic voice-note STT."""
|
||||
adapter = _make_adapter()
|
||||
adapter._client = SimpleNamespace(user=SimpleNamespace(id=999))
|
||||
adapter.handle_message = AsyncMock()
|
||||
|
||||
with patch(
|
||||
"gateway.platforms.discord.cache_audio_from_bytes",
|
||||
return_value="/tmp/audio_from_read.ogg",
|
||||
):
|
||||
att = SimpleNamespace(
|
||||
url="https://cdn.discordapp.com/attachments/fake/audio.ogg",
|
||||
filename="audio.ogg",
|
||||
content_type="audio/ogg",
|
||||
size=len(_OGG_BYTES),
|
||||
read=AsyncMock(return_value=_OGG_BYTES),
|
||||
is_voice_message=lambda: False,
|
||||
)
|
||||
from datetime import datetime, timezone
|
||||
|
||||
class _FakeDMChannel:
|
||||
id = 100
|
||||
name = "dm"
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.discord.discord.DMChannel",
|
||||
_FakeDMChannel,
|
||||
)
|
||||
chan = _FakeDMChannel()
|
||||
msg = SimpleNamespace(
|
||||
id=1, content="", attachments=[att], mentions=[],
|
||||
reference=None,
|
||||
created_at=datetime.now(timezone.utc),
|
||||
channel=chan,
|
||||
author=SimpleNamespace(id=42, display_name="U", name="U"),
|
||||
)
|
||||
await adapter._handle_message(msg)
|
||||
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
assert event.message_type == MessageType.AUDIO
|
||||
assert event.media_urls == ["/tmp/audio_from_read.ogg"]
|
||||
assert event.media_types == ["audio/ogg"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue