mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-02 07:11:49 +00:00
Remove unused imports (F401) and duplicate/shadowed import redefinitions (F811) across the codebase using ruff's safe autofixes. No behavioral changes -- imports only. - ~1400 safe autofixes applied across 644 files (net -1072 lines) - __init__.py re-exports preserved (excluded from F401 removal so public re-export surfaces stay intact) - Re-exports that are imported or monkeypatched by tests but look unused in their defining module are kept with explicit # noqa: F401 (gateway/run.py load_dotenv; run_agent re-exports from agent.message_sanitization, agent.context_compressor, agent.retry_utils, agent.prompt_builder, agent.process_bootstrap, agent.codex_responses_adapter) - Unsafe F841 (unused-variable) fixes deliberately skipped -- those can change behavior when the RHS has side effects - ruff lints remain disabled in pyproject.toml (only PLW1514 is selected); this is a one-time cleanup, not a config change Verification: - python -m compileall: clean - pytest --collect-only: all 27161 tests collect (zero import errors) - core entry points import clean (run_agent, model_tools, cli, toolsets, hermes_state, batch_runner, gateway) - static scan: every name any test imports directly from an edited module still resolves
184 lines
6.8 KiB
Python
184 lines
6.8 KiB
Python
"""
|
|
Tests for #24870 — Telegram: audio file attachments must NOT be routed to STT.
|
|
|
|
Telegram distinguishes three kinds of audio payloads:
|
|
- message.voice → Opus/OGG voice message → STT pipeline
|
|
- message.audio → audio file attachment → file path note, NOT STT
|
|
- message.document (audio mime) → generic file route
|
|
|
|
These tests confirm that:
|
|
1. MessageType.VOICE events still flow through the STT pipeline.
|
|
2. MessageType.AUDIO events bypass STT and get a file-path context note instead.
|
|
3. Mixed media lists (voice + audio) split correctly.
|
|
"""
|
|
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from gateway.config import GatewayConfig, Platform
|
|
from gateway.platforms.base import MessageEvent, MessageType
|
|
from gateway.session import SessionSource
|
|
|
|
|
|
def _make_runner(stt_enabled: bool = True) -> "GatewayRunner": # type: ignore[name-defined]
|
|
from gateway.run import GatewayRunner
|
|
|
|
runner = GatewayRunner.__new__(GatewayRunner)
|
|
runner.config = GatewayConfig(stt_enabled=stt_enabled)
|
|
runner.adapters = {}
|
|
runner._model = "test-model"
|
|
runner._base_url = ""
|
|
runner._has_setup_skill = lambda: False
|
|
return runner
|
|
|
|
|
|
def _voice_event(path: str = "/tmp/voice.ogg") -> MessageEvent:
|
|
return MessageEvent(
|
|
text="",
|
|
message_type=MessageType.VOICE,
|
|
source=SessionSource(platform=Platform.TELEGRAM, chat_id="1", chat_type="dm"),
|
|
media_urls=[path],
|
|
media_types=["audio/ogg"],
|
|
)
|
|
|
|
|
|
def _audio_event(path: str = "/tmp/song.mp3") -> MessageEvent:
|
|
return MessageEvent(
|
|
text="",
|
|
message_type=MessageType.AUDIO,
|
|
source=SessionSource(platform=Platform.TELEGRAM, chat_id="1", chat_type="dm"),
|
|
media_urls=[path],
|
|
media_types=["audio/mpeg"],
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 1. VOICE still goes through STT
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_voice_message_still_transcribed():
|
|
"""MessageType.VOICE must still be sent through _enrich_message_with_transcription."""
|
|
runner = _make_runner(stt_enabled=True)
|
|
source = SessionSource(platform=Platform.TELEGRAM, chat_id="1", chat_type="dm")
|
|
event = _voice_event("/tmp/voice.ogg")
|
|
|
|
with patch(
|
|
"tools.transcription_tools.transcribe_audio",
|
|
return_value={"success": True, "transcript": "hello world", "provider": "whisper"},
|
|
) as mock_transcribe:
|
|
result = await runner._prepare_inbound_message_text(
|
|
event=event,
|
|
source=source,
|
|
history=[],
|
|
)
|
|
|
|
mock_transcribe.assert_called_once_with("/tmp/voice.ogg")
|
|
assert "hello world" in result
|
|
assert "voice message" in result.lower()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 2. AUDIO file attachment bypasses STT
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_audio_attachment_skips_stt():
|
|
"""MessageType.AUDIO must NOT be routed to STT — transcribe_audio must not be called."""
|
|
runner = _make_runner(stt_enabled=True)
|
|
source = SessionSource(platform=Platform.TELEGRAM, chat_id="1", chat_type="dm")
|
|
event = _audio_event("/tmp/song.mp3")
|
|
|
|
with patch(
|
|
"tools.transcription_tools.transcribe_audio",
|
|
side_effect=AssertionError("transcribe_audio must NOT be called for audio file attachments"),
|
|
):
|
|
with patch(
|
|
"tools.credential_files.to_agent_visible_cache_path",
|
|
side_effect=lambda p: p,
|
|
):
|
|
result = await runner._prepare_inbound_message_text(
|
|
event=event,
|
|
source=source,
|
|
history=[],
|
|
)
|
|
|
|
assert result is not None
|
|
assert "/tmp/song.mp3" in result
|
|
assert "audio file attachment" in result.lower()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_audio_attachment_context_note_format():
|
|
"""Context note for audio file attachments should include the file path and guidance."""
|
|
runner = _make_runner(stt_enabled=True)
|
|
source = SessionSource(platform=Platform.TELEGRAM, chat_id="1", chat_type="dm")
|
|
event = _audio_event("/tmp/cache_12345_my_song.mp3")
|
|
|
|
with patch(
|
|
"tools.transcription_tools.transcribe_audio",
|
|
side_effect=AssertionError("must not be called"),
|
|
):
|
|
with patch(
|
|
"tools.credential_files.to_agent_visible_cache_path",
|
|
side_effect=lambda p: p,
|
|
):
|
|
result = await runner._prepare_inbound_message_text(
|
|
event=event,
|
|
source=source,
|
|
history=[],
|
|
)
|
|
|
|
assert "my_song.mp3" in result
|
|
assert "audio file attachment" in result.lower()
|
|
# Should NOT contain the voice-message transcription wrapper text
|
|
assert "voice message" not in result.lower()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 3. STT disabled still results in no transcription for audio file attachments
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_audio_attachment_skips_stt_when_stt_disabled():
|
|
"""Even with STT disabled, AUDIO must NOT produce STT disabled notice — just a file note."""
|
|
runner = _make_runner(stt_enabled=False)
|
|
source = SessionSource(platform=Platform.TELEGRAM, chat_id="1", chat_type="dm")
|
|
event = _audio_event("/tmp/podcast.m4a")
|
|
|
|
with patch(
|
|
"tools.transcription_tools.transcribe_audio",
|
|
side_effect=AssertionError("must not be called"),
|
|
):
|
|
with patch(
|
|
"tools.credential_files.to_agent_visible_cache_path",
|
|
side_effect=lambda p: p,
|
|
):
|
|
result = await runner._prepare_inbound_message_text(
|
|
event=event,
|
|
source=source,
|
|
history=[],
|
|
)
|
|
|
|
# Should NOT see the "transcription is disabled" note — that's only for VOICE
|
|
assert "transcription is disabled" not in result.lower()
|
|
assert "audio file attachment" in result.lower()
|
|
assert "/tmp/podcast.m4a" in result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 4. Telegram gateway: msg.audio → MessageType.AUDIO (not VOICE)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_telegram_media_type_detection_audio_vs_voice():
|
|
"""The Telegram platform must set MessageType.AUDIO for msg.audio, VOICE for msg.voice."""
|
|
from gateway.platforms.base import MessageType
|
|
|
|
# The Telegram adapter's _build_media_type already returns correct values
|
|
# via MessageType.AUDIO for .audio and MessageType.VOICE for .voice.
|
|
# Check the constants match expected semantic roles.
|
|
assert MessageType.AUDIO.value == "audio"
|
|
assert MessageType.VOICE.value == "voice"
|
|
# Sanity: they are distinct
|
|
assert MessageType.AUDIO != MessageType.VOICE
|