hermes-agent/tests/gateway/test_tts_media_routing.py
Teknium a989a79c0c
fix(gateway): allow native delivery of freshly-produced agent files (#32060)
The gateway's media delivery allowlist required files live inside
`~/.hermes/cache/{documents,images,...}`, which is the wrong shape for
real agent usage. Agents naturally produce artifacts via terminal tools
(`pandoc -o /tmp/report.pdf`, `matplotlib savefig`, etc.) or
write_file into project directories — these never land under the cache.
Result: users got a raw file path in chat instead of an attachment.

This is doubly bad in deployment shapes where the cache directories
aren't writable by the agent at all: Hermes running in Docker with a
read-only mount, or with a Docker/Modal/SSH terminal backend whose
filesystem isn't the gateway host's filesystem.

Layered trust model:

1. Cache-dir allowlist (unchanged) — Hermes-managed roots always trusted.
2. Operator allowlist — `HERMES_MEDIA_ALLOW_DIRS` env var, now also
   surfaced as `gateway.media_delivery_allow_dirs` in config.yaml.
3. Recency-based trust (new, default on) — files whose mtime is within
   `gateway.trust_recent_files_seconds` (default 600s) of "now" are
   trusted even outside the cache/operator allowlist. Old host files
   (`/etc/passwd`, `~/.bashrc`, `~/.ssh/id_rsa`) have mtimes measured
   in days/months, well outside the window — prompt-injection paths
   pointing at pre-existing files are still rejected.
4. Hard denylist — `/etc`, `/proc`, `/sys`, `/dev`, `/root`, `/boot`,
   `/var/{log,lib,run}`, plus `$HOME/.{ssh,aws,gnupg,kube,docker,config,
   azure,gcloud}` and `Library/Keychains`. Denylist blocks delivery
   even when recency would trust the file, in case an attacker
   somehow refreshes a sensitive file's mtime.

Operators who want strict-allowlist behavior set
`gateway.trust_recent_files: false` and the system reverts to
pre-existing behavior.

Tests: 6 new cases in test_platform_base.py cover the recency window,
disabled mode, system-path denylist, and the motivating PDF-in-project
scenario. 3 existing tests (test_platform_base, test_tts_media_routing,
test_send_message_tool) that exercised the strict-allowlist path are
updated to disable recency trust explicitly.

E2E validation: real `validate_media_delivery_path()` accepts fresh
PDFs in /tmp and project dirs, rejects /etc/passwd, ~/.ssh/id_rsa, and
files older than the window; config.yaml `gateway.*` keys bridge
correctly to the env vars the validator reads.
2026-05-25 05:34:31 -07:00

260 lines
9.8 KiB
Python

"""
Tests for cross-platform audio/voice media routing.
These tests pin the expected delivery path for audio media files across
Telegram (where Bot-API sendAudio only accepts MP3/M4A and .ogg/.opus
only renders as a voice bubble when explicitly flagged) and via
``GatewayRunner._deliver_media_from_response``.
"""
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, SendResult
from gateway.run import GatewayRunner
from gateway.session import SessionSource, build_session_key
class _MediaRoutingAdapter(BasePlatformAdapter):
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="test"), Platform.TELEGRAM)
async def connect(self):
return True
async def disconnect(self):
pass
async def send(self, chat_id, content=None, **kwargs):
return SendResult(success=True, message_id="text")
async def get_chat_info(self, chat_id):
return {"id": chat_id, "type": "dm"}
def _event(thread_id=None):
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="chat-1",
chat_type="dm",
thread_id=thread_id,
)
return MessageEvent(
text="make speech",
message_type=MessageType.TEXT,
source=source,
message_id="msg-1",
)
def _allowed_media_path(tmp_path, monkeypatch, name):
root = tmp_path / "media-cache"
media_file = root / name
media_file.parent.mkdir(parents=True, exist_ok=True)
media_file.write_bytes(b"media")
monkeypatch.setattr(
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
(root,),
)
return media_file.resolve()
@pytest.mark.asyncio
async def test_base_adapter_routes_telegram_flac_media_tag_to_document_sender(tmp_path, monkeypatch):
adapter = _MediaRoutingAdapter()
event = _event()
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.flac")
adapter._message_handler = AsyncMock(return_value=f"MEDIA:{media_file}")
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
await adapter._process_message_background(event, build_session_key(event.source))
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path=str(media_file),
metadata=None,
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_base_adapter_routes_non_voice_telegram_ogg_media_tag_to_document_sender(tmp_path, monkeypatch):
adapter = _MediaRoutingAdapter()
event = _event()
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.ogg")
adapter._message_handler = AsyncMock(return_value=f"MEDIA:{media_file}")
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
await adapter._process_message_background(event, build_session_key(event.source))
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path=str(media_file),
metadata=None,
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_sender(tmp_path, monkeypatch):
adapter = _MediaRoutingAdapter()
event = _event()
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.ogg")
adapter._message_handler = AsyncMock(
return_value=f"[[audio_as_voice]]\nMEDIA:{media_file}"
)
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
await adapter._process_message_background(event, build_session_key(event.source))
adapter.send_voice.assert_awaited_once_with(
chat_id="chat-1",
audio_path=str(media_file),
metadata=None,
)
adapter.send_document.assert_not_awaited()
def _fake_runner(thread_meta):
"""Build a fake GatewayRunner-like object with the helper methods needed by
_deliver_media_from_response."""
runner = SimpleNamespace(
_thread_metadata_for_source=lambda source, anchor=None: thread_meta,
_reply_anchor_for_event=lambda event: None,
)
return runner
@pytest.mark.asyncio
async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender(tmp_path, monkeypatch):
event = _event(thread_id="topic-1")
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.flac")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
_fake_runner({"thread_id": "topic-1"}),
f"MEDIA:{media_file}",
event,
adapter,
)
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path=str(media_file),
metadata={"thread_id": "topic-1"},
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_document_sender(tmp_path, monkeypatch):
event = _event(thread_id="topic-1")
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.ogg")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
_fake_runner({"thread_id": "topic-1"}),
f"MEDIA:{media_file}",
event,
adapter,
)
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path=str(media_file),
metadata={"thread_id": "topic-1"},
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender(tmp_path, monkeypatch):
"""MP3 audio on Telegram must go through send_voice (which routes to
sendAudio internally); Telegram accepts MP3 for the audio player."""
event = _event(thread_id="topic-1")
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.mp3")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
_fake_runner({"thread_id": "topic-1"}),
f"MEDIA:{media_file}",
event,
adapter,
)
adapter.send_voice.assert_awaited_once_with(
chat_id="chat-1",
audio_path=str(media_file),
metadata={"thread_id": "topic-1"},
)
adapter.send_document.assert_not_awaited()
@pytest.mark.asyncio
async def test_streaming_delivery_blocks_media_path_outside_allowed_roots(tmp_path, monkeypatch):
event = _event(thread_id="topic-1")
allowed_root = tmp_path / "media-cache"
allowed_root.mkdir()
secret = tmp_path / "outside.pdf"
secret.write_bytes(b"%PDF secret")
monkeypatch.setattr(
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
(allowed_root,),
)
# This test exercises the strict-allowlist path; disable recency trust so
# the freshly-written tmp_path file is not auto-accepted by the trust
# window. (Recency trust is covered separately in test_platform_base.py.)
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
_fake_runner({"thread_id": "topic-1"}),
f"MEDIA:{secret}",
event,
adapter,
)
adapter.send_document.assert_not_awaited()
adapter.send_voice.assert_not_awaited()