diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 307ecf46f4d..d3960154688 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -827,6 +827,8 @@ DOCUMENT_CACHE_DIR = get_hermes_dir("cache/documents", "document_cache") SCREENSHOT_CACHE_DIR = get_hermes_dir("cache/screenshots", "browser_screenshots") _HERMES_HOME = get_hermes_home() MEDIA_DELIVERY_ALLOW_DIRS_ENV = "HERMES_MEDIA_ALLOW_DIRS" +MEDIA_DELIVERY_TRUST_RECENT_ENV = "HERMES_MEDIA_TRUST_RECENT_FILES" +MEDIA_DELIVERY_TRUST_RECENT_SECONDS_ENV = "HERMES_MEDIA_TRUST_RECENT_SECONDS" MEDIA_DELIVERY_SAFE_ROOTS = ( IMAGE_CACHE_DIR, AUDIO_CACHE_DIR, @@ -840,6 +842,48 @@ MEDIA_DELIVERY_SAFE_ROOTS = ( _HERMES_HOME / "browser_screenshots", ) +# Default recency window for trusting freshly-produced files (seconds). +# The agent's actual work generally completes well inside 10 minutes; legitimate +# build artifacts (PDFs from pandoc, plots from matplotlib, etc.) almost always +# land seconds before delivery. Old system files (/etc/passwd, ~/.ssh/id_rsa, +# stray credentials) have mtimes measured in days or months — well outside this +# window — so prompt-injection paths pointing at pre-existing host files are +# still rejected. +_MEDIA_DELIVERY_TRUST_RECENT_DEFAULT_SECONDS = 600 + +# Hard denylist applied even when a path would otherwise pass recency trust. +# These prefixes hold credentials, system state, or process introspection that +# should never be uploaded as a gateway attachment, regardless of how new the +# file looks. The cache-dir allowlist still beats this — an operator-configured +# allowed root can intentionally live under one of these prefixes (rare, but +# their choice). +_MEDIA_DELIVERY_DENIED_PREFIXES = ( + "/etc", + "/proc", + "/sys", + "/dev", + "/root", + "/boot", + "/var/log", + "/var/lib", + "/var/run", +) + +# Within $HOME we additionally deny common credential / config directories. +# Resolved at check time against the live $HOME so containers and alt-home +# setups work correctly. +_MEDIA_DELIVERY_DENIED_HOME_SUBPATHS = ( + ".ssh", + ".aws", + ".gnupg", + ".kube", + ".docker", + ".config", + ".azure", + ".gcloud", + "Library/Keychains", # macOS +) + def _media_delivery_allowed_roots() -> List[Path]: """Return roots from which model-emitted local media may be delivered.""" @@ -856,6 +900,67 @@ def _media_delivery_allowed_roots() -> List[Path]: return roots +def _media_delivery_recency_seconds() -> float: + """Return the recency window for trusting freshly-produced files. + + 0 disables recency-based trust entirely (pure-allowlist mode). + """ + raw = os.environ.get(MEDIA_DELIVERY_TRUST_RECENT_ENV, "1").strip().lower() + if raw in ("0", "false", "no", "off", ""): + return 0.0 + try: + custom = os.environ.get(MEDIA_DELIVERY_TRUST_RECENT_SECONDS_ENV, "").strip() + if custom: + seconds = float(custom) + return max(0.0, seconds) + except (TypeError, ValueError): + pass + return float(_MEDIA_DELIVERY_TRUST_RECENT_DEFAULT_SECONDS) + + +def _media_delivery_denied_paths() -> List[Path]: + """Return absolute denylist paths under which delivery is never allowed.""" + denied = [Path(p) for p in _MEDIA_DELIVERY_DENIED_PREFIXES] + home = Path(os.path.expanduser("~")) + for sub in _MEDIA_DELIVERY_DENIED_HOME_SUBPATHS: + denied.append(home / sub) + # The Hermes home itself contains credentials (auth.json, .env) — only the + # cache subdirectories under it are explicitly allowlisted above. + denied.append(_HERMES_HOME / ".env") + denied.append(_HERMES_HOME / "auth.json") + denied.append(_HERMES_HOME / "credentials") + return denied + + +def _path_under_denied_prefix(resolved: Path) -> bool: + """Return True if ``resolved`` lives under a deny-listed system path.""" + for denied in _media_delivery_denied_paths(): + try: + resolved_denied = denied.expanduser().resolve(strict=False) + except (OSError, RuntimeError, ValueError): + continue + if _path_is_within(resolved, resolved_denied) or resolved == resolved_denied: + return True + return False + + +def _file_is_recently_produced(resolved: Path, window_seconds: float) -> bool: + """Return True if the file's mtime is within ``window_seconds`` of now. + + Used as a session-scoped trust signal: agents almost always produce + delivery artifacts within seconds of asking to send them, while + prompt-injection paths pointing at pre-existing host files (/etc/passwd, + ~/.ssh/id_rsa) have mtimes measured in days or months. + """ + if window_seconds <= 0: + return False + try: + mtime = resolved.stat().st_mtime + except OSError: + return False + return (time.time() - mtime) <= window_seconds + + def _path_is_within(path: Path, root: Path) -> bool: try: path.relative_to(root) @@ -902,6 +1007,16 @@ def validate_media_delivery_path(path: str) -> Optional[str]: if _path_is_within(resolved, resolved_root): return str(resolved) + # Outside the cache/operator allowlist: fall back to recency-based trust + # for files the agent has just produced (e.g. ``pandoc -o /tmp/report.pdf`` + # or ``write_file("/home/user/report.pdf", ...)``). System paths and + # credential locations remain blocked even when "recent" — see + # ``_MEDIA_DELIVERY_DENIED_PREFIXES`` for the denylist. + window = _media_delivery_recency_seconds() + if window > 0 and not _path_under_denied_prefix(resolved): + if _file_is_recently_produced(resolved, window): + return str(resolved) + return None diff --git a/gateway/run.py b/gateway/run.py index 675774022e3..696f9b29b81 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -932,6 +932,27 @@ if _config_path.exists(): _redact = _security_cfg.get("redact_secrets") if _redact is not None: os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower() + # Gateway settings (media delivery allowlist + recency trust) + _gateway_cfg = _cfg.get("gateway", {}) + if isinstance(_gateway_cfg, dict): + _allow_dirs = _gateway_cfg.get("media_delivery_allow_dirs") + if _allow_dirs: + if isinstance(_allow_dirs, str): + _allow_dirs_str = _allow_dirs + elif isinstance(_allow_dirs, (list, tuple)): + _allow_dirs_str = os.pathsep.join(str(p) for p in _allow_dirs if p) + else: + _allow_dirs_str = "" + if _allow_dirs_str: + os.environ["HERMES_MEDIA_ALLOW_DIRS"] = _allow_dirs_str + _trust_recent = _gateway_cfg.get("trust_recent_files") + if _trust_recent is not None: + os.environ["HERMES_MEDIA_TRUST_RECENT_FILES"] = ( + "1" if _trust_recent else "0" + ) + _trust_recent_seconds = _gateway_cfg.get("trust_recent_files_seconds") + if _trust_recent_seconds is not None: + os.environ["HERMES_MEDIA_TRUST_RECENT_SECONDS"] = str(_trust_recent_seconds) except Exception as _bridge_err: # Previously this was silent (`except Exception: pass`), which # hid partial bridge failures and let .env defaults shadow diff --git a/hermes_cli/config.py b/hermes_cli/config.py index e927996c3c8..07dfe23ba8d 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1638,6 +1638,31 @@ DEFAULT_CONFIG = { "force_ipv4": False, }, + # Gateway settings — control how messaging platforms (Telegram, Discord, + # Slack, etc.) deliver agent-produced files as native attachments. + "gateway": { + # Extra directories from which model-emitted bare file paths may be + # uploaded as native gateway attachments. Files inside the Hermes + # cache (~/.hermes/cache/{documents,images,audio,video,screenshots}) + # are always trusted; this list adds operator-controlled roots + # (project dirs, scratch dirs, mounted shares). Accepts a list of + # absolute paths or a single os.pathsep-separated string. Bridged + # to HERMES_MEDIA_ALLOW_DIRS at gateway startup. Tilde paths are + # expanded. + "media_delivery_allow_dirs": [], + # When true, files whose mtime is within ``trust_recent_files_seconds`` + # of "now" are trusted for native delivery even outside the cache / + # operator allowlist — useful for ``pandoc -o /tmp/report.pdf`` or + # PDFs the agent writes into a working directory. System paths + # (/etc, /proc, ~/.ssh, ~/.aws, etc.) remain blocked regardless. + # Disable to fall back to pure-allowlist mode. Bridged to + # HERMES_MEDIA_TRUST_RECENT_FILES. + "trust_recent_files": True, + # Recency window in seconds. 600 (10 min) comfortably covers a + # multi-tool agent turn. Bridged to HERMES_MEDIA_TRUST_RECENT_SECONDS. + "trust_recent_files_seconds": 600, + }, + # Session storage — controls automatic cleanup of ~/.hermes/state.db. # state.db accumulates every session, message, tool call, and FTS5 index # entry forever. Without auto-pruning, a heavy user (gateway + cron) diff --git a/tests/gateway/test_platform_base.py b/tests/gateway/test_platform_base.py index 3f303d0377c..b7d96d4dc3e 100644 --- a/tests/gateway/test_platform_base.py +++ b/tests/gateway/test_platform_base.py @@ -1,6 +1,7 @@ """Tests for gateway/platforms/base.py — MessageEvent, media extraction, message truncation.""" import os +import time from unittest.mock import patch import pytest @@ -367,6 +368,10 @@ class TestMediaDeliveryPathValidation: "gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS", tuple(roots), ) + # Disable recency-based trust by default so the original allowlist + # tests continue to exercise the strict-allowlist path. Tests that + # specifically cover recency trust re-enable it themselves. + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0") def test_allows_existing_file_inside_safe_root(self, tmp_path, monkeypatch): root = tmp_path / "media-cache" @@ -426,6 +431,110 @@ class TestMediaDeliveryPathValidation: assert BasePlatformAdapter.validate_media_delivery_path(str(media_file)) == str(media_file.resolve()) + def test_recency_trust_allows_freshly_produced_file(self, tmp_path, monkeypatch): + """A PDF the agent just wrote to /tmp should be deliverable. + + Covers the natural case: agent runs ``pandoc -o /tmp/report.pdf`` or + ``write_file('/home/user/report.pdf', ...)`` and asks the gateway to + send the result. With recency trust on, fresh files outside the cache + allowlist are accepted because the file's mtime is within the window. + """ + self._patch_roots(monkeypatch) # zero cache allowlist + monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False) + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1") + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600") + + fresh = tmp_path / "scratch" / "report.pdf" + fresh.parent.mkdir(parents=True) + fresh.write_bytes(b"%PDF-1.4") + + assert BasePlatformAdapter.validate_media_delivery_path(str(fresh)) == str(fresh.resolve()) + + def test_recency_trust_rejects_old_file(self, tmp_path, monkeypatch): + """A pre-existing host file (~/.bashrc, /etc/passwd shape) is rejected. + + Recency trust is the load-bearing anti-injection signal: prompt-injected + paths point at files that have existed for days or months, well outside + the trust window. + """ + self._patch_roots(monkeypatch) + monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False) + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1") + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "60") + + stale = tmp_path / "stale.pdf" + stale.write_bytes(b"%PDF-1.4") + old_mtime = time.time() - 7200 # 2 hours ago + os.utime(stale, (old_mtime, old_mtime)) + + assert BasePlatformAdapter.validate_media_delivery_path(str(stale)) is None + + def test_recency_trust_disabled_falls_back_to_pure_allowlist(self, tmp_path, monkeypatch): + """Setting trust_recent_files=false reverts to pre-existing strict behavior.""" + self._patch_roots(monkeypatch) + monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False) + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0") + + fresh = tmp_path / "report.pdf" + fresh.write_bytes(b"%PDF-1.4") # mtime = now + + assert BasePlatformAdapter.validate_media_delivery_path(str(fresh)) is None + + def test_recency_trust_denies_system_paths_even_when_fresh(self, tmp_path, monkeypatch): + """A freshly-touched file under /etc must NOT be uploaded. + + Belt-and-braces: even if an attacker rewrites the file's mtime + (e.g. via a separately compromised tool result that touches a system + file), the denylist refuses to deliver paths under /etc, /proc, /sys, + ~/.ssh, ~/.aws, etc. + """ + self._patch_roots(monkeypatch) + monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False) + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1") + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600") + + # Simulate $HOME so ~/.ssh resolves into our tmp dir. + fake_home = tmp_path / "home" + ssh_dir = fake_home / ".ssh" + ssh_dir.mkdir(parents=True) + secret = ssh_dir / "id_rsa.txt" + secret.write_bytes(b"-----BEGIN ...") # mtime = now + monkeypatch.setenv("HOME", str(fake_home)) + + assert BasePlatformAdapter.validate_media_delivery_path(str(secret)) is None + + def test_recency_trust_allows_pdf_in_project_dir(self, tmp_path, monkeypatch): + """The motivating case: agent produces a PDF in a project directory. + + Reproduces the Discord-PDF-not-delivered bug. Before recency trust, + files outside ~/.hermes/cache/* were silently dropped, leaving the + user with a raw filepath in chat instead of an attachment. + """ + self._patch_roots(monkeypatch) + monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False) + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1") + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600") + + project = tmp_path / "my-project" + report = project / "build" / "weekly-report.pdf" + report.parent.mkdir(parents=True) + report.write_bytes(b"%PDF-1.4") + + assert BasePlatformAdapter.validate_media_delivery_path(str(report)) == str(report.resolve()) + + def test_filter_keeps_recently_produced_files(self, tmp_path, monkeypatch): + """End-to-end: filter_local_delivery_paths routes a fresh PDF through.""" + self._patch_roots(monkeypatch) + monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False) + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1") + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600") + + fresh = tmp_path / "report.pdf" + fresh.write_bytes(b"%PDF-1.4") + + out = BasePlatformAdapter.filter_local_delivery_paths([str(fresh)]) + assert out == [str(fresh.resolve())] + # --------------------------------------------------------------------------- # should_send_media_as_audio diff --git a/tests/gateway/test_tts_media_routing.py b/tests/gateway/test_tts_media_routing.py index b4f410c280e..eeb740f8f62 100644 --- a/tests/gateway/test_tts_media_routing.py +++ b/tests/gateway/test_tts_media_routing.py @@ -234,6 +234,10 @@ async def test_streaming_delivery_blocks_media_path_outside_allowed_roots(tmp_pa "gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS", (allowed_root,), ) + # This test exercises the strict-allowlist path; disable recency trust so + # the freshly-written tmp_path file is not auto-accepted by the trust + # window. (Recency trust is covered separately in test_platform_base.py.) + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0") adapter = SimpleNamespace( name="test", extract_media=BasePlatformAdapter.extract_media, diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py index 66aab5eee74..922a7d7bdc2 100644 --- a/tests/tools/test_send_message_tool.py +++ b/tests/tools/test_send_message_tool.py @@ -377,7 +377,11 @@ class TestSendMessageTool: user_id="user-123", ) - def test_media_tag_outside_allowed_roots_is_not_sent(self, tmp_path): + def test_media_tag_outside_allowed_roots_is_not_sent(self, tmp_path, monkeypatch): + # This test exercises the strict-allowlist path; disable recency trust + # so the freshly-written tmp_path file is not auto-accepted by the + # trust window. (Recency trust is covered in test_platform_base.py.) + monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0") config, telegram_cfg = _make_config() secret = tmp_path / "secret.pdf" secret.write_bytes(b"%PDF secret")