mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(gateway): allow native delivery of freshly-produced agent files (#32060)
The gateway's media delivery allowlist required files live inside
`~/.hermes/cache/{documents,images,...}`, which is the wrong shape for
real agent usage. Agents naturally produce artifacts via terminal tools
(`pandoc -o /tmp/report.pdf`, `matplotlib savefig`, etc.) or
write_file into project directories — these never land under the cache.
Result: users got a raw file path in chat instead of an attachment.
This is doubly bad in deployment shapes where the cache directories
aren't writable by the agent at all: Hermes running in Docker with a
read-only mount, or with a Docker/Modal/SSH terminal backend whose
filesystem isn't the gateway host's filesystem.
Layered trust model:
1. Cache-dir allowlist (unchanged) — Hermes-managed roots always trusted.
2. Operator allowlist — `HERMES_MEDIA_ALLOW_DIRS` env var, now also
surfaced as `gateway.media_delivery_allow_dirs` in config.yaml.
3. Recency-based trust (new, default on) — files whose mtime is within
`gateway.trust_recent_files_seconds` (default 600s) of "now" are
trusted even outside the cache/operator allowlist. Old host files
(`/etc/passwd`, `~/.bashrc`, `~/.ssh/id_rsa`) have mtimes measured
in days/months, well outside the window — prompt-injection paths
pointing at pre-existing files are still rejected.
4. Hard denylist — `/etc`, `/proc`, `/sys`, `/dev`, `/root`, `/boot`,
`/var/{log,lib,run}`, plus `$HOME/.{ssh,aws,gnupg,kube,docker,config,
azure,gcloud}` and `Library/Keychains`. Denylist blocks delivery
even when recency would trust the file, in case an attacker
somehow refreshes a sensitive file's mtime.
Operators who want strict-allowlist behavior set
`gateway.trust_recent_files: false` and the system reverts to
pre-existing behavior.
Tests: 6 new cases in test_platform_base.py cover the recency window,
disabled mode, system-path denylist, and the motivating PDF-in-project
scenario. 3 existing tests (test_platform_base, test_tts_media_routing,
test_send_message_tool) that exercised the strict-allowlist path are
updated to disable recency trust explicitly.
E2E validation: real `validate_media_delivery_path()` accepts fresh
PDFs in /tmp and project dirs, rejects /etc/passwd, ~/.ssh/id_rsa, and
files older than the window; config.yaml `gateway.*` keys bridge
correctly to the env vars the validator reads.
This commit is contained in:
parent
0ff7c09e2f
commit
a989a79c0c
6 changed files with 279 additions and 1 deletions
|
|
@ -827,6 +827,8 @@ DOCUMENT_CACHE_DIR = get_hermes_dir("cache/documents", "document_cache")
|
|||
SCREENSHOT_CACHE_DIR = get_hermes_dir("cache/screenshots", "browser_screenshots")
|
||||
_HERMES_HOME = get_hermes_home()
|
||||
MEDIA_DELIVERY_ALLOW_DIRS_ENV = "HERMES_MEDIA_ALLOW_DIRS"
|
||||
MEDIA_DELIVERY_TRUST_RECENT_ENV = "HERMES_MEDIA_TRUST_RECENT_FILES"
|
||||
MEDIA_DELIVERY_TRUST_RECENT_SECONDS_ENV = "HERMES_MEDIA_TRUST_RECENT_SECONDS"
|
||||
MEDIA_DELIVERY_SAFE_ROOTS = (
|
||||
IMAGE_CACHE_DIR,
|
||||
AUDIO_CACHE_DIR,
|
||||
|
|
@ -840,6 +842,48 @@ MEDIA_DELIVERY_SAFE_ROOTS = (
|
|||
_HERMES_HOME / "browser_screenshots",
|
||||
)
|
||||
|
||||
# Default recency window for trusting freshly-produced files (seconds).
|
||||
# The agent's actual work generally completes well inside 10 minutes; legitimate
|
||||
# build artifacts (PDFs from pandoc, plots from matplotlib, etc.) almost always
|
||||
# land seconds before delivery. Old system files (/etc/passwd, ~/.ssh/id_rsa,
|
||||
# stray credentials) have mtimes measured in days or months — well outside this
|
||||
# window — so prompt-injection paths pointing at pre-existing host files are
|
||||
# still rejected.
|
||||
_MEDIA_DELIVERY_TRUST_RECENT_DEFAULT_SECONDS = 600
|
||||
|
||||
# Hard denylist applied even when a path would otherwise pass recency trust.
|
||||
# These prefixes hold credentials, system state, or process introspection that
|
||||
# should never be uploaded as a gateway attachment, regardless of how new the
|
||||
# file looks. The cache-dir allowlist still beats this — an operator-configured
|
||||
# allowed root can intentionally live under one of these prefixes (rare, but
|
||||
# their choice).
|
||||
_MEDIA_DELIVERY_DENIED_PREFIXES = (
|
||||
"/etc",
|
||||
"/proc",
|
||||
"/sys",
|
||||
"/dev",
|
||||
"/root",
|
||||
"/boot",
|
||||
"/var/log",
|
||||
"/var/lib",
|
||||
"/var/run",
|
||||
)
|
||||
|
||||
# Within $HOME we additionally deny common credential / config directories.
|
||||
# Resolved at check time against the live $HOME so containers and alt-home
|
||||
# setups work correctly.
|
||||
_MEDIA_DELIVERY_DENIED_HOME_SUBPATHS = (
|
||||
".ssh",
|
||||
".aws",
|
||||
".gnupg",
|
||||
".kube",
|
||||
".docker",
|
||||
".config",
|
||||
".azure",
|
||||
".gcloud",
|
||||
"Library/Keychains", # macOS
|
||||
)
|
||||
|
||||
|
||||
def _media_delivery_allowed_roots() -> List[Path]:
|
||||
"""Return roots from which model-emitted local media may be delivered."""
|
||||
|
|
@ -856,6 +900,67 @@ def _media_delivery_allowed_roots() -> List[Path]:
|
|||
return roots
|
||||
|
||||
|
||||
def _media_delivery_recency_seconds() -> float:
|
||||
"""Return the recency window for trusting freshly-produced files.
|
||||
|
||||
0 disables recency-based trust entirely (pure-allowlist mode).
|
||||
"""
|
||||
raw = os.environ.get(MEDIA_DELIVERY_TRUST_RECENT_ENV, "1").strip().lower()
|
||||
if raw in ("0", "false", "no", "off", ""):
|
||||
return 0.0
|
||||
try:
|
||||
custom = os.environ.get(MEDIA_DELIVERY_TRUST_RECENT_SECONDS_ENV, "").strip()
|
||||
if custom:
|
||||
seconds = float(custom)
|
||||
return max(0.0, seconds)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return float(_MEDIA_DELIVERY_TRUST_RECENT_DEFAULT_SECONDS)
|
||||
|
||||
|
||||
def _media_delivery_denied_paths() -> List[Path]:
|
||||
"""Return absolute denylist paths under which delivery is never allowed."""
|
||||
denied = [Path(p) for p in _MEDIA_DELIVERY_DENIED_PREFIXES]
|
||||
home = Path(os.path.expanduser("~"))
|
||||
for sub in _MEDIA_DELIVERY_DENIED_HOME_SUBPATHS:
|
||||
denied.append(home / sub)
|
||||
# The Hermes home itself contains credentials (auth.json, .env) — only the
|
||||
# cache subdirectories under it are explicitly allowlisted above.
|
||||
denied.append(_HERMES_HOME / ".env")
|
||||
denied.append(_HERMES_HOME / "auth.json")
|
||||
denied.append(_HERMES_HOME / "credentials")
|
||||
return denied
|
||||
|
||||
|
||||
def _path_under_denied_prefix(resolved: Path) -> bool:
|
||||
"""Return True if ``resolved`` lives under a deny-listed system path."""
|
||||
for denied in _media_delivery_denied_paths():
|
||||
try:
|
||||
resolved_denied = denied.expanduser().resolve(strict=False)
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
continue
|
||||
if _path_is_within(resolved, resolved_denied) or resolved == resolved_denied:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _file_is_recently_produced(resolved: Path, window_seconds: float) -> bool:
|
||||
"""Return True if the file's mtime is within ``window_seconds`` of now.
|
||||
|
||||
Used as a session-scoped trust signal: agents almost always produce
|
||||
delivery artifacts within seconds of asking to send them, while
|
||||
prompt-injection paths pointing at pre-existing host files (/etc/passwd,
|
||||
~/.ssh/id_rsa) have mtimes measured in days or months.
|
||||
"""
|
||||
if window_seconds <= 0:
|
||||
return False
|
||||
try:
|
||||
mtime = resolved.stat().st_mtime
|
||||
except OSError:
|
||||
return False
|
||||
return (time.time() - mtime) <= window_seconds
|
||||
|
||||
|
||||
def _path_is_within(path: Path, root: Path) -> bool:
|
||||
try:
|
||||
path.relative_to(root)
|
||||
|
|
@ -902,6 +1007,16 @@ def validate_media_delivery_path(path: str) -> Optional[str]:
|
|||
if _path_is_within(resolved, resolved_root):
|
||||
return str(resolved)
|
||||
|
||||
# Outside the cache/operator allowlist: fall back to recency-based trust
|
||||
# for files the agent has just produced (e.g. ``pandoc -o /tmp/report.pdf``
|
||||
# or ``write_file("/home/user/report.pdf", ...)``). System paths and
|
||||
# credential locations remain blocked even when "recent" — see
|
||||
# ``_MEDIA_DELIVERY_DENIED_PREFIXES`` for the denylist.
|
||||
window = _media_delivery_recency_seconds()
|
||||
if window > 0 and not _path_under_denied_prefix(resolved):
|
||||
if _file_is_recently_produced(resolved, window):
|
||||
return str(resolved)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -932,6 +932,27 @@ if _config_path.exists():
|
|||
_redact = _security_cfg.get("redact_secrets")
|
||||
if _redact is not None:
|
||||
os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
|
||||
# Gateway settings (media delivery allowlist + recency trust)
|
||||
_gateway_cfg = _cfg.get("gateway", {})
|
||||
if isinstance(_gateway_cfg, dict):
|
||||
_allow_dirs = _gateway_cfg.get("media_delivery_allow_dirs")
|
||||
if _allow_dirs:
|
||||
if isinstance(_allow_dirs, str):
|
||||
_allow_dirs_str = _allow_dirs
|
||||
elif isinstance(_allow_dirs, (list, tuple)):
|
||||
_allow_dirs_str = os.pathsep.join(str(p) for p in _allow_dirs if p)
|
||||
else:
|
||||
_allow_dirs_str = ""
|
||||
if _allow_dirs_str:
|
||||
os.environ["HERMES_MEDIA_ALLOW_DIRS"] = _allow_dirs_str
|
||||
_trust_recent = _gateway_cfg.get("trust_recent_files")
|
||||
if _trust_recent is not None:
|
||||
os.environ["HERMES_MEDIA_TRUST_RECENT_FILES"] = (
|
||||
"1" if _trust_recent else "0"
|
||||
)
|
||||
_trust_recent_seconds = _gateway_cfg.get("trust_recent_files_seconds")
|
||||
if _trust_recent_seconds is not None:
|
||||
os.environ["HERMES_MEDIA_TRUST_RECENT_SECONDS"] = str(_trust_recent_seconds)
|
||||
except Exception as _bridge_err:
|
||||
# Previously this was silent (`except Exception: pass`), which
|
||||
# hid partial bridge failures and let .env defaults shadow
|
||||
|
|
|
|||
|
|
@ -1638,6 +1638,31 @@ DEFAULT_CONFIG = {
|
|||
"force_ipv4": False,
|
||||
},
|
||||
|
||||
# Gateway settings — control how messaging platforms (Telegram, Discord,
|
||||
# Slack, etc.) deliver agent-produced files as native attachments.
|
||||
"gateway": {
|
||||
# Extra directories from which model-emitted bare file paths may be
|
||||
# uploaded as native gateway attachments. Files inside the Hermes
|
||||
# cache (~/.hermes/cache/{documents,images,audio,video,screenshots})
|
||||
# are always trusted; this list adds operator-controlled roots
|
||||
# (project dirs, scratch dirs, mounted shares). Accepts a list of
|
||||
# absolute paths or a single os.pathsep-separated string. Bridged
|
||||
# to HERMES_MEDIA_ALLOW_DIRS at gateway startup. Tilde paths are
|
||||
# expanded.
|
||||
"media_delivery_allow_dirs": [],
|
||||
# When true, files whose mtime is within ``trust_recent_files_seconds``
|
||||
# of "now" are trusted for native delivery even outside the cache /
|
||||
# operator allowlist — useful for ``pandoc -o /tmp/report.pdf`` or
|
||||
# PDFs the agent writes into a working directory. System paths
|
||||
# (/etc, /proc, ~/.ssh, ~/.aws, etc.) remain blocked regardless.
|
||||
# Disable to fall back to pure-allowlist mode. Bridged to
|
||||
# HERMES_MEDIA_TRUST_RECENT_FILES.
|
||||
"trust_recent_files": True,
|
||||
# Recency window in seconds. 600 (10 min) comfortably covers a
|
||||
# multi-tool agent turn. Bridged to HERMES_MEDIA_TRUST_RECENT_SECONDS.
|
||||
"trust_recent_files_seconds": 600,
|
||||
},
|
||||
|
||||
# Session storage — controls automatic cleanup of ~/.hermes/state.db.
|
||||
# state.db accumulates every session, message, tool call, and FTS5 index
|
||||
# entry forever. Without auto-pruning, a heavy user (gateway + cron)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
"""Tests for gateway/platforms/base.py — MessageEvent, media extraction, message truncation."""
|
||||
|
||||
import os
|
||||
import time
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
|
@ -367,6 +368,10 @@ class TestMediaDeliveryPathValidation:
|
|||
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
||||
tuple(roots),
|
||||
)
|
||||
# Disable recency-based trust by default so the original allowlist
|
||||
# tests continue to exercise the strict-allowlist path. Tests that
|
||||
# specifically cover recency trust re-enable it themselves.
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
|
||||
def test_allows_existing_file_inside_safe_root(self, tmp_path, monkeypatch):
|
||||
root = tmp_path / "media-cache"
|
||||
|
|
@ -426,6 +431,110 @@ class TestMediaDeliveryPathValidation:
|
|||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(media_file)) == str(media_file.resolve())
|
||||
|
||||
def test_recency_trust_allows_freshly_produced_file(self, tmp_path, monkeypatch):
|
||||
"""A PDF the agent just wrote to /tmp should be deliverable.
|
||||
|
||||
Covers the natural case: agent runs ``pandoc -o /tmp/report.pdf`` or
|
||||
``write_file('/home/user/report.pdf', ...)`` and asks the gateway to
|
||||
send the result. With recency trust on, fresh files outside the cache
|
||||
allowlist are accepted because the file's mtime is within the window.
|
||||
"""
|
||||
self._patch_roots(monkeypatch) # zero cache allowlist
|
||||
monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False)
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600")
|
||||
|
||||
fresh = tmp_path / "scratch" / "report.pdf"
|
||||
fresh.parent.mkdir(parents=True)
|
||||
fresh.write_bytes(b"%PDF-1.4")
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(fresh)) == str(fresh.resolve())
|
||||
|
||||
def test_recency_trust_rejects_old_file(self, tmp_path, monkeypatch):
|
||||
"""A pre-existing host file (~/.bashrc, /etc/passwd shape) is rejected.
|
||||
|
||||
Recency trust is the load-bearing anti-injection signal: prompt-injected
|
||||
paths point at files that have existed for days or months, well outside
|
||||
the trust window.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False)
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "60")
|
||||
|
||||
stale = tmp_path / "stale.pdf"
|
||||
stale.write_bytes(b"%PDF-1.4")
|
||||
old_mtime = time.time() - 7200 # 2 hours ago
|
||||
os.utime(stale, (old_mtime, old_mtime))
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(stale)) is None
|
||||
|
||||
def test_recency_trust_disabled_falls_back_to_pure_allowlist(self, tmp_path, monkeypatch):
|
||||
"""Setting trust_recent_files=false reverts to pre-existing strict behavior."""
|
||||
self._patch_roots(monkeypatch)
|
||||
monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False)
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
|
||||
fresh = tmp_path / "report.pdf"
|
||||
fresh.write_bytes(b"%PDF-1.4") # mtime = now
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(fresh)) is None
|
||||
|
||||
def test_recency_trust_denies_system_paths_even_when_fresh(self, tmp_path, monkeypatch):
|
||||
"""A freshly-touched file under /etc must NOT be uploaded.
|
||||
|
||||
Belt-and-braces: even if an attacker rewrites the file's mtime
|
||||
(e.g. via a separately compromised tool result that touches a system
|
||||
file), the denylist refuses to deliver paths under /etc, /proc, /sys,
|
||||
~/.ssh, ~/.aws, etc.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False)
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600")
|
||||
|
||||
# Simulate $HOME so ~/.ssh resolves into our tmp dir.
|
||||
fake_home = tmp_path / "home"
|
||||
ssh_dir = fake_home / ".ssh"
|
||||
ssh_dir.mkdir(parents=True)
|
||||
secret = ssh_dir / "id_rsa.txt"
|
||||
secret.write_bytes(b"-----BEGIN ...") # mtime = now
|
||||
monkeypatch.setenv("HOME", str(fake_home))
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(secret)) is None
|
||||
|
||||
def test_recency_trust_allows_pdf_in_project_dir(self, tmp_path, monkeypatch):
|
||||
"""The motivating case: agent produces a PDF in a project directory.
|
||||
|
||||
Reproduces the Discord-PDF-not-delivered bug. Before recency trust,
|
||||
files outside ~/.hermes/cache/* were silently dropped, leaving the
|
||||
user with a raw filepath in chat instead of an attachment.
|
||||
"""
|
||||
self._patch_roots(monkeypatch)
|
||||
monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False)
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600")
|
||||
|
||||
project = tmp_path / "my-project"
|
||||
report = project / "build" / "weekly-report.pdf"
|
||||
report.parent.mkdir(parents=True)
|
||||
report.write_bytes(b"%PDF-1.4")
|
||||
|
||||
assert BasePlatformAdapter.validate_media_delivery_path(str(report)) == str(report.resolve())
|
||||
|
||||
def test_filter_keeps_recently_produced_files(self, tmp_path, monkeypatch):
|
||||
"""End-to-end: filter_local_delivery_paths routes a fresh PDF through."""
|
||||
self._patch_roots(monkeypatch)
|
||||
monkeypatch.delenv("HERMES_MEDIA_ALLOW_DIRS", raising=False)
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "1")
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_SECONDS", "600")
|
||||
|
||||
fresh = tmp_path / "report.pdf"
|
||||
fresh.write_bytes(b"%PDF-1.4")
|
||||
|
||||
out = BasePlatformAdapter.filter_local_delivery_paths([str(fresh)])
|
||||
assert out == [str(fresh.resolve())]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# should_send_media_as_audio
|
||||
|
|
|
|||
|
|
@ -234,6 +234,10 @@ async def test_streaming_delivery_blocks_media_path_outside_allowed_roots(tmp_pa
|
|||
"gateway.platforms.base.MEDIA_DELIVERY_SAFE_ROOTS",
|
||||
(allowed_root,),
|
||||
)
|
||||
# This test exercises the strict-allowlist path; disable recency trust so
|
||||
# the freshly-written tmp_path file is not auto-accepted by the trust
|
||||
# window. (Recency trust is covered separately in test_platform_base.py.)
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
adapter = SimpleNamespace(
|
||||
name="test",
|
||||
extract_media=BasePlatformAdapter.extract_media,
|
||||
|
|
|
|||
|
|
@ -377,7 +377,11 @@ class TestSendMessageTool:
|
|||
user_id="user-123",
|
||||
)
|
||||
|
||||
def test_media_tag_outside_allowed_roots_is_not_sent(self, tmp_path):
|
||||
def test_media_tag_outside_allowed_roots_is_not_sent(self, tmp_path, monkeypatch):
|
||||
# This test exercises the strict-allowlist path; disable recency trust
|
||||
# so the freshly-written tmp_path file is not auto-accepted by the
|
||||
# trust window. (Recency trust is covered in test_platform_base.py.)
|
||||
monkeypatch.setenv("HERMES_MEDIA_TRUST_RECENT_FILES", "0")
|
||||
config, telegram_cfg = _make_config()
|
||||
secret = tmp_path / "secret.pdf"
|
||||
secret.write_bytes(b"%PDF secret")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue