feat: add .zip document support and auto-mount cache dirs into remote backends (#4846)

- Add .zip to SUPPORTED_DOCUMENT_TYPES so gateway platforms (Telegram,
  Slack, Discord) cache uploaded zip files instead of rejecting them.
- Add get_cache_directory_mounts() and iter_cache_files() to
  credential_files.py for host-side cache directory passthrough
  (documents, images, audio, screenshots).
- Docker: bind-mount cache dirs read-only alongside credentials/skills.
  Changes are live (bind mount semantics).
- Modal: mount cache files at sandbox creation + resync before each
  command via _sync_files() with mtime+size change detection.
- Handles backward-compat with legacy dir names (document_cache,
  image_cache, audio_cache, browser_screenshots) via get_hermes_dir().
- Container paths always use the new cache/<subdir> layout regardless
  of host layout.

This replaces the need for a dedicated extract_archive tool (PR #4819)
— the agent can now use standard terminal commands (unzip, tar) on
uploaded files inside remote containers.

Closes: related to PR #4819 by kshitijk4poor
This commit is contained in:
Teknium 2026-04-03 13:16:26 -07:00 committed by GitHub
parent 8a384628a5
commit b1756084a3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 274 additions and 47 deletions

View file

@ -235,6 +235,7 @@ SUPPORTED_DOCUMENT_TYPES = {
".pdf": "application/pdf", ".pdf": "application/pdf",
".md": "text/markdown", ".md": "text/markdown",
".txt": "text/plain", ".txt": "text/plain",
".zip": "application/zip",
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",

View file

@ -227,16 +227,19 @@ class TestIncomingDocumentHandling:
adapter.handle_message.assert_called_once() adapter.handle_message.assert_called_once()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_unsupported_type_skipped(self, adapter): async def test_zip_document_cached(self, adapter):
"""An unsupported file type (.zip) should be skipped silently.""" """A .zip file should be cached as a supported document."""
msg = make_message([ msg = make_message([
make_attachment(filename="archive.zip", content_type="application/zip") make_attachment(filename="archive.zip", content_type="application/zip")
]) ])
await adapter._handle_message(msg)
with _mock_aiohttp_download(b"PK\x03\x04test"):
await adapter._handle_message(msg)
event = adapter.handle_message.call_args[0][0] event = adapter.handle_message.call_args[0][0]
assert event.media_urls == [] assert len(event.media_urls) == 1
assert event.message_type == MessageType.TEXT assert event.media_types == ["application/zip"]
assert event.message_type == MessageType.DOCUMENT
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_download_error_handled(self, adapter): async def test_download_error_handled(self, adapter):

View file

@ -151,7 +151,7 @@ class TestSupportedDocumentTypes:
@pytest.mark.parametrize( @pytest.mark.parametrize(
"ext", "ext",
[".pdf", ".md", ".txt", ".docx", ".xlsx", ".pptx"], [".pdf", ".md", ".txt", ".zip", ".docx", ".xlsx", ".pptx"],
) )
def test_expected_extensions_present(self, ext): def test_expected_extensions_present(self, ext):
assert ext in SUPPORTED_DOCUMENT_TYPES assert ext in SUPPORTED_DOCUMENT_TYPES

View file

@ -408,19 +408,22 @@ class TestIncomingDocumentHandling:
assert "[Content of" not in (msg_event.text or "") assert "[Content of" not in (msg_event.text or "")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_unsupported_file_type_skipped(self, adapter): async def test_zip_file_cached(self, adapter):
"""A .zip file should be silently skipped.""" """A .zip file should be cached as a supported document."""
event = self._make_event(files=[{ with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
"mimetype": "application/zip", dl.return_value = b"PK\x03\x04zip"
"name": "archive.zip", event = self._make_event(files=[{
"url_private_download": "https://files.slack.com/archive.zip", "mimetype": "application/zip",
"size": 1024, "name": "archive.zip",
}]) "url_private_download": "https://files.slack.com/archive.zip",
await adapter._handle_slack_message(event) "size": 1024,
}])
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0] msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.message_type == MessageType.TEXT assert msg_event.message_type == MessageType.DOCUMENT
assert len(msg_event.media_urls) == 0 assert len(msg_event.media_urls) == 1
assert msg_event.media_types == ["application/zip"]
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_oversized_document_skipped(self, adapter): async def test_oversized_document_skipped(self, adapter):

View file

@ -236,15 +236,16 @@ class TestDocumentDownloadBlock:
assert "Please summarize" in event.text assert "Please summarize" in event.text
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_unsupported_type_rejected(self, adapter): async def test_zip_document_cached(self, adapter):
"""A .zip upload should be cached as a supported document."""
doc = _make_document(file_name="archive.zip", mime_type="application/zip", file_size=100) doc = _make_document(file_name="archive.zip", mime_type="application/zip", file_size=100)
msg = _make_message(document=doc) msg = _make_message(document=doc)
update = _make_update(msg) update = _make_update(msg)
await adapter._handle_media_message(update, MagicMock()) await adapter._handle_media_message(update, MagicMock())
event = adapter.handle_message.call_args[0][0] event = adapter.handle_message.call_args[0][0]
assert "Unsupported document type" in event.text assert event.media_urls and event.media_urls[0].endswith("archive.zip")
assert ".zip" in event.text assert event.media_types == ["application/zip"]
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_oversized_file_rejected(self, adapter): async def test_oversized_file_rejected(self, adapter):

View file

@ -10,7 +10,9 @@ import pytest
from tools.credential_files import ( from tools.credential_files import (
clear_credential_files, clear_credential_files,
get_credential_file_mounts, get_credential_file_mounts,
get_cache_directory_mounts,
get_skills_directory_mount, get_skills_directory_mount,
iter_cache_files,
iter_skills_files, iter_skills_files,
register_credential_file, register_credential_file,
register_credential_files, register_credential_files,
@ -358,3 +360,116 @@ class TestConfigPathTraversal:
mounts = get_credential_file_mounts() mounts = get_credential_file_mounts()
assert len(mounts) == 1 assert len(mounts) == 1
assert "oauth.json" in mounts[0]["container_path"] assert "oauth.json" in mounts[0]["container_path"]
# ---------------------------------------------------------------------------
# Cache directory mounts
# ---------------------------------------------------------------------------
class TestCacheDirectoryMounts:
"""Tests for get_cache_directory_mounts() and iter_cache_files()."""
def test_returns_existing_cache_dirs(self, tmp_path, monkeypatch):
"""Existing cache dirs are returned with correct container paths."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
(hermes_home / "cache" / "documents").mkdir(parents=True)
(hermes_home / "cache" / "audio").mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
mounts = get_cache_directory_mounts()
paths = {m["container_path"] for m in mounts}
assert "/root/.hermes/cache/documents" in paths
assert "/root/.hermes/cache/audio" in paths
def test_skips_nonexistent_dirs(self, tmp_path, monkeypatch):
"""Dirs that don't exist on disk are not returned."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
# Create only one cache dir
(hermes_home / "cache" / "documents").mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
mounts = get_cache_directory_mounts()
assert len(mounts) == 1
assert mounts[0]["container_path"] == "/root/.hermes/cache/documents"
def test_legacy_dir_names_resolved(self, tmp_path, monkeypatch):
"""Old-style dir names (e.g. document_cache) are resolved correctly."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
# Use legacy dir name — get_hermes_dir prefers old if it exists
(hermes_home / "document_cache").mkdir()
(hermes_home / "image_cache").mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
mounts = get_cache_directory_mounts()
host_paths = {m["host_path"] for m in mounts}
assert str(hermes_home / "document_cache") in host_paths
assert str(hermes_home / "image_cache") in host_paths
# Container paths always use the new layout
container_paths = {m["container_path"] for m in mounts}
assert "/root/.hermes/cache/documents" in container_paths
assert "/root/.hermes/cache/images" in container_paths
def test_empty_hermes_home(self, tmp_path, monkeypatch):
"""No cache dirs → empty list."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
assert get_cache_directory_mounts() == []
class TestIterCacheFiles:
"""Tests for iter_cache_files()."""
def test_enumerates_files(self, tmp_path, monkeypatch):
"""Regular files in cache dirs are returned."""
hermes_home = tmp_path / ".hermes"
doc_dir = hermes_home / "cache" / "documents"
doc_dir.mkdir(parents=True)
(doc_dir / "upload.zip").write_bytes(b"PK\x03\x04")
(doc_dir / "report.pdf").write_bytes(b"%PDF-1.4")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
entries = iter_cache_files()
names = {Path(e["container_path"]).name for e in entries}
assert "upload.zip" in names
assert "report.pdf" in names
def test_skips_symlinks(self, tmp_path, monkeypatch):
"""Symlinks inside cache dirs are skipped."""
hermes_home = tmp_path / ".hermes"
doc_dir = hermes_home / "cache" / "documents"
doc_dir.mkdir(parents=True)
real_file = doc_dir / "real.txt"
real_file.write_text("content")
(doc_dir / "link.txt").symlink_to(real_file)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
entries = iter_cache_files()
names = [Path(e["container_path"]).name for e in entries]
assert "real.txt" in names
assert "link.txt" not in names
def test_nested_files(self, tmp_path, monkeypatch):
"""Files in subdirectories are included with correct relative paths."""
hermes_home = tmp_path / ".hermes"
ss_dir = hermes_home / "cache" / "screenshots"
sub = ss_dir / "session_abc"
sub.mkdir(parents=True)
(sub / "screen1.png").write_bytes(b"PNG")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
entries = iter_cache_files()
assert len(entries) == 1
assert entries[0]["container_path"] == "/root/.hermes/cache/screenshots/session_abc/screen1.png"
def test_empty_cache(self, tmp_path, monkeypatch):
"""No cache dirs → empty list."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
assert iter_cache_files() == []

View file

@ -1,29 +1,21 @@
"""Credential file passthrough registry for remote terminal backends. """File passthrough registry for remote terminal backends.
Skills that declare ``required_credential_files`` in their frontmatter need Remote backends (Docker, Modal, SSH) create sandboxes with no host files.
those files available inside sandboxed execution environments (Modal, Docker). This module ensures that credential files, skill directories, and host-side
By default remote backends create bare containers with no host files. cache directories (documents, images, audio, screenshots) are mounted or
synced into those sandboxes so the agent can access them.
This module provides a session-scoped registry so skill-declared credential **Credentials and skills** session-scoped registry fed by skill declarations
files (and user-configured overrides) are mounted into remote sandboxes. (``required_credential_files``) and user config (``terminal.credential_files``).
Two sources feed the registry: **Cache directories** gateway-cached uploads, browser screenshots, TTS
audio, and processed images. Mounted read-only so the remote terminal can
reference files the host side created (e.g. ``unzip`` an uploaded archive).
1. **Skill declarations** when a skill is loaded via ``skill_view``, its Remote backends call :func:`get_credential_file_mounts`,
``required_credential_files`` entries are registered here if the files :func:`get_skills_directory_mount` / :func:`iter_skills_files`, and
exist on the host. :func:`get_cache_directory_mounts` / :func:`iter_cache_files` at sandbox
2. **User config** ``terminal.credential_files`` in config.yaml lets users creation time and before each command (for resync on Modal).
explicitly list additional files to mount.
Remote backends (``tools/environments/modal.py``, ``docker.py``) call
:func:`get_credential_file_mounts` at sandbox creation time.
Each registered entry is a dict::
{
"host_path": "/home/user/.hermes/google_token.json",
"container_path": "/root/.hermes/google_token.json",
}
""" """
from __future__ import annotations from __future__ import annotations
@ -300,6 +292,71 @@ def iter_skills_files(
return result return result
# ---------------------------------------------------------------------------
# Cache directory mounts (documents, images, audio, screenshots)
# ---------------------------------------------------------------------------
# The four cache subdirectories that should be mirrored into remote backends.
# Each tuple is (new_subpath, old_name) matching hermes_constants.get_hermes_dir().
_CACHE_DIRS: list[tuple[str, str]] = [
("cache/documents", "document_cache"),
("cache/images", "image_cache"),
("cache/audio", "audio_cache"),
("cache/screenshots", "browser_screenshots"),
]
def get_cache_directory_mounts(
container_base: str = "/root/.hermes",
) -> List[Dict[str, str]]:
"""Return mount entries for each cache directory that exists on disk.
Used by Docker to create bind mounts. Each entry has ``host_path`` and
``container_path`` keys. The host path is resolved via
``get_hermes_dir()`` for backward compatibility with old directory layouts.
"""
from hermes_constants import get_hermes_dir
mounts: List[Dict[str, str]] = []
for new_subpath, old_name in _CACHE_DIRS:
host_dir = get_hermes_dir(new_subpath, old_name)
if host_dir.is_dir():
# Always map to the *new* container layout regardless of host layout.
container_path = f"{container_base.rstrip('/')}/{new_subpath}"
mounts.append({
"host_path": str(host_dir),
"container_path": container_path,
})
return mounts
def iter_cache_files(
container_base: str = "/root/.hermes",
) -> List[Dict[str, str]]:
"""Return individual (host_path, container_path) entries for cache files.
Used by Modal to upload files individually and resync before each command.
Skips symlinks. The container paths use the new ``cache/<subdir>`` layout.
"""
from hermes_constants import get_hermes_dir
result: List[Dict[str, str]] = []
for new_subpath, old_name in _CACHE_DIRS:
host_dir = get_hermes_dir(new_subpath, old_name)
if not host_dir.is_dir():
continue
container_root = f"{container_base.rstrip('/')}/{new_subpath}"
for item in host_dir.rglob("*"):
if item.is_symlink() or not item.is_file():
continue
rel = item.relative_to(host_dir)
result.append({
"host_path": str(item),
"container_path": f"{container_root}/{rel}",
})
return result
def clear_credential_files() -> None: def clear_credential_files() -> None:
"""Reset the skill-scoped registry (e.g. on session reset).""" """Reset the skill-scoped registry (e.g. on session reset)."""
_registered_files.clear() _registered_files.clear()

View file

@ -315,7 +315,11 @@ class DockerEnvironment(BaseEnvironment):
# Mount credential files (OAuth tokens, etc.) declared by skills. # Mount credential files (OAuth tokens, etc.) declared by skills.
# Read-only so the container can authenticate but not modify host creds. # Read-only so the container can authenticate but not modify host creds.
try: try:
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount from tools.credential_files import (
get_credential_file_mounts,
get_skills_directory_mount,
get_cache_directory_mounts,
)
for mount_entry in get_credential_file_mounts(): for mount_entry in get_credential_file_mounts():
volume_args.extend([ volume_args.extend([
@ -341,6 +345,21 @@ class DockerEnvironment(BaseEnvironment):
skills_mount["host_path"], skills_mount["host_path"],
skills_mount["container_path"], skills_mount["container_path"],
) )
# Mount host-side cache directories (documents, images, audio,
# screenshots) so the agent can access uploaded files and other
# cached media from inside the container. Read-only — the
# container reads these but the host gateway manages writes.
for cache_mount in get_cache_directory_mounts():
volume_args.extend([
"-v",
f"{cache_mount['host_path']}:{cache_mount['container_path']}:ro",
])
logger.info(
"Docker: mounting cache dir %s -> %s",
cache_mount["host_path"],
cache_mount["container_path"],
)
except Exception as e: except Exception as e:
logger.debug("Docker: could not load credential file mounts: %s", e) logger.debug("Docker: could not load credential file mounts: %s", e)

View file

@ -186,7 +186,11 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
cred_mounts = [] cred_mounts = []
try: try:
from tools.credential_files import get_credential_file_mounts, iter_skills_files from tools.credential_files import (
get_credential_file_mounts,
iter_skills_files,
iter_cache_files,
)
for mount_entry in get_credential_file_mounts(): for mount_entry in get_credential_file_mounts():
cred_mounts.append( cred_mounts.append(
@ -212,6 +216,20 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
) )
if skills_files: if skills_files:
logger.info("Modal: mounting %d skill files", len(skills_files)) logger.info("Modal: mounting %d skill files", len(skills_files))
# Mount host-side cache files (documents, images, audio,
# screenshots). New files arriving mid-session are picked up
# by _sync_files() before each command execution.
cache_files = iter_cache_files()
for entry in cache_files:
cred_mounts.append(
_modal.Mount.from_local_file(
entry["host_path"],
remote_path=entry["container_path"],
)
)
if cache_files:
logger.info("Modal: mounting %d cache files", len(cache_files))
except Exception as e: except Exception as e:
logger.debug("Modal: could not load credential file mounts: %s", e) logger.debug("Modal: could not load credential file mounts: %s", e)
@ -308,13 +326,19 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
return True return True
def _sync_files(self) -> None: def _sync_files(self) -> None:
"""Push credential files and skill files into the running sandbox. """Push credential, skill, and cache files into the running sandbox.
Runs before each command. Uses mtime+size caching so only changed Runs before each command. Uses mtime+size caching so only changed
files are pushed (~13μs overhead in the no-op case). files are pushed (~13μs overhead in the no-op case). Cache files
are especially important here new uploads/screenshots may appear
mid-session after sandbox creation.
""" """
try: try:
from tools.credential_files import get_credential_file_mounts, iter_skills_files from tools.credential_files import (
get_credential_file_mounts,
iter_skills_files,
iter_cache_files,
)
for entry in get_credential_file_mounts(): for entry in get_credential_file_mounts():
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]): if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
@ -323,6 +347,10 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
for entry in iter_skills_files(): for entry in iter_skills_files():
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]): if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
logger.debug("Modal: synced skill file %s", entry["container_path"]) logger.debug("Modal: synced skill file %s", entry["container_path"])
for entry in iter_cache_files():
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
logger.debug("Modal: synced cache file %s", entry["container_path"])
except Exception as e: except Exception as e:
logger.debug("Modal: file sync failed: %s", e) logger.debug("Modal: file sync failed: %s", e)