feat: add .zip document support and auto-mount cache dirs into remote backends (#4846)

- Add .zip to SUPPORTED_DOCUMENT_TYPES so gateway platforms (Telegram, Slack, Discord) cache uploaded zip files instead of rejecting them. - Add get_cache_directory_mounts() and iter_cache_files() to credential_files.py for host-side cache directory passthrough (documents, images, audio, screenshots). - Docker: bind-mount cache dirs read-only alongside credentials/skills. Changes are live (bind mount semantics). - Modal: mount cache files at sandbox creation + resync before each command via _sync_files() with mtime+size change detection. - Handles backward-compat with legacy dir names (document_cache, image_cache, audio_cache, browser_screenshots) via get_hermes_dir(). - Container paths always use the new cache/<subdir> layout regardless of host layout. This replaces the need for a dedicated extract_archive tool (PR #4819) — the agent can now use standard terminal commands (unzip, tar) on uploaded files inside remote containers. Closes: related to PR #4819 by kshitijk4poor
2026-04-28 01:21:43 +00:00 · 2026-04-03 13:16:26 -07:00 · 2026-04-03 13:16:26 -07:00 · b1756084a3
commit b1756084a3
parent 8a384628a5
9 changed files with 274 additions and 47 deletions
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -235,6 +235,7 @@ SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
    ".md": "text/markdown",
    ".txt": "text/plain",
    ".zip": "application/zip",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
--- a/tests/gateway/test_discord_document_handling.py
+++ b/tests/gateway/test_discord_document_handling.py
@ -227,16 +227,19 @@ class TestIncomingDocumentHandling:
        adapter.handle_message.assert_called_once()
    @pytest.mark.asyncio
-    async def test_unsupported_type_skipped(self, adapter):
+    async def test_zip_document_cached(self, adapter):
-        """An unsupported file type (.zip) should be skipped silently."""
+        """A .zip file should be cached as a supported document."""
        msg = make_message([
            make_attachment(filename="archive.zip", content_type="application/zip")
        ])
-        await adapter._handle_message(msg)
+
        with _mock_aiohttp_download(b"PK\x03\x04test"):
            await adapter._handle_message(msg)
        event = adapter.handle_message.call_args[0][0]
-        assert event.media_urls == []
+        assert len(event.media_urls) == 1
-        assert event.message_type == MessageType.TEXT
+        assert event.media_types == ["application/zip"]
        assert event.message_type == MessageType.DOCUMENT
    @pytest.mark.asyncio
    async def test_download_error_handled(self, adapter):
--- a/tests/gateway/test_document_cache.py
+++ b/tests/gateway/test_document_cache.py
@ -151,7 +151,7 @@ class TestSupportedDocumentTypes:
    @pytest.mark.parametrize(
        "ext",
-        [".pdf", ".md", ".txt", ".docx", ".xlsx", ".pptx"],
+        [".pdf", ".md", ".txt", ".zip", ".docx", ".xlsx", ".pptx"],
    )
    def test_expected_extensions_present(self, ext):
        assert ext in SUPPORTED_DOCUMENT_TYPES
--- a/tests/gateway/test_slack.py
+++ b/tests/gateway/test_slack.py
@ -408,19 +408,22 @@ class TestIncomingDocumentHandling:
        assert "[Content of" not in (msg_event.text or "")
    @pytest.mark.asyncio
-    async def test_unsupported_file_type_skipped(self, adapter):
+    async def test_zip_file_cached(self, adapter):
-        """A .zip file should be silently skipped."""
+        """A .zip file should be cached as a supported document."""
-        event = self._make_event(files=[{
+        with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
-            "mimetype": "application/zip",
+            dl.return_value = b"PK\x03\x04zip"
-            "name": "archive.zip",
+            event = self._make_event(files=[{
-            "url_private_download": "https://files.slack.com/archive.zip",
+                "mimetype": "application/zip",
-            "size": 1024,
+                "name": "archive.zip",
-        }])
+                "url_private_download": "https://files.slack.com/archive.zip",
-        await adapter._handle_slack_message(event)
+                "size": 1024,
            }])
            await adapter._handle_slack_message(event)
        msg_event = adapter.handle_message.call_args[0][0]
-        assert msg_event.message_type == MessageType.TEXT
+        assert msg_event.message_type == MessageType.DOCUMENT
-        assert len(msg_event.media_urls) == 0
+        assert len(msg_event.media_urls) == 1
        assert msg_event.media_types == ["application/zip"]
    @pytest.mark.asyncio
    async def test_oversized_document_skipped(self, adapter):
--- a/tests/gateway/test_telegram_documents.py
+++ b/tests/gateway/test_telegram_documents.py
@ -236,15 +236,16 @@ class TestDocumentDownloadBlock:
        assert "Please summarize" in event.text
    @pytest.mark.asyncio
-    async def test_unsupported_type_rejected(self, adapter):
+    async def test_zip_document_cached(self, adapter):
        """A .zip upload should be cached as a supported document."""
        doc = _make_document(file_name="archive.zip", mime_type="application/zip", file_size=100)
        msg = _make_message(document=doc)
        update = _make_update(msg)
        await adapter._handle_media_message(update, MagicMock())
        event = adapter.handle_message.call_args[0][0]
-        assert "Unsupported document type" in event.text
+        assert event.media_urls and event.media_urls[0].endswith("archive.zip")
-        assert ".zip" in event.text
+        assert event.media_types == ["application/zip"]
    @pytest.mark.asyncio
    async def test_oversized_file_rejected(self, adapter):
--- a/tests/tools/test_credential_files.py
+++ b/tests/tools/test_credential_files.py
@ -10,7 +10,9 @@ import pytest
 from tools.credential_files import (
    clear_credential_files,
    get_credential_file_mounts,
    get_cache_directory_mounts,
    get_skills_directory_mount,
    iter_cache_files,
    iter_skills_files,
    register_credential_file,
    register_credential_files,
@ -358,3 +360,116 @@ class TestConfigPathTraversal:
        mounts = get_credential_file_mounts()
        assert len(mounts) == 1
        assert "oauth.json" in mounts[0]["container_path"]
 # ---------------------------------------------------------------------------
 # Cache directory mounts
 # ---------------------------------------------------------------------------
 class TestCacheDirectoryMounts:
    """Tests for get_cache_directory_mounts() and iter_cache_files()."""
    def test_returns_existing_cache_dirs(self, tmp_path, monkeypatch):
        """Existing cache dirs are returned with correct container paths."""
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
        (hermes_home / "cache" / "documents").mkdir(parents=True)
        (hermes_home / "cache" / "audio").mkdir(parents=True)
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        mounts = get_cache_directory_mounts()
        paths = {m["container_path"] for m in mounts}
        assert "/root/.hermes/cache/documents" in paths
        assert "/root/.hermes/cache/audio" in paths
    def test_skips_nonexistent_dirs(self, tmp_path, monkeypatch):
        """Dirs that don't exist on disk are not returned."""
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
        # Create only one cache dir
        (hermes_home / "cache" / "documents").mkdir(parents=True)
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        mounts = get_cache_directory_mounts()
        assert len(mounts) == 1
        assert mounts[0]["container_path"] == "/root/.hermes/cache/documents"
    def test_legacy_dir_names_resolved(self, tmp_path, monkeypatch):
        """Old-style dir names (e.g. document_cache) are resolved correctly."""
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
        # Use legacy dir name — get_hermes_dir prefers old if it exists
        (hermes_home / "document_cache").mkdir()
        (hermes_home / "image_cache").mkdir()
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        mounts = get_cache_directory_mounts()
        host_paths = {m["host_path"] for m in mounts}
        assert str(hermes_home / "document_cache") in host_paths
        assert str(hermes_home / "image_cache") in host_paths
        # Container paths always use the new layout
        container_paths = {m["container_path"] for m in mounts}
        assert "/root/.hermes/cache/documents" in container_paths
        assert "/root/.hermes/cache/images" in container_paths
    def test_empty_hermes_home(self, tmp_path, monkeypatch):
        """No cache dirs → empty list."""
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        assert get_cache_directory_mounts() == []
 class TestIterCacheFiles:
    """Tests for iter_cache_files()."""
    def test_enumerates_files(self, tmp_path, monkeypatch):
        """Regular files in cache dirs are returned."""
        hermes_home = tmp_path / ".hermes"
        doc_dir = hermes_home / "cache" / "documents"
        doc_dir.mkdir(parents=True)
        (doc_dir / "upload.zip").write_bytes(b"PK\x03\x04")
        (doc_dir / "report.pdf").write_bytes(b"%PDF-1.4")
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        entries = iter_cache_files()
        names = {Path(e["container_path"]).name for e in entries}
        assert "upload.zip" in names
        assert "report.pdf" in names
    def test_skips_symlinks(self, tmp_path, monkeypatch):
        """Symlinks inside cache dirs are skipped."""
        hermes_home = tmp_path / ".hermes"
        doc_dir = hermes_home / "cache" / "documents"
        doc_dir.mkdir(parents=True)
        real_file = doc_dir / "real.txt"
        real_file.write_text("content")
        (doc_dir / "link.txt").symlink_to(real_file)
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        entries = iter_cache_files()
        names = [Path(e["container_path"]).name for e in entries]
        assert "real.txt" in names
        assert "link.txt" not in names
    def test_nested_files(self, tmp_path, monkeypatch):
        """Files in subdirectories are included with correct relative paths."""
        hermes_home = tmp_path / ".hermes"
        ss_dir = hermes_home / "cache" / "screenshots"
        sub = ss_dir / "session_abc"
        sub.mkdir(parents=True)
        (sub / "screen1.png").write_bytes(b"PNG")
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        entries = iter_cache_files()
        assert len(entries) == 1
        assert entries[0]["container_path"] == "/root/.hermes/cache/screenshots/session_abc/screen1.png"
    def test_empty_cache(self, tmp_path, monkeypatch):
        """No cache dirs → empty list."""
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
        assert iter_cache_files() == []
--- a/tools/credential_files.py
+++ b/tools/credential_files.py
@ -1,29 +1,21 @@
-"""Credential file passthrough registry for remote terminal backends.
+"""File passthrough registry for remote terminal backends.
-Skills that declare ``required_credential_files`` in their frontmatter need
+Remote backends (Docker, Modal, SSH) create sandboxes with no host files.
-those files available inside sandboxed execution environments (Modal, Docker).
+This module ensures that credential files, skill directories, and host-side
-By default remote backends create bare containers with no host files.
+cache directories (documents, images, audio, screenshots) are mounted or
 synced into those sandboxes so the agent can access them.
-This module provides a session-scoped registry so skill-declared credential
+**Credentials and skills** — session-scoped registry fed by skill declarations
-files (and user-configured overrides) are mounted into remote sandboxes.
+(``required_credential_files``) and user config (``terminal.credential_files``).
-Two sources feed the registry:
+**Cache directories** — gateway-cached uploads, browser screenshots, TTS
 audio, and processed images.  Mounted read-only so the remote terminal can
 reference files the host side created (e.g. ``unzip`` an uploaded archive).
-1. **Skill declarations** — when a skill is loaded via ``skill_view``, its
+Remote backends call :func:`get_credential_file_mounts`,
-   ``required_credential_files`` entries are registered here if the files
+:func:`get_skills_directory_mount` / :func:`iter_skills_files`, and
-   exist on the host.
+:func:`get_cache_directory_mounts` / :func:`iter_cache_files` at sandbox
-2. **User config** — ``terminal.credential_files`` in config.yaml lets users
+creation time and before each command (for resync on Modal).
   explicitly list additional files to mount.
 Remote backends (``tools/environments/modal.py``, ``docker.py``) call
 :func:`get_credential_file_mounts` at sandbox creation time.
 Each registered entry is a dict::
    {
        "host_path": "/home/user/.hermes/google_token.json",
        "container_path": "/root/.hermes/google_token.json",
    }
 """
 from __future__ import annotations
@ -300,6 +292,71 @@ def iter_skills_files(
    return result
 # ---------------------------------------------------------------------------
 # Cache directory mounts (documents, images, audio, screenshots)
 # ---------------------------------------------------------------------------
 # The four cache subdirectories that should be mirrored into remote backends.
 # Each tuple is (new_subpath, old_name) matching hermes_constants.get_hermes_dir().
 _CACHE_DIRS: list[tuple[str, str]] = [
    ("cache/documents", "document_cache"),
    ("cache/images", "image_cache"),
    ("cache/audio", "audio_cache"),
    ("cache/screenshots", "browser_screenshots"),
 ]
 def get_cache_directory_mounts(
    container_base: str = "/root/.hermes",
 ) -> List[Dict[str, str]]:
    """Return mount entries for each cache directory that exists on disk.
    Used by Docker to create bind mounts.  Each entry has ``host_path`` and
    ``container_path`` keys.  The host path is resolved via
    ``get_hermes_dir()`` for backward compatibility with old directory layouts.
    """
    from hermes_constants import get_hermes_dir
    mounts: List[Dict[str, str]] = []
    for new_subpath, old_name in _CACHE_DIRS:
        host_dir = get_hermes_dir(new_subpath, old_name)
        if host_dir.is_dir():
            # Always map to the *new* container layout regardless of host layout.
            container_path = f"{container_base.rstrip('/')}/{new_subpath}"
            mounts.append({
                "host_path": str(host_dir),
                "container_path": container_path,
            })
    return mounts
 def iter_cache_files(
    container_base: str = "/root/.hermes",
 ) -> List[Dict[str, str]]:
    """Return individual (host_path, container_path) entries for cache files.
    Used by Modal to upload files individually and resync before each command.
    Skips symlinks.  The container paths use the new ``cache/<subdir>`` layout.
    """
    from hermes_constants import get_hermes_dir
    result: List[Dict[str, str]] = []
    for new_subpath, old_name in _CACHE_DIRS:
        host_dir = get_hermes_dir(new_subpath, old_name)
        if not host_dir.is_dir():
            continue
        container_root = f"{container_base.rstrip('/')}/{new_subpath}"
        for item in host_dir.rglob("*"):
            if item.is_symlink() or not item.is_file():
                continue
            rel = item.relative_to(host_dir)
            result.append({
                "host_path": str(item),
                "container_path": f"{container_root}/{rel}",
            })
    return result
 def clear_credential_files() -> None:
    """Reset the skill-scoped registry (e.g. on session reset)."""
    _registered_files.clear()
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@ -315,7 +315,11 @@ class DockerEnvironment(BaseEnvironment):
        # Mount credential files (OAuth tokens, etc.) declared by skills.
        # Read-only so the container can authenticate but not modify host creds.
        try:
-            from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
+            from tools.credential_files import (
                get_credential_file_mounts,
                get_skills_directory_mount,
                get_cache_directory_mounts,
            )
            for mount_entry in get_credential_file_mounts():
                volume_args.extend([
@ -341,6 +345,21 @@ class DockerEnvironment(BaseEnvironment):
                    skills_mount["host_path"],
                    skills_mount["container_path"],
                )
            # Mount host-side cache directories (documents, images, audio,
            # screenshots) so the agent can access uploaded files and other
            # cached media from inside the container.  Read-only — the
            # container reads these but the host gateway manages writes.
            for cache_mount in get_cache_directory_mounts():
                volume_args.extend([
                    "-v",
                    f"{cache_mount['host_path']}:{cache_mount['container_path']}:ro",
                ])
                logger.info(
                    "Docker: mounting cache dir %s -> %s",
                    cache_mount["host_path"],
                    cache_mount["container_path"],
                )
        except Exception as e:
            logger.debug("Docker: could not load credential file mounts: %s", e)
--- a/tools/environments/modal.py
+++ b/tools/environments/modal.py
@ -186,7 +186,11 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
        cred_mounts = []
        try:
-            from tools.credential_files import get_credential_file_mounts, iter_skills_files
+            from tools.credential_files import (
                get_credential_file_mounts,
                iter_skills_files,
                iter_cache_files,
            )
            for mount_entry in get_credential_file_mounts():
                cred_mounts.append(
@ -212,6 +216,20 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
                )
            if skills_files:
                logger.info("Modal: mounting %d skill files", len(skills_files))
            # Mount host-side cache files (documents, images, audio,
            # screenshots).  New files arriving mid-session are picked up
            # by _sync_files() before each command execution.
            cache_files = iter_cache_files()
            for entry in cache_files:
                cred_mounts.append(
                    _modal.Mount.from_local_file(
                        entry["host_path"],
                        remote_path=entry["container_path"],
                    )
                )
            if cache_files:
                logger.info("Modal: mounting %d cache files", len(cache_files))
        except Exception as e:
            logger.debug("Modal: could not load credential file mounts: %s", e)
@ -308,13 +326,19 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
        return True
    def _sync_files(self) -> None:
-        """Push credential files and skill files into the running sandbox.
+        """Push credential, skill, and cache files into the running sandbox.
        Runs before each command. Uses mtime+size caching so only changed
-        files are pushed (~13μs overhead in the no-op case).
+        files are pushed (~13μs overhead in the no-op case).  Cache files
        are especially important here — new uploads/screenshots may appear
        mid-session after sandbox creation.
        """
        try:
-            from tools.credential_files import get_credential_file_mounts, iter_skills_files
+            from tools.credential_files import (
                get_credential_file_mounts,
                iter_skills_files,
                iter_cache_files,
            )
            for entry in get_credential_file_mounts():
                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
@ -323,6 +347,10 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
            for entry in iter_skills_files():
                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
                    logger.debug("Modal: synced skill file %s", entry["container_path"])
            for entry in iter_cache_files():
                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
                    logger.debug("Modal: synced cache file %s", entry["container_path"])
        except Exception as e:
            logger.debug("Modal: file sync failed: %s", e)