From b1756084a3c0ceae76da9ebc721cf31bbed0a1c3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 3 Apr 2026 13:16:26 -0700 Subject: [PATCH] feat: add .zip document support and auto-mount cache dirs into remote backends (#4846) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add .zip to SUPPORTED_DOCUMENT_TYPES so gateway platforms (Telegram, Slack, Discord) cache uploaded zip files instead of rejecting them. - Add get_cache_directory_mounts() and iter_cache_files() to credential_files.py for host-side cache directory passthrough (documents, images, audio, screenshots). - Docker: bind-mount cache dirs read-only alongside credentials/skills. Changes are live (bind mount semantics). - Modal: mount cache files at sandbox creation + resync before each command via _sync_files() with mtime+size change detection. - Handles backward-compat with legacy dir names (document_cache, image_cache, audio_cache, browser_screenshots) via get_hermes_dir(). - Container paths always use the new cache/ layout regardless of host layout. This replaces the need for a dedicated extract_archive tool (PR #4819) — the agent can now use standard terminal commands (unzip, tar) on uploaded files inside remote containers. Closes: related to PR #4819 by kshitijk4poor --- gateway/platforms/base.py | 1 + .../gateway/test_discord_document_handling.py | 13 +- tests/gateway/test_document_cache.py | 2 +- tests/gateway/test_slack.py | 25 ++-- tests/gateway/test_telegram_documents.py | 7 +- tests/tools/test_credential_files.py | 115 ++++++++++++++++++ tools/credential_files.py | 101 +++++++++++---- tools/environments/docker.py | 21 +++- tools/environments/modal.py | 36 +++++- 9 files changed, 274 insertions(+), 47 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index c33c2924a..578ed6841 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -235,6 +235,7 @@ SUPPORTED_DOCUMENT_TYPES = { ".pdf": "application/pdf", ".md": "text/markdown", ".txt": "text/plain", + ".zip": "application/zip", ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", diff --git a/tests/gateway/test_discord_document_handling.py b/tests/gateway/test_discord_document_handling.py index b7be161cd..7f918d1c7 100644 --- a/tests/gateway/test_discord_document_handling.py +++ b/tests/gateway/test_discord_document_handling.py @@ -227,16 +227,19 @@ class TestIncomingDocumentHandling: adapter.handle_message.assert_called_once() @pytest.mark.asyncio - async def test_unsupported_type_skipped(self, adapter): - """An unsupported file type (.zip) should be skipped silently.""" + async def test_zip_document_cached(self, adapter): + """A .zip file should be cached as a supported document.""" msg = make_message([ make_attachment(filename="archive.zip", content_type="application/zip") ]) - await adapter._handle_message(msg) + + with _mock_aiohttp_download(b"PK\x03\x04test"): + await adapter._handle_message(msg) event = adapter.handle_message.call_args[0][0] - assert event.media_urls == [] - assert event.message_type == MessageType.TEXT + assert len(event.media_urls) == 1 + assert event.media_types == ["application/zip"] + assert event.message_type == MessageType.DOCUMENT @pytest.mark.asyncio async def test_download_error_handled(self, adapter): diff --git a/tests/gateway/test_document_cache.py b/tests/gateway/test_document_cache.py index 18440ed9c..cc756cea8 100644 --- a/tests/gateway/test_document_cache.py +++ b/tests/gateway/test_document_cache.py @@ -151,7 +151,7 @@ class TestSupportedDocumentTypes: @pytest.mark.parametrize( "ext", - [".pdf", ".md", ".txt", ".docx", ".xlsx", ".pptx"], + [".pdf", ".md", ".txt", ".zip", ".docx", ".xlsx", ".pptx"], ) def test_expected_extensions_present(self, ext): assert ext in SUPPORTED_DOCUMENT_TYPES diff --git a/tests/gateway/test_slack.py b/tests/gateway/test_slack.py index 16924b590..81f8077ad 100644 --- a/tests/gateway/test_slack.py +++ b/tests/gateway/test_slack.py @@ -408,19 +408,22 @@ class TestIncomingDocumentHandling: assert "[Content of" not in (msg_event.text or "") @pytest.mark.asyncio - async def test_unsupported_file_type_skipped(self, adapter): - """A .zip file should be silently skipped.""" - event = self._make_event(files=[{ - "mimetype": "application/zip", - "name": "archive.zip", - "url_private_download": "https://files.slack.com/archive.zip", - "size": 1024, - }]) - await adapter._handle_slack_message(event) + async def test_zip_file_cached(self, adapter): + """A .zip file should be cached as a supported document.""" + with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl: + dl.return_value = b"PK\x03\x04zip" + event = self._make_event(files=[{ + "mimetype": "application/zip", + "name": "archive.zip", + "url_private_download": "https://files.slack.com/archive.zip", + "size": 1024, + }]) + await adapter._handle_slack_message(event) msg_event = adapter.handle_message.call_args[0][0] - assert msg_event.message_type == MessageType.TEXT - assert len(msg_event.media_urls) == 0 + assert msg_event.message_type == MessageType.DOCUMENT + assert len(msg_event.media_urls) == 1 + assert msg_event.media_types == ["application/zip"] @pytest.mark.asyncio async def test_oversized_document_skipped(self, adapter): diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py index 11a8df5f8..86e5cb30f 100644 --- a/tests/gateway/test_telegram_documents.py +++ b/tests/gateway/test_telegram_documents.py @@ -236,15 +236,16 @@ class TestDocumentDownloadBlock: assert "Please summarize" in event.text @pytest.mark.asyncio - async def test_unsupported_type_rejected(self, adapter): + async def test_zip_document_cached(self, adapter): + """A .zip upload should be cached as a supported document.""" doc = _make_document(file_name="archive.zip", mime_type="application/zip", file_size=100) msg = _make_message(document=doc) update = _make_update(msg) await adapter._handle_media_message(update, MagicMock()) event = adapter.handle_message.call_args[0][0] - assert "Unsupported document type" in event.text - assert ".zip" in event.text + assert event.media_urls and event.media_urls[0].endswith("archive.zip") + assert event.media_types == ["application/zip"] @pytest.mark.asyncio async def test_oversized_file_rejected(self, adapter): diff --git a/tests/tools/test_credential_files.py b/tests/tools/test_credential_files.py index 7449c1db4..488badadf 100644 --- a/tests/tools/test_credential_files.py +++ b/tests/tools/test_credential_files.py @@ -10,7 +10,9 @@ import pytest from tools.credential_files import ( clear_credential_files, get_credential_file_mounts, + get_cache_directory_mounts, get_skills_directory_mount, + iter_cache_files, iter_skills_files, register_credential_file, register_credential_files, @@ -358,3 +360,116 @@ class TestConfigPathTraversal: mounts = get_credential_file_mounts() assert len(mounts) == 1 assert "oauth.json" in mounts[0]["container_path"] + + +# --------------------------------------------------------------------------- +# Cache directory mounts +# --------------------------------------------------------------------------- + +class TestCacheDirectoryMounts: + """Tests for get_cache_directory_mounts() and iter_cache_files().""" + + def test_returns_existing_cache_dirs(self, tmp_path, monkeypatch): + """Existing cache dirs are returned with correct container paths.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + (hermes_home / "cache" / "documents").mkdir(parents=True) + (hermes_home / "cache" / "audio").mkdir(parents=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + mounts = get_cache_directory_mounts() + paths = {m["container_path"] for m in mounts} + assert "/root/.hermes/cache/documents" in paths + assert "/root/.hermes/cache/audio" in paths + + def test_skips_nonexistent_dirs(self, tmp_path, monkeypatch): + """Dirs that don't exist on disk are not returned.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + # Create only one cache dir + (hermes_home / "cache" / "documents").mkdir(parents=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + mounts = get_cache_directory_mounts() + assert len(mounts) == 1 + assert mounts[0]["container_path"] == "/root/.hermes/cache/documents" + + def test_legacy_dir_names_resolved(self, tmp_path, monkeypatch): + """Old-style dir names (e.g. document_cache) are resolved correctly.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + # Use legacy dir name — get_hermes_dir prefers old if it exists + (hermes_home / "document_cache").mkdir() + (hermes_home / "image_cache").mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + mounts = get_cache_directory_mounts() + host_paths = {m["host_path"] for m in mounts} + assert str(hermes_home / "document_cache") in host_paths + assert str(hermes_home / "image_cache") in host_paths + # Container paths always use the new layout + container_paths = {m["container_path"] for m in mounts} + assert "/root/.hermes/cache/documents" in container_paths + assert "/root/.hermes/cache/images" in container_paths + + def test_empty_hermes_home(self, tmp_path, monkeypatch): + """No cache dirs → empty list.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + assert get_cache_directory_mounts() == [] + + +class TestIterCacheFiles: + """Tests for iter_cache_files().""" + + def test_enumerates_files(self, tmp_path, monkeypatch): + """Regular files in cache dirs are returned.""" + hermes_home = tmp_path / ".hermes" + doc_dir = hermes_home / "cache" / "documents" + doc_dir.mkdir(parents=True) + (doc_dir / "upload.zip").write_bytes(b"PK\x03\x04") + (doc_dir / "report.pdf").write_bytes(b"%PDF-1.4") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + entries = iter_cache_files() + names = {Path(e["container_path"]).name for e in entries} + assert "upload.zip" in names + assert "report.pdf" in names + + def test_skips_symlinks(self, tmp_path, monkeypatch): + """Symlinks inside cache dirs are skipped.""" + hermes_home = tmp_path / ".hermes" + doc_dir = hermes_home / "cache" / "documents" + doc_dir.mkdir(parents=True) + real_file = doc_dir / "real.txt" + real_file.write_text("content") + (doc_dir / "link.txt").symlink_to(real_file) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + entries = iter_cache_files() + names = [Path(e["container_path"]).name for e in entries] + assert "real.txt" in names + assert "link.txt" not in names + + def test_nested_files(self, tmp_path, monkeypatch): + """Files in subdirectories are included with correct relative paths.""" + hermes_home = tmp_path / ".hermes" + ss_dir = hermes_home / "cache" / "screenshots" + sub = ss_dir / "session_abc" + sub.mkdir(parents=True) + (sub / "screen1.png").write_bytes(b"PNG") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + entries = iter_cache_files() + assert len(entries) == 1 + assert entries[0]["container_path"] == "/root/.hermes/cache/screenshots/session_abc/screen1.png" + + def test_empty_cache(self, tmp_path, monkeypatch): + """No cache dirs → empty list.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + assert iter_cache_files() == [] diff --git a/tools/credential_files.py b/tools/credential_files.py index af4d13a4e..c58e0615a 100644 --- a/tools/credential_files.py +++ b/tools/credential_files.py @@ -1,29 +1,21 @@ -"""Credential file passthrough registry for remote terminal backends. +"""File passthrough registry for remote terminal backends. -Skills that declare ``required_credential_files`` in their frontmatter need -those files available inside sandboxed execution environments (Modal, Docker). -By default remote backends create bare containers with no host files. +Remote backends (Docker, Modal, SSH) create sandboxes with no host files. +This module ensures that credential files, skill directories, and host-side +cache directories (documents, images, audio, screenshots) are mounted or +synced into those sandboxes so the agent can access them. -This module provides a session-scoped registry so skill-declared credential -files (and user-configured overrides) are mounted into remote sandboxes. +**Credentials and skills** — session-scoped registry fed by skill declarations +(``required_credential_files``) and user config (``terminal.credential_files``). -Two sources feed the registry: +**Cache directories** — gateway-cached uploads, browser screenshots, TTS +audio, and processed images. Mounted read-only so the remote terminal can +reference files the host side created (e.g. ``unzip`` an uploaded archive). -1. **Skill declarations** — when a skill is loaded via ``skill_view``, its - ``required_credential_files`` entries are registered here if the files - exist on the host. -2. **User config** — ``terminal.credential_files`` in config.yaml lets users - explicitly list additional files to mount. - -Remote backends (``tools/environments/modal.py``, ``docker.py``) call -:func:`get_credential_file_mounts` at sandbox creation time. - -Each registered entry is a dict:: - - { - "host_path": "/home/user/.hermes/google_token.json", - "container_path": "/root/.hermes/google_token.json", - } +Remote backends call :func:`get_credential_file_mounts`, +:func:`get_skills_directory_mount` / :func:`iter_skills_files`, and +:func:`get_cache_directory_mounts` / :func:`iter_cache_files` at sandbox +creation time and before each command (for resync on Modal). """ from __future__ import annotations @@ -300,6 +292,71 @@ def iter_skills_files( return result +# --------------------------------------------------------------------------- +# Cache directory mounts (documents, images, audio, screenshots) +# --------------------------------------------------------------------------- + +# The four cache subdirectories that should be mirrored into remote backends. +# Each tuple is (new_subpath, old_name) matching hermes_constants.get_hermes_dir(). +_CACHE_DIRS: list[tuple[str, str]] = [ + ("cache/documents", "document_cache"), + ("cache/images", "image_cache"), + ("cache/audio", "audio_cache"), + ("cache/screenshots", "browser_screenshots"), +] + + +def get_cache_directory_mounts( + container_base: str = "/root/.hermes", +) -> List[Dict[str, str]]: + """Return mount entries for each cache directory that exists on disk. + + Used by Docker to create bind mounts. Each entry has ``host_path`` and + ``container_path`` keys. The host path is resolved via + ``get_hermes_dir()`` for backward compatibility with old directory layouts. + """ + from hermes_constants import get_hermes_dir + + mounts: List[Dict[str, str]] = [] + for new_subpath, old_name in _CACHE_DIRS: + host_dir = get_hermes_dir(new_subpath, old_name) + if host_dir.is_dir(): + # Always map to the *new* container layout regardless of host layout. + container_path = f"{container_base.rstrip('/')}/{new_subpath}" + mounts.append({ + "host_path": str(host_dir), + "container_path": container_path, + }) + return mounts + + +def iter_cache_files( + container_base: str = "/root/.hermes", +) -> List[Dict[str, str]]: + """Return individual (host_path, container_path) entries for cache files. + + Used by Modal to upload files individually and resync before each command. + Skips symlinks. The container paths use the new ``cache/`` layout. + """ + from hermes_constants import get_hermes_dir + + result: List[Dict[str, str]] = [] + for new_subpath, old_name in _CACHE_DIRS: + host_dir = get_hermes_dir(new_subpath, old_name) + if not host_dir.is_dir(): + continue + container_root = f"{container_base.rstrip('/')}/{new_subpath}" + for item in host_dir.rglob("*"): + if item.is_symlink() or not item.is_file(): + continue + rel = item.relative_to(host_dir) + result.append({ + "host_path": str(item), + "container_path": f"{container_root}/{rel}", + }) + return result + + def clear_credential_files() -> None: """Reset the skill-scoped registry (e.g. on session reset).""" _registered_files.clear() diff --git a/tools/environments/docker.py b/tools/environments/docker.py index 2a7bb6255..19889ea35 100644 --- a/tools/environments/docker.py +++ b/tools/environments/docker.py @@ -315,7 +315,11 @@ class DockerEnvironment(BaseEnvironment): # Mount credential files (OAuth tokens, etc.) declared by skills. # Read-only so the container can authenticate but not modify host creds. try: - from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount + from tools.credential_files import ( + get_credential_file_mounts, + get_skills_directory_mount, + get_cache_directory_mounts, + ) for mount_entry in get_credential_file_mounts(): volume_args.extend([ @@ -341,6 +345,21 @@ class DockerEnvironment(BaseEnvironment): skills_mount["host_path"], skills_mount["container_path"], ) + + # Mount host-side cache directories (documents, images, audio, + # screenshots) so the agent can access uploaded files and other + # cached media from inside the container. Read-only — the + # container reads these but the host gateway manages writes. + for cache_mount in get_cache_directory_mounts(): + volume_args.extend([ + "-v", + f"{cache_mount['host_path']}:{cache_mount['container_path']}:ro", + ]) + logger.info( + "Docker: mounting cache dir %s -> %s", + cache_mount["host_path"], + cache_mount["container_path"], + ) except Exception as e: logger.debug("Docker: could not load credential file mounts: %s", e) diff --git a/tools/environments/modal.py b/tools/environments/modal.py index 805f9ac28..7916a2c44 100644 --- a/tools/environments/modal.py +++ b/tools/environments/modal.py @@ -186,7 +186,11 @@ class ModalEnvironment(BaseModalExecutionEnvironment): cred_mounts = [] try: - from tools.credential_files import get_credential_file_mounts, iter_skills_files + from tools.credential_files import ( + get_credential_file_mounts, + iter_skills_files, + iter_cache_files, + ) for mount_entry in get_credential_file_mounts(): cred_mounts.append( @@ -212,6 +216,20 @@ class ModalEnvironment(BaseModalExecutionEnvironment): ) if skills_files: logger.info("Modal: mounting %d skill files", len(skills_files)) + + # Mount host-side cache files (documents, images, audio, + # screenshots). New files arriving mid-session are picked up + # by _sync_files() before each command execution. + cache_files = iter_cache_files() + for entry in cache_files: + cred_mounts.append( + _modal.Mount.from_local_file( + entry["host_path"], + remote_path=entry["container_path"], + ) + ) + if cache_files: + logger.info("Modal: mounting %d cache files", len(cache_files)) except Exception as e: logger.debug("Modal: could not load credential file mounts: %s", e) @@ -308,13 +326,19 @@ class ModalEnvironment(BaseModalExecutionEnvironment): return True def _sync_files(self) -> None: - """Push credential files and skill files into the running sandbox. + """Push credential, skill, and cache files into the running sandbox. Runs before each command. Uses mtime+size caching so only changed - files are pushed (~13μs overhead in the no-op case). + files are pushed (~13μs overhead in the no-op case). Cache files + are especially important here — new uploads/screenshots may appear + mid-session after sandbox creation. """ try: - from tools.credential_files import get_credential_file_mounts, iter_skills_files + from tools.credential_files import ( + get_credential_file_mounts, + iter_skills_files, + iter_cache_files, + ) for entry in get_credential_file_mounts(): if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]): @@ -323,6 +347,10 @@ class ModalEnvironment(BaseModalExecutionEnvironment): for entry in iter_skills_files(): if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]): logger.debug("Modal: synced skill file %s", entry["container_path"]) + + for entry in iter_cache_files(): + if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]): + logger.debug("Modal: synced cache file %s", entry["container_path"]) except Exception as e: logger.debug("Modal: file sync failed: %s", e)