feat: add .zip document support and auto-mount cache dirs into remote backends (#4846)

- Add .zip to SUPPORTED_DOCUMENT_TYPES so gateway platforms (Telegram, Slack, Discord) cache uploaded zip files instead of rejecting them. - Add get_cache_directory_mounts() and iter_cache_files() to credential_files.py for host-side cache directory passthrough (documents, images, audio, screenshots). - Docker: bind-mount cache dirs read-only alongside credentials/skills. Changes are live (bind mount semantics). - Modal: mount cache files at sandbox creation + resync before each command via _sync_files() with mtime+size change detection. - Handles backward-compat with legacy dir names (document_cache, image_cache, audio_cache, browser_screenshots) via get_hermes_dir(). - Container paths always use the new cache/<subdir> layout regardless of host layout. This replaces the need for a dedicated extract_archive tool (PR #4819) — the agent can now use standard terminal commands (unzip, tar) on uploaded files inside remote containers. Closes: related to PR #4819 by kshitijk4poor
2026-04-25 00:51:20 +00:00 · 2026-04-03 13:16:26 -07:00 · 2026-04-03 13:16:26 -07:00 · b1756084a3
commit b1756084a3
parent 8a384628a5
9 changed files with 274 additions and 47 deletions
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@ -315,7 +315,11 @@ class DockerEnvironment(BaseEnvironment):
        # Mount credential files (OAuth tokens, etc.) declared by skills.
        # Read-only so the container can authenticate but not modify host creds.
        try:
-            from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
+            from tools.credential_files import (
+                get_credential_file_mounts,
+                get_skills_directory_mount,
+                get_cache_directory_mounts,
+            )

            for mount_entry in get_credential_file_mounts():
                volume_args.extend([
@ -341,6 +345,21 @@ class DockerEnvironment(BaseEnvironment):
                    skills_mount["host_path"],
                    skills_mount["container_path"],
                )
+
+            # Mount host-side cache directories (documents, images, audio,
+            # screenshots) so the agent can access uploaded files and other
+            # cached media from inside the container.  Read-only — the
+            # container reads these but the host gateway manages writes.
+            for cache_mount in get_cache_directory_mounts():
+                volume_args.extend([
+                    "-v",
+                    f"{cache_mount['host_path']}:{cache_mount['container_path']}:ro",
+                ])
+                logger.info(
+                    "Docker: mounting cache dir %s -> %s",
+                    cache_mount["host_path"],
+                    cache_mount["container_path"],
+                )
        except Exception as e:
            logger.debug("Docker: could not load credential file mounts: %s", e)

--- a/tools/environments/modal.py
+++ b/tools/environments/modal.py
@ -186,7 +186,11 @@ class ModalEnvironment(BaseModalExecutionEnvironment):

        cred_mounts = []
        try:
-            from tools.credential_files import get_credential_file_mounts, iter_skills_files
+            from tools.credential_files import (
+                get_credential_file_mounts,
+                iter_skills_files,
+                iter_cache_files,
+            )

            for mount_entry in get_credential_file_mounts():
                cred_mounts.append(
@ -212,6 +216,20 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
                )
            if skills_files:
                logger.info("Modal: mounting %d skill files", len(skills_files))
+
+            # Mount host-side cache files (documents, images, audio,
+            # screenshots).  New files arriving mid-session are picked up
+            # by _sync_files() before each command execution.
+            cache_files = iter_cache_files()
+            for entry in cache_files:
+                cred_mounts.append(
+                    _modal.Mount.from_local_file(
+                        entry["host_path"],
+                        remote_path=entry["container_path"],
+                    )
+                )
+            if cache_files:
+                logger.info("Modal: mounting %d cache files", len(cache_files))
        except Exception as e:
            logger.debug("Modal: could not load credential file mounts: %s", e)

@ -308,13 +326,19 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
        return True

    def _sync_files(self) -> None:
-        """Push credential files and skill files into the running sandbox.
+        """Push credential, skill, and cache files into the running sandbox.

        Runs before each command. Uses mtime+size caching so only changed
-        files are pushed (~13μs overhead in the no-op case).
+        files are pushed (~13μs overhead in the no-op case).  Cache files
+        are especially important here — new uploads/screenshots may appear
+        mid-session after sandbox creation.
        """
        try:
-            from tools.credential_files import get_credential_file_mounts, iter_skills_files
+            from tools.credential_files import (
+                get_credential_file_mounts,
+                iter_skills_files,
+                iter_cache_files,
+            )

            for entry in get_credential_file_mounts():
                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
@ -323,6 +347,10 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
            for entry in iter_skills_files():
                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
                    logger.debug("Modal: synced skill file %s", entry["container_path"])
+
+            for entry in iter_cache_files():
+                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
+                    logger.debug("Modal: synced cache file %s", entry["container_path"])
        except Exception as e:
            logger.debug("Modal: file sync failed: %s", e)