feat: add .zip document support and auto-mount cache dirs into remote backends (#4846)

- Add .zip to SUPPORTED_DOCUMENT_TYPES so gateway platforms (Telegram, Slack, Discord) cache uploaded zip files instead of rejecting them. - Add get_cache_directory_mounts() and iter_cache_files() to credential_files.py for host-side cache directory passthrough (documents, images, audio, screenshots). - Docker: bind-mount cache dirs read-only alongside credentials/skills. Changes are live (bind mount semantics). - Modal: mount cache files at sandbox creation + resync before each command via _sync_files() with mtime+size change detection. - Handles backward-compat with legacy dir names (document_cache, image_cache, audio_cache, browser_screenshots) via get_hermes_dir(). - Container paths always use the new cache/<subdir> layout regardless of host layout. This replaces the need for a dedicated extract_archive tool (PR #4819) — the agent can now use standard terminal commands (unzip, tar) on uploaded files inside remote containers. Closes: related to PR #4819 by kshitijk4poor
2026-04-25 00:51:20 +00:00 · 2026-04-03 13:16:26 -07:00 · 2026-04-03 13:16:26 -07:00 · b1756084a3
commit b1756084a3
parent 8a384628a5
9 changed files with 274 additions and 47 deletions
--- a/tools/credential_files.py
+++ b/tools/credential_files.py
@ -1,29 +1,21 @@
-"""Credential file passthrough registry for remote terminal backends.
+"""File passthrough registry for remote terminal backends.

-Skills that declare ``required_credential_files`` in their frontmatter need
-those files available inside sandboxed execution environments (Modal, Docker).
-By default remote backends create bare containers with no host files.
+Remote backends (Docker, Modal, SSH) create sandboxes with no host files.
+This module ensures that credential files, skill directories, and host-side
+cache directories (documents, images, audio, screenshots) are mounted or
+synced into those sandboxes so the agent can access them.

-This module provides a session-scoped registry so skill-declared credential
-files (and user-configured overrides) are mounted into remote sandboxes.
+**Credentials and skills** — session-scoped registry fed by skill declarations
+(``required_credential_files``) and user config (``terminal.credential_files``).

-Two sources feed the registry:
+**Cache directories** — gateway-cached uploads, browser screenshots, TTS
+audio, and processed images.  Mounted read-only so the remote terminal can
+reference files the host side created (e.g. ``unzip`` an uploaded archive).

-1. **Skill declarations** — when a skill is loaded via ``skill_view``, its
-   ``required_credential_files`` entries are registered here if the files
-   exist on the host.
-2. **User config** — ``terminal.credential_files`` in config.yaml lets users
-   explicitly list additional files to mount.
-
-Remote backends (``tools/environments/modal.py``, ``docker.py``) call
-:func:`get_credential_file_mounts` at sandbox creation time.
-
-Each registered entry is a dict::
-
-    {
-        "host_path": "/home/user/.hermes/google_token.json",
-        "container_path": "/root/.hermes/google_token.json",
-    }
+Remote backends call :func:`get_credential_file_mounts`,
+:func:`get_skills_directory_mount` / :func:`iter_skills_files`, and
+:func:`get_cache_directory_mounts` / :func:`iter_cache_files` at sandbox
+creation time and before each command (for resync on Modal).
 """

 from __future__ import annotations
@ -300,6 +292,71 @@ def iter_skills_files(
    return result


+# ---------------------------------------------------------------------------
+# Cache directory mounts (documents, images, audio, screenshots)
+# ---------------------------------------------------------------------------
+
+# The four cache subdirectories that should be mirrored into remote backends.
+# Each tuple is (new_subpath, old_name) matching hermes_constants.get_hermes_dir().
+_CACHE_DIRS: list[tuple[str, str]] = [
+    ("cache/documents", "document_cache"),
+    ("cache/images", "image_cache"),
+    ("cache/audio", "audio_cache"),
+    ("cache/screenshots", "browser_screenshots"),
+]
+
+
+def get_cache_directory_mounts(
+    container_base: str = "/root/.hermes",
+) -> List[Dict[str, str]]:
+    """Return mount entries for each cache directory that exists on disk.
+
+    Used by Docker to create bind mounts.  Each entry has ``host_path`` and
+    ``container_path`` keys.  The host path is resolved via
+    ``get_hermes_dir()`` for backward compatibility with old directory layouts.
+    """
+    from hermes_constants import get_hermes_dir
+
+    mounts: List[Dict[str, str]] = []
+    for new_subpath, old_name in _CACHE_DIRS:
+        host_dir = get_hermes_dir(new_subpath, old_name)
+        if host_dir.is_dir():
+            # Always map to the *new* container layout regardless of host layout.
+            container_path = f"{container_base.rstrip('/')}/{new_subpath}"
+            mounts.append({
+                "host_path": str(host_dir),
+                "container_path": container_path,
+            })
+    return mounts
+
+
+def iter_cache_files(
+    container_base: str = "/root/.hermes",
+) -> List[Dict[str, str]]:
+    """Return individual (host_path, container_path) entries for cache files.
+
+    Used by Modal to upload files individually and resync before each command.
+    Skips symlinks.  The container paths use the new ``cache/<subdir>`` layout.
+    """
+    from hermes_constants import get_hermes_dir
+
+    result: List[Dict[str, str]] = []
+    for new_subpath, old_name in _CACHE_DIRS:
+        host_dir = get_hermes_dir(new_subpath, old_name)
+        if not host_dir.is_dir():
+            continue
+        container_root = f"{container_base.rstrip('/')}/{new_subpath}"
+        for item in host_dir.rglob("*"):
+            if item.is_symlink() or not item.is_file():
+                continue
+            rel = item.relative_to(host_dir)
+            result.append({
+                "host_path": str(item),
+                "container_path": f"{container_root}/{rel}",
+            })
+    return result
+
+
 def clear_credential_files() -> None:
    """Reset the skill-scoped registry (e.g. on session reset)."""
    _registered_files.clear()