feat: add .zip document support and auto-mount cache dirs into remote backends (#4846)

- Add .zip to SUPPORTED_DOCUMENT_TYPES so gateway platforms (Telegram,
  Slack, Discord) cache uploaded zip files instead of rejecting them.
- Add get_cache_directory_mounts() and iter_cache_files() to
  credential_files.py for host-side cache directory passthrough
  (documents, images, audio, screenshots).
- Docker: bind-mount cache dirs read-only alongside credentials/skills.
  Changes are live (bind mount semantics).
- Modal: mount cache files at sandbox creation + resync before each
  command via _sync_files() with mtime+size change detection.
- Handles backward-compat with legacy dir names (document_cache,
  image_cache, audio_cache, browser_screenshots) via get_hermes_dir().
- Container paths always use the new cache/<subdir> layout regardless
  of host layout.

This replaces the need for a dedicated extract_archive tool (PR #4819)
— the agent can now use standard terminal commands (unzip, tar) on
uploaded files inside remote containers.

Closes: related to PR #4819 by kshitijk4poor
This commit is contained in:
Teknium 2026-04-03 13:16:26 -07:00 committed by GitHub
parent 8a384628a5
commit b1756084a3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 274 additions and 47 deletions

View file

@ -1,29 +1,21 @@
"""Credential file passthrough registry for remote terminal backends.
"""File passthrough registry for remote terminal backends.
Skills that declare ``required_credential_files`` in their frontmatter need
those files available inside sandboxed execution environments (Modal, Docker).
By default remote backends create bare containers with no host files.
Remote backends (Docker, Modal, SSH) create sandboxes with no host files.
This module ensures that credential files, skill directories, and host-side
cache directories (documents, images, audio, screenshots) are mounted or
synced into those sandboxes so the agent can access them.
This module provides a session-scoped registry so skill-declared credential
files (and user-configured overrides) are mounted into remote sandboxes.
**Credentials and skills** session-scoped registry fed by skill declarations
(``required_credential_files``) and user config (``terminal.credential_files``).
Two sources feed the registry:
**Cache directories** gateway-cached uploads, browser screenshots, TTS
audio, and processed images. Mounted read-only so the remote terminal can
reference files the host side created (e.g. ``unzip`` an uploaded archive).
1. **Skill declarations** when a skill is loaded via ``skill_view``, its
``required_credential_files`` entries are registered here if the files
exist on the host.
2. **User config** ``terminal.credential_files`` in config.yaml lets users
explicitly list additional files to mount.
Remote backends (``tools/environments/modal.py``, ``docker.py``) call
:func:`get_credential_file_mounts` at sandbox creation time.
Each registered entry is a dict::
{
"host_path": "/home/user/.hermes/google_token.json",
"container_path": "/root/.hermes/google_token.json",
}
Remote backends call :func:`get_credential_file_mounts`,
:func:`get_skills_directory_mount` / :func:`iter_skills_files`, and
:func:`get_cache_directory_mounts` / :func:`iter_cache_files` at sandbox
creation time and before each command (for resync on Modal).
"""
from __future__ import annotations
@ -300,6 +292,71 @@ def iter_skills_files(
return result
# ---------------------------------------------------------------------------
# Cache directory mounts (documents, images, audio, screenshots)
# ---------------------------------------------------------------------------
# The four cache subdirectories that should be mirrored into remote backends.
# Each tuple is (new_subpath, old_name) matching hermes_constants.get_hermes_dir().
_CACHE_DIRS: list[tuple[str, str]] = [
("cache/documents", "document_cache"),
("cache/images", "image_cache"),
("cache/audio", "audio_cache"),
("cache/screenshots", "browser_screenshots"),
]
def get_cache_directory_mounts(
container_base: str = "/root/.hermes",
) -> List[Dict[str, str]]:
"""Return mount entries for each cache directory that exists on disk.
Used by Docker to create bind mounts. Each entry has ``host_path`` and
``container_path`` keys. The host path is resolved via
``get_hermes_dir()`` for backward compatibility with old directory layouts.
"""
from hermes_constants import get_hermes_dir
mounts: List[Dict[str, str]] = []
for new_subpath, old_name in _CACHE_DIRS:
host_dir = get_hermes_dir(new_subpath, old_name)
if host_dir.is_dir():
# Always map to the *new* container layout regardless of host layout.
container_path = f"{container_base.rstrip('/')}/{new_subpath}"
mounts.append({
"host_path": str(host_dir),
"container_path": container_path,
})
return mounts
def iter_cache_files(
container_base: str = "/root/.hermes",
) -> List[Dict[str, str]]:
"""Return individual (host_path, container_path) entries for cache files.
Used by Modal to upload files individually and resync before each command.
Skips symlinks. The container paths use the new ``cache/<subdir>`` layout.
"""
from hermes_constants import get_hermes_dir
result: List[Dict[str, str]] = []
for new_subpath, old_name in _CACHE_DIRS:
host_dir = get_hermes_dir(new_subpath, old_name)
if not host_dir.is_dir():
continue
container_root = f"{container_base.rstrip('/')}/{new_subpath}"
for item in host_dir.rglob("*"):
if item.is_symlink() or not item.is_file():
continue
rel = item.relative_to(host_dir)
result.append({
"host_path": str(item),
"container_path": f"{container_root}/{rel}",
})
return result
def clear_credential_files() -> None:
"""Reset the skill-scoped registry (e.g. on session reset)."""
_registered_files.clear()