feat: add .zip document support and auto-mount cache dirs into remote backends (#4846)

- Add .zip to SUPPORTED_DOCUMENT_TYPES so gateway platforms (Telegram,
  Slack, Discord) cache uploaded zip files instead of rejecting them.
- Add get_cache_directory_mounts() and iter_cache_files() to
  credential_files.py for host-side cache directory passthrough
  (documents, images, audio, screenshots).
- Docker: bind-mount cache dirs read-only alongside credentials/skills.
  Changes are live (bind mount semantics).
- Modal: mount cache files at sandbox creation + resync before each
  command via _sync_files() with mtime+size change detection.
- Handles backward-compat with legacy dir names (document_cache,
  image_cache, audio_cache, browser_screenshots) via get_hermes_dir().
- Container paths always use the new cache/<subdir> layout regardless
  of host layout.

This replaces the need for a dedicated extract_archive tool (PR #4819)
— the agent can now use standard terminal commands (unzip, tar) on
uploaded files inside remote containers.

Closes: related to PR #4819 by kshitijk4poor
This commit is contained in:
Teknium 2026-04-03 13:16:26 -07:00 committed by GitHub
parent 8a384628a5
commit b1756084a3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 274 additions and 47 deletions

View file

@ -235,6 +235,7 @@ SUPPORTED_DOCUMENT_TYPES = {
".pdf": "application/pdf",
".md": "text/markdown",
".txt": "text/plain",
".zip": "application/zip",
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",

View file

@ -227,16 +227,19 @@ class TestIncomingDocumentHandling:
adapter.handle_message.assert_called_once()
@pytest.mark.asyncio
async def test_unsupported_type_skipped(self, adapter):
"""An unsupported file type (.zip) should be skipped silently."""
async def test_zip_document_cached(self, adapter):
"""A .zip file should be cached as a supported document."""
msg = make_message([
make_attachment(filename="archive.zip", content_type="application/zip")
])
await adapter._handle_message(msg)
with _mock_aiohttp_download(b"PK\x03\x04test"):
await adapter._handle_message(msg)
event = adapter.handle_message.call_args[0][0]
assert event.media_urls == []
assert event.message_type == MessageType.TEXT
assert len(event.media_urls) == 1
assert event.media_types == ["application/zip"]
assert event.message_type == MessageType.DOCUMENT
@pytest.mark.asyncio
async def test_download_error_handled(self, adapter):

View file

@ -151,7 +151,7 @@ class TestSupportedDocumentTypes:
@pytest.mark.parametrize(
"ext",
[".pdf", ".md", ".txt", ".docx", ".xlsx", ".pptx"],
[".pdf", ".md", ".txt", ".zip", ".docx", ".xlsx", ".pptx"],
)
def test_expected_extensions_present(self, ext):
assert ext in SUPPORTED_DOCUMENT_TYPES

View file

@ -408,19 +408,22 @@ class TestIncomingDocumentHandling:
assert "[Content of" not in (msg_event.text or "")
@pytest.mark.asyncio
async def test_unsupported_file_type_skipped(self, adapter):
"""A .zip file should be silently skipped."""
event = self._make_event(files=[{
"mimetype": "application/zip",
"name": "archive.zip",
"url_private_download": "https://files.slack.com/archive.zip",
"size": 1024,
}])
await adapter._handle_slack_message(event)
async def test_zip_file_cached(self, adapter):
"""A .zip file should be cached as a supported document."""
with patch.object(adapter, "_download_slack_file_bytes", new_callable=AsyncMock) as dl:
dl.return_value = b"PK\x03\x04zip"
event = self._make_event(files=[{
"mimetype": "application/zip",
"name": "archive.zip",
"url_private_download": "https://files.slack.com/archive.zip",
"size": 1024,
}])
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.message_type == MessageType.TEXT
assert len(msg_event.media_urls) == 0
assert msg_event.message_type == MessageType.DOCUMENT
assert len(msg_event.media_urls) == 1
assert msg_event.media_types == ["application/zip"]
@pytest.mark.asyncio
async def test_oversized_document_skipped(self, adapter):

View file

@ -236,15 +236,16 @@ class TestDocumentDownloadBlock:
assert "Please summarize" in event.text
@pytest.mark.asyncio
async def test_unsupported_type_rejected(self, adapter):
async def test_zip_document_cached(self, adapter):
"""A .zip upload should be cached as a supported document."""
doc = _make_document(file_name="archive.zip", mime_type="application/zip", file_size=100)
msg = _make_message(document=doc)
update = _make_update(msg)
await adapter._handle_media_message(update, MagicMock())
event = adapter.handle_message.call_args[0][0]
assert "Unsupported document type" in event.text
assert ".zip" in event.text
assert event.media_urls and event.media_urls[0].endswith("archive.zip")
assert event.media_types == ["application/zip"]
@pytest.mark.asyncio
async def test_oversized_file_rejected(self, adapter):

View file

@ -10,7 +10,9 @@ import pytest
from tools.credential_files import (
clear_credential_files,
get_credential_file_mounts,
get_cache_directory_mounts,
get_skills_directory_mount,
iter_cache_files,
iter_skills_files,
register_credential_file,
register_credential_files,
@ -358,3 +360,116 @@ class TestConfigPathTraversal:
mounts = get_credential_file_mounts()
assert len(mounts) == 1
assert "oauth.json" in mounts[0]["container_path"]
# ---------------------------------------------------------------------------
# Cache directory mounts
# ---------------------------------------------------------------------------
class TestCacheDirectoryMounts:
"""Tests for get_cache_directory_mounts() and iter_cache_files()."""
def test_returns_existing_cache_dirs(self, tmp_path, monkeypatch):
"""Existing cache dirs are returned with correct container paths."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
(hermes_home / "cache" / "documents").mkdir(parents=True)
(hermes_home / "cache" / "audio").mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
mounts = get_cache_directory_mounts()
paths = {m["container_path"] for m in mounts}
assert "/root/.hermes/cache/documents" in paths
assert "/root/.hermes/cache/audio" in paths
def test_skips_nonexistent_dirs(self, tmp_path, monkeypatch):
"""Dirs that don't exist on disk are not returned."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
# Create only one cache dir
(hermes_home / "cache" / "documents").mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
mounts = get_cache_directory_mounts()
assert len(mounts) == 1
assert mounts[0]["container_path"] == "/root/.hermes/cache/documents"
def test_legacy_dir_names_resolved(self, tmp_path, monkeypatch):
"""Old-style dir names (e.g. document_cache) are resolved correctly."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
# Use legacy dir name — get_hermes_dir prefers old if it exists
(hermes_home / "document_cache").mkdir()
(hermes_home / "image_cache").mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
mounts = get_cache_directory_mounts()
host_paths = {m["host_path"] for m in mounts}
assert str(hermes_home / "document_cache") in host_paths
assert str(hermes_home / "image_cache") in host_paths
# Container paths always use the new layout
container_paths = {m["container_path"] for m in mounts}
assert "/root/.hermes/cache/documents" in container_paths
assert "/root/.hermes/cache/images" in container_paths
def test_empty_hermes_home(self, tmp_path, monkeypatch):
"""No cache dirs → empty list."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
assert get_cache_directory_mounts() == []
class TestIterCacheFiles:
"""Tests for iter_cache_files()."""
def test_enumerates_files(self, tmp_path, monkeypatch):
"""Regular files in cache dirs are returned."""
hermes_home = tmp_path / ".hermes"
doc_dir = hermes_home / "cache" / "documents"
doc_dir.mkdir(parents=True)
(doc_dir / "upload.zip").write_bytes(b"PK\x03\x04")
(doc_dir / "report.pdf").write_bytes(b"%PDF-1.4")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
entries = iter_cache_files()
names = {Path(e["container_path"]).name for e in entries}
assert "upload.zip" in names
assert "report.pdf" in names
def test_skips_symlinks(self, tmp_path, monkeypatch):
"""Symlinks inside cache dirs are skipped."""
hermes_home = tmp_path / ".hermes"
doc_dir = hermes_home / "cache" / "documents"
doc_dir.mkdir(parents=True)
real_file = doc_dir / "real.txt"
real_file.write_text("content")
(doc_dir / "link.txt").symlink_to(real_file)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
entries = iter_cache_files()
names = [Path(e["container_path"]).name for e in entries]
assert "real.txt" in names
assert "link.txt" not in names
def test_nested_files(self, tmp_path, monkeypatch):
"""Files in subdirectories are included with correct relative paths."""
hermes_home = tmp_path / ".hermes"
ss_dir = hermes_home / "cache" / "screenshots"
sub = ss_dir / "session_abc"
sub.mkdir(parents=True)
(sub / "screen1.png").write_bytes(b"PNG")
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
entries = iter_cache_files()
assert len(entries) == 1
assert entries[0]["container_path"] == "/root/.hermes/cache/screenshots/session_abc/screen1.png"
def test_empty_cache(self, tmp_path, monkeypatch):
"""No cache dirs → empty list."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
assert iter_cache_files() == []

View file

@ -1,29 +1,21 @@
"""Credential file passthrough registry for remote terminal backends.
"""File passthrough registry for remote terminal backends.
Skills that declare ``required_credential_files`` in their frontmatter need
those files available inside sandboxed execution environments (Modal, Docker).
By default remote backends create bare containers with no host files.
Remote backends (Docker, Modal, SSH) create sandboxes with no host files.
This module ensures that credential files, skill directories, and host-side
cache directories (documents, images, audio, screenshots) are mounted or
synced into those sandboxes so the agent can access them.
This module provides a session-scoped registry so skill-declared credential
files (and user-configured overrides) are mounted into remote sandboxes.
**Credentials and skills** session-scoped registry fed by skill declarations
(``required_credential_files``) and user config (``terminal.credential_files``).
Two sources feed the registry:
**Cache directories** gateway-cached uploads, browser screenshots, TTS
audio, and processed images. Mounted read-only so the remote terminal can
reference files the host side created (e.g. ``unzip`` an uploaded archive).
1. **Skill declarations** when a skill is loaded via ``skill_view``, its
``required_credential_files`` entries are registered here if the files
exist on the host.
2. **User config** ``terminal.credential_files`` in config.yaml lets users
explicitly list additional files to mount.
Remote backends (``tools/environments/modal.py``, ``docker.py``) call
:func:`get_credential_file_mounts` at sandbox creation time.
Each registered entry is a dict::
{
"host_path": "/home/user/.hermes/google_token.json",
"container_path": "/root/.hermes/google_token.json",
}
Remote backends call :func:`get_credential_file_mounts`,
:func:`get_skills_directory_mount` / :func:`iter_skills_files`, and
:func:`get_cache_directory_mounts` / :func:`iter_cache_files` at sandbox
creation time and before each command (for resync on Modal).
"""
from __future__ import annotations
@ -300,6 +292,71 @@ def iter_skills_files(
return result
# ---------------------------------------------------------------------------
# Cache directory mounts (documents, images, audio, screenshots)
# ---------------------------------------------------------------------------
# The four cache subdirectories that should be mirrored into remote backends.
# Each tuple is (new_subpath, old_name) matching hermes_constants.get_hermes_dir().
_CACHE_DIRS: list[tuple[str, str]] = [
("cache/documents", "document_cache"),
("cache/images", "image_cache"),
("cache/audio", "audio_cache"),
("cache/screenshots", "browser_screenshots"),
]
def get_cache_directory_mounts(
container_base: str = "/root/.hermes",
) -> List[Dict[str, str]]:
"""Return mount entries for each cache directory that exists on disk.
Used by Docker to create bind mounts. Each entry has ``host_path`` and
``container_path`` keys. The host path is resolved via
``get_hermes_dir()`` for backward compatibility with old directory layouts.
"""
from hermes_constants import get_hermes_dir
mounts: List[Dict[str, str]] = []
for new_subpath, old_name in _CACHE_DIRS:
host_dir = get_hermes_dir(new_subpath, old_name)
if host_dir.is_dir():
# Always map to the *new* container layout regardless of host layout.
container_path = f"{container_base.rstrip('/')}/{new_subpath}"
mounts.append({
"host_path": str(host_dir),
"container_path": container_path,
})
return mounts
def iter_cache_files(
container_base: str = "/root/.hermes",
) -> List[Dict[str, str]]:
"""Return individual (host_path, container_path) entries for cache files.
Used by Modal to upload files individually and resync before each command.
Skips symlinks. The container paths use the new ``cache/<subdir>`` layout.
"""
from hermes_constants import get_hermes_dir
result: List[Dict[str, str]] = []
for new_subpath, old_name in _CACHE_DIRS:
host_dir = get_hermes_dir(new_subpath, old_name)
if not host_dir.is_dir():
continue
container_root = f"{container_base.rstrip('/')}/{new_subpath}"
for item in host_dir.rglob("*"):
if item.is_symlink() or not item.is_file():
continue
rel = item.relative_to(host_dir)
result.append({
"host_path": str(item),
"container_path": f"{container_root}/{rel}",
})
return result
def clear_credential_files() -> None:
"""Reset the skill-scoped registry (e.g. on session reset)."""
_registered_files.clear()

View file

@ -315,7 +315,11 @@ class DockerEnvironment(BaseEnvironment):
# Mount credential files (OAuth tokens, etc.) declared by skills.
# Read-only so the container can authenticate but not modify host creds.
try:
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
from tools.credential_files import (
get_credential_file_mounts,
get_skills_directory_mount,
get_cache_directory_mounts,
)
for mount_entry in get_credential_file_mounts():
volume_args.extend([
@ -341,6 +345,21 @@ class DockerEnvironment(BaseEnvironment):
skills_mount["host_path"],
skills_mount["container_path"],
)
# Mount host-side cache directories (documents, images, audio,
# screenshots) so the agent can access uploaded files and other
# cached media from inside the container. Read-only — the
# container reads these but the host gateway manages writes.
for cache_mount in get_cache_directory_mounts():
volume_args.extend([
"-v",
f"{cache_mount['host_path']}:{cache_mount['container_path']}:ro",
])
logger.info(
"Docker: mounting cache dir %s -> %s",
cache_mount["host_path"],
cache_mount["container_path"],
)
except Exception as e:
logger.debug("Docker: could not load credential file mounts: %s", e)

View file

@ -186,7 +186,11 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
cred_mounts = []
try:
from tools.credential_files import get_credential_file_mounts, iter_skills_files
from tools.credential_files import (
get_credential_file_mounts,
iter_skills_files,
iter_cache_files,
)
for mount_entry in get_credential_file_mounts():
cred_mounts.append(
@ -212,6 +216,20 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
)
if skills_files:
logger.info("Modal: mounting %d skill files", len(skills_files))
# Mount host-side cache files (documents, images, audio,
# screenshots). New files arriving mid-session are picked up
# by _sync_files() before each command execution.
cache_files = iter_cache_files()
for entry in cache_files:
cred_mounts.append(
_modal.Mount.from_local_file(
entry["host_path"],
remote_path=entry["container_path"],
)
)
if cache_files:
logger.info("Modal: mounting %d cache files", len(cache_files))
except Exception as e:
logger.debug("Modal: could not load credential file mounts: %s", e)
@ -308,13 +326,19 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
return True
def _sync_files(self) -> None:
"""Push credential files and skill files into the running sandbox.
"""Push credential, skill, and cache files into the running sandbox.
Runs before each command. Uses mtime+size caching so only changed
files are pushed (~13μs overhead in the no-op case).
files are pushed (~13μs overhead in the no-op case). Cache files
are especially important here new uploads/screenshots may appear
mid-session after sandbox creation.
"""
try:
from tools.credential_files import get_credential_file_mounts, iter_skills_files
from tools.credential_files import (
get_credential_file_mounts,
iter_skills_files,
iter_cache_files,
)
for entry in get_credential_file_mounts():
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
@ -323,6 +347,10 @@ class ModalEnvironment(BaseModalExecutionEnvironment):
for entry in iter_skills_files():
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
logger.debug("Modal: synced skill file %s", entry["container_path"])
for entry in iter_cache_files():
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
logger.debug("Modal: synced cache file %s", entry["container_path"])
except Exception as e:
logger.debug("Modal: file sync failed: %s", e)