perf(ssh,modal): bulk file sync via tar pipe and tar/base64 archive

SSH: symlink-staging + tar -ch piped over SSH in a single TCP stream.
Eliminates per-file scp round-trips. Handles timeout (kills both
processes), SSH Popen failure (kills tar), and tar create failure.

Modal: in-memory gzipped tar archive, base64-encoded, decoded+extracted
in one exec call. Checks exit code and raises on failure.

Both backends use shared helpers extracted into file_sync.py:
- quoted_mkdir_command() — mirrors existing quoted_rm_command()
- unique_parent_dirs() — deduplicates parent dirs from file pairs

Migrates _ensure_remote_dirs to use the new helpers.

28 new tests (21 SSH + 7 Modal), all passing.

Closes #7465
Closes #7467
This commit is contained in:
kshitijk4poor 2026-04-11 11:17:17 +05:30 committed by alt-glitch
parent 723b5bec85
commit 04d4f41e77
5 changed files with 897 additions and 5 deletions

View file

@ -5,8 +5,11 @@ wrapper, while preserving Hermes' persistent snapshot behavior across sessions.
"""
import asyncio
import base64
import io
import logging
import shlex
import tarfile
import threading
from pathlib import Path
from typing import Any, Optional
@ -18,7 +21,13 @@ from tools.environments.base import (
_load_json_store,
_save_json_store,
)
from tools.environments.file_sync import FileSyncManager, iter_sync_files, quoted_rm_command
from tools.environments.file_sync import (
FileSyncManager,
iter_sync_files,
quoted_mkdir_command,
quoted_rm_command,
unique_parent_dirs,
)
logger = logging.getLogger(__name__)
@ -259,13 +268,13 @@ class ModalEnvironment(BaseEnvironment):
get_files_fn=lambda: iter_sync_files("/root/.hermes"),
upload_fn=self._modal_upload,
delete_fn=self._modal_delete,
bulk_upload_fn=self._modal_bulk_upload,
)
self._sync_manager.sync(force=True)
self.init_session()
def _modal_upload(self, host_path: str, remote_path: str) -> None:
"""Upload a single file via base64-over-exec."""
import base64
content = Path(host_path).read_bytes()
b64 = base64.b64encode(content).decode("ascii")
container_dir = str(Path(remote_path).parent)
@ -280,6 +289,44 @@ class ModalEnvironment(BaseEnvironment):
self._worker.run_coroutine(_write(), timeout=15)
def _modal_bulk_upload(self, files: list[tuple[str, str]]) -> None:
"""Upload many files in a single exec call via tar archive.
Builds a gzipped tar archive in memory, base64-encodes it, and
decodes+extracts in one ``exec`` call. Avoids per-file
exec+encoding overhead (~580 files goes from minutes to seconds).
"""
if not files:
return
# Build a tar archive in memory with files at their remote paths
buf = io.BytesIO()
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
for host_path, remote_path in files:
# Store with leading '/' stripped so extracting at '/'
# recreates the full absolute path
tar.add(host_path, arcname=remote_path.lstrip("/"))
payload = base64.b64encode(buf.getvalue()).decode("ascii")
# Pre-create parent dirs + decode + extract in one exec call
parents = unique_parent_dirs(files)
mkdir_part = quoted_mkdir_command(parents)
cmd = (
f"{mkdir_part} && "
f"echo {shlex.quote(payload)} | base64 -d | tar xzf - -C /"
)
sandbox = self._sandbox
async def _bulk():
proc = await sandbox.exec.aio("bash", "-c", cmd)
exit_code = await proc.wait.aio()
if exit_code != 0:
raise RuntimeError(
f"Modal bulk upload failed (exit {exit_code})"
)
self._worker.run_coroutine(_bulk(), timeout=120)
def _modal_delete(self, remote_paths: list[str]) -> None:
"""Batch-delete remote files via exec."""
rm_cmd = quoted_rm_command(remote_paths)