feat(file-sync): sync remote changes back to host on teardown

Add sync_back() to FileSyncManager — on sandbox cleanup, downloads
the remote .hermes/ directory as a tar archive, diffs against SHA-256
hashes of what was originally pushed, and applies only changed files.

- SHA-256 content hashing on push for accurate change detection
- Retry with exponential backoff (3 attempts, 2s/4s/8s)
- SIGINT deferred during sync-back to prevent partial writes
- fcntl.flock serialization for concurrent gateway sandboxes
- Last-write-wins conflict resolution with logged warnings
- New files created on remote are pulled back via path inference
- Backend implementations: SSH (tar cf over pipe), Modal (exec tar
  cf, read stdout), Daytona (exec tar cf, SDK download_file)
- Wired into cleanup() for all three backends (runs before
  ControlMaster close / sandbox terminate / sandbox stop)

28 new tests (10 FSM core + 18 backend-specific), 72 total passing.
This commit is contained in:
alt-glitch 2026-04-11 17:32:27 -07:00
parent 27eeea0555
commit 37c478cf2f
6 changed files with 1023 additions and 0 deletions

View file

@ -10,6 +10,7 @@ from pathlib import Path
from tools.environments.base import BaseEnvironment, _popen_bash
from tools.environments.file_sync import (
BulkDownloadFn,
FileSyncManager,
iter_sync_files,
quoted_mkdir_command,
@ -58,6 +59,7 @@ class SSHEnvironment(BaseEnvironment):
upload_fn=self._scp_upload,
delete_fn=self._ssh_delete,
bulk_upload_fn=self._ssh_bulk_upload,
bulk_download_fn=self._ssh_bulk_download,
)
self._sync_manager.sync(force=True)
@ -216,6 +218,16 @@ class SSHEnvironment(BaseEnvironment):
logger.debug("SSH: bulk-uploaded %d file(s) via tar pipe", len(files))
def _ssh_bulk_download(self, dest: Path) -> None:
"""Download remote .hermes/ as a tar archive."""
base = f"{self._remote_home}/.hermes"
ssh_cmd = self._build_ssh_command()
ssh_cmd.append(f"tar cf - -C {shlex.quote(base)} .")
with open(dest, "wb") as f:
result = subprocess.run(ssh_cmd, stdout=f, stderr=subprocess.PIPE, timeout=120)
if result.returncode != 0:
raise RuntimeError(f"SSH bulk download failed: {result.stderr.decode(errors='replace').strip()}")
def _ssh_delete(self, remote_paths: list[str]) -> None:
"""Batch-delete remote files in one SSH call."""
cmd = self._build_ssh_command()
@ -245,6 +257,10 @@ class SSHEnvironment(BaseEnvironment):
return _popen_bash(cmd, stdin_data)
def cleanup(self):
if self._sync_manager:
logger.info("SSH: syncing files from sandbox...")
self._sync_manager.sync_back()
if self.control_socket.exists():
try:
cmd = ["ssh", "-o", f"ControlPath={self.control_socket}",