mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-27 01:11:40 +00:00
perf(ssh,modal): bulk file sync via tar pipe and tar/base64 archive (#8014)
* perf(ssh,modal): bulk file sync via tar pipe and tar/base64 archive SSH: symlink-staging + tar -ch piped over SSH in a single TCP stream. Eliminates per-file scp round-trips. Handles timeout (kills both processes), SSH Popen failure (kills tar), and tar create failure. Modal: in-memory gzipped tar archive, base64-encoded, decoded+extracted in one exec call. Checks exit code and raises on failure. Both backends use shared helpers extracted into file_sync.py: - quoted_mkdir_command() — mirrors existing quoted_rm_command() - unique_parent_dirs() — deduplicates parent dirs from file pairs Migrates _ensure_remote_dirs to use the new helpers. 28 new tests (21 SSH + 7 Modal), all passing. Closes #7465 Closes #7467 * fix(modal): pipe stdin to avoid ARG_MAX, clean up review findings - Modal bulk upload: stream base64 payload through proc.stdin in 1MB chunks instead of embedding in command string (Modal SDK enforces 64KB ARG_MAX_BYTES — typical payloads are ~4.3MB) - Modal single-file upload: same stdin fix, add exit code checking - Remove what-narrating comments in ssh.py and modal.py (keep WHY comments: symlink staging rationale, SIGPIPE, deadlock avoidance) - Remove unnecessary `sandbox = self._sandbox` alias in modal bulk - Daytona: use shared helpers (unique_parent_dirs, quoted_mkdir_command) instead of inlined duplicates --------- Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
This commit is contained in:
parent
fd73937ec8
commit
27eeea0555
6 changed files with 992 additions and 13 deletions
|
|
@ -5,8 +5,11 @@ wrapper, while preserving Hermes' persistent snapshot behavior across sessions.
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import shlex
|
||||
import tarfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
|
@ -18,7 +21,13 @@ from tools.environments.base import (
|
|||
_load_json_store,
|
||||
_save_json_store,
|
||||
)
|
||||
from tools.environments.file_sync import FileSyncManager, iter_sync_files, quoted_rm_command
|
||||
from tools.environments.file_sync import (
|
||||
FileSyncManager,
|
||||
iter_sync_files,
|
||||
quoted_mkdir_command,
|
||||
quoted_rm_command,
|
||||
unique_parent_dirs,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -259,26 +268,84 @@ class ModalEnvironment(BaseEnvironment):
|
|||
get_files_fn=lambda: iter_sync_files("/root/.hermes"),
|
||||
upload_fn=self._modal_upload,
|
||||
delete_fn=self._modal_delete,
|
||||
bulk_upload_fn=self._modal_bulk_upload,
|
||||
)
|
||||
self._sync_manager.sync(force=True)
|
||||
self.init_session()
|
||||
|
||||
def _modal_upload(self, host_path: str, remote_path: str) -> None:
|
||||
"""Upload a single file via base64-over-exec."""
|
||||
import base64
|
||||
"""Upload a single file via base64 piped through stdin."""
|
||||
content = Path(host_path).read_bytes()
|
||||
b64 = base64.b64encode(content).decode("ascii")
|
||||
container_dir = str(Path(remote_path).parent)
|
||||
cmd = (
|
||||
f"mkdir -p {shlex.quote(container_dir)} && "
|
||||
f"echo {shlex.quote(b64)} | base64 -d > {shlex.quote(remote_path)}"
|
||||
f"base64 -d > {shlex.quote(remote_path)}"
|
||||
)
|
||||
|
||||
async def _write():
|
||||
proc = await self._sandbox.exec.aio("bash", "-c", cmd)
|
||||
offset = 0
|
||||
chunk_size = self._STDIN_CHUNK_SIZE
|
||||
while offset < len(b64):
|
||||
proc.stdin.write(b64[offset:offset + chunk_size])
|
||||
await proc.stdin.drain.aio()
|
||||
offset += chunk_size
|
||||
proc.stdin.write_eof()
|
||||
await proc.stdin.drain.aio()
|
||||
await proc.wait.aio()
|
||||
|
||||
self._worker.run_coroutine(_write(), timeout=15)
|
||||
self._worker.run_coroutine(_write(), timeout=30)
|
||||
|
||||
# Modal SDK stdin buffer limit (legacy server path). The command-router
|
||||
# path allows 16 MB, but we must stay under the smaller 2 MB cap for
|
||||
# compatibility. Chunks are written below this threshold and flushed
|
||||
# individually via drain().
|
||||
_STDIN_CHUNK_SIZE = 1 * 1024 * 1024 # 1 MB — safe for both transport paths
|
||||
|
||||
def _modal_bulk_upload(self, files: list[tuple[str, str]]) -> None:
|
||||
"""Upload many files via tar archive piped through stdin.
|
||||
|
||||
Builds a gzipped tar archive in memory and streams it into a
|
||||
``base64 -d | tar xzf -`` pipeline via the process's stdin,
|
||||
avoiding the Modal SDK's 64 KB ``ARG_MAX_BYTES`` exec-arg limit.
|
||||
"""
|
||||
if not files:
|
||||
return
|
||||
|
||||
buf = io.BytesIO()
|
||||
with tarfile.open(fileobj=buf, mode="w:gz") as tar:
|
||||
for host_path, remote_path in files:
|
||||
tar.add(host_path, arcname=remote_path.lstrip("/"))
|
||||
payload = base64.b64encode(buf.getvalue()).decode("ascii")
|
||||
|
||||
parents = unique_parent_dirs(files)
|
||||
mkdir_part = quoted_mkdir_command(parents)
|
||||
cmd = f"{mkdir_part} && base64 -d | tar xzf - -C /"
|
||||
|
||||
async def _bulk():
|
||||
proc = await self._sandbox.exec.aio("bash", "-c", cmd)
|
||||
|
||||
# Stream payload through stdin in chunks to stay under the
|
||||
# SDK's per-write buffer limit (2 MB legacy / 16 MB router).
|
||||
offset = 0
|
||||
chunk_size = self._STDIN_CHUNK_SIZE
|
||||
while offset < len(payload):
|
||||
proc.stdin.write(payload[offset:offset + chunk_size])
|
||||
await proc.stdin.drain.aio()
|
||||
offset += chunk_size
|
||||
|
||||
proc.stdin.write_eof()
|
||||
await proc.stdin.drain.aio()
|
||||
|
||||
exit_code = await proc.wait.aio()
|
||||
if exit_code != 0:
|
||||
stderr_text = await proc.stderr.read.aio()
|
||||
raise RuntimeError(
|
||||
f"Modal bulk upload failed (exit {exit_code}): {stderr_text}"
|
||||
)
|
||||
|
||||
self._worker.run_coroutine(_bulk(), timeout=120)
|
||||
|
||||
def _modal_delete(self, remote_paths: list[str]) -> None:
|
||||
"""Batch-delete remote files via exec."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue