mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
perf(ssh,modal): bulk file sync via tar pipe and tar/base64 archive
SSH: symlink-staging + tar -ch piped over SSH in a single TCP stream. Eliminates per-file scp round-trips. Handles timeout (kills both processes), SSH Popen failure (kills tar), and tar create failure. Modal: in-memory gzipped tar archive, base64-encoded, decoded+extracted in one exec call. Checks exit code and raises on failure. Both backends use shared helpers extracted into file_sync.py: - quoted_mkdir_command() — mirrors existing quoted_rm_command() - unique_parent_dirs() — deduplicates parent dirs from file pairs Migrates _ensure_remote_dirs to use the new helpers. 28 new tests (21 SSH + 7 Modal), all passing. Closes #7465 Closes #7467
This commit is contained in:
parent
723b5bec85
commit
04d4f41e77
5 changed files with 897 additions and 5 deletions
224
tests/tools/test_modal_bulk_upload.py
Normal file
224
tests/tools/test_modal_bulk_upload.py
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
"""Tests for Modal bulk upload via tar/base64 archive."""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.environments import modal as modal_env
|
||||
|
||||
|
||||
def _make_mock_modal_env(monkeypatch, tmp_path):
|
||||
"""Create a minimal mock ModalEnvironment for testing upload methods.
|
||||
|
||||
Returns a ModalEnvironment-like object with _sandbox and _worker mocked.
|
||||
We don't call __init__ because it requires the Modal SDK.
|
||||
"""
|
||||
env = object.__new__(modal_env.ModalEnvironment)
|
||||
env._sandbox = MagicMock()
|
||||
env._worker = MagicMock()
|
||||
env._persistent = False
|
||||
env._task_id = "test"
|
||||
env._sync_manager = None
|
||||
return env
|
||||
|
||||
|
||||
def _wire_async_exec(env, exec_calls=None):
|
||||
"""Wire mock sandbox.exec.aio and a real run_coroutine on the env.
|
||||
|
||||
Optionally captures exec call args into *exec_calls* list.
|
||||
Returns a dict that will contain ``run_coroutine`` kwargs after
|
||||
the next call (useful for timeout assertions).
|
||||
"""
|
||||
if exec_calls is None:
|
||||
exec_calls = []
|
||||
run_kwargs: dict = {}
|
||||
|
||||
async def mock_exec(*args, **kwargs):
|
||||
exec_calls.append(args)
|
||||
proc = MagicMock()
|
||||
proc.wait = MagicMock()
|
||||
proc.wait.aio = AsyncMock(return_value=0)
|
||||
return proc
|
||||
|
||||
env._sandbox.exec = MagicMock()
|
||||
env._sandbox.exec.aio = mock_exec
|
||||
|
||||
def real_run_coroutine(coro, **kwargs):
|
||||
run_kwargs.update(kwargs)
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
return loop.run_until_complete(coro)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
env._worker.run_coroutine = real_run_coroutine
|
||||
return exec_calls, run_kwargs
|
||||
|
||||
|
||||
class TestModalBulkUpload:
|
||||
"""Test _modal_bulk_upload method."""
|
||||
|
||||
def test_empty_files_is_noop(self, monkeypatch, tmp_path):
|
||||
"""Empty file list should not call worker.run_coroutine."""
|
||||
env = _make_mock_modal_env(monkeypatch, tmp_path)
|
||||
env._modal_bulk_upload([])
|
||||
env._worker.run_coroutine.assert_not_called()
|
||||
|
||||
def test_tar_archive_contains_all_files(self, monkeypatch, tmp_path):
|
||||
"""The tar archive sent to the sandbox should contain all files."""
|
||||
env = _make_mock_modal_env(monkeypatch, tmp_path)
|
||||
|
||||
src_a = tmp_path / "a.json"
|
||||
src_b = tmp_path / "b.py"
|
||||
src_a.write_text("cred_content")
|
||||
src_b.write_text("skill_content")
|
||||
|
||||
files = [
|
||||
(str(src_a), "/root/.hermes/credentials/a.json"),
|
||||
(str(src_b), "/root/.hermes/skills/b.py"),
|
||||
]
|
||||
|
||||
exec_calls, _ = _wire_async_exec(env)
|
||||
env._modal_bulk_upload(files)
|
||||
|
||||
# Verify exec was called with bash -c and a tar command
|
||||
assert len(exec_calls) == 1
|
||||
args = exec_calls[0]
|
||||
assert args[0] == "bash"
|
||||
assert args[1] == "-c"
|
||||
cmd = args[2]
|
||||
assert "mkdir -p" in cmd
|
||||
assert "base64 -d" in cmd
|
||||
assert "tar xzf" in cmd
|
||||
assert "-C /" in cmd
|
||||
|
||||
# Extract the base64 payload and verify tar contents
|
||||
import re
|
||||
match = re.search(r"echo '?([A-Za-z0-9+/=]+)'?", cmd)
|
||||
assert match, f"Could not find base64 payload in command: {cmd}"
|
||||
payload = match.group(1)
|
||||
|
||||
tar_data = base64.b64decode(payload)
|
||||
buf = io.BytesIO(tar_data)
|
||||
with tarfile.open(fileobj=buf, mode="r:gz") as tar:
|
||||
names = sorted(tar.getnames())
|
||||
assert "root/.hermes/credentials/a.json" in names
|
||||
assert "root/.hermes/skills/b.py" in names
|
||||
|
||||
# Verify content
|
||||
a_content = tar.extractfile("root/.hermes/credentials/a.json").read()
|
||||
assert a_content == b"cred_content"
|
||||
b_content = tar.extractfile("root/.hermes/skills/b.py").read()
|
||||
assert b_content == b"skill_content"
|
||||
|
||||
def test_mkdir_includes_all_parents(self, monkeypatch, tmp_path):
|
||||
"""Remote parent directories should be pre-created in the command."""
|
||||
env = _make_mock_modal_env(monkeypatch, tmp_path)
|
||||
|
||||
src = tmp_path / "f.txt"
|
||||
src.write_text("data")
|
||||
|
||||
files = [
|
||||
(str(src), "/root/.hermes/credentials/f.txt"),
|
||||
(str(src), "/root/.hermes/skills/deep/nested/f.txt"),
|
||||
]
|
||||
|
||||
exec_calls, _ = _wire_async_exec(env)
|
||||
env._modal_bulk_upload(files)
|
||||
|
||||
cmd = exec_calls[0][2]
|
||||
assert "/root/.hermes/credentials" in cmd
|
||||
assert "/root/.hermes/skills/deep/nested" in cmd
|
||||
|
||||
def test_single_exec_call(self, monkeypatch, tmp_path):
|
||||
"""Bulk upload should use exactly one exec call regardless of file count."""
|
||||
env = _make_mock_modal_env(monkeypatch, tmp_path)
|
||||
|
||||
files = []
|
||||
for i in range(20):
|
||||
src = tmp_path / f"file_{i}.txt"
|
||||
src.write_text(f"content_{i}")
|
||||
files.append((str(src), f"/root/.hermes/cache/file_{i}.txt"))
|
||||
|
||||
exec_calls, _ = _wire_async_exec(env)
|
||||
env._modal_bulk_upload(files)
|
||||
|
||||
# Should be exactly 1 exec call, not 20
|
||||
assert len(exec_calls) == 1
|
||||
|
||||
def test_bulk_upload_wired_in_filesyncmanager(self, monkeypatch):
|
||||
"""Verify ModalEnvironment passes bulk_upload_fn to FileSyncManager."""
|
||||
captured_kwargs = {}
|
||||
|
||||
def capture_fsm(**kwargs):
|
||||
captured_kwargs.update(kwargs)
|
||||
return type("M", (), {"sync": lambda self, **k: None})()
|
||||
|
||||
monkeypatch.setattr(modal_env, "FileSyncManager", capture_fsm)
|
||||
|
||||
# Create a minimal env without full __init__
|
||||
env = object.__new__(modal_env.ModalEnvironment)
|
||||
env._sandbox = MagicMock()
|
||||
env._worker = MagicMock()
|
||||
env._persistent = False
|
||||
env._task_id = "test"
|
||||
|
||||
# Manually call the part of __init__ that wires FileSyncManager
|
||||
from tools.environments.file_sync import iter_sync_files
|
||||
env._sync_manager = modal_env.FileSyncManager(
|
||||
get_files_fn=lambda: iter_sync_files("/root/.hermes"),
|
||||
upload_fn=env._modal_upload,
|
||||
delete_fn=env._modal_delete,
|
||||
bulk_upload_fn=env._modal_bulk_upload,
|
||||
)
|
||||
|
||||
assert "bulk_upload_fn" in captured_kwargs
|
||||
assert captured_kwargs["bulk_upload_fn"] is not None
|
||||
assert callable(captured_kwargs["bulk_upload_fn"])
|
||||
|
||||
def test_timeout_set_to_120(self, monkeypatch, tmp_path):
|
||||
"""Bulk upload uses a 120s timeout (not the per-file 15s)."""
|
||||
env = _make_mock_modal_env(monkeypatch, tmp_path)
|
||||
|
||||
src = tmp_path / "f.txt"
|
||||
src.write_text("data")
|
||||
files = [(str(src), "/root/.hermes/f.txt")]
|
||||
|
||||
_, run_kwargs = _wire_async_exec(env)
|
||||
env._modal_bulk_upload(files)
|
||||
|
||||
assert run_kwargs.get("timeout") == 120
|
||||
|
||||
def test_nonzero_exit_raises(self, monkeypatch, tmp_path):
|
||||
"""Non-zero exit code from remote exec should raise RuntimeError."""
|
||||
env = _make_mock_modal_env(monkeypatch, tmp_path)
|
||||
|
||||
src = tmp_path / "f.txt"
|
||||
src.write_text("data")
|
||||
files = [(str(src), "/root/.hermes/f.txt")]
|
||||
|
||||
async def mock_exec(*args, **kwargs):
|
||||
proc = MagicMock()
|
||||
proc.wait = MagicMock()
|
||||
proc.wait.aio = AsyncMock(return_value=1) # non-zero exit
|
||||
return proc
|
||||
|
||||
env._sandbox.exec = MagicMock()
|
||||
env._sandbox.exec.aio = mock_exec
|
||||
|
||||
def real_run_coroutine(coro, **kwargs):
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
return loop.run_until_complete(coro)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
env._worker.run_coroutine = real_run_coroutine
|
||||
|
||||
with pytest.raises(RuntimeError, match="Modal bulk upload failed"):
|
||||
env._modal_bulk_upload(files)
|
||||
Loading…
Add table
Add a link
Reference in a new issue