hermes-agent/tests/tools/test_modal_bulk_upload.py
kshitijk4poor 04d4f41e77 perf(ssh,modal): bulk file sync via tar pipe and tar/base64 archive
SSH: symlink-staging + tar -ch piped over SSH in a single TCP stream.
Eliminates per-file scp round-trips. Handles timeout (kills both
processes), SSH Popen failure (kills tar), and tar create failure.

Modal: in-memory gzipped tar archive, base64-encoded, decoded+extracted
in one exec call. Checks exit code and raises on failure.

Both backends use shared helpers extracted into file_sync.py:
- quoted_mkdir_command() — mirrors existing quoted_rm_command()
- unique_parent_dirs() — deduplicates parent dirs from file pairs

Migrates _ensure_remote_dirs to use the new helpers.

28 new tests (21 SSH + 7 Modal), all passing.

Closes #7465
Closes #7467
2026-04-11 17:21:15 -07:00

224 lines
7.5 KiB
Python

"""Tests for Modal bulk upload via tar/base64 archive."""
import asyncio
import base64
import io
import tarfile
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from tools.environments import modal as modal_env
def _make_mock_modal_env(monkeypatch, tmp_path):
"""Create a minimal mock ModalEnvironment for testing upload methods.
Returns a ModalEnvironment-like object with _sandbox and _worker mocked.
We don't call __init__ because it requires the Modal SDK.
"""
env = object.__new__(modal_env.ModalEnvironment)
env._sandbox = MagicMock()
env._worker = MagicMock()
env._persistent = False
env._task_id = "test"
env._sync_manager = None
return env
def _wire_async_exec(env, exec_calls=None):
"""Wire mock sandbox.exec.aio and a real run_coroutine on the env.
Optionally captures exec call args into *exec_calls* list.
Returns a dict that will contain ``run_coroutine`` kwargs after
the next call (useful for timeout assertions).
"""
if exec_calls is None:
exec_calls = []
run_kwargs: dict = {}
async def mock_exec(*args, **kwargs):
exec_calls.append(args)
proc = MagicMock()
proc.wait = MagicMock()
proc.wait.aio = AsyncMock(return_value=0)
return proc
env._sandbox.exec = MagicMock()
env._sandbox.exec.aio = mock_exec
def real_run_coroutine(coro, **kwargs):
run_kwargs.update(kwargs)
loop = asyncio.new_event_loop()
try:
return loop.run_until_complete(coro)
finally:
loop.close()
env._worker.run_coroutine = real_run_coroutine
return exec_calls, run_kwargs
class TestModalBulkUpload:
"""Test _modal_bulk_upload method."""
def test_empty_files_is_noop(self, monkeypatch, tmp_path):
"""Empty file list should not call worker.run_coroutine."""
env = _make_mock_modal_env(monkeypatch, tmp_path)
env._modal_bulk_upload([])
env._worker.run_coroutine.assert_not_called()
def test_tar_archive_contains_all_files(self, monkeypatch, tmp_path):
"""The tar archive sent to the sandbox should contain all files."""
env = _make_mock_modal_env(monkeypatch, tmp_path)
src_a = tmp_path / "a.json"
src_b = tmp_path / "b.py"
src_a.write_text("cred_content")
src_b.write_text("skill_content")
files = [
(str(src_a), "/root/.hermes/credentials/a.json"),
(str(src_b), "/root/.hermes/skills/b.py"),
]
exec_calls, _ = _wire_async_exec(env)
env._modal_bulk_upload(files)
# Verify exec was called with bash -c and a tar command
assert len(exec_calls) == 1
args = exec_calls[0]
assert args[0] == "bash"
assert args[1] == "-c"
cmd = args[2]
assert "mkdir -p" in cmd
assert "base64 -d" in cmd
assert "tar xzf" in cmd
assert "-C /" in cmd
# Extract the base64 payload and verify tar contents
import re
match = re.search(r"echo '?([A-Za-z0-9+/=]+)'?", cmd)
assert match, f"Could not find base64 payload in command: {cmd}"
payload = match.group(1)
tar_data = base64.b64decode(payload)
buf = io.BytesIO(tar_data)
with tarfile.open(fileobj=buf, mode="r:gz") as tar:
names = sorted(tar.getnames())
assert "root/.hermes/credentials/a.json" in names
assert "root/.hermes/skills/b.py" in names
# Verify content
a_content = tar.extractfile("root/.hermes/credentials/a.json").read()
assert a_content == b"cred_content"
b_content = tar.extractfile("root/.hermes/skills/b.py").read()
assert b_content == b"skill_content"
def test_mkdir_includes_all_parents(self, monkeypatch, tmp_path):
"""Remote parent directories should be pre-created in the command."""
env = _make_mock_modal_env(monkeypatch, tmp_path)
src = tmp_path / "f.txt"
src.write_text("data")
files = [
(str(src), "/root/.hermes/credentials/f.txt"),
(str(src), "/root/.hermes/skills/deep/nested/f.txt"),
]
exec_calls, _ = _wire_async_exec(env)
env._modal_bulk_upload(files)
cmd = exec_calls[0][2]
assert "/root/.hermes/credentials" in cmd
assert "/root/.hermes/skills/deep/nested" in cmd
def test_single_exec_call(self, monkeypatch, tmp_path):
"""Bulk upload should use exactly one exec call regardless of file count."""
env = _make_mock_modal_env(monkeypatch, tmp_path)
files = []
for i in range(20):
src = tmp_path / f"file_{i}.txt"
src.write_text(f"content_{i}")
files.append((str(src), f"/root/.hermes/cache/file_{i}.txt"))
exec_calls, _ = _wire_async_exec(env)
env._modal_bulk_upload(files)
# Should be exactly 1 exec call, not 20
assert len(exec_calls) == 1
def test_bulk_upload_wired_in_filesyncmanager(self, monkeypatch):
"""Verify ModalEnvironment passes bulk_upload_fn to FileSyncManager."""
captured_kwargs = {}
def capture_fsm(**kwargs):
captured_kwargs.update(kwargs)
return type("M", (), {"sync": lambda self, **k: None})()
monkeypatch.setattr(modal_env, "FileSyncManager", capture_fsm)
# Create a minimal env without full __init__
env = object.__new__(modal_env.ModalEnvironment)
env._sandbox = MagicMock()
env._worker = MagicMock()
env._persistent = False
env._task_id = "test"
# Manually call the part of __init__ that wires FileSyncManager
from tools.environments.file_sync import iter_sync_files
env._sync_manager = modal_env.FileSyncManager(
get_files_fn=lambda: iter_sync_files("/root/.hermes"),
upload_fn=env._modal_upload,
delete_fn=env._modal_delete,
bulk_upload_fn=env._modal_bulk_upload,
)
assert "bulk_upload_fn" in captured_kwargs
assert captured_kwargs["bulk_upload_fn"] is not None
assert callable(captured_kwargs["bulk_upload_fn"])
def test_timeout_set_to_120(self, monkeypatch, tmp_path):
"""Bulk upload uses a 120s timeout (not the per-file 15s)."""
env = _make_mock_modal_env(monkeypatch, tmp_path)
src = tmp_path / "f.txt"
src.write_text("data")
files = [(str(src), "/root/.hermes/f.txt")]
_, run_kwargs = _wire_async_exec(env)
env._modal_bulk_upload(files)
assert run_kwargs.get("timeout") == 120
def test_nonzero_exit_raises(self, monkeypatch, tmp_path):
"""Non-zero exit code from remote exec should raise RuntimeError."""
env = _make_mock_modal_env(monkeypatch, tmp_path)
src = tmp_path / "f.txt"
src.write_text("data")
files = [(str(src), "/root/.hermes/f.txt")]
async def mock_exec(*args, **kwargs):
proc = MagicMock()
proc.wait = MagicMock()
proc.wait.aio = AsyncMock(return_value=1) # non-zero exit
return proc
env._sandbox.exec = MagicMock()
env._sandbox.exec.aio = mock_exec
def real_run_coroutine(coro, **kwargs):
loop = asyncio.new_event_loop()
try:
return loop.run_until_complete(coro)
finally:
loop.close()
env._worker.run_coroutine = real_run_coroutine
with pytest.raises(RuntimeError, match="Modal bulk upload failed"):
env._modal_bulk_upload(files)