#!/usr/bin/env python3 """ Comprehensive Modal Integration Test Suite Tests both: 1. terminal_tool.py Modal backend (CLI/agent use case) 2. atropos/backends/modal_backend.py (RL training use case) Run with: # All tests (requires Modal account) python tests/test_modal_integration.py # Dry run (no Modal, tests config/logic only) python tests/test_modal_integration.py --dry-run # Specific test category python tests/test_modal_integration.py --category terminal python tests/test_modal_integration.py --category atropos python tests/test_modal_integration.py --category profiles """ import asyncio import json import os import sys import tempfile import time from pathlib import Path from typing import Dict, Any, List, Optional from dataclasses import dataclass # Add parent to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) # ============================================================================= # Atropos Import Helper # ============================================================================= def try_import_atropos_backend(): """ Try to import atropos backend directly, bypassing the atroposlib check. Returns (ModalToolBackend, ModalSandboxConfig, Slot, SlotState) or raises ImportError. """ try: # Try direct import first (works if atroposlib is installed) from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig from atropos.slots.slot import Slot, SlotState return ModalToolBackend, ModalSandboxConfig, Slot, SlotState except (ImportError, ModuleNotFoundError): # Try importing the module directly without going through atropos/__init__.py import importlib.util backend_path = Path(__file__).parent.parent / "atropos" / "backends" / "modal_backend.py" slot_path = Path(__file__).parent.parent / "atropos" / "slots" / "slot.py" executor_path = Path(__file__).parent.parent / "atropos" / "slots" / "executor.py" base_path = Path(__file__).parent.parent / "atropos" / "backends" / "base.py" if not backend_path.exists(): raise ImportError(f"modal_backend.py not found at {backend_path}") # Load slot module first spec = importlib.util.spec_from_file_location("atropos_slots_slot", slot_path) slot_module = importlib.util.module_from_spec(spec) sys.modules["atropos.slots.slot"] = slot_module spec.loader.exec_module(slot_module) # Load executor module spec = importlib.util.spec_from_file_location("atropos_slots_executor", executor_path) executor_module = importlib.util.module_from_spec(spec) sys.modules["atropos.slots.executor"] = executor_module spec.loader.exec_module(executor_module) # Load base module spec = importlib.util.spec_from_file_location("atropos_backends_base", base_path) base_module = importlib.util.module_from_spec(spec) sys.modules["atropos.backends.base"] = base_module spec.loader.exec_module(base_module) # Now load modal_backend spec = importlib.util.spec_from_file_location("atropos_backends_modal_backend", backend_path) backend_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(backend_module) return ( backend_module.ModalToolBackend, backend_module.ModalSandboxConfig, slot_module.Slot, slot_module.SlotState, ) # ============================================================================= # Test Configuration # ============================================================================= @dataclass class TestConfig: dry_run: bool = False verbose: bool = True category: Optional[str] = None # None = all, or "terminal", "atropos", "profiles" # ============================================================================= # Test Results Tracking # ============================================================================= class TestResults: def __init__(self): self.passed: List[str] = [] self.failed: List[tuple] = [] # (name, error) self.skipped: List[tuple] = [] # (name, reason) def record_pass(self, name: str): self.passed.append(name) print(f" ✅ {name}") def record_fail(self, name: str, error: str): self.failed.append((name, error)) print(f" ❌ {name}: {error}") def record_skip(self, name: str, reason: str): self.skipped.append((name, reason)) print(f" ⏭️ {name}: {reason}") def summary(self): total = len(self.passed) + len(self.failed) + len(self.skipped) print(f"\n{'='*60}") print(f"TEST RESULTS: {len(self.passed)}/{total} passed") print(f" Passed: {len(self.passed)}") print(f" Failed: {len(self.failed)}") print(f" Skipped: {len(self.skipped)}") if self.failed: print(f"\nFailed tests:") for name, error in self.failed: print(f" - {name}: {error}") return len(self.failed) == 0 results = TestResults() # ============================================================================= # CATEGORY 1: Profile Configuration Tests # ============================================================================= def test_profile_loading_from_env(): """Test ModalProfile.from_env() loads environment variables correctly.""" from tools.terminal_tool import ModalProfile # Set test environment variables # Note: The prefix is TERMINAL_MODAL_PROFILE_{profile_name}_ where profile_name is used as-is os.environ["TERMINAL_MODAL_PROFILE_testenv_IMAGE"] = "python:3.12" os.environ["TERMINAL_MODAL_PROFILE_testenv_GPU"] = "A100" os.environ["TERMINAL_MODAL_PROFILE_testenv_CPU"] = "4.0" os.environ["TERMINAL_MODAL_PROFILE_testenv_MEMORY"] = "32768" os.environ["TERMINAL_MODAL_PROFILE_testenv_SECRETS"] = "secret1,secret2" os.environ["TERMINAL_MODAL_PROFILE_testenv_ENV_VARS"] = "KEY1=val1;KEY2=val2" try: profile = ModalProfile.from_env("testenv") assert profile.name == "testenv", f"Expected name 'testenv', got '{profile.name}'" assert profile.image == "python:3.12", f"Expected image 'python:3.12', got '{profile.image}'" assert profile.gpu == "A100", f"Expected GPU 'A100', got '{profile.gpu}'" assert profile.cpu == 4.0, f"Expected CPU 4.0, got {profile.cpu}" assert profile.memory == 32768, f"Expected memory 32768, got {profile.memory}" assert profile.secrets == ["secret1", "secret2"], f"Secrets mismatch: {profile.secrets}" assert profile.env_vars == {"KEY1": "val1", "KEY2": "val2"}, f"Env vars mismatch: {profile.env_vars}" results.record_pass("test_profile_loading_from_env") except Exception as e: results.record_fail("test_profile_loading_from_env", str(e)) finally: # Cleanup for key in list(os.environ.keys()): if key.startswith("TERMINAL_MODAL_PROFILE_testenv_"): del os.environ[key] def test_profile_loading_from_yaml(): """Test ModalProfile.load_profiles() from YAML file.""" from tools.terminal_tool import ModalProfile, YAML_AVAILABLE if not YAML_AVAILABLE: results.record_skip("test_profile_loading_from_yaml", "PyYAML not installed") return yaml_content = """ profiles: test-yaml: image: pytorch/pytorch:2.0 gpu: T4 cpu: 2.0 memory: 8192 min_pool: 1 max_pool: 3 secrets: - hf-token env_vars: CUDA_VISIBLE_DEVICES: "0" test-yaml-2: image: node:20 cpu: 1.0 """ with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f: f.write(yaml_content) yaml_path = f.name try: profiles = ModalProfile.load_profiles(yaml_path) assert "test-yaml" in profiles, f"Profile 'test-yaml' not found in {list(profiles.keys())}" assert "test-yaml-2" in profiles, f"Profile 'test-yaml-2' not found" p1 = profiles["test-yaml"] assert p1.image == "pytorch/pytorch:2.0" assert p1.gpu == "T4" assert p1.cpu == 2.0 assert p1.memory == 8192 assert p1.secrets == ["hf-token"] assert p1.env_vars == {"CUDA_VISIBLE_DEVICES": "0"} results.record_pass("test_profile_loading_from_yaml") except Exception as e: results.record_fail("test_profile_loading_from_yaml", str(e)) finally: os.unlink(yaml_path) def test_profile_defaults(): """Test ModalProfile uses correct defaults.""" from tools.terminal_tool import ModalProfile try: profile = ModalProfile(name="minimal") assert profile.image == "python:3.11" assert profile.gpu is None assert profile.cpu == 1.0 assert profile.memory == 2048 assert profile.min_pool == 1 assert profile.max_pool == 5 assert profile.idle_timeout == 120 assert profile.secrets == [] assert profile.env_vars == {} results.record_pass("test_profile_defaults") except Exception as e: results.record_fail("test_profile_defaults", str(e)) def test_atropos_config_with_app_name(): """Test ModalSandboxConfig.with_app_name() method.""" try: # Try direct import first try: from atropos.backends.modal_backend import ModalSandboxConfig except (ImportError, ModuleNotFoundError): # Try importing module directly without atropos/__init__.py ModalToolBackend, ModalSandboxConfig, _, _ = try_import_atropos_backend() config = ModalSandboxConfig( name="test-convert", image="python:3.10", gpu="A10G", cpu=2.0, memory=4096, secrets=["secret1"], env_vars={"FOO": "bar"}, ) config_with_app = config.with_app_name("my-app") assert config_with_app.app_name == "my-app-test-convert" assert config_with_app.image == "python:3.10" assert config_with_app.gpu == "A10G" assert config_with_app.cpu == 2.0 assert config_with_app.memory == 4096 assert config_with_app.secrets == ["secret1"] assert config_with_app.env_vars == {"FOO": "bar"} results.record_pass("test_atropos_config_with_app_name") except ImportError as e: results.record_skip("test_atropos_config_with_app_name", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_config_with_app_name", str(e)) # ============================================================================= # CATEGORY 2: Terminal Tool Modal Tests # ============================================================================= def test_terminal_modal_pool_manager_singleton(): """Test _ModalPoolManager is a proper singleton.""" from tools.terminal_tool import _ModalPoolManager try: # Reset singleton for test _ModalPoolManager._instance = None manager1 = _ModalPoolManager.get_instance() manager2 = _ModalPoolManager.get_instance() assert manager1 is manager2, "Pool manager should be singleton" results.record_pass("test_terminal_modal_pool_manager_singleton") except Exception as e: results.record_fail("test_terminal_modal_pool_manager_singleton", str(e)) def test_terminal_create_environment_modal(): """Test _create_environment creates Modal environment correctly.""" from tools.terminal_tool import _create_environment try: env = _create_environment( env_type="modal", image="python:3.11", cwd="/workspace", timeout=60, task_id="test-task-123", profile="default", ) # Check it's the right type assert env.__class__.__name__ == "_ModalSandboxEnvironment" assert env.profile == "default" assert env.task_id == "test-task-123" results.record_pass("test_terminal_create_environment_modal") except Exception as e: results.record_fail("test_terminal_create_environment_modal", str(e)) def test_terminal_tool_profile_parameter(config: TestConfig): """Test terminal_tool() accepts profile parameter.""" if config.dry_run: results.record_skip("test_terminal_tool_profile_parameter", "Dry run mode") return from tools.terminal_tool import terminal_tool, cleanup_vm # Save original env original_env = os.environ.get("TERMINAL_ENV") try: os.environ["TERMINAL_ENV"] = "modal" task_id = f"test-profile-param-{int(time.time())}" # This should work without error (profile passed through) result = terminal_tool( "echo 'Hello from Modal'", task_id=task_id, profile="default", ) result_data = json.loads(result) # terminal_tool returns {"output", "exit_code", "error"} not {"success"} assert result_data.get("exit_code") == 0, f"Command failed: {result_data}" assert "Hello from Modal" in result_data.get("output", "") cleanup_vm(task_id) results.record_pass("test_terminal_tool_profile_parameter") except Exception as e: results.record_fail("test_terminal_tool_profile_parameter", str(e)) finally: if original_env: os.environ["TERMINAL_ENV"] = original_env elif "TERMINAL_ENV" in os.environ: del os.environ["TERMINAL_ENV"] def test_terminal_modal_execute_simple(config: TestConfig): """Test basic command execution in Modal sandbox.""" if config.dry_run: results.record_skip("test_terminal_modal_execute_simple", "Dry run mode") return from tools.terminal_tool import terminal_tool, cleanup_vm original_env = os.environ.get("TERMINAL_ENV") try: os.environ["TERMINAL_ENV"] = "modal" task_id = f"test-simple-{int(time.time())}" # Test echo result = json.loads(terminal_tool("echo 'test123'", task_id=task_id)) assert result["exit_code"] == 0, f"Echo failed: {result}" assert "test123" in result["output"] # Test pwd result = json.loads(terminal_tool("pwd", task_id=task_id)) assert result["exit_code"] == 0, f"pwd failed: {result}" # Test file creation and reading result = json.loads(terminal_tool("echo 'content' > test.txt && cat test.txt", task_id=task_id)) assert result["exit_code"] == 0, f"File ops failed: {result}" assert "content" in result["output"] cleanup_vm(task_id) results.record_pass("test_terminal_modal_execute_simple") except Exception as e: results.record_fail("test_terminal_modal_execute_simple", str(e)) finally: if original_env: os.environ["TERMINAL_ENV"] = original_env elif "TERMINAL_ENV" in os.environ: del os.environ["TERMINAL_ENV"] def test_terminal_modal_persistence(config: TestConfig): """Test state persists within same task_id.""" if config.dry_run: results.record_skip("test_terminal_modal_persistence", "Dry run mode") return from tools.terminal_tool import terminal_tool, cleanup_vm original_env = os.environ.get("TERMINAL_ENV") try: os.environ["TERMINAL_ENV"] = "modal" task_id = f"test-persist-{int(time.time())}" # Create a file result1 = json.loads(terminal_tool("echo 'persistent data' > /workspace/persist.txt", task_id=task_id)) assert result1["exit_code"] == 0, f"Create file failed: {result1}" # Read it in separate call (same task_id) result2 = json.loads(terminal_tool("cat /workspace/persist.txt", task_id=task_id)) assert result2["exit_code"] == 0, f"Read file failed: {result2}" assert "persistent data" in result2["output"] cleanup_vm(task_id) results.record_pass("test_terminal_modal_persistence") except Exception as e: results.record_fail("test_terminal_modal_persistence", str(e)) finally: if original_env: os.environ["TERMINAL_ENV"] = original_env elif "TERMINAL_ENV" in os.environ: del os.environ["TERMINAL_ENV"] def test_terminal_modal_isolation(config: TestConfig): """Test different task_ids are isolated.""" if config.dry_run: results.record_skip("test_terminal_modal_isolation", "Dry run mode") return from tools.terminal_tool import terminal_tool, cleanup_vm original_env = os.environ.get("TERMINAL_ENV") try: os.environ["TERMINAL_ENV"] = "modal" task_id_1 = f"test-iso-1-{int(time.time())}" task_id_2 = f"test-iso-2-{int(time.time())}" # Create file in task 1 result1 = json.loads(terminal_tool("echo 'task1' > /workspace/iso.txt", task_id=task_id_1)) assert result1["exit_code"] == 0, f"Task 1 create failed: {result1}" # Create different file in task 2 result2 = json.loads(terminal_tool("echo 'task2' > /workspace/iso.txt", task_id=task_id_2)) assert result2["exit_code"] == 0, f"Task 2 create failed: {result2}" # Verify task 1 still has its own content result3 = json.loads(terminal_tool("cat /workspace/iso.txt", task_id=task_id_1)) assert result3["exit_code"] == 0, f"Task 1 read failed: {result3}" assert "task1" in result3["output"], f"Task 1 content corrupted: {result3['output']}" # Verify task 2 has its content result4 = json.loads(terminal_tool("cat /workspace/iso.txt", task_id=task_id_2)) assert result4["exit_code"] == 0, f"Task 2 read failed: {result4}" assert "task2" in result4["output"], f"Task 2 content corrupted: {result4['output']}" cleanup_vm(task_id_1) cleanup_vm(task_id_2) results.record_pass("test_terminal_modal_isolation") except Exception as e: results.record_fail("test_terminal_modal_isolation", str(e)) finally: if original_env: os.environ["TERMINAL_ENV"] = original_env elif "TERMINAL_ENV" in os.environ: del os.environ["TERMINAL_ENV"] # ============================================================================= # CATEGORY 3: Atropos Modal Backend Tests # ============================================================================= async def test_atropos_backend_lifecycle(config: TestConfig): """Test ModalToolBackend start/stop lifecycle.""" if config.dry_run: results.record_skip("test_atropos_backend_lifecycle", "Dry run mode") return try: try: from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig except (ImportError, ModuleNotFoundError): ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend() config_obj = ModalSandboxConfig( app_name="test-lifecycle", min_sandboxes=1, max_sandboxes=2, slots_per_sandbox=3, ) backend = ModalToolBackend(config_obj) # Start await backend.start() status = backend.get_status() assert status["sandboxes"] >= 1, f"Expected at least 1 sandbox, got {status}" assert status["slots_per_sandbox"] == 3 # Stop await backend.stop(purge=True) results.record_pass("test_atropos_backend_lifecycle") except ImportError as e: results.record_skip("test_atropos_backend_lifecycle", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_backend_lifecycle", str(e)) async def test_atropos_slot_acquire_release(config: TestConfig): """Test slot acquisition and release.""" if config.dry_run: results.record_skip("test_atropos_slot_acquire_release", "Dry run mode") return try: try: from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig except (ImportError, ModuleNotFoundError): ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend() config_obj = ModalSandboxConfig( app_name="test-slots", min_sandboxes=1, max_sandboxes=2, slots_per_sandbox=5, ) backend = ModalToolBackend(config_obj) await backend.start() try: # Acquire slot slot = await backend.acquire("trajectory-1") assert slot is not None assert slot.trajectory_id == "trajectory-1" assert "/data/" in slot.workspace_dir # Check status shows slot in use status = backend.get_status() assert status["available_slots"] < status["total_slots"] # Release slot await backend.release(slot) # Check slot is available again status = backend.get_status() # Note: might need small delay for status update results.record_pass("test_atropos_slot_acquire_release") finally: await backend.stop(purge=True) except ImportError as e: results.record_skip("test_atropos_slot_acquire_release", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_slot_acquire_release", str(e)) async def test_atropos_execute_in_slot(config: TestConfig): """Test command execution in acquired slot.""" if config.dry_run: results.record_skip("test_atropos_execute_in_slot", "Dry run mode") return try: try: from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig except (ImportError, ModuleNotFoundError): ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend() config_obj = ModalSandboxConfig( app_name="test-execute", min_sandboxes=1, max_sandboxes=1, slots_per_sandbox=3, ) backend = ModalToolBackend(config_obj) await backend.start() try: slot = await backend.acquire("test-exec") # Execute bash command results_list = await backend.execute_batch([ (slot, "bash", {"command": "echo 'hello world'"}) ]) assert len(results_list) == 1 result = results_list[0] assert result.success, f"Command failed: {result.error}" assert "hello world" in result.output await backend.release(slot) results.record_pass("test_atropos_execute_in_slot") finally: await backend.stop(purge=True) except ImportError as e: results.record_skip("test_atropos_execute_in_slot", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_execute_in_slot", str(e)) async def test_atropos_batched_execution(config: TestConfig): """Test batched parallel execution across multiple slots.""" if config.dry_run: results.record_skip("test_atropos_batched_execution", "Dry run mode") return try: try: from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig except (ImportError, ModuleNotFoundError): ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend() config_obj = ModalSandboxConfig( app_name="test-batch", min_sandboxes=1, max_sandboxes=2, slots_per_sandbox=5, ) backend = ModalToolBackend(config_obj) await backend.start() try: # Acquire multiple slots slots = [] for i in range(3): slot = await backend.acquire(f"batch-{i}") slots.append(slot) # Execute batch of commands start_time = time.time() results_list = await backend.execute_batch([ (slots[0], "bash", {"command": "sleep 1 && echo 'slot0'"}), (slots[1], "bash", {"command": "sleep 1 && echo 'slot1'"}), (slots[2], "bash", {"command": "sleep 1 && echo 'slot2'"}), ]) elapsed = time.time() - start_time # All should succeed assert len(results_list) == 3 for i, result in enumerate(results_list): assert result.success, f"Slot {i} failed: {result.error}" assert f"slot{i}" in result.output # Should be parallel - with Modal overhead, allow up to 5s for 3x 1-second sleeps # (If sequential, would take > 3s just for the sleeps) assert elapsed < 5.0, f"Batch execution took {elapsed}s, expected < 5.0s (parallel)" for slot in slots: await backend.release(slot) results.record_pass("test_atropos_batched_execution") finally: await backend.stop(purge=True) except ImportError as e: results.record_skip("test_atropos_batched_execution", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_batched_execution", str(e)) async def test_atropos_slot_workspace_isolation(config: TestConfig): """Test workspace isolation between slots.""" if config.dry_run: results.record_skip("test_atropos_slot_workspace_isolation", "Dry run mode") return try: try: from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig except (ImportError, ModuleNotFoundError): ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend() config_obj = ModalSandboxConfig( app_name="test-isolation", min_sandboxes=1, max_sandboxes=1, slots_per_sandbox=3, ) backend = ModalToolBackend(config_obj) await backend.start() try: slot1 = await backend.acquire("iso-1") slot2 = await backend.acquire("iso-2") # Write different content to each slot await backend.execute_batch([ (slot1, "bash", {"command": "echo 'content1' > test.txt"}), (slot2, "bash", {"command": "echo 'content2' > test.txt"}), ]) # Read back and verify isolation results_list = await backend.execute_batch([ (slot1, "bash", {"command": "cat test.txt"}), (slot2, "bash", {"command": "cat test.txt"}), ]) assert "content1" in results_list[0].output, f"Slot 1 content wrong: {results_list[0].output}" assert "content2" in results_list[1].output, f"Slot 2 content wrong: {results_list[1].output}" await backend.release(slot1) await backend.release(slot2) results.record_pass("test_atropos_slot_workspace_isolation") finally: await backend.stop(purge=True) except ImportError as e: results.record_skip("test_atropos_slot_workspace_isolation", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_slot_workspace_isolation", str(e)) async def test_atropos_workspace_reset(config: TestConfig): """Test workspace reset on slot release.""" if config.dry_run: results.record_skip("test_atropos_workspace_reset", "Dry run mode") return try: try: from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig except (ImportError, ModuleNotFoundError): ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend() config_obj = ModalSandboxConfig( app_name="test-reset", min_sandboxes=1, max_sandboxes=1, slots_per_sandbox=2, ) backend = ModalToolBackend(config_obj) await backend.start() try: # Acquire, create file, release with reset slot = await backend.acquire("reset-test") slot_id = slot.slot_id await backend.execute_batch([ (slot, "bash", {"command": "echo 'should be deleted' > test.txt"}), ]) await backend.release(slot, reset_workspace=True) # Re-acquire (might get same slot) slot2 = await backend.acquire("reset-test-2") # Check file doesn't exist (or we got different slot) result = await backend.execute_batch([ (slot2, "bash", {"command": "cat test.txt 2>/dev/null || echo 'file not found'"}), ]) # Either file not found OR different slot output = result[0].output if slot2.slot_id == slot_id: assert "file not found" in output or not result[0].success, f"File should be deleted: {output}" await backend.release(slot2) results.record_pass("test_atropos_workspace_reset") finally: await backend.stop(purge=True) except ImportError as e: results.record_skip("test_atropos_workspace_reset", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_workspace_reset", str(e)) async def test_atropos_multi_profile(config: TestConfig): """Test multi-profile support with different resources.""" if config.dry_run: results.record_skip("test_atropos_multi_profile", "Dry run mode") return try: try: from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig except (ImportError, ModuleNotFoundError): ModalToolBackend, ModalSandboxConfig, _, _ = try_import_atropos_backend() # Create backend with multiple profiles backend = ModalToolBackend.with_profiles( app_name="test-multiprofile", profiles={ "default": ModalSandboxConfig( name="default", image="python:3.11", cpu=1.0, memory=2048, min_sandboxes=1, max_sandboxes=2, slots_per_sandbox=3, ), "compute": ModalSandboxConfig( name="compute", image="python:3.11", cpu=2.0, memory=4096, min_sandboxes=0, # Start on demand max_sandboxes=1, slots_per_sandbox=2, ), }, default_profile="default", ) await backend.start(profiles_to_start=["default"]) try: # List profiles profiles = backend.list_profiles() assert "default" in profiles assert "compute" in profiles assert profiles["default"]["active"] == True assert profiles["compute"]["active"] == False # Not started yet # Acquire from default profile slot1 = await backend.acquire("traj-1", profile="default") assert slot1 is not None # Acquire from compute profile (should start it on demand) slot2 = await backend.acquire("traj-2", profile="compute") assert slot2 is not None # Execute on both results_list = await backend.execute_batch([ (slot1, "bash", {"command": "python --version"}), (slot2, "bash", {"command": "python --version"}), ]) assert results_list[0].success assert results_list[1].success await backend.release(slot1) await backend.release(slot2) # Check status shows both profiles status = backend.get_status() assert "default" in status["pools"] assert "compute" in status["pools"] results.record_pass("test_atropos_multi_profile") finally: await backend.stop(purge=True) except ImportError as e: results.record_skip("test_atropos_multi_profile", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_multi_profile", str(e)) async def test_atropos_cross_profile_batch(config: TestConfig): """Test batched execution across different profiles.""" if config.dry_run: results.record_skip("test_atropos_cross_profile_batch", "Dry run mode") return try: try: from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig except (ImportError, ModuleNotFoundError): ModalToolBackend, ModalSandboxConfig, _, _ = try_import_atropos_backend() backend = ModalToolBackend.with_profiles( app_name="test-crossprofile", profiles={ "profile-a": ModalSandboxConfig( name="profile-a", min_sandboxes=1, max_sandboxes=1, slots_per_sandbox=2, ), "profile-b": ModalSandboxConfig( name="profile-b", min_sandboxes=1, max_sandboxes=1, slots_per_sandbox=2, ), }, default_profile="profile-a", ) await backend.start(profiles_to_start=["profile-a", "profile-b"]) try: slot_a = await backend.acquire("traj-a", profile="profile-a") slot_b = await backend.acquire("traj-b", profile="profile-b") # Batch execute across profiles results_list = await backend.execute_batch([ (slot_a, "bash", {"command": "echo 'from-a'"}), (slot_b, "bash", {"command": "echo 'from-b'"}), ]) assert len(results_list) == 2 assert "from-a" in results_list[0].output assert "from-b" in results_list[1].output await backend.release(slot_a) await backend.release(slot_b) results.record_pass("test_atropos_cross_profile_batch") finally: await backend.stop(purge=True) except ImportError as e: results.record_skip("test_atropos_cross_profile_batch", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_cross_profile_batch", str(e)) async def test_atropos_artifact_helpers(config: TestConfig): """Test read_artifact, list_artifacts, archive_artifacts.""" if config.dry_run: results.record_skip("test_atropos_artifact_helpers", "Dry run mode") return try: try: from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig except (ImportError, ModuleNotFoundError): ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend() config_obj = ModalSandboxConfig( app_name="test-artifacts", min_sandboxes=1, max_sandboxes=1, slots_per_sandbox=2, ) backend = ModalToolBackend(config_obj) await backend.start() try: slot = await backend.acquire("artifact-test") # Create test files await backend.execute_batch([ (slot, "bash", {"command": "echo 'hello' > file1.txt && echo 'world' > file2.txt && mkdir subdir && echo 'nested' > subdir/file3.txt"}), ]) # Test read_artifact content = await backend.read_artifact(slot, "file1.txt") assert content["success"] assert "hello" in content["content"] # Test list_artifacts listing = await backend.list_artifacts(slot, ".", recursive=False) assert listing["success"] assert "file1.txt" in listing["entries"] or any("file1" in e for e in listing["entries"]) # Test archive_artifacts archive = await backend.archive_artifacts(slot, ".", archive_format="tar.gz") assert archive["success"] assert len(archive["archive_base64"]) > 0 await backend.release(slot) results.record_pass("test_atropos_artifact_helpers") finally: await backend.stop(purge=True) except ImportError as e: results.record_skip("test_atropos_artifact_helpers", f"Requires atroposlib: pip install -e '.[atropos]'") except Exception as e: results.record_fail("test_atropos_artifact_helpers", str(e)) # ============================================================================= # Test Runner # ============================================================================= def run_sync_tests(config: TestConfig): """Run synchronous tests.""" print("\n" + "="*60) print("SYNCHRONOUS TESTS") print("="*60) if config.category in (None, "profiles"): print("\n--- Profile Configuration Tests ---") test_profile_loading_from_env() test_profile_loading_from_yaml() test_profile_defaults() test_atropos_config_with_app_name() if config.category in (None, "terminal"): print("\n--- Terminal Tool Modal Tests ---") test_terminal_modal_pool_manager_singleton() test_terminal_create_environment_modal() test_terminal_tool_profile_parameter(config) test_terminal_modal_execute_simple(config) test_terminal_modal_persistence(config) test_terminal_modal_isolation(config) async def run_async_tests(config: TestConfig): """Run asynchronous tests.""" print("\n" + "="*60) print("ASYNCHRONOUS TESTS (Atropos Backend)") print("="*60) if config.category in (None, "atropos"): print("\n--- Backend Lifecycle Tests ---") await test_atropos_backend_lifecycle(config) print("\n--- Slot Management Tests ---") await test_atropos_slot_acquire_release(config) await test_atropos_execute_in_slot(config) await test_atropos_batched_execution(config) await test_atropos_slot_workspace_isolation(config) await test_atropos_workspace_reset(config) print("\n--- Multi-Profile Tests ---") await test_atropos_multi_profile(config) await test_atropos_cross_profile_batch(config) print("\n--- Artifact Helper Tests ---") await test_atropos_artifact_helpers(config) def main(): import argparse parser = argparse.ArgumentParser(description="Modal Integration Test Suite") parser.add_argument("--dry-run", action="store_true", help="Skip tests requiring Modal") parser.add_argument("--category", choices=["terminal", "atropos", "profiles"], help="Run specific category") parser.add_argument("--verbose", action="store_true", default=True) args = parser.parse_args() config = TestConfig( dry_run=args.dry_run, verbose=args.verbose, category=args.category, ) print("="*60) print("MODAL INTEGRATION TEST SUITE") print("="*60) print(f"Mode: {'DRY RUN' if config.dry_run else 'LIVE'}") print(f"Category: {config.category or 'ALL'}") # Run sync tests run_sync_tests(config) # Run async tests asyncio.run(run_async_tests(config)) # Summary success = results.summary() sys.exit(0 if success else 1) if __name__ == "__main__": main()