diff --git a/.gitignore b/.gitignore index 770e017bb9..766b1ebd14 100644 --- a/.gitignore +++ b/.gitignore @@ -50,4 +50,15 @@ cli-config.yaml *.html *.json *.log -*.csv \ No newline at end of file +*.csv + +# Singularity/Apptainer images (large binary files) +*.sif + +# Test files +test_singularity_*.py +test_*.py +!tests/test_*.py + +# Nomad data +/tmp/NomadClient*/ diff --git a/atropos/atropos-sandbox.sif b/atropos/atropos-sandbox.sif deleted file mode 100755 index adf433d994..0000000000 Binary files a/atropos/atropos-sandbox.sif and /dev/null differ diff --git a/test_singularity_job.py b/test_singularity_job.py deleted file mode 100644 index e7e36423c2..0000000000 --- a/test_singularity_job.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for Singularity sandbox job creation. - -This tests the create_sandbox_job function with driver="singularity". -""" - -import asyncio -import sys -import json -import importlib.util - -# Load atropos.nomad.client directly to bypass __init__.py -spec = importlib.util.spec_from_file_location( - "nomad_client", - "/root/Hermes-Agent/atropos/nomad/client.py" -) -nomad_client = importlib.util.module_from_spec(spec) -sys.modules["nomad_client"] = nomad_client -spec.loader.exec_module(nomad_client) - -NomadClient = nomad_client.NomadClient -create_sandbox_job = nomad_client.create_sandbox_job - - -async def test_singularity_job(): - """Test Singularity job creation and submission to Nomad.""" - - job_id = "test-singularity-sandbox" - sif_path = "/root/Hermes-Agent/atropos/atropos-sandbox.sif" - - print("=== Singularity Sandbox Job Test ===\n") - - # Create job spec for Singularity - print("Creating Singularity job spec...") - job_spec = create_sandbox_job( - job_id=job_id, - driver="singularity", - singularity_image=sif_path, - slots_per_container=5, - count=1, - cpu=500, - memory=512, - ) - - # Print task driver and config - task = job_spec["TaskGroups"][0]["Tasks"][0] - print(f" Driver: {task['Driver']}") - print(f" Config: {json.dumps(task['Config'], indent=4)}") - print() - - # Test submission to Nomad - print("Connecting to Nomad...") - client = NomadClient(address="http://localhost:4646") - - try: - # Check health - healthy = await client.is_healthy() - print(f" Nomad healthy: {healthy}") - - if not healthy: - print("❌ Nomad is not reachable!") - return False - - # Purge any existing job - print(f"\nPurging existing job '{job_id}'...") - await client.stop_job(job_id, purge=True) - - # Submit job - print(f"Submitting Singularity job '{job_id}'...") - result = await client.submit_job(job_spec) - print(f" Result: {result}") - - if "error" in result: - print(f"❌ Job submission failed: {result}") - return False - - # Wait for allocation - print("\nWaiting for allocation (10 seconds)...") - await asyncio.sleep(10) - - # Check allocations - allocs = await client.get_job_allocations(job_id) - print(f"Allocations: {len(allocs)}") - for alloc in allocs: - print(f" - {alloc.id[:8]} status={alloc.status.value} http={alloc.http_address}") - - # Get detailed info - detail = await client.get_allocation(alloc.id) - if detail: - task_states = detail.get("TaskStates", {}) - for task_name, state in task_states.items(): - events = state.get("Events", [])[-3:] - print(f" Task '{task_name}': {[e.get('Type') for e in events]}") - - # Check if any are running - running = [a for a in allocs if a.status.value == "running"] - if running: - print(f"\n✅ Job running! {len(running)} allocation(s)") - - # Try to reach the sandbox server - if running[0].http_address: - import aiohttp - try: - async with aiohttp.ClientSession() as session: - async with session.get(f"{running[0].http_address}/health", timeout=aiohttp.ClientTimeout(total=5)) as resp: - print(f" Health check: {resp.status} - {await resp.text()}") - except Exception as e: - print(f" Health check failed: {e}") - else: - print("\n⚠️ No running allocations yet (may still be starting)") - - return True - - finally: - # Don't cleanup - leave running for debugging - print(f"\n[Leaving job '{job_id}' running for debugging]") - print(f" View logs: nomad alloc logs -job {job_id}") - print(f" Cleanup: nomad job stop -purge {job_id}") - await client.close() - print("Done!") - - -if __name__ == "__main__": - success = asyncio.run(test_singularity_job()) - sys.exit(0 if success else 1) diff --git a/test_singularity_sandbox.py b/test_singularity_sandbox.py deleted file mode 100644 index f378b3459b..0000000000 --- a/test_singularity_sandbox.py +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for Singularity/Apptainer sandbox integration. - -This tests the SlotPool with driver="singularity" using the raw_exec Nomad driver. -""" - -import asyncio -import sys -import os - -# Add parent to path for imports -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -from atropos.slots.pool import SlotPool, SlotPoolConfig - - -async def test_singularity_sandbox(): - """Test the Singularity sandbox deployment and basic execution.""" - - # Configure for Singularity - config = SlotPoolConfig( - nomad_address="http://localhost:4646", - job_id="atropos-sandbox-singularity", - driver="singularity", - singularity_image="/root/Hermes-Agent/atropos/atropos-sandbox.sif", - slots_per_container=5, - min_containers=1, - max_containers=2, - cpu=500, - memory=512, - purge_job_on_start=True, # Clean start for testing - ) - - print(f"Testing Singularity sandbox with config:") - print(f" driver: {config.driver}") - print(f" singularity_image: {config.singularity_image}") - print(f" job_id: {config.job_id}") - print() - - pool = SlotPool(config) - - try: - print("Starting SlotPool...") - await pool.start() - - stats = pool.get_stats() - print(f"Pool started! Stats: {stats}") - print() - - # Acquire a slot - print("Acquiring slot...") - slot = await pool.acquire("test-trajectory-001") - print(f"Acquired slot: {slot.slot_id} (alloc={slot.alloc_id[:8]})") - print() - - # Execute a simple command - print("Executing 'echo hello from singularity'...") - result = await pool.execute( - slot, - "bash", - {"command": "echo 'Hello from Singularity sandbox!' && uname -a"} - ) - print(f"Result: {result}") - print() - - # Test file write - print("Testing file write...") - write_result = await pool.execute( - slot, - "write_file", - {"path": "test.txt", "content": "Test file from Singularity!"} - ) - print(f"Write result: {write_result}") - - # Test file read - print("Testing file read...") - read_result = await pool.execute( - slot, - "read_file", - {"path": "test.txt"} - ) - print(f"Read result: {read_result}") - print() - - # Release slot - print("Releasing slot...") - await pool.release(slot) - - print("✅ All tests passed!") - - except Exception as e: - print(f"❌ Error: {e}") - import traceback - traceback.print_exc() - return False - - finally: - print("\nStopping pool...") - await pool.stop(purge_job=True) - print("Pool stopped.") - - return True - - -if __name__ == "__main__": - success = asyncio.run(test_singularity_sandbox()) - sys.exit(0 if success else 1)