#!/usr/bin/env python3
"""
Modal Integration Stress Tests & Full Integration Tests

This test suite includes:
1. Stress tests for Modal sandbox pools (concurrent load, scaling)
2. Atropos backend tests (requires atroposlib)
3. mini-swe-agent integration tests

Prerequisites:
    # Install dev dependencies
    pip install -e '.[dev,modal]'
    
    # Install atroposlib for Atropos tests
    pip install -e '.[atropos]'
    
    # Clone mini-swe-agent (if not present)
    git clone https://github.com/anthropics/mini-swe-agent.git mini-swe-agent
    # Or as submodule:
    git submodule add https://github.com/anthropics/mini-swe-agent.git mini-swe-agent
    
Run with:
    # All tests
    python tests/test_modal_stress.py
    
    # Stress tests only
    python tests/test_modal_stress.py --category stress
    
    # Atropos tests only
    python tests/test_modal_stress.py --category atropos
    
    # Mini-swe-agent tests only
    python tests/test_modal_stress.py --category miniswe
    
    # Dry run (no Modal calls)
    python tests/test_modal_stress.py --dry-run
"""

import asyncio
import json
import os
import sys
import time
import random
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple
from dataclasses import dataclass

# Add parent to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))


# =============================================================================
# Test Configuration
# =============================================================================

@dataclass
class StressTestConfig:
    dry_run: bool = False
    verbose: bool = True
    category: Optional[str] = None
    # Stress test parameters (reduced defaults for faster first-run)
    concurrent_tasks: int = 3  # Start small - Modal cold starts are slow
    total_operations: int = 10
    max_sandboxes: int = 3
    slots_per_sandbox: int = 3


# =============================================================================
# Test Results Tracking
# =============================================================================

class TestResults:
    def __init__(self):
        self.passed: List[str] = []
        self.failed: List[Tuple[str, str]] = []
        self.skipped: List[Tuple[str, str]] = []
        self.metrics: Dict[str, Any] = {}
    
    def record_pass(self, name: str, metrics: Optional[Dict] = None):
        self.passed.append(name)
        if metrics:
            self.metrics[name] = metrics
        print(f"  ✅ {name}")
        if metrics:
            for k, v in metrics.items():
                print(f"     📊 {k}: {v}")
    
    def record_fail(self, name: str, error: str):
        self.failed.append((name, error))
        print(f"  ❌ {name}: {error}")
    
    def record_skip(self, name: str, reason: str):
        self.skipped.append((name, reason))
        print(f"  ⏭️  {name}: {reason}")
    
    def summary(self):
        total = len(self.passed) + len(self.failed) + len(self.skipped)
        print(f"\n{'='*70}")
        print(f"STRESS TEST RESULTS: {len(self.passed)}/{total} passed")
        print(f"  Passed:  {len(self.passed)}")
        print(f"  Failed:  {len(self.failed)}")
        print(f"  Skipped: {len(self.skipped)}")
        
        if self.failed:
            print(f"\nFailed tests:")
            for name, error in self.failed:
                print(f"  - {name}: {error}")
        
        if self.metrics:
            print(f"\nPerformance Metrics:")
            for test, metrics in self.metrics.items():
                print(f"  {test}:")
                for k, v in metrics.items():
                    print(f"    - {k}: {v}")
        
        return len(self.failed) == 0


results = TestResults()


# =============================================================================
# Helper: Atropos Import
# =============================================================================

def try_import_atropos():
    """Try importing Atropos backend components."""
    try:
        from atropos.backends.modal_backend import (
            ModalToolBackend, ModalSandboxConfig,
            _ModalMultiProfileManager
        )
        from atropos.slots.slot import Slot, SlotState
        return ModalToolBackend, ModalSandboxConfig, Slot, SlotState
    except (ImportError, ModuleNotFoundError) as e:
        return None


def try_import_miniswe():
    """Try importing mini-swe-agent components."""
    try:
        # Check if mini-swe-agent path exists and has content
        mini_swe_path = Path(__file__).parent.parent / "mini-swe-agent" / "src"
        if mini_swe_path.exists() and list(mini_swe_path.iterdir()):
            sys.path.insert(0, str(mini_swe_path))
            import minisweagent
            return minisweagent
        return None
    except (ImportError, ModuleNotFoundError) as e:
        return None


# =============================================================================
# CATEGORY 1: Stress Tests (Terminal Tool)
# =============================================================================

def test_stress_concurrent_tasks(config: StressTestConfig):
    """Stress test: Multiple concurrent task_ids hitting the pool."""
    if config.dry_run:
        results.record_skip("test_stress_concurrent_tasks", "Dry run mode")
        return
    
    from tools.terminal_tool import terminal_tool, cleanup_vm
    
    original_env = os.environ.get("TERMINAL_ENV")
    os.environ["TERMINAL_ENV"] = "modal"
    
    try:
        num_tasks = config.concurrent_tasks
        task_ids = [f"stress-concurrent-{i}-{int(time.time())}" for i in range(num_tasks)]
        
        start_time = time.time()
        errors = []
        successes = 0
        
        def run_task(task_id: str) -> Tuple[bool, str]:
            try:
                result = json.loads(terminal_tool(
                    f"echo 'Hello from {task_id}' && sleep 0.5",
                    task_id=task_id,
                ))
                success = result["exit_code"] == 0
                
                # IMPORTANT: Clean up immediately after task completes
                # This releases the sandbox back to the pool for other tasks
                try:
                    cleanup_vm(task_id)
                except:
                    pass
                
                if success:
                    return True, ""
                # Include more details for debugging
                error_detail = result.get("error", "no error message")
                output = result.get("output", "")[:100]  # First 100 chars
                return False, f"Exit code: {result['exit_code']}, error: {error_detail}, output: {output}"
            except Exception as e:
                # Clean up even on failure
                try:
                    cleanup_vm(task_id)
                except:
                    pass
                import traceback
                return False, f"Exception: {str(e)}\n{traceback.format_exc()}"
        
        # Run all tasks concurrently using threads
        with ThreadPoolExecutor(max_workers=num_tasks) as executor:
            futures = {executor.submit(run_task, tid): tid for tid in task_ids}
            
            for future in as_completed(futures):
                task_id = futures[future]
                try:
                    success, error = future.result(timeout=60)
                    if success:
                        successes += 1
                    else:
                        errors.append(f"{task_id}: {error}")
                except Exception as e:
                    errors.append(f"{task_id}: {str(e)}")
        
        elapsed = time.time() - start_time
        
        # No need for cleanup here - each task cleans up immediately
        
        # Report
        success_rate = successes / num_tasks * 100
        
        if success_rate >= 90:  # Allow 10% failure rate for stress test
            results.record_pass("test_stress_concurrent_tasks", {
                "concurrent_tasks": num_tasks,
                "successes": successes,
                "failures": len(errors),
                "success_rate": f"{success_rate:.1f}%",
                "total_time": f"{elapsed:.2f}s",
                "avg_time_per_task": f"{elapsed/num_tasks:.2f}s",
            })
        else:
            results.record_fail(
                "test_stress_concurrent_tasks",
                f"Success rate {success_rate:.1f}% < 90%. Errors: {errors[:3]}"
            )
    
    except Exception as e:
        results.record_fail("test_stress_concurrent_tasks", str(e))
    finally:
        if original_env:
            os.environ["TERMINAL_ENV"] = original_env
        elif "TERMINAL_ENV" in os.environ:
            del os.environ["TERMINAL_ENV"]


def test_stress_rapid_fire(config: StressTestConfig):
    """Stress test: Rapid sequential commands to same task_id."""
    if config.dry_run:
        results.record_skip("test_stress_rapid_fire", "Dry run mode")
        return
    
    from tools.terminal_tool import terminal_tool, cleanup_vm
    
    original_env = os.environ.get("TERMINAL_ENV")
    os.environ["TERMINAL_ENV"] = "modal"
    
    try:
        task_id = f"stress-rapid-{int(time.time())}"
        num_commands = config.total_operations
        
        start_time = time.time()
        successes = 0
        errors = []
        
        for i in range(num_commands):
            try:
                result = json.loads(terminal_tool(f"echo {i}", task_id=task_id))
                if result["exit_code"] == 0 and str(i) in result["output"]:
                    successes += 1
                else:
                    errors.append(f"Command {i}: unexpected result")
            except Exception as e:
                errors.append(f"Command {i}: {str(e)}")
        
        elapsed = time.time() - start_time
        cleanup_vm(task_id)
        
        success_rate = successes / num_commands * 100
        commands_per_second = num_commands / elapsed
        
        if success_rate >= 95:
            results.record_pass("test_stress_rapid_fire", {
                "total_commands": num_commands,
                "successes": successes,
                "success_rate": f"{success_rate:.1f}%",
                "total_time": f"{elapsed:.2f}s",
                "commands_per_second": f"{commands_per_second:.1f}",
            })
        else:
            results.record_fail(
                "test_stress_rapid_fire",
                f"Success rate {success_rate:.1f}% < 95%"
            )
    
    except Exception as e:
        results.record_fail("test_stress_rapid_fire", str(e))
    finally:
        if original_env:
            os.environ["TERMINAL_ENV"] = original_env
        elif "TERMINAL_ENV" in os.environ:
            del os.environ["TERMINAL_ENV"]


def test_stress_pool_scaling(config: StressTestConfig):
    """Stress test: Force pool to scale up and down by running tasks in batches."""
    if config.dry_run:
        results.record_skip("test_stress_pool_scaling", "Dry run mode")
        return
    
    from tools.terminal_tool import terminal_tool, cleanup_vm, _ModalPoolManager
    
    original_env = os.environ.get("TERMINAL_ENV")
    os.environ["TERMINAL_ENV"] = "modal"
    
    try:
        # Run tasks in batches matching max_sandboxes to test pool reuse
        # This verifies sandboxes can be acquired, used, released, and reused
        batch_size = config.max_sandboxes
        num_batches = 3
        total_tasks = batch_size * num_batches
        
        start_time = time.time()
        successes = 0
        
        for batch in range(num_batches):
            task_ids = [f"stress-scale-{batch}-{i}-{int(time.time())}" for i in range(batch_size)]
            
            def run_task(task_id: str):
                try:
                    result = json.loads(terminal_tool(
                        "echo done",  # Fast command to test scaling
                        task_id=task_id,
                    ))
                    success = result["exit_code"] == 0
                    try:
                        cleanup_vm(task_id)
                    except:
                        pass
                    return success
                except:
                    try:
                        cleanup_vm(task_id)
                    except:
                        pass
                    return False
            
            # Run batch concurrently
            with ThreadPoolExecutor(max_workers=batch_size) as executor:
                batch_results = list(executor.map(run_task, task_ids))
            successes += sum(batch_results)
        
        elapsed = time.time() - start_time
        
        # Check pool status
        try:
            manager = _ModalPoolManager.get_instance()
            pool_status = manager.get_status() if hasattr(manager, 'get_status') else {}
        except:
            pool_status = {}
        
        success_rate = successes / total_tasks * 100
        
        if success_rate >= 80:  # Allow some tolerance
            results.record_pass("test_stress_pool_scaling", {
                "total_tasks": total_tasks,
                "num_batches": num_batches,
                "batch_size": batch_size,
                "successes": successes,
                "success_rate": f"{success_rate:.1f}%",
                "total_time": f"{elapsed:.2f}s",
                "pool_status": pool_status,
            })
        else:
            results.record_fail(
                "test_stress_pool_scaling",
                f"Success rate {success_rate:.1f}% < 80%"
            )
    
    except Exception as e:
        results.record_fail("test_stress_pool_scaling", str(e))
    finally:
        if original_env:
            os.environ["TERMINAL_ENV"] = original_env
        elif "TERMINAL_ENV" in os.environ:
            del os.environ["TERMINAL_ENV"]


def test_stress_large_output(config: StressTestConfig):
    """Stress test: Commands producing large output."""
    if config.dry_run:
        results.record_skip("test_stress_large_output", "Dry run mode")
        return
    
    from tools.terminal_tool import terminal_tool, cleanup_vm
    
    original_env = os.environ.get("TERMINAL_ENV")
    os.environ["TERMINAL_ENV"] = "modal"
    
    try:
        task_id = f"stress-large-{int(time.time())}"
        
        # First verify basic connectivity with simple command
        warmup = json.loads(terminal_tool("echo warmup", task_id=task_id))
        if warmup["exit_code"] != 0:
            results.record_fail(
                "test_stress_large_output",
                f"Warmup failed: {warmup.get('error', 'unknown')}"
            )
            return
        
        # Generate output - use seq which is more portable
        start_time = time.time()
        result = json.loads(terminal_tool(
            'seq 1 500 | while read i; do echo "Line $i: This is test content for large output"; done',
            task_id=task_id,
            timeout=60,
        ))
        elapsed = time.time() - start_time
        
        cleanup_vm(task_id)
        
        output_size = len(result.get("output", ""))
        error_msg = result.get("error", "")
        
        if result["exit_code"] == 0 and output_size > 5000:
            results.record_pass("test_stress_large_output", {
                "output_size": f"{output_size:,} bytes",
                "time": f"{elapsed:.2f}s",
                "throughput": f"{output_size/elapsed/1024:.1f} KB/s" if elapsed > 0 else "N/A",
            })
        else:
            results.record_fail(
                "test_stress_large_output",
                f"Exit code: {result['exit_code']}, output size: {output_size}, error: {error_msg}"
            )
    
    except Exception as e:
        import traceback
        results.record_fail("test_stress_large_output", f"{str(e)}\n{traceback.format_exc()}")
    finally:
        try:
            cleanup_vm(task_id)
        except:
            pass
        if original_env:
            os.environ["TERMINAL_ENV"] = original_env
        elif "TERMINAL_ENV" in os.environ:
            del os.environ["TERMINAL_ENV"]


def test_stress_error_recovery(config: StressTestConfig):
    """Stress test: Commands that fail and verify sandbox continues working."""
    if config.dry_run:
        results.record_skip("test_stress_error_recovery", "Dry run mode")
        return
    
    from tools.terminal_tool import terminal_tool, cleanup_vm
    
    original_env = os.environ.get("TERMINAL_ENV")
    os.environ["TERMINAL_ENV"] = "modal"
    
    try:
        task_id = f"stress-error-{int(time.time())}"
        
        # Run some failing commands
        failing_commands = [
            "exit 1",
            "false",
            "cat /nonexistent/file",
            "command_that_does_not_exist",
        ]
        
        for cmd in failing_commands:
            result = json.loads(terminal_tool(cmd, task_id=task_id))
            # These should fail but not crash
            assert result["exit_code"] != 0 or result.get("error"), f"Expected failure for: {cmd}"
        
        # Now run a command that should succeed
        result = json.loads(terminal_tool("echo 'recovery success'", task_id=task_id))
        
        cleanup_vm(task_id)
        
        if result["exit_code"] == 0 and "recovery success" in result["output"]:
            results.record_pass("test_stress_error_recovery", {
                "failed_commands": len(failing_commands),
                "recovery": "success",
            })
        else:
            results.record_fail(
                "test_stress_error_recovery",
                f"Recovery failed: {result}"
            )
    
    except Exception as e:
        results.record_fail("test_stress_error_recovery", str(e))
    finally:
        if original_env:
            os.environ["TERMINAL_ENV"] = original_env
        elif "TERMINAL_ENV" in os.environ:
            del os.environ["TERMINAL_ENV"]


# =============================================================================
# CATEGORY 2: Atropos Backend Stress Tests
# =============================================================================

async def test_atropos_stress_slot_churn(config: StressTestConfig):
    """Atropos stress test: Rapid slot acquire/release cycles."""
    if config.dry_run:
        results.record_skip("test_atropos_stress_slot_churn", "Dry run mode")
        return
    
    imports = try_import_atropos()
    if imports is None:
        results.record_skip("test_atropos_stress_slot_churn", "Requires atroposlib")
        return
    
    ModalToolBackend, ModalSandboxConfig, _, _ = imports
    
    try:
        backend_config = ModalSandboxConfig(
            app_name=f"stress-churn-{int(time.time())}",
            min_sandboxes=1,
            max_sandboxes=3,
            slots_per_sandbox=5,
        )
        
        backend = ModalToolBackend(backend_config)
        await backend.start()
        
        try:
            num_cycles = config.total_operations
            start_time = time.time()
            successes = 0
            
            for i in range(num_cycles):
                try:
                    slot = await backend.acquire(f"churn-{i}")
                    
                    # Quick command
                    results_list = await backend.execute_batch([
                        (slot, "bash", {"command": f"echo {i}"})
                    ])
                    
                    if results_list[0].success:
                        successes += 1
                    
                    await backend.release(slot, reset_workspace=(i % 5 == 0))
                except Exception as e:
                    pass  # Count as failure
            
            elapsed = time.time() - start_time
            success_rate = successes / num_cycles * 100
            
            if success_rate >= 90:
                results.record_pass("test_atropos_stress_slot_churn", {
                    "cycles": num_cycles,
                    "successes": successes,
                    "success_rate": f"{success_rate:.1f}%",
                    "total_time": f"{elapsed:.2f}s",
                    "cycles_per_second": f"{num_cycles/elapsed:.1f}",
                })
            else:
                results.record_fail(
                    "test_atropos_stress_slot_churn",
                    f"Success rate {success_rate:.1f}% < 90%"
                )
        
        finally:
            await backend.stop(purge=True)
    
    except Exception as e:
        results.record_fail("test_atropos_stress_slot_churn", str(e))


async def test_atropos_stress_parallel_batches(config: StressTestConfig):
    """Atropos stress test: Multiple parallel batch executions."""
    if config.dry_run:
        results.record_skip("test_atropos_stress_parallel_batches", "Dry run mode")
        return
    
    imports = try_import_atropos()
    if imports is None:
        results.record_skip("test_atropos_stress_parallel_batches", "Requires atroposlib")
        return
    
    ModalToolBackend, ModalSandboxConfig, _, _ = imports
    
    try:
        backend_config = ModalSandboxConfig(
            app_name=f"stress-batch-{int(time.time())}",
            min_sandboxes=2,
            max_sandboxes=4,
            slots_per_sandbox=5,
        )
        
        backend = ModalToolBackend(backend_config)
        await backend.start()
        
        try:
            num_slots = 10
            slots = []
            
            # Acquire multiple slots
            for i in range(num_slots):
                slot = await backend.acquire(f"batch-{i}")
                slots.append(slot)
            
            # Run multiple batches in parallel
            start_time = time.time()
            num_batches = 5
            
            async def run_batch(batch_id: int):
                requests = [
                    (slot, "bash", {"command": f"echo 'batch{batch_id}-slot{i}'"})
                    for i, slot in enumerate(slots)
                ]
                return await backend.execute_batch(requests)
            
            batch_tasks = [run_batch(i) for i in range(num_batches)]
            all_results = await asyncio.gather(*batch_tasks)
            
            elapsed = time.time() - start_time
            
            # Count successes
            total_commands = num_batches * num_slots
            successes = sum(
                1 for batch_result in all_results
                for r in batch_result
                if r.success
            )
            
            # Release slots
            for slot in slots:
                await backend.release(slot)
            
            success_rate = successes / total_commands * 100
            
            if success_rate >= 90:
                results.record_pass("test_atropos_stress_parallel_batches", {
                    "batches": num_batches,
                    "slots": num_slots,
                    "total_commands": total_commands,
                    "successes": successes,
                    "success_rate": f"{success_rate:.1f}%",
                    "total_time": f"{elapsed:.2f}s",
                    "commands_per_second": f"{total_commands/elapsed:.1f}",
                })
            else:
                results.record_fail(
                    "test_atropos_stress_parallel_batches",
                    f"Success rate {success_rate:.1f}% < 90%"
                )
        
        finally:
            await backend.stop(purge=True)
    
    except Exception as e:
        results.record_fail("test_atropos_stress_parallel_batches", str(e))


async def test_atropos_stress_multi_profile_load(config: StressTestConfig):
    """Atropos stress test: Load across multiple profiles."""
    if config.dry_run:
        results.record_skip("test_atropos_stress_multi_profile_load", "Dry run mode")
        return
    
    imports = try_import_atropos()
    if imports is None:
        results.record_skip("test_atropos_stress_multi_profile_load", "Requires atroposlib")
        return
    
    ModalToolBackend, ModalSandboxConfig, _, _ = imports
    
    try:
        backend = ModalToolBackend.with_profiles(
            app_name=f"stress-multiprofile-{int(time.time())}",
            profiles={
                "cpu-light": ModalSandboxConfig(
                    name="cpu-light",
                    cpu=0.5,
                    memory=1024,
                    min_sandboxes=1,
                    max_sandboxes=2,
                    slots_per_sandbox=5,
                ),
                "cpu-heavy": ModalSandboxConfig(
                    name="cpu-heavy",
                    cpu=2.0,
                    memory=4096,
                    min_sandboxes=0,
                    max_sandboxes=2,
                    slots_per_sandbox=3,
                ),
            }
        )
        
        await backend.start(profiles_to_start=["cpu-light", "cpu-heavy"])
        
        try:
            num_tasks_per_profile = 5
            slots = []
            
            # Acquire from both profiles
            for i in range(num_tasks_per_profile):
                light_slot = await backend.acquire(f"light-{i}", profile="cpu-light")
                heavy_slot = await backend.acquire(f"heavy-{i}", profile="cpu-heavy")
                slots.append((light_slot, "cpu-light"))
                slots.append((heavy_slot, "cpu-heavy"))
            
            # Execute batch across all profiles
            start_time = time.time()
            
            requests = [
                (slot, "bash", {"command": f"echo 'profile={profile}'"})
                for slot, profile in slots
            ]
            
            batch_results = await backend.execute_batch(requests)
            elapsed = time.time() - start_time
            
            successes = sum(1 for r in batch_results if r.success)
            
            # Release all
            for slot, _ in slots:
                await backend.release(slot)
            
            status = backend.get_status()
            
            success_rate = successes / len(slots) * 100
            
            if success_rate >= 90:
                results.record_pass("test_atropos_stress_multi_profile_load", {
                    "profiles": 2,
                    "tasks_per_profile": num_tasks_per_profile,
                    "total_tasks": len(slots),
                    "successes": successes,
                    "success_rate": f"{success_rate:.1f}%",
                    "time": f"{elapsed:.2f}s",
                    "status": status,
                })
            else:
                results.record_fail(
                    "test_atropos_stress_multi_profile_load",
                    f"Success rate {success_rate:.1f}% < 90%"
                )
        
        finally:
            await backend.stop(purge=True)
    
    except Exception as e:
        results.record_fail("test_atropos_stress_multi_profile_load", str(e))


# =============================================================================
# CATEGORY 3: Mini-SWE-Agent Integration Tests
# =============================================================================

def test_miniswe_environment_available():
    """Check if mini-swe-agent is properly set up."""
    mini_swe_path = Path(__file__).parent.parent / "mini-swe-agent" / "src"
    
    if not mini_swe_path.exists():
        results.record_skip(
            "test_miniswe_environment_available",
            "mini-swe-agent not found. Run: git clone https://github.com/anthropics/mini-swe-agent.git mini-swe-agent"
        )
        return
    
    if not list(mini_swe_path.iterdir()):
        results.record_skip(
            "test_miniswe_environment_available",
            "mini-swe-agent directory is empty. Run: git submodule update --init"
        )
        return
    
    miniswe = try_import_miniswe()
    if miniswe is None:
        results.record_fail(
            "test_miniswe_environment_available",
            "Failed to import minisweagent module"
        )
        return
    
    results.record_pass("test_miniswe_environment_available", {
        "path": str(mini_swe_path),
        "module": miniswe.__name__,
    })


def test_miniswe_modal_backend(config: StressTestConfig):
    """Test mini-swe-agent with Modal backend."""
    if config.dry_run:
        results.record_skip("test_miniswe_modal_backend", "Dry run mode")
        return
    
    miniswe = try_import_miniswe()
    if miniswe is None:
        results.record_skip(
            "test_miniswe_modal_backend",
            "mini-swe-agent not available"
        )
        return
    
    try:
        # Check if ModalEnvironment exists in minisweagent
        if not hasattr(miniswe, 'ModalEnvironment'):
            results.record_skip(
                "test_miniswe_modal_backend",
                "minisweagent.ModalEnvironment not found"
            )
            return
        
        # Create Modal environment
        env = miniswe.ModalEnvironment(
            image="python:3.11",
            timeout=60,
        )
        
        # Execute a command
        result = env.execute("echo 'Hello from mini-swe-agent Modal'")
        
        env.cleanup()
        
        if "Hello from mini-swe-agent Modal" in str(result):
            results.record_pass("test_miniswe_modal_backend")
        else:
            results.record_fail(
                "test_miniswe_modal_backend",
                f"Unexpected result: {result}"
            )
    
    except Exception as e:
        results.record_fail("test_miniswe_modal_backend", str(e))


# =============================================================================
# Test Runner
# =============================================================================

def run_sync_tests(config: StressTestConfig):
    """Run synchronous tests."""
    if config.category in (None, "stress"):
        print("\n" + "="*70)
        print("STRESS TESTS (Terminal Tool)")
        print("="*70)
        
        test_stress_concurrent_tasks(config)
        test_stress_rapid_fire(config)
        test_stress_pool_scaling(config)
        test_stress_large_output(config)
        test_stress_error_recovery(config)
    
    if config.category in (None, "miniswe"):
        print("\n" + "="*70)
        print("MINI-SWE-AGENT INTEGRATION TESTS")
        print("="*70)
        
        test_miniswe_environment_available()
        test_miniswe_modal_backend(config)


async def run_async_tests(config: StressTestConfig):
    """Run asynchronous tests."""
    if config.category in (None, "atropos"):
        print("\n" + "="*70)
        print("ATROPOS BACKEND STRESS TESTS")
        print("="*70)
        
        await test_atropos_stress_slot_churn(config)
        await test_atropos_stress_parallel_batches(config)
        await test_atropos_stress_multi_profile_load(config)


def main():
    import argparse
    
    parser = argparse.ArgumentParser(description="Modal Stress Test Suite")
    parser.add_argument("--dry-run", action="store_true", help="Skip tests requiring Modal")
    parser.add_argument("--category", choices=["stress", "atropos", "miniswe"], help="Run specific category")
    parser.add_argument("--concurrent", type=int, default=10, help="Number of concurrent tasks")
    parser.add_argument("--operations", type=int, default=50, help="Total operations for stress tests")
    parser.add_argument("--verbose", action="store_true", default=True)
    args = parser.parse_args()
    
    config = StressTestConfig(
        dry_run=args.dry_run,
        verbose=args.verbose,
        category=args.category,
        concurrent_tasks=args.concurrent,
        total_operations=args.operations,
    )
    
    print("="*70)
    print("MODAL STRESS & INTEGRATION TEST SUITE")
    print("="*70)
    print(f"Mode: {'DRY RUN' if config.dry_run else 'LIVE'}")
    print(f"Category: {config.category or 'ALL'}")
    print(f"Concurrent tasks: {config.concurrent_tasks}")
    print(f"Total operations: {config.total_operations}")
    
    # Run sync tests
    run_sync_tests(config)
    
    # Run async tests
    asyncio.run(run_async_tests(config))
    
    # Summary
    success = results.summary()
    sys.exit(0 if success else 1)


if __name__ == "__main__":
    main()