Update environment configuration and enhance terminal tool integration

- Updated `.env.example` to include new API keys and configuration options for the mini-swe-agent backend, including support for local, Docker, and Modal environments. - Added `.gitmodules` to include mini-swe-agent as a submodule for easier integration. - Refactored `mini_swe_runner.py` to use the updated model format and default to OpenRouter for API calls. - Enhanced `model_tools.py` to support the new terminal tool definitions and ensure compatibility with the mini-swe-agent backend. - Updated `README.md` to reflect changes in setup instructions and environment variable configurations. - Improved `terminal_tool.py` to manage execution environments and lifecycle, ensuring proper cleanup and error handling. - Introduced `terminal_hecate.py` for executing commands on MorphCloud VMs, providing an alternative backend for terminal operations.
2026-04-26 01:01:40 +00:00 · 2026-01-23 12:26:53 +00:00 · 2026-01-23 12:26:53 +00:00 · ba19d530ad
commit ba19d530ad
parent 47555602d7
11 changed files with 548 additions and 473 deletions
--- a/tools/simple_terminal_tool.py
+++ b/tools/simple_terminal_tool.py
@ -1,437 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple Terminal Tool Module
-
-A simplified terminal tool that executes commands on MorphCloud VMs without tmux.
-No session persistence, no interactive app support - just simple command execution.
-
-Features:
- Direct SSH command execution
- Background task support
- VM lifecycle management with TTL
- Automatic cleanup after inactivity
-
-Usage:
-    from simple_terminal_tool import simple_terminal_tool
-
-    # Execute a simple command
-    result = simple_terminal_tool("ls -la")
-
-    # Execute in background
-    result = simple_terminal_tool("python server.py", background=True)
-"""
-
-import json
-import os
-import time
-import threading
-import atexit
-from typing import Optional, Dict, Any
-
-# Tool description for LLM
-SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM environment.
-
-**Environment:**
- Minimal Debian-based OS with internet access
- Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
- Filesystem is persisted between tool calls but environment variables, venvs, etc are reset.
-
-**Command Execution:**
- Simple commands: Just provide the 'command' parameter
- Background processes: Set 'background': True for servers/long-running tasks
- Command timeout: Optional 'timeout' parameter in seconds
-
-**Examples:**
- Run command: `{"command": "ls -la"}`
- Background task: `{"command": "source path/to/my/venv/bin/activate && python server.py", "background": True}`
- With timeout: `{"command": "long_task.sh", "timeout": 300}`
-
-**Best Practices:**
- Run servers/long processes in background
- Monitor disk usage for large tasks
- Install whatever tools you need with sudo apt-get
- Do not be afraid to run pip with --break-system-packages
-
-**Things to avoid**
- Do NOT use interactive tools such as tmux, vim, nano, python repl - you will get stuck. Even git sometimes becomes interactive if the output is large. If you're not sure pipe to cat.
-"""
-
-# Global state for VM lifecycle management
-_active_instances: Dict[str, Any] = {}
-_last_activity: Dict[str, float] = {}
-_instance_lock = threading.Lock()
-_cleanup_thread = None
-_cleanup_running = False
-
-
-def _cleanup_inactive_vms(vm_lifetime_seconds: int = 300):
-    """Clean up VMs that have been inactive for longer than vm_lifetime_seconds."""
-    global _active_instances, _last_activity
-
-    current_time = time.time()
-    tasks_to_cleanup = []
-
-    with _instance_lock:
-        for task_id, last_time in list(_last_activity.items()):
-            if current_time - last_time > vm_lifetime_seconds:
-                tasks_to_cleanup.append(task_id)
-
-        for task_id in tasks_to_cleanup:
-            try:
-                if task_id in _active_instances:
-                    instance = _active_instances[task_id]
-                    if hasattr(instance, 'terminate'):
-                        instance.terminate()
-                    elif hasattr(instance, 'stop'):
-                        instance.stop()
-                    elif hasattr(instance, 'delete'):
-                        instance.delete()
-
-                    del _active_instances[task_id]
-                    print(f"[VM Cleanup] Terminated inactive VM for task: {task_id}")
-
-                if task_id in _last_activity:
-                    del _last_activity[task_id]
-
-            except Exception as e:
-                # 404 errors are benign - VM already cleaned up by TTL
-                error_str = str(e)
-                if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
-                    print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
-                else:
-                    print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
-                
-                # Always remove from tracking dicts to prevent infinite retry loops
-                if task_id in _active_instances:
-                    del _active_instances[task_id]
-                if task_id in _last_activity:
-                    del _last_activity[task_id]
-
-
-def _cleanup_thread_worker():
-    """Background thread worker that periodically cleans up inactive VMs."""
-    global _cleanup_running
-
-    while _cleanup_running:
-        try:
-            vm_lifetime = int(os.getenv("HECATE_VM_LIFETIME_SECONDS", "300"))
-            _cleanup_inactive_vms(vm_lifetime)
-        except Exception as e:
-            print(f"[VM Cleanup] Error in cleanup thread: {e}")
-
-        for _ in range(60):
-            if not _cleanup_running:
-                break
-            time.sleep(1)
-
-
-def _start_cleanup_thread():
-    """Start the background cleanup thread if not already running."""
-    global _cleanup_thread, _cleanup_running
-
-    with _instance_lock:
-        if _cleanup_thread is None or not _cleanup_thread.is_alive():
-            _cleanup_running = True
-            _cleanup_thread = threading.Thread(target=_cleanup_thread_worker, daemon=True)
-            _cleanup_thread.start()
-
-
-def _stop_cleanup_thread():
-    """Stop the background cleanup thread."""
-    global _cleanup_running
-    _cleanup_running = False
-    if _cleanup_thread is not None:
-        _cleanup_thread.join(timeout=5)
-
-
-def cleanup_vm(task_id: str):
-    """Manually clean up a specific VM by task_id."""
-    global _active_instances, _last_activity
-
-    with _instance_lock:
-        try:
-            if task_id in _active_instances:
-                instance = _active_instances[task_id]
-                if hasattr(instance, 'terminate'):
-                    instance.terminate()
-                elif hasattr(instance, 'stop'):
-                    instance.stop()
-                elif hasattr(instance, 'delete'):
-                    instance.delete()
-
-                del _active_instances[task_id]
-                print(f"[VM Cleanup] Manually terminated VM for task: {task_id}")
-
-            if task_id in _last_activity:
-                del _last_activity[task_id]
-
-        except Exception as e:
-            # 404 errors are benign - VM already cleaned up by TTL
-            error_str = str(e)
-            if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
-                print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
-            else:
-                print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
-
-
-atexit.register(_stop_cleanup_thread)
-
-
-def _execute_command(instance, command: str, timeout: Optional[int] = None) -> Dict[str, Any]:
-    """
-    Execute a command on the VM instance using instance.exec() for proper stderr capture.
-
-    Args:
-        instance: MorphVM instance
-        command: Command to execute
-        timeout: Optional timeout in seconds (Note: exec() may not support timeout directly)
-
-    Returns:
-        dict with stdout, stderr, returncode
-    """
-    try:
-        # Use instance.exec() which properly captures both stdout and stderr
-        # (unlike ssh.run() which doesn't capture stderr correctly)
-        result = instance.exec(command)
-        
-        # Debug logging only for verbose mode or unusual cases
-        # Note: Non-zero exit codes are normal (model's command failed) - not a tool error
-        if result.exit_code != 0 and not result.stdout and not result.stderr:
-            # Only log if we got absolutely no output - might indicate an issue
-            print(f"⚠️  Command returned exit={result.exit_code} with no output")
-
-        return {
-            "stdout": result.stdout or "",
-            "stderr": result.stderr or "",
-            "returncode": result.exit_code
-        }
-
-    except Exception as e:
-        # Check if it's a timeout
-        error_str = str(e).lower()
-        if "timeout" in error_str:
-            return {
-                "stdout": "",
-                "stderr": f"Command timed out after {timeout or 120} seconds",
-                "returncode": 124
-            }
-
-        return {
-            "stdout": "",
-            "stderr": f"Command execution failed: {str(e)}",
-            "returncode": -1
-        }
-
-
-def simple_terminal_tool(
-    command: str,
-    background: bool = False,
-    timeout: Optional[int] = None,
-    task_id: Optional[str] = None
-) -> str:
-    """
-    Execute a command on a MorphCloud VM without session persistence.
-
-    Args:
-        command: The command to execute
-        background: Whether to run in background (default: False)
-        timeout: Command timeout in seconds (default: 120)
-        task_id: Unique identifier for VM isolation (optional)
-
-    Returns:
-        str: JSON string with output, exit_code, and error fields
-
-    Examples:
-        # Execute a simple command
-        >>> result = simple_terminal_tool(command="ls -la /tmp")
-
-        # Run a background task
-        >>> result = simple_terminal_tool(command="python server.py", background=True)
-
-        # With custom timeout
-        >>> result = simple_terminal_tool(command="long_task.sh", timeout=300)
-    """
-    global _active_instances, _last_activity
-
-    try:
-        # Import required modules
-        try:
-            from morphcloud.api import MorphCloudClient
-        except ImportError as import_error:
-            return json.dumps({
-                "output": "",
-                "exit_code": -1,
-                "error": f"Terminal tool disabled: {import_error}",
-                "status": "disabled"
-            }, ensure_ascii=False)
-
-        # Get configuration
-        vm_ttl_seconds = int(os.getenv("HECATE_VM_TTL_SECONDS", "1200"))
-        snapshot_id = os.getenv("HECATE_DEFAULT_SNAPSHOT_ID", "snapshot_defv9tjg")
-
-        # Check API key
-        morph_api_key = os.getenv("MORPH_API_KEY")
-        if not morph_api_key:
-            return json.dumps({
-                "output": "",
-                "exit_code": -1,
-                "error": "MORPH_API_KEY environment variable not set",
-                "status": "disabled"
-            }, ensure_ascii=False)
-
-        # Use task_id for VM isolation
-        effective_task_id = task_id or "default"
-
-        # Start cleanup thread
-        _start_cleanup_thread()
-
-        # Get or create VM instance
-        with _instance_lock:
-            if effective_task_id not in _active_instances:
-                morph_client = MorphCloudClient(api_key=morph_api_key)
-                _active_instances[effective_task_id] = morph_client.instances.start(
-                    snapshot_id=snapshot_id,
-                    ttl_seconds=vm_ttl_seconds,
-                    ttl_action="stop"
-                )
-
-            # Update last activity time
-            _last_activity[effective_task_id] = time.time()
-            instance = _active_instances[effective_task_id]
-
-        # Wait for instance to be ready
-        instance.wait_until_ready()
-
-        # Prepare command for execution
-        if background:
-            # Run in background with nohup and redirect output
-            exec_command = f"nohup {command} > /tmp/bg_output.log 2>&1 &"
-            result = _execute_command(instance, exec_command, timeout=10)
-
-            # For background tasks, return immediately with info
-            if result["returncode"] == 0:
-                return json.dumps({
-                    "output": "Background task started successfully",
-                    "exit_code": 0,
-                    "error": None
-                }, ensure_ascii=False)
-            else:
-                # Include stderr in output but don't set error (command failure, not tool failure)
-                bg_output = result["stdout"]
-                if result["stderr"]:
-                    bg_output = f"{bg_output}\n{result['stderr']}" if bg_output else result["stderr"]
-                return json.dumps({
-                    "output": bg_output,
-                    "exit_code": result["returncode"],
-                    "error": None  # Only set for actual tool failures
-                }, ensure_ascii=False)
-        else:
-            # Run foreground command with retry logic for transient failures
-            max_retries = 3
-            retry_count = 0
-            result = None
-            
-            while retry_count <= max_retries:
-                result = _execute_command(instance, command, timeout=timeout)
-                
-                # Check if we should retry (only for transient errors, not normal results)
-                stdout = result.get("stdout", "")
-                stderr = result.get("stderr", "")
-                returncode = result.get("returncode", 0)
-                
-                should_retry = False
-                retry_reason = ""
-                
-                # NOTE: Empty output with exit_code=0 is NORMAL for many commands:
-                # - File writes: cat > file, echo > file
-                # - Directory ops: mkdir, cd
-                # - Silent installs: pip install --quiet
-                # So we do NOT retry on exit_code=0, even with empty output.
-                
-                # Only retry on special error codes that suggest transient/infra issues
-                if not stdout and not stderr and returncode in [-1, 124]:
-                    should_retry = True
-                    retry_reason = f"transient error (code {returncode})"
-                
-                if should_retry and retry_count < max_retries:
-                    retry_count += 1
-                    wait_time = 2 ** retry_count  # Exponential backoff: 2s, 4s, 8s
-                    print(f"⚠️  Terminal: {retry_reason}, retrying in {wait_time}s (attempt {retry_count}/{max_retries})")
-                    time.sleep(wait_time)
-                    continue
-                
-                # Got a result (success or normal command failure) - exit retry loop
-                break
-
-            # Combine stdout and stderr for output
-            output = result["stdout"]
-            if result["stderr"] and result["returncode"] != 0:
-                output = f"{output}\n{result['stderr']}" if output else result["stderr"]
-            
-            # Truncate output if too long (max 50,000 chars to avoid context explosion)
-            MAX_OUTPUT_CHARS = 50000
-            if len(output) > MAX_OUTPUT_CHARS:
-                truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..."
-                output = truncated_notice + output[-MAX_OUTPUT_CHARS:]
-
-            # NOTE: error is only set for FUNCTIONAL tool failures (VM issues, timeouts, etc.)
-            # Non-zero exit codes from the model's commands are NOT tool failures - 
-            # the model can self-correct. The exit_code field tells the model if the command succeeded.
-            # Retries that eventually succeed also don't count as failures.
-            return json.dumps({
-                "output": output.strip(),
-                "exit_code": result["returncode"],
-                "error": None  # Only set for actual tool failures, not command failures
-            }, ensure_ascii=False)
-
-    except Exception as e:
-        return json.dumps({
-            "output": "",
-            "exit_code": -1,
-            "error": f"Failed to execute command: {str(e)}",
-            "status": "error"
-        }, ensure_ascii=False)
-
-
-def check_requirements() -> bool:
-    """Check if all requirements for the simple terminal tool are met."""
-    required_vars = ["MORPH_API_KEY"]
-    missing_required = [var for var in required_vars if not os.getenv(var)]
-
-    if missing_required:
-        print(f"Missing required environment variables: {', '.join(missing_required)}")
-        return False
-
-    try:
-        from morphcloud.api import MorphCloudClient
-        return True
-    except Exception as e:
-        print(f"MorphCloud not available: {e}")
-        return False
-
-
-if __name__ == "__main__":
-    """Simple test when run directly."""
-    print("Simple Terminal Tool Module")
-    print("=" * 40)
-
-    if not check_requirements():
-        print("Requirements not met. Please check the messages above.")
-        exit(1)
-
-    print("All requirements met!")
-    print("\nAvailable Tool:")
-    print("  - simple_terminal_tool: Execute commands without session persistence")
-
-    print("\nUsage Examples:")
-    print("  # Execute a command")
-    print("  result = simple_terminal_tool(command='ls -la')")
-    print("  ")
-    print("  # Run a background task")
-    print("  result = simple_terminal_tool(command='python server.py', background=True)")
-
-    print("\nEnvironment Variables:")
-    print(f"  MORPH_API_KEY: {'Set' if os.getenv('MORPH_API_KEY') else 'Not set'}")
-    print(f"  HECATE_VM_TTL_SECONDS: {os.getenv('HECATE_VM_TTL_SECONDS', '1200')} (default: 1200 / 20 minutes)")
-    print(f"  HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300 / 5 minutes)")
-    print(f"  HECATE_DEFAULT_SNAPSHOT_ID: {os.getenv('HECATE_DEFAULT_SNAPSHOT_ID', 'snapshot_defv9tjg')}")