feat: devex help, add Makefile, ruff, pre-commit, and modernize CI

2026-04-28 01:21:43 +00:00 · 2026-03-09 20:36:51 -05:00 · 2026-03-09 20:36:51 -05:00 · f4d7e6a29e
commit f4d7e6a29e
parent 172a38c344
111 changed files with 11655 additions and 10200 deletions
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -26,20 +26,22 @@ Usage:
    result = terminal_tool("python server.py", background=True)
 """

+import atexit
 import json
 import logging
 import os
-import signal
-import sys
-import time
-import threading
-import atexit
 import shutil
-import subprocess
-import tempfile
-import uuid
+import sys
+import threading
+import time
 from pathlib import Path
-from typing import Optional, Dict, Any
+from typing import Any
+
+from tools.interrupt import (
+    _interrupt_event,  # noqa: F401 — re-exported to environments/local.py
+    is_interrupted,  # noqa: F401 — re-exported
+)
+from tools.interrupt import set_interrupt as set_interrupt_event  # noqa: F401 — re-exported

 logger = logging.getLogger(__name__)

@ -49,7 +51,6 @@ logger = logging.getLogger(__name__)
 # The terminal tool polls this during command execution so it can kill
 # long-running subprocesses immediately instead of blocking until timeout.
 # ---------------------------------------------------------------------------
-from tools.interrupt import set_interrupt as set_interrupt_event, is_interrupted, _interrupt_event


 # Add mini-swe-agent to path if not installed
@ -65,7 +66,6 @@ if mini_swe_path.exists():
 # Singularity helpers (scratch dir, SIF cache) now live in tools/environments/singularity.py
 from tools.environments.singularity import _get_scratch_dir

-
 # Disk usage warning threshold (in GB)
 DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "500"))

@ -73,28 +73,32 @@ DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "5
 def _check_disk_usage_warning():
    """Check if total disk usage exceeds warning threshold."""
    scratch_dir = _get_scratch_dir()
-    
+
    try:
        # Get total size of hermes directories
        total_bytes = 0
        import glob
+
        for path in glob.glob(str(scratch_dir / "hermes-*")):
-            for f in Path(path).rglob('*'):
+            for f in Path(path).rglob("*"):
                if f.is_file():
                    try:
                        total_bytes += f.stat().st_size
                    except OSError:
                        pass
-        
-        total_gb = total_bytes / (1024 ** 3)
-        
+
+        total_gb = total_bytes / (1024**3)
+
        if total_gb > DISK_USAGE_WARNING_THRESHOLD_GB:
-            logger.warning("Disk usage (%.1fGB) exceeds threshold (%.0fGB). Consider running cleanup_all_environments().",
-                           total_gb, DISK_USAGE_WARNING_THRESHOLD_GB)
+            logger.warning(
+                "Disk usage (%.1fGB) exceeds threshold (%.0fGB). Consider running cleanup_all_environments().",
+                total_gb,
+                DISK_USAGE_WARNING_THRESHOLD_GB,
+            )
            return True
-        
+
        return False
-    except Exception as e:
+    except Exception:
        return False


@ -121,59 +125,59 @@ def set_approval_callback(cb):
    global _approval_callback
    _approval_callback = cb

+
 # =============================================================================
 # Dangerous Command Approval System
 # =============================================================================

 # Dangerous command detection + approval now consolidated in tools/approval.py
 from tools.approval import (
-    detect_dangerous_command as _detect_dangerous_command,
    check_dangerous_command as _check_dangerous_command_impl,
-    load_permanent_allowlist as _load_permanent_allowlist,
-    DANGEROUS_PATTERNS,
 )


 def _check_dangerous_command(command: str, env_type: str) -> dict:
    """Delegate to the consolidated approval module, passing the CLI callback."""
-    return _check_dangerous_command_impl(command, env_type,
-                                         approval_callback=_approval_callback)
+    return _check_dangerous_command_impl(command, env_type, approval_callback=_approval_callback)


 def _handle_sudo_failure(output: str, env_type: str) -> str:
    """
    Check for sudo failure and add helpful message for messaging contexts.
-    
+
    Returns enhanced output if sudo failed in messaging context, else original.
    """
    is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
-    
+
    if not is_gateway:
        return output
-    
+
    # Check for sudo failure indicators
    sudo_failures = [
        "sudo: a password is required",
        "sudo: no tty present",
        "sudo: a terminal is required",
    ]
-    
+
    for failure in sudo_failures:
        if failure in output:
-            return output + "\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to ~/.hermes/.env on the agent machine."
-    
+            return (
+                output
+                + "\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to ~/.hermes/.env on the agent machine."
+            )
+
    return output


 def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
    """
    Prompt user for sudo password with timeout.
-    
+
    Returns the password if entered, or empty string if:
    - User presses Enter without input (skip)
    - Timeout expires (45s default)
    - Any error occurs
-    
+
    Only works in interactive mode (HERMES_INTERACTIVE=1).
    If a _sudo_password_callback is registered (by the CLI), delegates to it
    so the prompt integrates with prompt_toolkit's UI.  Otherwise reads
@ -181,7 +185,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
    """
    import sys
    import time as time_module
-    
+
    # Use the registered callback when available (prompt_toolkit-compatible)
    if _sudo_password_callback is not None:
        try:
@ -190,13 +194,14 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
            return ""

    result = {"password": None, "done": False}
-    
+
    def read_password_thread():
        """Read password from /dev/tty with echo disabled."""
        tty_fd = None
        old_attrs = None
        try:
            import termios
+
            tty_fd = os.open("/dev/tty", os.O_RDONLY)
            old_attrs = termios.tcgetattr(tty_fd)
            new_attrs = termios.tcgetattr(tty_fd)
@ -217,6 +222,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
            if tty_fd is not None and old_attrs is not None:
                try:
                    import termios as _termios
+
                    _termios.tcsetattr(tty_fd, _termios.TCSAFLUSH, old_attrs)
                except Exception:
                    pass
@ -226,11 +232,11 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
                except Exception:
                    pass
            result["done"] = True
-    
+
    try:
        os.environ["HERMES_SPINNER_PAUSE"] = "1"
        time_module.sleep(0.2)
-        
+
        print()
        print("┌" + "─" * 58 + "┐")
        print("│  🔐 SUDO PASSWORD REQUIRED" + " " * 30 + "│")
@ -241,11 +247,11 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
        print("└" + "─" * 58 + "┘")
        print()
        print("  Password (hidden): ", end="", flush=True)
-        
+
        password_thread = threading.Thread(target=read_password_thread, daemon=True)
        password_thread.start()
        password_thread.join(timeout=timeout_seconds)
-        
+
        if result["done"]:
            password = result["password"] or ""
            print()  # newline after hidden input
@ -262,7 +268,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
            print()
            sys.stdout.flush()
            return ""
-            
+
    except (EOFError, KeyboardInterrupt):
        print()
        print("  ⏭ Cancelled - continuing without sudo")
@ -281,29 +287,29 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
 def _transform_sudo_command(command: str) -> str:
    """
    Transform sudo commands to use -S flag if SUDO_PASSWORD is available.
-    
+
    This is a shared helper used by all execution environments to provide
    consistent sudo handling across local, SSH, and container environments.
-    
+
    If SUDO_PASSWORD is set (via env, config, or interactive prompt):
      'sudo apt install curl' -> password piped via sudo -S
-      
+
    If SUDO_PASSWORD is not set and in interactive mode (HERMES_INTERACTIVE=1):
      Prompts user for password with 45s timeout, caches for session.
-      
+
    If SUDO_PASSWORD is not set and NOT interactive:
      Command runs as-is (fails gracefully with "sudo: a password is required").
    """
    global _cached_sudo_password
    import re
-    
+
    # Check if command even contains sudo
-    if not re.search(r'\bsudo\b', command):
+    if not re.search(r"\bsudo\b", command):
        return command  # No sudo in command, return as-is
-    
+
    # Try to get password from: env var -> session cache -> interactive prompt
    sudo_password = os.getenv("SUDO_PASSWORD", "") or _cached_sudo_password
-    
+
    if not sudo_password:
        # No password configured - check if we're in interactive mode
        if os.getenv("HERMES_INTERACTIVE"):
@ -311,30 +317,30 @@ def _transform_sudo_command(command: str) -> str:
            sudo_password = _prompt_for_sudo_password(timeout_seconds=45)
            if sudo_password:
                _cached_sudo_password = sudo_password  # Cache for session
-    
+
    if not sudo_password:
        return command  # No password, let it fail gracefully
-    
+
    def replace_sudo(match):
        # Replace 'sudo' with password-piped version
        # The -S flag makes sudo read password from stdin
        # The -p '' suppresses the password prompt
        # Use shlex.quote() to prevent shell injection via password content
        import shlex
+
        return f"echo {shlex.quote(sudo_password)} | sudo -S -p ''"
-    
+
    # Match 'sudo' at word boundaries (not 'visudo' or 'sudoers')
    # This handles: sudo, sudo -flag, etc.
-    return re.sub(r'\bsudo\b', replace_sudo, command)
+    return re.sub(r"\bsudo\b", replace_sudo, command)


 # Environment classes now live in tools/environments/
+from tools.environments.docker import DockerEnvironment as _DockerEnvironment
 from tools.environments.local import LocalEnvironment as _LocalEnvironment
+from tools.environments.modal import ModalEnvironment as _ModalEnvironment
 from tools.environments.singularity import SingularityEnvironment as _SingularityEnvironment
 from tools.environments.ssh import SSHEnvironment as _SSHEnvironment
-from tools.environments.docker import DockerEnvironment as _DockerEnvironment
-from tools.environments.modal import ModalEnvironment as _ModalEnvironment
-

 # Tool description for LLM
 TERMINAL_TOOL_DESCRIPTION = """Execute shell commands on a Linux environment. Filesystem persists between calls.
@ -356,10 +362,10 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p
 """

 # Global state for environment lifecycle management
-_active_environments: Dict[str, Any] = {}
-_last_activity: Dict[str, float] = {}
+_active_environments: dict[str, Any] = {}
+_last_activity: dict[str, float] = {}
 _env_lock = threading.Lock()
-_creation_locks: Dict[str, threading.Lock] = {}  # Per-task locks for sandbox creation
+_creation_locks: dict[str, threading.Lock] = {}  # Per-task locks for sandbox creation
 _creation_locks_lock = threading.Lock()  # Protects _creation_locks dict itself
 _cleanup_thread = None
 _cleanup_running = False
@ -372,10 +378,10 @@ _cleanup_running = False
 #
 # This is never exposed to the model -- only infrastructure code calls it.
 # Thread-safe because each task_id is unique per rollout.
-_task_env_overrides: Dict[str, Dict[str, Any]] = {}
+_task_env_overrides: dict[str, dict[str, Any]] = {}


-def register_task_env_overrides(task_id: str, overrides: Dict[str, Any]):
+def register_task_env_overrides(task_id: str, overrides: dict[str, Any]):
    """
    Register environment overrides for a specific task/rollout.

@ -402,13 +408,14 @@ def clear_task_env_overrides(task_id: str):
    """
    _task_env_overrides.pop(task_id, None)

+
 # Configuration from environment variables
-def _get_env_config() -> Dict[str, Any]:
+def _get_env_config() -> dict[str, Any]:
    """Get terminal environment configuration from environment variables."""
    # Default image with Python and Node.js for maximum compatibility
    default_image = "nikolaik/python-nodejs:python3.11-nodejs20"
    env_type = os.getenv("TERMINAL_ENV", "local")
-    
+
    # Default cwd: local uses the host's current directory, everything
    # else starts in the user's home (~ resolves to whatever account
    # is running inside the container/remote).
@ -416,7 +423,7 @@ def _get_env_config() -> Dict[str, Any]:
        default_cwd = os.getcwd()
    else:
        default_cwd = "~"
-    
+
    # Read TERMINAL_CWD but sanity-check it for container backends.
    # If the CWD looks like a host-local path that can't exist inside a
    # container/sandbox, fall back to the backend's own default. This
@ -426,9 +433,12 @@ def _get_env_config() -> Dict[str, Any]:
    if env_type in ("modal", "docker", "singularity", "daytona") and cwd:
        host_prefixes = ("/Users/", "C:\\", "C:/")
        if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd:
-            logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
-                        "(host path won't exist in sandbox). Using %r instead.",
-                        cwd, env_type, default_cwd)
+            logger.info(
+                "Ignoring TERMINAL_CWD=%r for %s backend (host path won't exist in sandbox). Using %r instead.",
+                cwd,
+                env_type,
+                default_cwd,
+            )
            cwd = default_cwd

    return {
@ -447,19 +457,25 @@ def _get_env_config() -> Dict[str, Any]:
        "ssh_key": os.getenv("TERMINAL_SSH_KEY", ""),
        # Container resource config (applies to docker, singularity, modal, daytona -- ignored for local/ssh)
        "container_cpu": float(os.getenv("TERMINAL_CONTAINER_CPU", "1")),
-        "container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")),     # MB (default 5GB)
-        "container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")),        # MB (default 50GB)
+        "container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")),  # MB (default 5GB)
+        "container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")),  # MB (default 50GB)
        "container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"),
        "docker_volumes": json.loads(os.getenv("TERMINAL_DOCKER_VOLUMES", "[]")),
    }


-def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
-                        ssh_config: dict = None, container_config: dict = None,
-                        task_id: str = "default"):
+def _create_environment(
+    env_type: str,
+    image: str,
+    cwd: str,
+    timeout: int,
+    ssh_config: dict = None,
+    container_config: dict = None,
+    task_id: str = "default",
+):
    """
    Create an execution environment from mini-swe-agent.
-    
+
    Args:
        env_type: One of "local", "docker", "singularity", "modal", "daytona", "ssh"
        image: Docker/Singularity/Modal image name (ignored for local/ssh)
@ -468,7 +484,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
        ssh_config: SSH connection config (for env_type="ssh")
        container_config: Resource config for container backends (cpu, memory, disk, persistent)
        task_id: Task identifier for environment reuse and snapshot keying
-        
+
    Returns:
        Environment instance with execute() method
    """
@ -481,22 +497,32 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,

    if env_type == "local":
        return _LocalEnvironment(cwd=cwd, timeout=timeout)
-    
+
    elif env_type == "docker":
        return _DockerEnvironment(
-            image=image, cwd=cwd, timeout=timeout,
-            cpu=cpu, memory=memory, disk=disk,
-            persistent_filesystem=persistent, task_id=task_id,
+            image=image,
+            cwd=cwd,
+            timeout=timeout,
+            cpu=cpu,
+            memory=memory,
+            disk=disk,
+            persistent_filesystem=persistent,
+            task_id=task_id,
            volumes=volumes,
        )
-    
+
    elif env_type == "singularity":
        return _SingularityEnvironment(
-            image=image, cwd=cwd, timeout=timeout,
-            cpu=cpu, memory=memory, disk=disk,
-            persistent_filesystem=persistent, task_id=task_id,
+            image=image,
+            cwd=cwd,
+            timeout=timeout,
+            cpu=cpu,
+            memory=memory,
+            disk=disk,
+            persistent_filesystem=persistent,
+            task_id=task_id,
        )
-    
+
    elif env_type == "modal":
        sandbox_kwargs = {}
        if cpu > 0:
@ -505,20 +531,29 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
            sandbox_kwargs["memory"] = memory
        if disk > 0:
            sandbox_kwargs["ephemeral_disk"] = disk
-        
+
        return _ModalEnvironment(
-            image=image, cwd=cwd, timeout=timeout,
+            image=image,
+            cwd=cwd,
+            timeout=timeout,
            modal_sandbox_kwargs=sandbox_kwargs,
-            persistent_filesystem=persistent, task_id=task_id,
+            persistent_filesystem=persistent,
+            task_id=task_id,
        )
-    
+
    elif env_type == "daytona":
        # Lazy import so daytona SDK is only required when backend is selected.
        from tools.environments.daytona import DaytonaEnvironment as _DaytonaEnvironment
+
        return _DaytonaEnvironment(
-            image=image, cwd=cwd, timeout=timeout,
-            cpu=int(cpu), memory=memory, disk=disk,
-            persistent_filesystem=persistent, task_id=task_id,
+            image=image,
+            cwd=cwd,
+            timeout=timeout,
+            cpu=int(cpu),
+            memory=memory,
+            disk=disk,
+            persistent_filesystem=persistent,
+            task_id=task_id,
        )

    elif env_type == "ssh":
@ -534,7 +569,9 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
        )

    else:
-        raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', 'daytona', or 'ssh'")
+        raise ValueError(
+            f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', 'daytona', or 'ssh'"
+        )


 def _cleanup_inactive_envs(lifetime_seconds: int = 300):
@ -547,6 +584,7 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
    # background processes (their _last_activity gets refreshed to keep them alive).
    try:
        from tools.process_registry import process_registry
+
        for task_id in list(_last_activity.keys()):
            if process_registry.has_active_processes(task_id):
                _last_activity[task_id] = current_time  # Keep sandbox alive
@ -579,16 +617,17 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
        # ShellFileOperations from referencing a dead sandbox)
        try:
            from tools.file_tools import clear_file_ops_cache
+
            clear_file_ops_cache(task_id)
        except ImportError:
            pass

        try:
-            if hasattr(env, 'cleanup'):
+            if hasattr(env, "cleanup"):
                env.cleanup()
-            elif hasattr(env, 'stop'):
+            elif hasattr(env, "stop"):
                env.stop()
-            elif hasattr(env, 'terminate'):
+            elif hasattr(env, "terminate"):
                env.terminate()

            logger.info("Cleaned up inactive environment for task: %s", task_id)
@ -640,27 +679,28 @@ def _stop_cleanup_thread():
            pass


-def get_active_environments_info() -> Dict[str, Any]:
+def get_active_environments_info() -> dict[str, Any]:
    """Get information about currently active environments."""
    info = {
        "count": len(_active_environments),
        "task_ids": list(_active_environments.keys()),
        "workdirs": {},
    }
-    
+
    # Calculate total disk usage (per-task to avoid double-counting)
    total_size = 0
-    for task_id in _active_environments.keys():
+    for task_id in _active_environments:
        scratch_dir = _get_scratch_dir()
        pattern = f"hermes-*{task_id[:8]}*"
        import glob
+
        for path in glob.glob(str(scratch_dir / pattern)):
            try:
-                size = sum(f.stat().st_size for f in Path(path).rglob('*') if f.is_file())
+                size = sum(f.stat().st_size for f in Path(path).rglob("*") if f.is_file())
                total_size += size
            except OSError:
                pass
-    
+
    info["total_disk_usage_mb"] = round(total_size / (1024 * 1024), 2)
    return info

@ -668,27 +708,28 @@ def get_active_environments_info() -> Dict[str, Any]:
 def cleanup_all_environments():
    """Clean up ALL active environments. Use with caution."""
    global _active_environments, _last_activity
-    
+
    task_ids = list(_active_environments.keys())
    cleaned = 0
-    
+
    for task_id in task_ids:
        try:
            cleanup_vm(task_id)
            cleaned += 1
        except Exception as e:
            logger.error("Error cleaning %s: %s", task_id, e, exc_info=True)
-    
+
    # Also clean any orphaned directories
    scratch_dir = _get_scratch_dir()
    import glob
+
    for path in glob.glob(str(scratch_dir / "hermes-*")):
        try:
            shutil.rmtree(path, ignore_errors=True)
            logger.info("Removed orphaned: %s", path)
        except OSError:
            pass
-    
+
    if cleaned > 0:
        logger.info("Cleaned %d environments", cleaned)
    return cleaned
@ -713,6 +754,7 @@ def cleanup_vm(task_id: str):
    # Invalidate stale file_ops cache entry
    try:
        from tools.file_tools import clear_file_ops_cache
+
        clear_file_ops_cache(task_id)
    except ImportError:
        pass
@ -721,11 +763,11 @@ def cleanup_vm(task_id: str):
        return

    try:
-        if hasattr(env, 'cleanup'):
+        if hasattr(env, "cleanup"):
            env.cleanup()
-        elif hasattr(env, 'stop'):
+        elif hasattr(env, "stop"):
            env.stop()
-        elif hasattr(env, 'terminate'):
+        elif hasattr(env, "terminate"):
            env.terminate()

        logger.info("Manually cleaned up environment for task: %s", task_id)
@ -746,17 +788,18 @@ def _atexit_cleanup():
        logger.info("Shutting down %d remaining sandbox(es)...", count)
        cleanup_all_environments()

+
 atexit.register(_atexit_cleanup)


 def terminal_tool(
    command: str,
    background: bool = False,
-    timeout: Optional[int] = None,
-    task_id: Optional[str] = None,
+    timeout: int | None = None,
+    task_id: str | None = None,
    force: bool = False,
-    workdir: Optional[str] = None,
-    check_interval: Optional[int] = None,
+    workdir: str | None = None,
+    check_interval: int | None = None,
    pty: bool = False,
 ) -> str:
    """
@ -784,7 +827,7 @@ def terminal_tool(

        # With custom timeout
        >>> result = terminal_tool(command="long_task.sh", timeout=300)
-        
+
        # Force run after user confirmation
        # Note: force parameter is internal only, not exposed to model API
    """
@ -801,7 +844,7 @@ def terminal_tool(
        # Check per-task overrides (set by environments like TerminalBench2Env)
        # before falling back to global env var config
        overrides = _task_env_overrides.get(effective_task_id, {})
-        
+
        # Select image based on env type, with per-task override support
        if env_type == "docker":
            image = overrides.get("docker_image") or config["docker_image"]
@ -882,12 +925,15 @@ def terminal_tool(
                            task_id=effective_task_id,
                        )
                    except ImportError as e:
-                        return json.dumps({
-                            "output": "",
-                            "exit_code": -1,
-                            "error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
-                            "status": "disabled"
-                        }, ensure_ascii=False)
+                        return json.dumps(
+                            {
+                                "output": "",
+                                "exit_code": -1,
+                                "error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
+                                "status": "disabled",
+                            },
+                            ensure_ascii=False,
+                        )

                    with _env_lock:
                        _active_environments[effective_task_id] = new_env
@ -902,27 +948,33 @@ def terminal_tool(
            if not approval["approved"]:
                # Check if this is an approval_required (gateway ask mode)
                if approval.get("status") == "approval_required":
-                    return json.dumps({
-                        "output": "",
-                        "exit_code": -1,
-                        "error": approval.get("message", "Waiting for user approval"),
-                        "status": "approval_required",
-                        "command": approval.get("command", command),
-                        "description": approval.get("description", "dangerous command"),
-                        "pattern_key": approval.get("pattern_key", ""),
-                    }, ensure_ascii=False)
+                    return json.dumps(
+                        {
+                            "output": "",
+                            "exit_code": -1,
+                            "error": approval.get("message", "Waiting for user approval"),
+                            "status": "approval_required",
+                            "command": approval.get("command", command),
+                            "description": approval.get("description", "dangerous command"),
+                            "pattern_key": approval.get("pattern_key", ""),
+                        },
+                        ensure_ascii=False,
+                    )
                # Command was blocked - include the pattern category so the caller knows why
                desc = approval.get("description", "potentially dangerous operation")
                fallback_msg = (
                    f"Command denied: matches '{desc}' pattern. "
                    "Use the approval prompt to allow it, or rephrase the command."
                )
-                return json.dumps({
-                    "output": "",
-                    "exit_code": -1,
-                    "error": approval.get("message", fallback_msg),
-                    "status": "blocked"
-                }, ensure_ascii=False)
+                return json.dumps(
+                    {
+                        "output": "",
+                        "exit_code": -1,
+                        "error": approval.get("message", fallback_msg),
+                        "status": "blocked",
+                    },
+                    ensure_ascii=False,
+                )

        # Prepare command for execution
        if background:
@ -940,7 +992,7 @@ def terminal_tool(
                        cwd=effective_cwd,
                        task_id=effective_task_id,
                        session_key=session_key,
-                        env_vars=env.env if hasattr(env, 'env') else None,
+                        env_vars=env.env if hasattr(env, "env") else None,
                        use_pty=pty,
                    )
                else:
@ -964,38 +1016,36 @@ def terminal_tool(
                max_timeout = effective_timeout
                if timeout and timeout > max_timeout:
                    result_data["timeout_note"] = (
-                        f"Requested timeout {timeout}s was clamped to "
-                        f"configured limit of {max_timeout}s"
+                        f"Requested timeout {timeout}s was clamped to configured limit of {max_timeout}s"
                    )

                # Register check_interval watcher (gateway picks this up after agent run)
                if check_interval and background:
                    effective_interval = max(30, check_interval)
                    if check_interval < 30:
-                        result_data["check_interval_note"] = (
-                            f"Requested {check_interval}s raised to minimum 30s"
-                        )
-                    process_registry.pending_watchers.append({
-                        "session_id": proc_session.id,
-                        "check_interval": effective_interval,
-                        "session_key": session_key,
-                        "platform": os.getenv("HERMES_SESSION_PLATFORM", ""),
-                        "chat_id": os.getenv("HERMES_SESSION_CHAT_ID", ""),
-                    })
+                        result_data["check_interval_note"] = f"Requested {check_interval}s raised to minimum 30s"
+                    process_registry.pending_watchers.append(
+                        {
+                            "session_id": proc_session.id,
+                            "check_interval": effective_interval,
+                            "session_key": session_key,
+                            "platform": os.getenv("HERMES_SESSION_PLATFORM", ""),
+                            "chat_id": os.getenv("HERMES_SESSION_CHAT_ID", ""),
+                        }
+                    )

                return json.dumps(result_data, ensure_ascii=False)
            except Exception as e:
-                return json.dumps({
-                    "output": "",
-                    "exit_code": -1,
-                    "error": f"Failed to start background process: {str(e)}"
-                }, ensure_ascii=False)
+                return json.dumps(
+                    {"output": "", "exit_code": -1, "error": f"Failed to start background process: {str(e)}"},
+                    ensure_ascii=False,
+                )
        else:
            # Run foreground command with retry logic
            max_retries = 3
            retry_count = 0
            result = None
-            
+
            while retry_count <= max_retries:
                try:
                    execute_kwargs = {"timeout": effective_timeout}
@ -1005,39 +1055,61 @@ def terminal_tool(
                except Exception as e:
                    error_str = str(e).lower()
                    if "timeout" in error_str:
-                        return json.dumps({
-                            "output": "",
-                            "exit_code": 124,
-                            "error": f"Command timed out after {effective_timeout} seconds"
-                        }, ensure_ascii=False)
-                    
+                        return json.dumps(
+                            {
+                                "output": "",
+                                "exit_code": 124,
+                                "error": f"Command timed out after {effective_timeout} seconds",
+                            },
+                            ensure_ascii=False,
+                        )
+
                    # Retry on transient errors
                    if retry_count < max_retries:
                        retry_count += 1
-                        wait_time = 2 ** retry_count
-                        logger.warning("Execution error, retrying in %ds (attempt %d/%d) - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
-                                       wait_time, retry_count, max_retries, command[:200], type(e).__name__, e, effective_task_id, env_type)
+                        wait_time = 2**retry_count
+                        logger.warning(
+                            "Execution error, retrying in %ds (attempt %d/%d) - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
+                            wait_time,
+                            retry_count,
+                            max_retries,
+                            command[:200],
+                            type(e).__name__,
+                            e,
+                            effective_task_id,
+                            env_type,
+                        )
                        time.sleep(wait_time)
                        continue
-                    
-                    logger.error("Execution failed after %d retries - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
-                                 max_retries, command[:200], type(e).__name__, e, effective_task_id, env_type)
-                    return json.dumps({
-                        "output": "",
-                        "exit_code": -1,
-                        "error": f"Command execution failed: {type(e).__name__}: {str(e)}"
-                    }, ensure_ascii=False)
-                
+
+                    logger.error(
+                        "Execution failed after %d retries - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
+                        max_retries,
+                        command[:200],
+                        type(e).__name__,
+                        e,
+                        effective_task_id,
+                        env_type,
+                    )
+                    return json.dumps(
+                        {
+                            "output": "",
+                            "exit_code": -1,
+                            "error": f"Command execution failed: {type(e).__name__}: {str(e)}",
+                        },
+                        ensure_ascii=False,
+                    )
+
                # Got a result
                break
-            
+
            # Extract output
            output = result.get("output", "")
            returncode = result.get("returncode", 0)
-            
+
            # Add helpful message for sudo failures in messaging context
            output = _handle_sudo_failure(output, env_type)
-            
+
            # Truncate output if too long, keeping both head and tail
            MAX_OUTPUT_CHARS = 50000
            if len(output) > MAX_OUTPUT_CHARS:
@ -1045,65 +1117,56 @@ def terminal_tool(
                tail_chars = MAX_OUTPUT_CHARS - head_chars  # 60% tail (most recent/relevant output)
                omitted = len(output) - head_chars - tail_chars
                truncated_notice = (
-                    f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
-                    f"out of {len(output)} total] ...\n\n"
+                    f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted out of {len(output)} total] ...\n\n"
                )
                output = output[:head_chars] + truncated_notice + output[-tail_chars:]

            # Redact secrets from command output (catches env/printenv leaking keys)
            from agent.redact import redact_sensitive_text
+
            output = redact_sensitive_text(output.strip()) if output else ""

-            return json.dumps({
-                "output": output,
-                "exit_code": returncode,
-                "error": None
-            }, ensure_ascii=False)
+            return json.dumps({"output": output, "exit_code": returncode, "error": None}, ensure_ascii=False)

    except Exception as e:
-        return json.dumps({
-            "output": "",
-            "exit_code": -1,
-            "error": f"Failed to execute command: {str(e)}",
-            "status": "error"
-        }, ensure_ascii=False)
+        return json.dumps(
+            {"output": "", "exit_code": -1, "error": f"Failed to execute command: {str(e)}", "status": "error"},
+            ensure_ascii=False,
+        )


 def check_terminal_requirements() -> bool:
    """Check if all requirements for the terminal tool are met."""
    config = _get_env_config()
    env_type = config["env_type"]
-    
+
    try:
        if env_type == "local":
-            from minisweagent.environments.local import LocalEnvironment
            return True
        elif env_type == "docker":
-            from minisweagent.environments.docker import DockerEnvironment
            # Check if docker is available
            import subprocess
+
            result = subprocess.run(["docker", "version"], capture_output=True, timeout=5)
            return result.returncode == 0
        elif env_type == "singularity":
-            from minisweagent.environments.singularity import SingularityEnvironment
+            import shutil
+
            # Check if singularity/apptainer is available
            import subprocess
-            import shutil
+
            executable = shutil.which("apptainer") or shutil.which("singularity")
            if executable:
                result = subprocess.run([executable, "--version"], capture_output=True, timeout=5)
                return result.returncode == 0
            return False
        elif env_type == "ssh":
-            from tools.environments.ssh import SSHEnvironment
            # Check that host and user are configured
            return bool(config.get("ssh_host")) and bool(config.get("ssh_user"))
        elif env_type == "modal":
-            from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
            # Check for modal token
            return os.getenv("MODAL_TOKEN_ID") is not None or Path.home().joinpath(".modal.toml").exists()
        elif env_type == "daytona":
-            from daytona import Daytona
            return os.getenv("DAYTONA_API_KEY") is not None
        else:
            return False
@ -1116,9 +1179,9 @@ if __name__ == "__main__":
    # Simple test when run directly
    print("Terminal Tool Module (mini-swe-agent backend)")
    print("=" * 50)
-    
+
    config = _get_env_config()
-    print(f"\nCurrent Configuration:")
+    print("\nCurrent Configuration:")
    print(f"  Environment type: {config['env_type']}")
    print(f"  Docker image: {config['docker_image']}")
    print(f"  Modal image: {config['modal_image']}")
@ -1165,37 +1228,34 @@ TERMINAL_SCHEMA = {
    "parameters": {
        "type": "object",
        "properties": {
-            "command": {
-                "type": "string",
-                "description": "The command to execute on the VM"
-            },
+            "command": {"type": "string", "description": "The command to execute on the VM"},
            "background": {
                "type": "boolean",
                "description": "ONLY for servers/watchers that never exit. For scripts, builds, installs — use foreground with timeout instead (it returns instantly when done).",
-                "default": False
+                "default": False,
            },
            "timeout": {
                "type": "integer",
                "description": "Max seconds to wait (default: 180). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily.",
-                "minimum": 1
+                "minimum": 1,
            },
            "workdir": {
                "type": "string",
-                "description": "Working directory for this command (absolute path). Defaults to the session working directory."
+                "description": "Working directory for this command (absolute path). Defaults to the session working directory.",
            },
            "check_interval": {
                "type": "integer",
                "description": "Seconds between automatic status checks for background processes (gateway/messaging only, minimum 30). When set, I'll proactively report progress.",
-                "minimum": 30
+                "minimum": 30,
            },
            "pty": {
                "type": "boolean",
                "description": "Run in pseudo-terminal (PTY) mode for interactive CLI tools like Codex, Claude Code, or Python REPL. Only works with local and SSH backends. Default: false.",
-                "default": False
-            }
+                "default": False,
+            },
        },
-        "required": ["command"]
-    }
+        "required": ["command"],
+    },
 }