feat: devex help, add Makefile, ruff, pre-commit, and modernize CI

This commit is contained in:
Brooklyn Nicholson 2026-03-09 20:36:51 -05:00
parent 172a38c344
commit f4d7e6a29e
111 changed files with 11655 additions and 10200 deletions

View file

@ -26,20 +26,22 @@ Usage:
result = terminal_tool("python server.py", background=True)
"""
import atexit
import json
import logging
import os
import signal
import sys
import time
import threading
import atexit
import shutil
import subprocess
import tempfile
import uuid
import sys
import threading
import time
from pathlib import Path
from typing import Optional, Dict, Any
from typing import Any
from tools.interrupt import (
_interrupt_event, # noqa: F401 — re-exported to environments/local.py
is_interrupted, # noqa: F401 — re-exported
)
from tools.interrupt import set_interrupt as set_interrupt_event # noqa: F401 — re-exported
logger = logging.getLogger(__name__)
@ -49,7 +51,6 @@ logger = logging.getLogger(__name__)
# The terminal tool polls this during command execution so it can kill
# long-running subprocesses immediately instead of blocking until timeout.
# ---------------------------------------------------------------------------
from tools.interrupt import set_interrupt as set_interrupt_event, is_interrupted, _interrupt_event
# Add mini-swe-agent to path if not installed
@ -65,7 +66,6 @@ if mini_swe_path.exists():
# Singularity helpers (scratch dir, SIF cache) now live in tools/environments/singularity.py
from tools.environments.singularity import _get_scratch_dir
# Disk usage warning threshold (in GB)
DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "500"))
@ -73,28 +73,32 @@ DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "5
def _check_disk_usage_warning():
"""Check if total disk usage exceeds warning threshold."""
scratch_dir = _get_scratch_dir()
try:
# Get total size of hermes directories
total_bytes = 0
import glob
for path in glob.glob(str(scratch_dir / "hermes-*")):
for f in Path(path).rglob('*'):
for f in Path(path).rglob("*"):
if f.is_file():
try:
total_bytes += f.stat().st_size
except OSError:
pass
total_gb = total_bytes / (1024 ** 3)
total_gb = total_bytes / (1024**3)
if total_gb > DISK_USAGE_WARNING_THRESHOLD_GB:
logger.warning("Disk usage (%.1fGB) exceeds threshold (%.0fGB). Consider running cleanup_all_environments().",
total_gb, DISK_USAGE_WARNING_THRESHOLD_GB)
logger.warning(
"Disk usage (%.1fGB) exceeds threshold (%.0fGB). Consider running cleanup_all_environments().",
total_gb,
DISK_USAGE_WARNING_THRESHOLD_GB,
)
return True
return False
except Exception as e:
except Exception:
return False
@ -121,59 +125,59 @@ def set_approval_callback(cb):
global _approval_callback
_approval_callback = cb
# =============================================================================
# Dangerous Command Approval System
# =============================================================================
# Dangerous command detection + approval now consolidated in tools/approval.py
from tools.approval import (
detect_dangerous_command as _detect_dangerous_command,
check_dangerous_command as _check_dangerous_command_impl,
load_permanent_allowlist as _load_permanent_allowlist,
DANGEROUS_PATTERNS,
)
def _check_dangerous_command(command: str, env_type: str) -> dict:
"""Delegate to the consolidated approval module, passing the CLI callback."""
return _check_dangerous_command_impl(command, env_type,
approval_callback=_approval_callback)
return _check_dangerous_command_impl(command, env_type, approval_callback=_approval_callback)
def _handle_sudo_failure(output: str, env_type: str) -> str:
"""
Check for sudo failure and add helpful message for messaging contexts.
Returns enhanced output if sudo failed in messaging context, else original.
"""
is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
if not is_gateway:
return output
# Check for sudo failure indicators
sudo_failures = [
"sudo: a password is required",
"sudo: no tty present",
"sudo: a terminal is required",
]
for failure in sudo_failures:
if failure in output:
return output + "\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to ~/.hermes/.env on the agent machine."
return (
output
+ "\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to ~/.hermes/.env on the agent machine."
)
return output
def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
"""
Prompt user for sudo password with timeout.
Returns the password if entered, or empty string if:
- User presses Enter without input (skip)
- Timeout expires (45s default)
- Any error occurs
Only works in interactive mode (HERMES_INTERACTIVE=1).
If a _sudo_password_callback is registered (by the CLI), delegates to it
so the prompt integrates with prompt_toolkit's UI. Otherwise reads
@ -181,7 +185,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
"""
import sys
import time as time_module
# Use the registered callback when available (prompt_toolkit-compatible)
if _sudo_password_callback is not None:
try:
@ -190,13 +194,14 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
return ""
result = {"password": None, "done": False}
def read_password_thread():
"""Read password from /dev/tty with echo disabled."""
tty_fd = None
old_attrs = None
try:
import termios
tty_fd = os.open("/dev/tty", os.O_RDONLY)
old_attrs = termios.tcgetattr(tty_fd)
new_attrs = termios.tcgetattr(tty_fd)
@ -217,6 +222,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
if tty_fd is not None and old_attrs is not None:
try:
import termios as _termios
_termios.tcsetattr(tty_fd, _termios.TCSAFLUSH, old_attrs)
except Exception:
pass
@ -226,11 +232,11 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
except Exception:
pass
result["done"] = True
try:
os.environ["HERMES_SPINNER_PAUSE"] = "1"
time_module.sleep(0.2)
print()
print("" + "" * 58 + "")
print("│ 🔐 SUDO PASSWORD REQUIRED" + " " * 30 + "")
@ -241,11 +247,11 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
print("" + "" * 58 + "")
print()
print(" Password (hidden): ", end="", flush=True)
password_thread = threading.Thread(target=read_password_thread, daemon=True)
password_thread.start()
password_thread.join(timeout=timeout_seconds)
if result["done"]:
password = result["password"] or ""
print() # newline after hidden input
@ -262,7 +268,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
print()
sys.stdout.flush()
return ""
except (EOFError, KeyboardInterrupt):
print()
print(" ⏭ Cancelled - continuing without sudo")
@ -281,29 +287,29 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
def _transform_sudo_command(command: str) -> str:
"""
Transform sudo commands to use -S flag if SUDO_PASSWORD is available.
This is a shared helper used by all execution environments to provide
consistent sudo handling across local, SSH, and container environments.
If SUDO_PASSWORD is set (via env, config, or interactive prompt):
'sudo apt install curl' -> password piped via sudo -S
If SUDO_PASSWORD is not set and in interactive mode (HERMES_INTERACTIVE=1):
Prompts user for password with 45s timeout, caches for session.
If SUDO_PASSWORD is not set and NOT interactive:
Command runs as-is (fails gracefully with "sudo: a password is required").
"""
global _cached_sudo_password
import re
# Check if command even contains sudo
if not re.search(r'\bsudo\b', command):
if not re.search(r"\bsudo\b", command):
return command # No sudo in command, return as-is
# Try to get password from: env var -> session cache -> interactive prompt
sudo_password = os.getenv("SUDO_PASSWORD", "") or _cached_sudo_password
if not sudo_password:
# No password configured - check if we're in interactive mode
if os.getenv("HERMES_INTERACTIVE"):
@ -311,30 +317,30 @@ def _transform_sudo_command(command: str) -> str:
sudo_password = _prompt_for_sudo_password(timeout_seconds=45)
if sudo_password:
_cached_sudo_password = sudo_password # Cache for session
if not sudo_password:
return command # No password, let it fail gracefully
def replace_sudo(match):
# Replace 'sudo' with password-piped version
# The -S flag makes sudo read password from stdin
# The -p '' suppresses the password prompt
# Use shlex.quote() to prevent shell injection via password content
import shlex
return f"echo {shlex.quote(sudo_password)} | sudo -S -p ''"
# Match 'sudo' at word boundaries (not 'visudo' or 'sudoers')
# This handles: sudo, sudo -flag, etc.
return re.sub(r'\bsudo\b', replace_sudo, command)
return re.sub(r"\bsudo\b", replace_sudo, command)
# Environment classes now live in tools/environments/
from tools.environments.docker import DockerEnvironment as _DockerEnvironment
from tools.environments.local import LocalEnvironment as _LocalEnvironment
from tools.environments.modal import ModalEnvironment as _ModalEnvironment
from tools.environments.singularity import SingularityEnvironment as _SingularityEnvironment
from tools.environments.ssh import SSHEnvironment as _SSHEnvironment
from tools.environments.docker import DockerEnvironment as _DockerEnvironment
from tools.environments.modal import ModalEnvironment as _ModalEnvironment
# Tool description for LLM
TERMINAL_TOOL_DESCRIPTION = """Execute shell commands on a Linux environment. Filesystem persists between calls.
@ -356,10 +362,10 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p
"""
# Global state for environment lifecycle management
_active_environments: Dict[str, Any] = {}
_last_activity: Dict[str, float] = {}
_active_environments: dict[str, Any] = {}
_last_activity: dict[str, float] = {}
_env_lock = threading.Lock()
_creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation
_creation_locks: dict[str, threading.Lock] = {} # Per-task locks for sandbox creation
_creation_locks_lock = threading.Lock() # Protects _creation_locks dict itself
_cleanup_thread = None
_cleanup_running = False
@ -372,10 +378,10 @@ _cleanup_running = False
#
# This is never exposed to the model -- only infrastructure code calls it.
# Thread-safe because each task_id is unique per rollout.
_task_env_overrides: Dict[str, Dict[str, Any]] = {}
_task_env_overrides: dict[str, dict[str, Any]] = {}
def register_task_env_overrides(task_id: str, overrides: Dict[str, Any]):
def register_task_env_overrides(task_id: str, overrides: dict[str, Any]):
"""
Register environment overrides for a specific task/rollout.
@ -402,13 +408,14 @@ def clear_task_env_overrides(task_id: str):
"""
_task_env_overrides.pop(task_id, None)
# Configuration from environment variables
def _get_env_config() -> Dict[str, Any]:
def _get_env_config() -> dict[str, Any]:
"""Get terminal environment configuration from environment variables."""
# Default image with Python and Node.js for maximum compatibility
default_image = "nikolaik/python-nodejs:python3.11-nodejs20"
env_type = os.getenv("TERMINAL_ENV", "local")
# Default cwd: local uses the host's current directory, everything
# else starts in the user's home (~ resolves to whatever account
# is running inside the container/remote).
@ -416,7 +423,7 @@ def _get_env_config() -> Dict[str, Any]:
default_cwd = os.getcwd()
else:
default_cwd = "~"
# Read TERMINAL_CWD but sanity-check it for container backends.
# If the CWD looks like a host-local path that can't exist inside a
# container/sandbox, fall back to the backend's own default. This
@ -426,9 +433,12 @@ def _get_env_config() -> Dict[str, Any]:
if env_type in ("modal", "docker", "singularity", "daytona") and cwd:
host_prefixes = ("/Users/", "C:\\", "C:/")
if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd:
logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
"(host path won't exist in sandbox). Using %r instead.",
cwd, env_type, default_cwd)
logger.info(
"Ignoring TERMINAL_CWD=%r for %s backend (host path won't exist in sandbox). Using %r instead.",
cwd,
env_type,
default_cwd,
)
cwd = default_cwd
return {
@ -447,19 +457,25 @@ def _get_env_config() -> Dict[str, Any]:
"ssh_key": os.getenv("TERMINAL_SSH_KEY", ""),
# Container resource config (applies to docker, singularity, modal, daytona -- ignored for local/ssh)
"container_cpu": float(os.getenv("TERMINAL_CONTAINER_CPU", "1")),
"container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")), # MB (default 5GB)
"container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")), # MB (default 50GB)
"container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")), # MB (default 5GB)
"container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")), # MB (default 50GB)
"container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"),
"docker_volumes": json.loads(os.getenv("TERMINAL_DOCKER_VOLUMES", "[]")),
}
def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
ssh_config: dict = None, container_config: dict = None,
task_id: str = "default"):
def _create_environment(
env_type: str,
image: str,
cwd: str,
timeout: int,
ssh_config: dict = None,
container_config: dict = None,
task_id: str = "default",
):
"""
Create an execution environment from mini-swe-agent.
Args:
env_type: One of "local", "docker", "singularity", "modal", "daytona", "ssh"
image: Docker/Singularity/Modal image name (ignored for local/ssh)
@ -468,7 +484,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
ssh_config: SSH connection config (for env_type="ssh")
container_config: Resource config for container backends (cpu, memory, disk, persistent)
task_id: Task identifier for environment reuse and snapshot keying
Returns:
Environment instance with execute() method
"""
@ -481,22 +497,32 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
if env_type == "local":
return _LocalEnvironment(cwd=cwd, timeout=timeout)
elif env_type == "docker":
return _DockerEnvironment(
image=image, cwd=cwd, timeout=timeout,
cpu=cpu, memory=memory, disk=disk,
persistent_filesystem=persistent, task_id=task_id,
image=image,
cwd=cwd,
timeout=timeout,
cpu=cpu,
memory=memory,
disk=disk,
persistent_filesystem=persistent,
task_id=task_id,
volumes=volumes,
)
elif env_type == "singularity":
return _SingularityEnvironment(
image=image, cwd=cwd, timeout=timeout,
cpu=cpu, memory=memory, disk=disk,
persistent_filesystem=persistent, task_id=task_id,
image=image,
cwd=cwd,
timeout=timeout,
cpu=cpu,
memory=memory,
disk=disk,
persistent_filesystem=persistent,
task_id=task_id,
)
elif env_type == "modal":
sandbox_kwargs = {}
if cpu > 0:
@ -505,20 +531,29 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
sandbox_kwargs["memory"] = memory
if disk > 0:
sandbox_kwargs["ephemeral_disk"] = disk
return _ModalEnvironment(
image=image, cwd=cwd, timeout=timeout,
image=image,
cwd=cwd,
timeout=timeout,
modal_sandbox_kwargs=sandbox_kwargs,
persistent_filesystem=persistent, task_id=task_id,
persistent_filesystem=persistent,
task_id=task_id,
)
elif env_type == "daytona":
# Lazy import so daytona SDK is only required when backend is selected.
from tools.environments.daytona import DaytonaEnvironment as _DaytonaEnvironment
return _DaytonaEnvironment(
image=image, cwd=cwd, timeout=timeout,
cpu=int(cpu), memory=memory, disk=disk,
persistent_filesystem=persistent, task_id=task_id,
image=image,
cwd=cwd,
timeout=timeout,
cpu=int(cpu),
memory=memory,
disk=disk,
persistent_filesystem=persistent,
task_id=task_id,
)
elif env_type == "ssh":
@ -534,7 +569,9 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
)
else:
raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', 'daytona', or 'ssh'")
raise ValueError(
f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', 'daytona', or 'ssh'"
)
def _cleanup_inactive_envs(lifetime_seconds: int = 300):
@ -547,6 +584,7 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
# background processes (their _last_activity gets refreshed to keep them alive).
try:
from tools.process_registry import process_registry
for task_id in list(_last_activity.keys()):
if process_registry.has_active_processes(task_id):
_last_activity[task_id] = current_time # Keep sandbox alive
@ -579,16 +617,17 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
# ShellFileOperations from referencing a dead sandbox)
try:
from tools.file_tools import clear_file_ops_cache
clear_file_ops_cache(task_id)
except ImportError:
pass
try:
if hasattr(env, 'cleanup'):
if hasattr(env, "cleanup"):
env.cleanup()
elif hasattr(env, 'stop'):
elif hasattr(env, "stop"):
env.stop()
elif hasattr(env, 'terminate'):
elif hasattr(env, "terminate"):
env.terminate()
logger.info("Cleaned up inactive environment for task: %s", task_id)
@ -640,27 +679,28 @@ def _stop_cleanup_thread():
pass
def get_active_environments_info() -> Dict[str, Any]:
def get_active_environments_info() -> dict[str, Any]:
"""Get information about currently active environments."""
info = {
"count": len(_active_environments),
"task_ids": list(_active_environments.keys()),
"workdirs": {},
}
# Calculate total disk usage (per-task to avoid double-counting)
total_size = 0
for task_id in _active_environments.keys():
for task_id in _active_environments:
scratch_dir = _get_scratch_dir()
pattern = f"hermes-*{task_id[:8]}*"
import glob
for path in glob.glob(str(scratch_dir / pattern)):
try:
size = sum(f.stat().st_size for f in Path(path).rglob('*') if f.is_file())
size = sum(f.stat().st_size for f in Path(path).rglob("*") if f.is_file())
total_size += size
except OSError:
pass
info["total_disk_usage_mb"] = round(total_size / (1024 * 1024), 2)
return info
@ -668,27 +708,28 @@ def get_active_environments_info() -> Dict[str, Any]:
def cleanup_all_environments():
"""Clean up ALL active environments. Use with caution."""
global _active_environments, _last_activity
task_ids = list(_active_environments.keys())
cleaned = 0
for task_id in task_ids:
try:
cleanup_vm(task_id)
cleaned += 1
except Exception as e:
logger.error("Error cleaning %s: %s", task_id, e, exc_info=True)
# Also clean any orphaned directories
scratch_dir = _get_scratch_dir()
import glob
for path in glob.glob(str(scratch_dir / "hermes-*")):
try:
shutil.rmtree(path, ignore_errors=True)
logger.info("Removed orphaned: %s", path)
except OSError:
pass
if cleaned > 0:
logger.info("Cleaned %d environments", cleaned)
return cleaned
@ -713,6 +754,7 @@ def cleanup_vm(task_id: str):
# Invalidate stale file_ops cache entry
try:
from tools.file_tools import clear_file_ops_cache
clear_file_ops_cache(task_id)
except ImportError:
pass
@ -721,11 +763,11 @@ def cleanup_vm(task_id: str):
return
try:
if hasattr(env, 'cleanup'):
if hasattr(env, "cleanup"):
env.cleanup()
elif hasattr(env, 'stop'):
elif hasattr(env, "stop"):
env.stop()
elif hasattr(env, 'terminate'):
elif hasattr(env, "terminate"):
env.terminate()
logger.info("Manually cleaned up environment for task: %s", task_id)
@ -746,17 +788,18 @@ def _atexit_cleanup():
logger.info("Shutting down %d remaining sandbox(es)...", count)
cleanup_all_environments()
atexit.register(_atexit_cleanup)
def terminal_tool(
command: str,
background: bool = False,
timeout: Optional[int] = None,
task_id: Optional[str] = None,
timeout: int | None = None,
task_id: str | None = None,
force: bool = False,
workdir: Optional[str] = None,
check_interval: Optional[int] = None,
workdir: str | None = None,
check_interval: int | None = None,
pty: bool = False,
) -> str:
"""
@ -784,7 +827,7 @@ def terminal_tool(
# With custom timeout
>>> result = terminal_tool(command="long_task.sh", timeout=300)
# Force run after user confirmation
# Note: force parameter is internal only, not exposed to model API
"""
@ -801,7 +844,7 @@ def terminal_tool(
# Check per-task overrides (set by environments like TerminalBench2Env)
# before falling back to global env var config
overrides = _task_env_overrides.get(effective_task_id, {})
# Select image based on env type, with per-task override support
if env_type == "docker":
image = overrides.get("docker_image") or config["docker_image"]
@ -882,12 +925,15 @@ def terminal_tool(
task_id=effective_task_id,
)
except ImportError as e:
return json.dumps({
"output": "",
"exit_code": -1,
"error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
"status": "disabled"
}, ensure_ascii=False)
return json.dumps(
{
"output": "",
"exit_code": -1,
"error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
"status": "disabled",
},
ensure_ascii=False,
)
with _env_lock:
_active_environments[effective_task_id] = new_env
@ -902,27 +948,33 @@ def terminal_tool(
if not approval["approved"]:
# Check if this is an approval_required (gateway ask mode)
if approval.get("status") == "approval_required":
return json.dumps({
"output": "",
"exit_code": -1,
"error": approval.get("message", "Waiting for user approval"),
"status": "approval_required",
"command": approval.get("command", command),
"description": approval.get("description", "dangerous command"),
"pattern_key": approval.get("pattern_key", ""),
}, ensure_ascii=False)
return json.dumps(
{
"output": "",
"exit_code": -1,
"error": approval.get("message", "Waiting for user approval"),
"status": "approval_required",
"command": approval.get("command", command),
"description": approval.get("description", "dangerous command"),
"pattern_key": approval.get("pattern_key", ""),
},
ensure_ascii=False,
)
# Command was blocked - include the pattern category so the caller knows why
desc = approval.get("description", "potentially dangerous operation")
fallback_msg = (
f"Command denied: matches '{desc}' pattern. "
"Use the approval prompt to allow it, or rephrase the command."
)
return json.dumps({
"output": "",
"exit_code": -1,
"error": approval.get("message", fallback_msg),
"status": "blocked"
}, ensure_ascii=False)
return json.dumps(
{
"output": "",
"exit_code": -1,
"error": approval.get("message", fallback_msg),
"status": "blocked",
},
ensure_ascii=False,
)
# Prepare command for execution
if background:
@ -940,7 +992,7 @@ def terminal_tool(
cwd=effective_cwd,
task_id=effective_task_id,
session_key=session_key,
env_vars=env.env if hasattr(env, 'env') else None,
env_vars=env.env if hasattr(env, "env") else None,
use_pty=pty,
)
else:
@ -964,38 +1016,36 @@ def terminal_tool(
max_timeout = effective_timeout
if timeout and timeout > max_timeout:
result_data["timeout_note"] = (
f"Requested timeout {timeout}s was clamped to "
f"configured limit of {max_timeout}s"
f"Requested timeout {timeout}s was clamped to configured limit of {max_timeout}s"
)
# Register check_interval watcher (gateway picks this up after agent run)
if check_interval and background:
effective_interval = max(30, check_interval)
if check_interval < 30:
result_data["check_interval_note"] = (
f"Requested {check_interval}s raised to minimum 30s"
)
process_registry.pending_watchers.append({
"session_id": proc_session.id,
"check_interval": effective_interval,
"session_key": session_key,
"platform": os.getenv("HERMES_SESSION_PLATFORM", ""),
"chat_id": os.getenv("HERMES_SESSION_CHAT_ID", ""),
})
result_data["check_interval_note"] = f"Requested {check_interval}s raised to minimum 30s"
process_registry.pending_watchers.append(
{
"session_id": proc_session.id,
"check_interval": effective_interval,
"session_key": session_key,
"platform": os.getenv("HERMES_SESSION_PLATFORM", ""),
"chat_id": os.getenv("HERMES_SESSION_CHAT_ID", ""),
}
)
return json.dumps(result_data, ensure_ascii=False)
except Exception as e:
return json.dumps({
"output": "",
"exit_code": -1,
"error": f"Failed to start background process: {str(e)}"
}, ensure_ascii=False)
return json.dumps(
{"output": "", "exit_code": -1, "error": f"Failed to start background process: {str(e)}"},
ensure_ascii=False,
)
else:
# Run foreground command with retry logic
max_retries = 3
retry_count = 0
result = None
while retry_count <= max_retries:
try:
execute_kwargs = {"timeout": effective_timeout}
@ -1005,39 +1055,61 @@ def terminal_tool(
except Exception as e:
error_str = str(e).lower()
if "timeout" in error_str:
return json.dumps({
"output": "",
"exit_code": 124,
"error": f"Command timed out after {effective_timeout} seconds"
}, ensure_ascii=False)
return json.dumps(
{
"output": "",
"exit_code": 124,
"error": f"Command timed out after {effective_timeout} seconds",
},
ensure_ascii=False,
)
# Retry on transient errors
if retry_count < max_retries:
retry_count += 1
wait_time = 2 ** retry_count
logger.warning("Execution error, retrying in %ds (attempt %d/%d) - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
wait_time, retry_count, max_retries, command[:200], type(e).__name__, e, effective_task_id, env_type)
wait_time = 2**retry_count
logger.warning(
"Execution error, retrying in %ds (attempt %d/%d) - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
wait_time,
retry_count,
max_retries,
command[:200],
type(e).__name__,
e,
effective_task_id,
env_type,
)
time.sleep(wait_time)
continue
logger.error("Execution failed after %d retries - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
max_retries, command[:200], type(e).__name__, e, effective_task_id, env_type)
return json.dumps({
"output": "",
"exit_code": -1,
"error": f"Command execution failed: {type(e).__name__}: {str(e)}"
}, ensure_ascii=False)
logger.error(
"Execution failed after %d retries - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
max_retries,
command[:200],
type(e).__name__,
e,
effective_task_id,
env_type,
)
return json.dumps(
{
"output": "",
"exit_code": -1,
"error": f"Command execution failed: {type(e).__name__}: {str(e)}",
},
ensure_ascii=False,
)
# Got a result
break
# Extract output
output = result.get("output", "")
returncode = result.get("returncode", 0)
# Add helpful message for sudo failures in messaging context
output = _handle_sudo_failure(output, env_type)
# Truncate output if too long, keeping both head and tail
MAX_OUTPUT_CHARS = 50000
if len(output) > MAX_OUTPUT_CHARS:
@ -1045,65 +1117,56 @@ def terminal_tool(
tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output)
omitted = len(output) - head_chars - tail_chars
truncated_notice = (
f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
f"out of {len(output)} total] ...\n\n"
f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted out of {len(output)} total] ...\n\n"
)
output = output[:head_chars] + truncated_notice + output[-tail_chars:]
# Redact secrets from command output (catches env/printenv leaking keys)
from agent.redact import redact_sensitive_text
output = redact_sensitive_text(output.strip()) if output else ""
return json.dumps({
"output": output,
"exit_code": returncode,
"error": None
}, ensure_ascii=False)
return json.dumps({"output": output, "exit_code": returncode, "error": None}, ensure_ascii=False)
except Exception as e:
return json.dumps({
"output": "",
"exit_code": -1,
"error": f"Failed to execute command: {str(e)}",
"status": "error"
}, ensure_ascii=False)
return json.dumps(
{"output": "", "exit_code": -1, "error": f"Failed to execute command: {str(e)}", "status": "error"},
ensure_ascii=False,
)
def check_terminal_requirements() -> bool:
"""Check if all requirements for the terminal tool are met."""
config = _get_env_config()
env_type = config["env_type"]
try:
if env_type == "local":
from minisweagent.environments.local import LocalEnvironment
return True
elif env_type == "docker":
from minisweagent.environments.docker import DockerEnvironment
# Check if docker is available
import subprocess
result = subprocess.run(["docker", "version"], capture_output=True, timeout=5)
return result.returncode == 0
elif env_type == "singularity":
from minisweagent.environments.singularity import SingularityEnvironment
import shutil
# Check if singularity/apptainer is available
import subprocess
import shutil
executable = shutil.which("apptainer") or shutil.which("singularity")
if executable:
result = subprocess.run([executable, "--version"], capture_output=True, timeout=5)
return result.returncode == 0
return False
elif env_type == "ssh":
from tools.environments.ssh import SSHEnvironment
# Check that host and user are configured
return bool(config.get("ssh_host")) and bool(config.get("ssh_user"))
elif env_type == "modal":
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
# Check for modal token
return os.getenv("MODAL_TOKEN_ID") is not None or Path.home().joinpath(".modal.toml").exists()
elif env_type == "daytona":
from daytona import Daytona
return os.getenv("DAYTONA_API_KEY") is not None
else:
return False
@ -1116,9 +1179,9 @@ if __name__ == "__main__":
# Simple test when run directly
print("Terminal Tool Module (mini-swe-agent backend)")
print("=" * 50)
config = _get_env_config()
print(f"\nCurrent Configuration:")
print("\nCurrent Configuration:")
print(f" Environment type: {config['env_type']}")
print(f" Docker image: {config['docker_image']}")
print(f" Modal image: {config['modal_image']}")
@ -1165,37 +1228,34 @@ TERMINAL_SCHEMA = {
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "string",
"description": "The command to execute on the VM"
},
"command": {"type": "string", "description": "The command to execute on the VM"},
"background": {
"type": "boolean",
"description": "ONLY for servers/watchers that never exit. For scripts, builds, installs — use foreground with timeout instead (it returns instantly when done).",
"default": False
"default": False,
},
"timeout": {
"type": "integer",
"description": "Max seconds to wait (default: 180). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily.",
"minimum": 1
"minimum": 1,
},
"workdir": {
"type": "string",
"description": "Working directory for this command (absolute path). Defaults to the session working directory."
"description": "Working directory for this command (absolute path). Defaults to the session working directory.",
},
"check_interval": {
"type": "integer",
"description": "Seconds between automatic status checks for background processes (gateway/messaging only, minimum 30). When set, I'll proactively report progress.",
"minimum": 30
"minimum": 30,
},
"pty": {
"type": "boolean",
"description": "Run in pseudo-terminal (PTY) mode for interactive CLI tools like Codex, Claude Code, or Python REPL. Only works with local and SSH backends. Default: false.",
"default": False
}
"default": False,
},
},
"required": ["command"]
}
"required": ["command"],
},
}