mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 01:21:43 +00:00
feat: devex help, add Makefile, ruff, pre-commit, and modernize CI
This commit is contained in:
parent
172a38c344
commit
f4d7e6a29e
111 changed files with 11655 additions and 10200 deletions
|
|
@ -26,20 +26,22 @@ Usage:
|
|||
result = terminal_tool("python server.py", background=True)
|
||||
"""
|
||||
|
||||
import atexit
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
import threading
|
||||
import atexit
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import uuid
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
from typing import Any
|
||||
|
||||
from tools.interrupt import (
|
||||
_interrupt_event, # noqa: F401 — re-exported to environments/local.py
|
||||
is_interrupted, # noqa: F401 — re-exported
|
||||
)
|
||||
from tools.interrupt import set_interrupt as set_interrupt_event # noqa: F401 — re-exported
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -49,7 +51,6 @@ logger = logging.getLogger(__name__)
|
|||
# The terminal tool polls this during command execution so it can kill
|
||||
# long-running subprocesses immediately instead of blocking until timeout.
|
||||
# ---------------------------------------------------------------------------
|
||||
from tools.interrupt import set_interrupt as set_interrupt_event, is_interrupted, _interrupt_event
|
||||
|
||||
|
||||
# Add mini-swe-agent to path if not installed
|
||||
|
|
@ -65,7 +66,6 @@ if mini_swe_path.exists():
|
|||
# Singularity helpers (scratch dir, SIF cache) now live in tools/environments/singularity.py
|
||||
from tools.environments.singularity import _get_scratch_dir
|
||||
|
||||
|
||||
# Disk usage warning threshold (in GB)
|
||||
DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "500"))
|
||||
|
||||
|
|
@ -73,28 +73,32 @@ DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "5
|
|||
def _check_disk_usage_warning():
|
||||
"""Check if total disk usage exceeds warning threshold."""
|
||||
scratch_dir = _get_scratch_dir()
|
||||
|
||||
|
||||
try:
|
||||
# Get total size of hermes directories
|
||||
total_bytes = 0
|
||||
import glob
|
||||
|
||||
for path in glob.glob(str(scratch_dir / "hermes-*")):
|
||||
for f in Path(path).rglob('*'):
|
||||
for f in Path(path).rglob("*"):
|
||||
if f.is_file():
|
||||
try:
|
||||
total_bytes += f.stat().st_size
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
total_gb = total_bytes / (1024 ** 3)
|
||||
|
||||
|
||||
total_gb = total_bytes / (1024**3)
|
||||
|
||||
if total_gb > DISK_USAGE_WARNING_THRESHOLD_GB:
|
||||
logger.warning("Disk usage (%.1fGB) exceeds threshold (%.0fGB). Consider running cleanup_all_environments().",
|
||||
total_gb, DISK_USAGE_WARNING_THRESHOLD_GB)
|
||||
logger.warning(
|
||||
"Disk usage (%.1fGB) exceeds threshold (%.0fGB). Consider running cleanup_all_environments().",
|
||||
total_gb,
|
||||
DISK_USAGE_WARNING_THRESHOLD_GB,
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
return False
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
|
|
@ -121,59 +125,59 @@ def set_approval_callback(cb):
|
|||
global _approval_callback
|
||||
_approval_callback = cb
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Dangerous Command Approval System
|
||||
# =============================================================================
|
||||
|
||||
# Dangerous command detection + approval now consolidated in tools/approval.py
|
||||
from tools.approval import (
|
||||
detect_dangerous_command as _detect_dangerous_command,
|
||||
check_dangerous_command as _check_dangerous_command_impl,
|
||||
load_permanent_allowlist as _load_permanent_allowlist,
|
||||
DANGEROUS_PATTERNS,
|
||||
)
|
||||
|
||||
|
||||
def _check_dangerous_command(command: str, env_type: str) -> dict:
|
||||
"""Delegate to the consolidated approval module, passing the CLI callback."""
|
||||
return _check_dangerous_command_impl(command, env_type,
|
||||
approval_callback=_approval_callback)
|
||||
return _check_dangerous_command_impl(command, env_type, approval_callback=_approval_callback)
|
||||
|
||||
|
||||
def _handle_sudo_failure(output: str, env_type: str) -> str:
|
||||
"""
|
||||
Check for sudo failure and add helpful message for messaging contexts.
|
||||
|
||||
|
||||
Returns enhanced output if sudo failed in messaging context, else original.
|
||||
"""
|
||||
is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
|
||||
|
||||
|
||||
if not is_gateway:
|
||||
return output
|
||||
|
||||
|
||||
# Check for sudo failure indicators
|
||||
sudo_failures = [
|
||||
"sudo: a password is required",
|
||||
"sudo: no tty present",
|
||||
"sudo: a terminal is required",
|
||||
]
|
||||
|
||||
|
||||
for failure in sudo_failures:
|
||||
if failure in output:
|
||||
return output + "\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to ~/.hermes/.env on the agent machine."
|
||||
|
||||
return (
|
||||
output
|
||||
+ "\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to ~/.hermes/.env on the agent machine."
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
|
||||
"""
|
||||
Prompt user for sudo password with timeout.
|
||||
|
||||
|
||||
Returns the password if entered, or empty string if:
|
||||
- User presses Enter without input (skip)
|
||||
- Timeout expires (45s default)
|
||||
- Any error occurs
|
||||
|
||||
|
||||
Only works in interactive mode (HERMES_INTERACTIVE=1).
|
||||
If a _sudo_password_callback is registered (by the CLI), delegates to it
|
||||
so the prompt integrates with prompt_toolkit's UI. Otherwise reads
|
||||
|
|
@ -181,7 +185,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
|
|||
"""
|
||||
import sys
|
||||
import time as time_module
|
||||
|
||||
|
||||
# Use the registered callback when available (prompt_toolkit-compatible)
|
||||
if _sudo_password_callback is not None:
|
||||
try:
|
||||
|
|
@ -190,13 +194,14 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
|
|||
return ""
|
||||
|
||||
result = {"password": None, "done": False}
|
||||
|
||||
|
||||
def read_password_thread():
|
||||
"""Read password from /dev/tty with echo disabled."""
|
||||
tty_fd = None
|
||||
old_attrs = None
|
||||
try:
|
||||
import termios
|
||||
|
||||
tty_fd = os.open("/dev/tty", os.O_RDONLY)
|
||||
old_attrs = termios.tcgetattr(tty_fd)
|
||||
new_attrs = termios.tcgetattr(tty_fd)
|
||||
|
|
@ -217,6 +222,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
|
|||
if tty_fd is not None and old_attrs is not None:
|
||||
try:
|
||||
import termios as _termios
|
||||
|
||||
_termios.tcsetattr(tty_fd, _termios.TCSAFLUSH, old_attrs)
|
||||
except Exception:
|
||||
pass
|
||||
|
|
@ -226,11 +232,11 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
|
|||
except Exception:
|
||||
pass
|
||||
result["done"] = True
|
||||
|
||||
|
||||
try:
|
||||
os.environ["HERMES_SPINNER_PAUSE"] = "1"
|
||||
time_module.sleep(0.2)
|
||||
|
||||
|
||||
print()
|
||||
print("┌" + "─" * 58 + "┐")
|
||||
print("│ 🔐 SUDO PASSWORD REQUIRED" + " " * 30 + "│")
|
||||
|
|
@ -241,11 +247,11 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
|
|||
print("└" + "─" * 58 + "┘")
|
||||
print()
|
||||
print(" Password (hidden): ", end="", flush=True)
|
||||
|
||||
|
||||
password_thread = threading.Thread(target=read_password_thread, daemon=True)
|
||||
password_thread.start()
|
||||
password_thread.join(timeout=timeout_seconds)
|
||||
|
||||
|
||||
if result["done"]:
|
||||
password = result["password"] or ""
|
||||
print() # newline after hidden input
|
||||
|
|
@ -262,7 +268,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
|
|||
print()
|
||||
sys.stdout.flush()
|
||||
return ""
|
||||
|
||||
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print()
|
||||
print(" ⏭ Cancelled - continuing without sudo")
|
||||
|
|
@ -281,29 +287,29 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
|
|||
def _transform_sudo_command(command: str) -> str:
|
||||
"""
|
||||
Transform sudo commands to use -S flag if SUDO_PASSWORD is available.
|
||||
|
||||
|
||||
This is a shared helper used by all execution environments to provide
|
||||
consistent sudo handling across local, SSH, and container environments.
|
||||
|
||||
|
||||
If SUDO_PASSWORD is set (via env, config, or interactive prompt):
|
||||
'sudo apt install curl' -> password piped via sudo -S
|
||||
|
||||
|
||||
If SUDO_PASSWORD is not set and in interactive mode (HERMES_INTERACTIVE=1):
|
||||
Prompts user for password with 45s timeout, caches for session.
|
||||
|
||||
|
||||
If SUDO_PASSWORD is not set and NOT interactive:
|
||||
Command runs as-is (fails gracefully with "sudo: a password is required").
|
||||
"""
|
||||
global _cached_sudo_password
|
||||
import re
|
||||
|
||||
|
||||
# Check if command even contains sudo
|
||||
if not re.search(r'\bsudo\b', command):
|
||||
if not re.search(r"\bsudo\b", command):
|
||||
return command # No sudo in command, return as-is
|
||||
|
||||
|
||||
# Try to get password from: env var -> session cache -> interactive prompt
|
||||
sudo_password = os.getenv("SUDO_PASSWORD", "") or _cached_sudo_password
|
||||
|
||||
|
||||
if not sudo_password:
|
||||
# No password configured - check if we're in interactive mode
|
||||
if os.getenv("HERMES_INTERACTIVE"):
|
||||
|
|
@ -311,30 +317,30 @@ def _transform_sudo_command(command: str) -> str:
|
|||
sudo_password = _prompt_for_sudo_password(timeout_seconds=45)
|
||||
if sudo_password:
|
||||
_cached_sudo_password = sudo_password # Cache for session
|
||||
|
||||
|
||||
if not sudo_password:
|
||||
return command # No password, let it fail gracefully
|
||||
|
||||
|
||||
def replace_sudo(match):
|
||||
# Replace 'sudo' with password-piped version
|
||||
# The -S flag makes sudo read password from stdin
|
||||
# The -p '' suppresses the password prompt
|
||||
# Use shlex.quote() to prevent shell injection via password content
|
||||
import shlex
|
||||
|
||||
return f"echo {shlex.quote(sudo_password)} | sudo -S -p ''"
|
||||
|
||||
|
||||
# Match 'sudo' at word boundaries (not 'visudo' or 'sudoers')
|
||||
# This handles: sudo, sudo -flag, etc.
|
||||
return re.sub(r'\bsudo\b', replace_sudo, command)
|
||||
return re.sub(r"\bsudo\b", replace_sudo, command)
|
||||
|
||||
|
||||
# Environment classes now live in tools/environments/
|
||||
from tools.environments.docker import DockerEnvironment as _DockerEnvironment
|
||||
from tools.environments.local import LocalEnvironment as _LocalEnvironment
|
||||
from tools.environments.modal import ModalEnvironment as _ModalEnvironment
|
||||
from tools.environments.singularity import SingularityEnvironment as _SingularityEnvironment
|
||||
from tools.environments.ssh import SSHEnvironment as _SSHEnvironment
|
||||
from tools.environments.docker import DockerEnvironment as _DockerEnvironment
|
||||
from tools.environments.modal import ModalEnvironment as _ModalEnvironment
|
||||
|
||||
|
||||
# Tool description for LLM
|
||||
TERMINAL_TOOL_DESCRIPTION = """Execute shell commands on a Linux environment. Filesystem persists between calls.
|
||||
|
|
@ -356,10 +362,10 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p
|
|||
"""
|
||||
|
||||
# Global state for environment lifecycle management
|
||||
_active_environments: Dict[str, Any] = {}
|
||||
_last_activity: Dict[str, float] = {}
|
||||
_active_environments: dict[str, Any] = {}
|
||||
_last_activity: dict[str, float] = {}
|
||||
_env_lock = threading.Lock()
|
||||
_creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation
|
||||
_creation_locks: dict[str, threading.Lock] = {} # Per-task locks for sandbox creation
|
||||
_creation_locks_lock = threading.Lock() # Protects _creation_locks dict itself
|
||||
_cleanup_thread = None
|
||||
_cleanup_running = False
|
||||
|
|
@ -372,10 +378,10 @@ _cleanup_running = False
|
|||
#
|
||||
# This is never exposed to the model -- only infrastructure code calls it.
|
||||
# Thread-safe because each task_id is unique per rollout.
|
||||
_task_env_overrides: Dict[str, Dict[str, Any]] = {}
|
||||
_task_env_overrides: dict[str, dict[str, Any]] = {}
|
||||
|
||||
|
||||
def register_task_env_overrides(task_id: str, overrides: Dict[str, Any]):
|
||||
def register_task_env_overrides(task_id: str, overrides: dict[str, Any]):
|
||||
"""
|
||||
Register environment overrides for a specific task/rollout.
|
||||
|
||||
|
|
@ -402,13 +408,14 @@ def clear_task_env_overrides(task_id: str):
|
|||
"""
|
||||
_task_env_overrides.pop(task_id, None)
|
||||
|
||||
|
||||
# Configuration from environment variables
|
||||
def _get_env_config() -> Dict[str, Any]:
|
||||
def _get_env_config() -> dict[str, Any]:
|
||||
"""Get terminal environment configuration from environment variables."""
|
||||
# Default image with Python and Node.js for maximum compatibility
|
||||
default_image = "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
env_type = os.getenv("TERMINAL_ENV", "local")
|
||||
|
||||
|
||||
# Default cwd: local uses the host's current directory, everything
|
||||
# else starts in the user's home (~ resolves to whatever account
|
||||
# is running inside the container/remote).
|
||||
|
|
@ -416,7 +423,7 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
default_cwd = os.getcwd()
|
||||
else:
|
||||
default_cwd = "~"
|
||||
|
||||
|
||||
# Read TERMINAL_CWD but sanity-check it for container backends.
|
||||
# If the CWD looks like a host-local path that can't exist inside a
|
||||
# container/sandbox, fall back to the backend's own default. This
|
||||
|
|
@ -426,9 +433,12 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
if env_type in ("modal", "docker", "singularity", "daytona") and cwd:
|
||||
host_prefixes = ("/Users/", "C:\\", "C:/")
|
||||
if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd:
|
||||
logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
|
||||
"(host path won't exist in sandbox). Using %r instead.",
|
||||
cwd, env_type, default_cwd)
|
||||
logger.info(
|
||||
"Ignoring TERMINAL_CWD=%r for %s backend (host path won't exist in sandbox). Using %r instead.",
|
||||
cwd,
|
||||
env_type,
|
||||
default_cwd,
|
||||
)
|
||||
cwd = default_cwd
|
||||
|
||||
return {
|
||||
|
|
@ -447,19 +457,25 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
"ssh_key": os.getenv("TERMINAL_SSH_KEY", ""),
|
||||
# Container resource config (applies to docker, singularity, modal, daytona -- ignored for local/ssh)
|
||||
"container_cpu": float(os.getenv("TERMINAL_CONTAINER_CPU", "1")),
|
||||
"container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")), # MB (default 5GB)
|
||||
"container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")), # MB (default 50GB)
|
||||
"container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")), # MB (default 5GB)
|
||||
"container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")), # MB (default 50GB)
|
||||
"container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"),
|
||||
"docker_volumes": json.loads(os.getenv("TERMINAL_DOCKER_VOLUMES", "[]")),
|
||||
}
|
||||
|
||||
|
||||
def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
||||
ssh_config: dict = None, container_config: dict = None,
|
||||
task_id: str = "default"):
|
||||
def _create_environment(
|
||||
env_type: str,
|
||||
image: str,
|
||||
cwd: str,
|
||||
timeout: int,
|
||||
ssh_config: dict = None,
|
||||
container_config: dict = None,
|
||||
task_id: str = "default",
|
||||
):
|
||||
"""
|
||||
Create an execution environment from mini-swe-agent.
|
||||
|
||||
|
||||
Args:
|
||||
env_type: One of "local", "docker", "singularity", "modal", "daytona", "ssh"
|
||||
image: Docker/Singularity/Modal image name (ignored for local/ssh)
|
||||
|
|
@ -468,7 +484,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
ssh_config: SSH connection config (for env_type="ssh")
|
||||
container_config: Resource config for container backends (cpu, memory, disk, persistent)
|
||||
task_id: Task identifier for environment reuse and snapshot keying
|
||||
|
||||
|
||||
Returns:
|
||||
Environment instance with execute() method
|
||||
"""
|
||||
|
|
@ -481,22 +497,32 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
|
||||
if env_type == "local":
|
||||
return _LocalEnvironment(cwd=cwd, timeout=timeout)
|
||||
|
||||
|
||||
elif env_type == "docker":
|
||||
return _DockerEnvironment(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
cpu=cpu, memory=memory, disk=disk,
|
||||
persistent_filesystem=persistent, task_id=task_id,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=timeout,
|
||||
cpu=cpu,
|
||||
memory=memory,
|
||||
disk=disk,
|
||||
persistent_filesystem=persistent,
|
||||
task_id=task_id,
|
||||
volumes=volumes,
|
||||
)
|
||||
|
||||
|
||||
elif env_type == "singularity":
|
||||
return _SingularityEnvironment(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
cpu=cpu, memory=memory, disk=disk,
|
||||
persistent_filesystem=persistent, task_id=task_id,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=timeout,
|
||||
cpu=cpu,
|
||||
memory=memory,
|
||||
disk=disk,
|
||||
persistent_filesystem=persistent,
|
||||
task_id=task_id,
|
||||
)
|
||||
|
||||
|
||||
elif env_type == "modal":
|
||||
sandbox_kwargs = {}
|
||||
if cpu > 0:
|
||||
|
|
@ -505,20 +531,29 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
sandbox_kwargs["memory"] = memory
|
||||
if disk > 0:
|
||||
sandbox_kwargs["ephemeral_disk"] = disk
|
||||
|
||||
|
||||
return _ModalEnvironment(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=timeout,
|
||||
modal_sandbox_kwargs=sandbox_kwargs,
|
||||
persistent_filesystem=persistent, task_id=task_id,
|
||||
persistent_filesystem=persistent,
|
||||
task_id=task_id,
|
||||
)
|
||||
|
||||
|
||||
elif env_type == "daytona":
|
||||
# Lazy import so daytona SDK is only required when backend is selected.
|
||||
from tools.environments.daytona import DaytonaEnvironment as _DaytonaEnvironment
|
||||
|
||||
return _DaytonaEnvironment(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
cpu=int(cpu), memory=memory, disk=disk,
|
||||
persistent_filesystem=persistent, task_id=task_id,
|
||||
image=image,
|
||||
cwd=cwd,
|
||||
timeout=timeout,
|
||||
cpu=int(cpu),
|
||||
memory=memory,
|
||||
disk=disk,
|
||||
persistent_filesystem=persistent,
|
||||
task_id=task_id,
|
||||
)
|
||||
|
||||
elif env_type == "ssh":
|
||||
|
|
@ -534,7 +569,9 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', 'daytona', or 'ssh'")
|
||||
raise ValueError(
|
||||
f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', 'daytona', or 'ssh'"
|
||||
)
|
||||
|
||||
|
||||
def _cleanup_inactive_envs(lifetime_seconds: int = 300):
|
||||
|
|
@ -547,6 +584,7 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
|
|||
# background processes (their _last_activity gets refreshed to keep them alive).
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
|
||||
for task_id in list(_last_activity.keys()):
|
||||
if process_registry.has_active_processes(task_id):
|
||||
_last_activity[task_id] = current_time # Keep sandbox alive
|
||||
|
|
@ -579,16 +617,17 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
|
|||
# ShellFileOperations from referencing a dead sandbox)
|
||||
try:
|
||||
from tools.file_tools import clear_file_ops_cache
|
||||
|
||||
clear_file_ops_cache(task_id)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
if hasattr(env, 'cleanup'):
|
||||
if hasattr(env, "cleanup"):
|
||||
env.cleanup()
|
||||
elif hasattr(env, 'stop'):
|
||||
elif hasattr(env, "stop"):
|
||||
env.stop()
|
||||
elif hasattr(env, 'terminate'):
|
||||
elif hasattr(env, "terminate"):
|
||||
env.terminate()
|
||||
|
||||
logger.info("Cleaned up inactive environment for task: %s", task_id)
|
||||
|
|
@ -640,27 +679,28 @@ def _stop_cleanup_thread():
|
|||
pass
|
||||
|
||||
|
||||
def get_active_environments_info() -> Dict[str, Any]:
|
||||
def get_active_environments_info() -> dict[str, Any]:
|
||||
"""Get information about currently active environments."""
|
||||
info = {
|
||||
"count": len(_active_environments),
|
||||
"task_ids": list(_active_environments.keys()),
|
||||
"workdirs": {},
|
||||
}
|
||||
|
||||
|
||||
# Calculate total disk usage (per-task to avoid double-counting)
|
||||
total_size = 0
|
||||
for task_id in _active_environments.keys():
|
||||
for task_id in _active_environments:
|
||||
scratch_dir = _get_scratch_dir()
|
||||
pattern = f"hermes-*{task_id[:8]}*"
|
||||
import glob
|
||||
|
||||
for path in glob.glob(str(scratch_dir / pattern)):
|
||||
try:
|
||||
size = sum(f.stat().st_size for f in Path(path).rglob('*') if f.is_file())
|
||||
size = sum(f.stat().st_size for f in Path(path).rglob("*") if f.is_file())
|
||||
total_size += size
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
info["total_disk_usage_mb"] = round(total_size / (1024 * 1024), 2)
|
||||
return info
|
||||
|
||||
|
|
@ -668,27 +708,28 @@ def get_active_environments_info() -> Dict[str, Any]:
|
|||
def cleanup_all_environments():
|
||||
"""Clean up ALL active environments. Use with caution."""
|
||||
global _active_environments, _last_activity
|
||||
|
||||
|
||||
task_ids = list(_active_environments.keys())
|
||||
cleaned = 0
|
||||
|
||||
|
||||
for task_id in task_ids:
|
||||
try:
|
||||
cleanup_vm(task_id)
|
||||
cleaned += 1
|
||||
except Exception as e:
|
||||
logger.error("Error cleaning %s: %s", task_id, e, exc_info=True)
|
||||
|
||||
|
||||
# Also clean any orphaned directories
|
||||
scratch_dir = _get_scratch_dir()
|
||||
import glob
|
||||
|
||||
for path in glob.glob(str(scratch_dir / "hermes-*")):
|
||||
try:
|
||||
shutil.rmtree(path, ignore_errors=True)
|
||||
logger.info("Removed orphaned: %s", path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
if cleaned > 0:
|
||||
logger.info("Cleaned %d environments", cleaned)
|
||||
return cleaned
|
||||
|
|
@ -713,6 +754,7 @@ def cleanup_vm(task_id: str):
|
|||
# Invalidate stale file_ops cache entry
|
||||
try:
|
||||
from tools.file_tools import clear_file_ops_cache
|
||||
|
||||
clear_file_ops_cache(task_id)
|
||||
except ImportError:
|
||||
pass
|
||||
|
|
@ -721,11 +763,11 @@ def cleanup_vm(task_id: str):
|
|||
return
|
||||
|
||||
try:
|
||||
if hasattr(env, 'cleanup'):
|
||||
if hasattr(env, "cleanup"):
|
||||
env.cleanup()
|
||||
elif hasattr(env, 'stop'):
|
||||
elif hasattr(env, "stop"):
|
||||
env.stop()
|
||||
elif hasattr(env, 'terminate'):
|
||||
elif hasattr(env, "terminate"):
|
||||
env.terminate()
|
||||
|
||||
logger.info("Manually cleaned up environment for task: %s", task_id)
|
||||
|
|
@ -746,17 +788,18 @@ def _atexit_cleanup():
|
|||
logger.info("Shutting down %d remaining sandbox(es)...", count)
|
||||
cleanup_all_environments()
|
||||
|
||||
|
||||
atexit.register(_atexit_cleanup)
|
||||
|
||||
|
||||
def terminal_tool(
|
||||
command: str,
|
||||
background: bool = False,
|
||||
timeout: Optional[int] = None,
|
||||
task_id: Optional[str] = None,
|
||||
timeout: int | None = None,
|
||||
task_id: str | None = None,
|
||||
force: bool = False,
|
||||
workdir: Optional[str] = None,
|
||||
check_interval: Optional[int] = None,
|
||||
workdir: str | None = None,
|
||||
check_interval: int | None = None,
|
||||
pty: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
|
|
@ -784,7 +827,7 @@ def terminal_tool(
|
|||
|
||||
# With custom timeout
|
||||
>>> result = terminal_tool(command="long_task.sh", timeout=300)
|
||||
|
||||
|
||||
# Force run after user confirmation
|
||||
# Note: force parameter is internal only, not exposed to model API
|
||||
"""
|
||||
|
|
@ -801,7 +844,7 @@ def terminal_tool(
|
|||
# Check per-task overrides (set by environments like TerminalBench2Env)
|
||||
# before falling back to global env var config
|
||||
overrides = _task_env_overrides.get(effective_task_id, {})
|
||||
|
||||
|
||||
# Select image based on env type, with per-task override support
|
||||
if env_type == "docker":
|
||||
image = overrides.get("docker_image") or config["docker_image"]
|
||||
|
|
@ -882,12 +925,15 @@ def terminal_tool(
|
|||
task_id=effective_task_id,
|
||||
)
|
||||
except ImportError as e:
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
|
||||
"status": "disabled"
|
||||
}, ensure_ascii=False)
|
||||
return json.dumps(
|
||||
{
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
|
||||
"status": "disabled",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
with _env_lock:
|
||||
_active_environments[effective_task_id] = new_env
|
||||
|
|
@ -902,27 +948,33 @@ def terminal_tool(
|
|||
if not approval["approved"]:
|
||||
# Check if this is an approval_required (gateway ask mode)
|
||||
if approval.get("status") == "approval_required":
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": approval.get("message", "Waiting for user approval"),
|
||||
"status": "approval_required",
|
||||
"command": approval.get("command", command),
|
||||
"description": approval.get("description", "dangerous command"),
|
||||
"pattern_key": approval.get("pattern_key", ""),
|
||||
}, ensure_ascii=False)
|
||||
return json.dumps(
|
||||
{
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": approval.get("message", "Waiting for user approval"),
|
||||
"status": "approval_required",
|
||||
"command": approval.get("command", command),
|
||||
"description": approval.get("description", "dangerous command"),
|
||||
"pattern_key": approval.get("pattern_key", ""),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
# Command was blocked - include the pattern category so the caller knows why
|
||||
desc = approval.get("description", "potentially dangerous operation")
|
||||
fallback_msg = (
|
||||
f"Command denied: matches '{desc}' pattern. "
|
||||
"Use the approval prompt to allow it, or rephrase the command."
|
||||
)
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": approval.get("message", fallback_msg),
|
||||
"status": "blocked"
|
||||
}, ensure_ascii=False)
|
||||
return json.dumps(
|
||||
{
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": approval.get("message", fallback_msg),
|
||||
"status": "blocked",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
# Prepare command for execution
|
||||
if background:
|
||||
|
|
@ -940,7 +992,7 @@ def terminal_tool(
|
|||
cwd=effective_cwd,
|
||||
task_id=effective_task_id,
|
||||
session_key=session_key,
|
||||
env_vars=env.env if hasattr(env, 'env') else None,
|
||||
env_vars=env.env if hasattr(env, "env") else None,
|
||||
use_pty=pty,
|
||||
)
|
||||
else:
|
||||
|
|
@ -964,38 +1016,36 @@ def terminal_tool(
|
|||
max_timeout = effective_timeout
|
||||
if timeout and timeout > max_timeout:
|
||||
result_data["timeout_note"] = (
|
||||
f"Requested timeout {timeout}s was clamped to "
|
||||
f"configured limit of {max_timeout}s"
|
||||
f"Requested timeout {timeout}s was clamped to configured limit of {max_timeout}s"
|
||||
)
|
||||
|
||||
# Register check_interval watcher (gateway picks this up after agent run)
|
||||
if check_interval and background:
|
||||
effective_interval = max(30, check_interval)
|
||||
if check_interval < 30:
|
||||
result_data["check_interval_note"] = (
|
||||
f"Requested {check_interval}s raised to minimum 30s"
|
||||
)
|
||||
process_registry.pending_watchers.append({
|
||||
"session_id": proc_session.id,
|
||||
"check_interval": effective_interval,
|
||||
"session_key": session_key,
|
||||
"platform": os.getenv("HERMES_SESSION_PLATFORM", ""),
|
||||
"chat_id": os.getenv("HERMES_SESSION_CHAT_ID", ""),
|
||||
})
|
||||
result_data["check_interval_note"] = f"Requested {check_interval}s raised to minimum 30s"
|
||||
process_registry.pending_watchers.append(
|
||||
{
|
||||
"session_id": proc_session.id,
|
||||
"check_interval": effective_interval,
|
||||
"session_key": session_key,
|
||||
"platform": os.getenv("HERMES_SESSION_PLATFORM", ""),
|
||||
"chat_id": os.getenv("HERMES_SESSION_CHAT_ID", ""),
|
||||
}
|
||||
)
|
||||
|
||||
return json.dumps(result_data, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": f"Failed to start background process: {str(e)}"
|
||||
}, ensure_ascii=False)
|
||||
return json.dumps(
|
||||
{"output": "", "exit_code": -1, "error": f"Failed to start background process: {str(e)}"},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
else:
|
||||
# Run foreground command with retry logic
|
||||
max_retries = 3
|
||||
retry_count = 0
|
||||
result = None
|
||||
|
||||
|
||||
while retry_count <= max_retries:
|
||||
try:
|
||||
execute_kwargs = {"timeout": effective_timeout}
|
||||
|
|
@ -1005,39 +1055,61 @@ def terminal_tool(
|
|||
except Exception as e:
|
||||
error_str = str(e).lower()
|
||||
if "timeout" in error_str:
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": 124,
|
||||
"error": f"Command timed out after {effective_timeout} seconds"
|
||||
}, ensure_ascii=False)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"output": "",
|
||||
"exit_code": 124,
|
||||
"error": f"Command timed out after {effective_timeout} seconds",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
# Retry on transient errors
|
||||
if retry_count < max_retries:
|
||||
retry_count += 1
|
||||
wait_time = 2 ** retry_count
|
||||
logger.warning("Execution error, retrying in %ds (attempt %d/%d) - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
|
||||
wait_time, retry_count, max_retries, command[:200], type(e).__name__, e, effective_task_id, env_type)
|
||||
wait_time = 2**retry_count
|
||||
logger.warning(
|
||||
"Execution error, retrying in %ds (attempt %d/%d) - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
|
||||
wait_time,
|
||||
retry_count,
|
||||
max_retries,
|
||||
command[:200],
|
||||
type(e).__name__,
|
||||
e,
|
||||
effective_task_id,
|
||||
env_type,
|
||||
)
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
|
||||
logger.error("Execution failed after %d retries - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
|
||||
max_retries, command[:200], type(e).__name__, e, effective_task_id, env_type)
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": f"Command execution failed: {type(e).__name__}: {str(e)}"
|
||||
}, ensure_ascii=False)
|
||||
|
||||
|
||||
logger.error(
|
||||
"Execution failed after %d retries - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
|
||||
max_retries,
|
||||
command[:200],
|
||||
type(e).__name__,
|
||||
e,
|
||||
effective_task_id,
|
||||
env_type,
|
||||
)
|
||||
return json.dumps(
|
||||
{
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": f"Command execution failed: {type(e).__name__}: {str(e)}",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
# Got a result
|
||||
break
|
||||
|
||||
|
||||
# Extract output
|
||||
output = result.get("output", "")
|
||||
returncode = result.get("returncode", 0)
|
||||
|
||||
|
||||
# Add helpful message for sudo failures in messaging context
|
||||
output = _handle_sudo_failure(output, env_type)
|
||||
|
||||
|
||||
# Truncate output if too long, keeping both head and tail
|
||||
MAX_OUTPUT_CHARS = 50000
|
||||
if len(output) > MAX_OUTPUT_CHARS:
|
||||
|
|
@ -1045,65 +1117,56 @@ def terminal_tool(
|
|||
tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output)
|
||||
omitted = len(output) - head_chars - tail_chars
|
||||
truncated_notice = (
|
||||
f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
|
||||
f"out of {len(output)} total] ...\n\n"
|
||||
f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted out of {len(output)} total] ...\n\n"
|
||||
)
|
||||
output = output[:head_chars] + truncated_notice + output[-tail_chars:]
|
||||
|
||||
# Redact secrets from command output (catches env/printenv leaking keys)
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
output = redact_sensitive_text(output.strip()) if output else ""
|
||||
|
||||
return json.dumps({
|
||||
"output": output,
|
||||
"exit_code": returncode,
|
||||
"error": None
|
||||
}, ensure_ascii=False)
|
||||
return json.dumps({"output": output, "exit_code": returncode, "error": None}, ensure_ascii=False)
|
||||
|
||||
except Exception as e:
|
||||
return json.dumps({
|
||||
"output": "",
|
||||
"exit_code": -1,
|
||||
"error": f"Failed to execute command: {str(e)}",
|
||||
"status": "error"
|
||||
}, ensure_ascii=False)
|
||||
return json.dumps(
|
||||
{"output": "", "exit_code": -1, "error": f"Failed to execute command: {str(e)}", "status": "error"},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
def check_terminal_requirements() -> bool:
|
||||
"""Check if all requirements for the terminal tool are met."""
|
||||
config = _get_env_config()
|
||||
env_type = config["env_type"]
|
||||
|
||||
|
||||
try:
|
||||
if env_type == "local":
|
||||
from minisweagent.environments.local import LocalEnvironment
|
||||
return True
|
||||
elif env_type == "docker":
|
||||
from minisweagent.environments.docker import DockerEnvironment
|
||||
# Check if docker is available
|
||||
import subprocess
|
||||
|
||||
result = subprocess.run(["docker", "version"], capture_output=True, timeout=5)
|
||||
return result.returncode == 0
|
||||
elif env_type == "singularity":
|
||||
from minisweagent.environments.singularity import SingularityEnvironment
|
||||
import shutil
|
||||
|
||||
# Check if singularity/apptainer is available
|
||||
import subprocess
|
||||
import shutil
|
||||
|
||||
executable = shutil.which("apptainer") or shutil.which("singularity")
|
||||
if executable:
|
||||
result = subprocess.run([executable, "--version"], capture_output=True, timeout=5)
|
||||
return result.returncode == 0
|
||||
return False
|
||||
elif env_type == "ssh":
|
||||
from tools.environments.ssh import SSHEnvironment
|
||||
# Check that host and user are configured
|
||||
return bool(config.get("ssh_host")) and bool(config.get("ssh_user"))
|
||||
elif env_type == "modal":
|
||||
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
|
||||
# Check for modal token
|
||||
return os.getenv("MODAL_TOKEN_ID") is not None or Path.home().joinpath(".modal.toml").exists()
|
||||
elif env_type == "daytona":
|
||||
from daytona import Daytona
|
||||
return os.getenv("DAYTONA_API_KEY") is not None
|
||||
else:
|
||||
return False
|
||||
|
|
@ -1116,9 +1179,9 @@ if __name__ == "__main__":
|
|||
# Simple test when run directly
|
||||
print("Terminal Tool Module (mini-swe-agent backend)")
|
||||
print("=" * 50)
|
||||
|
||||
|
||||
config = _get_env_config()
|
||||
print(f"\nCurrent Configuration:")
|
||||
print("\nCurrent Configuration:")
|
||||
print(f" Environment type: {config['env_type']}")
|
||||
print(f" Docker image: {config['docker_image']}")
|
||||
print(f" Modal image: {config['modal_image']}")
|
||||
|
|
@ -1165,37 +1228,34 @@ TERMINAL_SCHEMA = {
|
|||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {
|
||||
"type": "string",
|
||||
"description": "The command to execute on the VM"
|
||||
},
|
||||
"command": {"type": "string", "description": "The command to execute on the VM"},
|
||||
"background": {
|
||||
"type": "boolean",
|
||||
"description": "ONLY for servers/watchers that never exit. For scripts, builds, installs — use foreground with timeout instead (it returns instantly when done).",
|
||||
"default": False
|
||||
"default": False,
|
||||
},
|
||||
"timeout": {
|
||||
"type": "integer",
|
||||
"description": "Max seconds to wait (default: 180). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily.",
|
||||
"minimum": 1
|
||||
"minimum": 1,
|
||||
},
|
||||
"workdir": {
|
||||
"type": "string",
|
||||
"description": "Working directory for this command (absolute path). Defaults to the session working directory."
|
||||
"description": "Working directory for this command (absolute path). Defaults to the session working directory.",
|
||||
},
|
||||
"check_interval": {
|
||||
"type": "integer",
|
||||
"description": "Seconds between automatic status checks for background processes (gateway/messaging only, minimum 30). When set, I'll proactively report progress.",
|
||||
"minimum": 30
|
||||
"minimum": 30,
|
||||
},
|
||||
"pty": {
|
||||
"type": "boolean",
|
||||
"description": "Run in pseudo-terminal (PTY) mode for interactive CLI tools like Codex, Claude Code, or Python REPL. Only works with local and SSH backends. Default: false.",
|
||||
"default": False
|
||||
}
|
||||
"default": False,
|
||||
},
|
||||
},
|
||||
"required": ["command"]
|
||||
}
|
||||
"required": ["command"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue