mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Refactor Terminal and AIAgent cleanup
This commit is contained in:
parent
9018e9dd70
commit
9123cfb5dd
17 changed files with 1842 additions and 976 deletions
|
|
@ -75,125 +75,8 @@ if mini_swe_path.exists():
|
|||
# Custom Singularity Environment with more space
|
||||
# =============================================================================
|
||||
|
||||
def _get_scratch_dir() -> Path:
|
||||
"""Get the best directory for Singularity sandboxes - prefers /scratch if available."""
|
||||
# Check for configurable scratch directory first (highest priority)
|
||||
custom_scratch = os.getenv("TERMINAL_SCRATCH_DIR")
|
||||
if custom_scratch:
|
||||
scratch_path = Path(custom_scratch)
|
||||
scratch_path.mkdir(parents=True, exist_ok=True)
|
||||
return scratch_path
|
||||
|
||||
# Check for /scratch (common on HPC clusters, especially GPU nodes)
|
||||
scratch = Path("/scratch")
|
||||
if scratch.exists() and os.access(scratch, os.W_OK):
|
||||
# Create user-specific subdirectory
|
||||
user_scratch = scratch / os.getenv("USER", "hermes") / "hermes-agent"
|
||||
user_scratch.mkdir(parents=True, exist_ok=True)
|
||||
logger.info("Using /scratch for sandboxes: %s", user_scratch)
|
||||
return user_scratch
|
||||
|
||||
# Fall back to /tmp (only relevant for Singularity/HPC sandboxes)
|
||||
logger.debug("/scratch not available, using /tmp for sandboxes")
|
||||
return Path(tempfile.gettempdir())
|
||||
|
||||
|
||||
def _get_apptainer_cache_dir() -> Path:
|
||||
"""Get the Apptainer cache directory for SIF images."""
|
||||
# Check for APPTAINER_CACHEDIR env var
|
||||
cache_dir = os.getenv("APPTAINER_CACHEDIR")
|
||||
if cache_dir:
|
||||
cache_path = Path(cache_dir)
|
||||
cache_path.mkdir(parents=True, exist_ok=True)
|
||||
return cache_path
|
||||
|
||||
# Use user-specific subdirectory in scratch for cache
|
||||
scratch = _get_scratch_dir()
|
||||
cache_path = scratch / ".apptainer"
|
||||
cache_path.mkdir(parents=True, exist_ok=True)
|
||||
return cache_path
|
||||
|
||||
|
||||
# Lock for SIF building to prevent race conditions
|
||||
_sif_build_lock = threading.Lock()
|
||||
|
||||
|
||||
def _get_or_build_sif(image: str, executable: str = "apptainer") -> str:
|
||||
"""
|
||||
Get or build a SIF image from a docker:// URL.
|
||||
|
||||
If the image is already a .sif file, returns it as-is.
|
||||
If the image is a docker:// URL, checks for cached SIF and builds if needed.
|
||||
|
||||
Args:
|
||||
image: Image path (docker://... URL or .sif path)
|
||||
executable: apptainer or singularity
|
||||
|
||||
Returns:
|
||||
Path to SIF file, or original image if not a docker:// URL
|
||||
"""
|
||||
# If already a .sif file, use it directly
|
||||
if image.endswith('.sif') and Path(image).exists():
|
||||
return image
|
||||
|
||||
# If not a docker:// URL, return as-is (could be a local sandbox or other format)
|
||||
if not image.startswith('docker://'):
|
||||
return image
|
||||
|
||||
# Generate SIF filename from docker image name
|
||||
# docker://nikolaik/python-nodejs:python3.11-nodejs20 -> python-nodejs-python3.11-nodejs20.sif
|
||||
image_name = image.replace('docker://', '').replace('/', '-').replace(':', '-')
|
||||
cache_dir = _get_apptainer_cache_dir()
|
||||
sif_path = cache_dir / f"{image_name}.sif"
|
||||
|
||||
# Check if SIF already exists
|
||||
if sif_path.exists():
|
||||
return str(sif_path)
|
||||
|
||||
# Build SIF with lock to prevent multiple workers building simultaneously
|
||||
with _sif_build_lock:
|
||||
# Double-check after acquiring lock (another thread may have built it)
|
||||
if sif_path.exists():
|
||||
return str(sif_path)
|
||||
|
||||
logger.info("Building SIF image (one-time setup)...")
|
||||
logger.info(" Source: %s", image)
|
||||
logger.info(" Target: %s", sif_path)
|
||||
|
||||
# Ensure tmp directory exists for build
|
||||
tmp_dir = cache_dir / "tmp"
|
||||
tmp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Set APPTAINER_TMPDIR for the build
|
||||
env = os.environ.copy()
|
||||
env["APPTAINER_TMPDIR"] = str(tmp_dir)
|
||||
env["APPTAINER_CACHEDIR"] = str(cache_dir)
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[executable, "build", str(sif_path), image],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600, # 10 min timeout for pulling and building
|
||||
env=env
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.warning("SIF build failed, falling back to docker:// URL")
|
||||
logger.warning(" Error: %s", result.stderr[:500])
|
||||
return image
|
||||
|
||||
logger.info("SIF image built successfully")
|
||||
return str(sif_path)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning("SIF build timed out, falling back to docker:// URL")
|
||||
# Clean up partial file
|
||||
if sif_path.exists():
|
||||
sif_path.unlink()
|
||||
return image
|
||||
except Exception as e:
|
||||
logger.warning("SIF build error: %s, falling back to docker:// URL", e)
|
||||
return image
|
||||
# Singularity helpers (scratch dir, SIF cache) now live in tools/environments/singularity.py
|
||||
from tools.environments.singularity import _get_scratch_dir
|
||||
|
||||
|
||||
# Disk usage warning threshold (in GB)
|
||||
|
|
@ -255,234 +138,19 @@ def set_approval_callback(cb):
|
|||
# Dangerous Command Approval System
|
||||
# =============================================================================
|
||||
|
||||
from tools import approval as _approval
|
||||
|
||||
# Dangerous command patterns (regex, description)
|
||||
DANGEROUS_PATTERNS = [
|
||||
(r'\brm\s+(-[^\s]*\s+)*/', "delete in root path"),
|
||||
(r'\brm\s+(-[^\s]*)?r', "recursive delete"),
|
||||
(r'\brm\s+--recursive\b', "recursive delete (long flag)"),
|
||||
(r'\bchmod\s+(-[^\s]*\s+)*777\b', "world-writable permissions"),
|
||||
(r'\bchmod\s+--recursive\b.*777', "recursive world-writable (long flag)"),
|
||||
(r'\bchown\s+(-[^\s]*)?R\s+root', "recursive chown to root"),
|
||||
(r'\bchown\s+--recursive\b.*root', "recursive chown to root (long flag)"),
|
||||
(r'\bmkfs\b', "format filesystem"),
|
||||
(r'\bdd\s+.*if=', "disk copy"),
|
||||
(r'>\s*/dev/sd', "write to block device"),
|
||||
(r'\bDROP\s+(TABLE|DATABASE)\b', "SQL DROP"),
|
||||
(r'\bDELETE\s+FROM\b(?!.*\bWHERE\b)', "SQL DELETE without WHERE"),
|
||||
(r'\bTRUNCATE\s+(TABLE)?\s*\w', "SQL TRUNCATE"),
|
||||
(r'>\s*/etc/', "overwrite system config"),
|
||||
(r'\bsystemctl\s+(stop|disable|mask)\b', "stop/disable system service"),
|
||||
(r'\bkill\s+-9\s+-1\b', "kill all processes"),
|
||||
(r'\bpkill\s+-9\b', "force kill processes"),
|
||||
(r':()\s*{\s*:\s*\|\s*:&\s*}\s*;:', "fork bomb"),
|
||||
# Indirect execution via command launchers
|
||||
(r'\b(bash|sh|zsh)\s+-c\s+', "shell command via -c flag"),
|
||||
(r'\b(python[23]?|perl|ruby|node)\s+-[ec]\s+', "script execution via -e/-c flag"),
|
||||
# Pipe-to-shell (remote code execution)
|
||||
(r'\b(curl|wget)\b.*\|\s*(ba)?sh\b', "pipe remote content to shell"),
|
||||
# Destructive find/xargs patterns
|
||||
(r'\bxargs\s+.*\brm\b', "xargs with rm"),
|
||||
(r'\bfind\b.*-exec\s+rm\b', "find -exec rm"),
|
||||
(r'\bfind\b.*-delete\b', "find -delete"),
|
||||
]
|
||||
|
||||
|
||||
def _load_permanent_allowlist() -> set:
|
||||
"""Load permanently allowed command patterns from config.
|
||||
|
||||
Also syncs them into the approval module so is_approved() works for
|
||||
patterns that were added via 'always' in a previous session.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
patterns = set(config.get("command_allowlist", []) or [])
|
||||
if patterns:
|
||||
_approval.load_permanent(patterns)
|
||||
return patterns
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
|
||||
def _save_permanent_allowlist(patterns: set):
|
||||
"""Save permanently allowed command patterns to config."""
|
||||
try:
|
||||
from hermes_cli.config import load_config, save_config
|
||||
config = load_config()
|
||||
config["command_allowlist"] = list(patterns)
|
||||
save_config(config)
|
||||
except Exception as e:
|
||||
logger.warning("Could not save allowlist: %s", e)
|
||||
|
||||
|
||||
def _detect_dangerous_command(command: str) -> tuple:
|
||||
"""
|
||||
Check if command matches any dangerous patterns.
|
||||
|
||||
Returns:
|
||||
(is_dangerous, pattern_key, description) or (False, None, None)
|
||||
"""
|
||||
import re
|
||||
command_lower = command.lower()
|
||||
|
||||
for pattern, description in DANGEROUS_PATTERNS:
|
||||
if re.search(pattern, command_lower, re.IGNORECASE):
|
||||
# Use a simplified pattern key for caching (first word + key chars)
|
||||
pattern_key = pattern.split(r'\b')[1] if r'\b' in pattern else pattern[:20]
|
||||
return (True, pattern_key, description)
|
||||
|
||||
return (False, None, None)
|
||||
|
||||
|
||||
def _is_command_approved(pattern_key: str) -> bool:
|
||||
"""Check if a pattern is approved (session or permanent)."""
|
||||
session_key = os.getenv("HERMES_SESSION_KEY", "default")
|
||||
return _approval.is_approved(session_key, pattern_key)
|
||||
|
||||
|
||||
def _prompt_dangerous_approval(command: str, description: str, timeout_seconds: int = 60) -> str:
|
||||
"""
|
||||
Prompt user to approve a dangerous command (CLI only).
|
||||
|
||||
If an _approval_callback is registered (by the CLI), delegates to it so the
|
||||
prompt integrates with prompt_toolkit's UI. Otherwise falls back to the
|
||||
raw input() approach (works outside the TUI, e.g. tests).
|
||||
|
||||
Returns: 'once', 'session', 'always', or 'deny'
|
||||
"""
|
||||
import sys
|
||||
import threading
|
||||
|
||||
# Use the registered callback when available (prompt_toolkit-compatible)
|
||||
if _approval_callback is not None:
|
||||
try:
|
||||
return _approval_callback(command, description)
|
||||
except Exception:
|
||||
return "deny"
|
||||
|
||||
# Pause spinner if one is running
|
||||
os.environ["HERMES_SPINNER_PAUSE"] = "1"
|
||||
|
||||
try:
|
||||
print()
|
||||
print(f" ⚠️ DANGEROUS COMMAND: {description}")
|
||||
print(f" {command[:80]}{'...' if len(command) > 80 else ''}")
|
||||
print()
|
||||
print(f" [o]nce | [s]ession | [a]lways | [d]eny")
|
||||
print()
|
||||
sys.stdout.flush()
|
||||
|
||||
result = {"choice": ""}
|
||||
|
||||
def get_input():
|
||||
try:
|
||||
result["choice"] = input(" Choice [o/s/a/D]: ").strip().lower()
|
||||
except (EOFError, OSError):
|
||||
result["choice"] = ""
|
||||
|
||||
thread = threading.Thread(target=get_input, daemon=True)
|
||||
thread.start()
|
||||
thread.join(timeout=timeout_seconds)
|
||||
|
||||
if thread.is_alive():
|
||||
print("\n ⏱ Timeout - denying command")
|
||||
return "deny"
|
||||
|
||||
choice = result["choice"]
|
||||
|
||||
if choice in ('o', 'once'):
|
||||
print(" ✓ Allowed once")
|
||||
return "once"
|
||||
elif choice in ('s', 'session'):
|
||||
print(" ✓ Allowed for this session")
|
||||
return "session"
|
||||
elif choice in ('a', 'always'):
|
||||
print(" ✓ Added to permanent allowlist")
|
||||
return "always"
|
||||
else:
|
||||
print(" ✗ Denied")
|
||||
return "deny"
|
||||
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\n ✗ Cancelled")
|
||||
return "deny"
|
||||
finally:
|
||||
if "HERMES_SPINNER_PAUSE" in os.environ:
|
||||
del os.environ["HERMES_SPINNER_PAUSE"]
|
||||
print()
|
||||
sys.stdout.flush()
|
||||
# Dangerous command detection + approval now consolidated in tools/approval.py
|
||||
from tools.approval import (
|
||||
detect_dangerous_command as _detect_dangerous_command,
|
||||
check_dangerous_command as _check_dangerous_command_impl,
|
||||
load_permanent_allowlist as _load_permanent_allowlist,
|
||||
DANGEROUS_PATTERNS,
|
||||
)
|
||||
|
||||
|
||||
def _check_dangerous_command(command: str, env_type: str) -> dict:
|
||||
"""
|
||||
Check if command is dangerous and handle approval.
|
||||
|
||||
Only applies to local/ssh backends in interactive contexts.
|
||||
|
||||
Args:
|
||||
command: The command to check
|
||||
env_type: The terminal backend type
|
||||
|
||||
Returns:
|
||||
{"approved": True/False, "message": str or None}
|
||||
"""
|
||||
# Skip check for isolated environments (containers are disposable)
|
||||
if env_type in ("docker", "singularity", "modal"):
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
# Detect dangerous command
|
||||
is_dangerous, pattern_key, description = _detect_dangerous_command(command)
|
||||
|
||||
if not is_dangerous:
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
# Check if already approved
|
||||
if _is_command_approved(pattern_key):
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
# Check context - only prompt in interactive modes
|
||||
is_cli = os.getenv("HERMES_INTERACTIVE")
|
||||
is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
|
||||
|
||||
if not is_cli and not is_gateway:
|
||||
# Programmatic use - allow (user opted into local backend)
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
if is_gateway or os.getenv("HERMES_EXEC_ASK"):
|
||||
# Messaging context - return approval_required so the gateway can
|
||||
# prompt the user interactively instead of just blocking
|
||||
session_key = os.getenv("HERMES_SESSION_KEY", "default")
|
||||
_approval.submit_pending(session_key, {
|
||||
"command": command,
|
||||
"pattern_key": pattern_key,
|
||||
"description": description,
|
||||
})
|
||||
return {
|
||||
"approved": False,
|
||||
"pattern_key": pattern_key,
|
||||
"status": "approval_required",
|
||||
"command": command,
|
||||
"description": description,
|
||||
"message": f"⚠️ This command is potentially dangerous ({description}). Asking the user for approval..."
|
||||
}
|
||||
|
||||
# CLI context - prompt user
|
||||
choice = _prompt_dangerous_approval(command, description)
|
||||
|
||||
if choice == "deny":
|
||||
return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."}
|
||||
|
||||
session_key = os.getenv("HERMES_SESSION_KEY", "default")
|
||||
if choice == "session":
|
||||
_approval.approve_session(session_key, pattern_key)
|
||||
elif choice == "always":
|
||||
_approval.approve_session(session_key, pattern_key)
|
||||
_approval.approve_permanent(pattern_key)
|
||||
_save_permanent_allowlist(_load_permanent_allowlist() | {pattern_key})
|
||||
|
||||
return {"approved": True, "message": None}
|
||||
"""Delegate to the consolidated approval module, passing the CLI callback."""
|
||||
return _check_dangerous_command_impl(command, env_type,
|
||||
approval_callback=_approval_callback)
|
||||
|
||||
|
||||
def _handle_sudo_failure(output: str, env_type: str) -> str:
|
||||
|
|
@ -671,569 +339,12 @@ def _transform_sudo_command(command: str) -> str:
|
|||
return re.sub(r'\bsudo\b', replace_sudo, command)
|
||||
|
||||
|
||||
class _LocalEnvironment:
|
||||
"""
|
||||
Local execution environment with sudo support and non-blocking stdin.
|
||||
|
||||
Features:
|
||||
- Uses stdin=DEVNULL to prevent hanging on interactive prompts (sudo, etc.)
|
||||
- Optional SUDO_PASSWORD support: if set, transforms `sudo` commands to use `sudo -S`
|
||||
- Graceful failure: sudo commands fail fast with clear error if no password configured
|
||||
|
||||
Environment variables:
|
||||
- SUDO_PASSWORD: If set, enables sudo commands by piping password via `sudo -S`
|
||||
"""
|
||||
|
||||
def __init__(self, cwd: str = "", timeout: int = 60, env: dict = None):
|
||||
self.cwd = cwd or os.getcwd()
|
||||
self.timeout = timeout
|
||||
self.env = env or {}
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
|
||||
stdin_data: str | None = None) -> dict:
|
||||
"""
|
||||
Execute a command locally with sudo support.
|
||||
|
||||
Uses Popen + polling so the global interrupt event can kill the
|
||||
process early when the user sends a new message, instead of
|
||||
blocking for the full timeout.
|
||||
|
||||
A background reader thread drains stdout continuously to prevent
|
||||
pipe buffer deadlocks. Without this, commands producing >64KB of
|
||||
output would block (Linux pipe buffer = 64KB) while the poll loop
|
||||
waits for the process to finish — a classic deadlock.
|
||||
|
||||
Args:
|
||||
stdin_data: If provided, piped to the process's stdin. This
|
||||
bypasses shell ARG_MAX limits for large content.
|
||||
"""
|
||||
work_dir = cwd or self.cwd or os.getcwd()
|
||||
effective_timeout = timeout or self.timeout
|
||||
|
||||
# Transform sudo commands if SUDO_PASSWORD is available
|
||||
exec_command = _transform_sudo_command(command)
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
exec_command,
|
||||
shell=True,
|
||||
text=True,
|
||||
cwd=work_dir,
|
||||
env=os.environ | self.env,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
|
||||
# Start in a new process group so we can kill the whole tree
|
||||
preexec_fn=os.setsid,
|
||||
)
|
||||
|
||||
# Pipe stdin_data in a background thread to avoid deadlock
|
||||
# (large writes can block if the pipe buffer fills before the
|
||||
# process drains it).
|
||||
if stdin_data is not None:
|
||||
def _write_stdin():
|
||||
try:
|
||||
proc.stdin.write(stdin_data)
|
||||
proc.stdin.close()
|
||||
except (BrokenPipeError, OSError):
|
||||
pass
|
||||
stdin_writer = threading.Thread(target=_write_stdin, daemon=True)
|
||||
stdin_writer.start()
|
||||
|
||||
# Drain stdout in a background thread to prevent pipe buffer
|
||||
# deadlocks. The OS pipe buffer is 64KB on Linux; if the child
|
||||
# writes more than that before anyone reads, it blocks forever.
|
||||
_output_chunks: list[str] = []
|
||||
def _drain_stdout():
|
||||
try:
|
||||
for line in proc.stdout:
|
||||
_output_chunks.append(line)
|
||||
except ValueError:
|
||||
pass # stdout closed during interrupt/timeout
|
||||
finally:
|
||||
try:
|
||||
proc.stdout.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
reader = threading.Thread(target=_drain_stdout, daemon=True)
|
||||
reader.start()
|
||||
|
||||
deadline = time.monotonic() + effective_timeout
|
||||
|
||||
# Poll every 200ms so we notice interrupts quickly
|
||||
while proc.poll() is None:
|
||||
if _interrupt_event.is_set():
|
||||
# User sent a new message — kill the process tree and return
|
||||
# what we have so far
|
||||
try:
|
||||
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
proc.kill()
|
||||
reader.join(timeout=2)
|
||||
output = "".join(_output_chunks)
|
||||
return {
|
||||
"output": output + "\n[Command interrupted — user sent a new message]",
|
||||
"returncode": 130 # Standard interrupted exit code
|
||||
}
|
||||
|
||||
if time.monotonic() > deadline:
|
||||
# Timeout — kill process tree
|
||||
try:
|
||||
os.killpg(os.getpgid(proc.pid), signal.SIGTERM)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
proc.kill()
|
||||
reader.join(timeout=2)
|
||||
return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124}
|
||||
|
||||
# Short sleep to avoid busy-waiting
|
||||
time.sleep(0.2)
|
||||
|
||||
# Process finished — wait for reader to drain remaining output
|
||||
reader.join(timeout=5)
|
||||
return {"output": "".join(_output_chunks), "returncode": proc.returncode}
|
||||
|
||||
except Exception as e:
|
||||
return {"output": f"Execution error: {str(e)}", "returncode": 1}
|
||||
|
||||
def cleanup(self):
|
||||
"""No cleanup needed for local environment."""
|
||||
pass
|
||||
|
||||
def stop(self):
|
||||
"""Alias for cleanup."""
|
||||
pass
|
||||
|
||||
|
||||
class _SingularityEnvironment:
|
||||
"""
|
||||
Persistent Singularity/Apptainer container environment.
|
||||
|
||||
Uses `apptainer instance` to create a long-running container that persists
|
||||
state (files, installs, env changes) across all commands within a task.
|
||||
The model experiences this as a real Linux VM.
|
||||
|
||||
Features:
|
||||
- Persistent filesystem: files created in one command are visible in the next
|
||||
- Package installs persist: pip/apt installs survive across tool calls
|
||||
- Full isolation: --containall gives PID, IPC, and environment isolation
|
||||
- Writable tmpfs overlay: full root filesystem is writable (RAM-backed)
|
||||
- Automatic SIF caching: docker:// images converted to SIF once, reused forever
|
||||
"""
|
||||
|
||||
def __init__(self, image: str, cwd: str = "/root", timeout: int = 60):
|
||||
self.cwd = cwd
|
||||
self.timeout = timeout
|
||||
|
||||
# Use apptainer if available, otherwise singularity
|
||||
self.executable = "apptainer" if shutil.which("apptainer") else "singularity"
|
||||
|
||||
# Get or build SIF from docker:// URL (fast if already cached)
|
||||
self.image = _get_or_build_sif(image, self.executable)
|
||||
|
||||
# Create unique instance name (must be alphanumeric + underscores)
|
||||
self.instance_id = f"hermes_{uuid.uuid4().hex[:12]}"
|
||||
self._instance_started = False
|
||||
|
||||
# Start the persistent instance
|
||||
self._start_instance()
|
||||
|
||||
def _start_instance(self):
|
||||
"""Start a persistent apptainer instance.
|
||||
|
||||
The instance runs as a background process. All subsequent execute() calls
|
||||
run commands inside this same instance, so state persists across calls.
|
||||
"""
|
||||
cmd = [
|
||||
self.executable, "instance", "start",
|
||||
"--writable-tmpfs", # RAM-backed writable overlay on read-only SIF
|
||||
"--containall", # Full isolation: PID, IPC, environment, filesystem
|
||||
str(self.image),
|
||||
self.instance_id,
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120, # 2 min for instance startup
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Failed to start instance: {result.stderr}")
|
||||
|
||||
self._instance_started = True
|
||||
logger.info("Singularity instance %s started (persistent container)", self.instance_id)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise RuntimeError("Instance start timed out")
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
|
||||
stdin_data: str | None = None) -> dict:
|
||||
"""Execute a command in the persistent Singularity instance.
|
||||
|
||||
All commands run in the same container, so files, installs, and
|
||||
environment changes persist between calls.
|
||||
"""
|
||||
if not self._instance_started:
|
||||
return {"output": "Instance not started", "returncode": -1}
|
||||
|
||||
cmd = [self.executable, "exec"]
|
||||
|
||||
# Set working directory
|
||||
work_dir = cwd or self.cwd
|
||||
cmd.extend(["--pwd", work_dir])
|
||||
|
||||
# Connect to the running instance
|
||||
cmd.append(f"instance://{self.instance_id}")
|
||||
|
||||
# Transform sudo commands if SUDO_PASSWORD is available
|
||||
exec_command = _transform_sudo_command(command)
|
||||
|
||||
# Execute the command
|
||||
cmd.extend(["bash", "-c", exec_command])
|
||||
|
||||
run_kwargs = {
|
||||
"text": True,
|
||||
"timeout": timeout or self.timeout,
|
||||
"encoding": "utf-8",
|
||||
"errors": "replace",
|
||||
"stdout": subprocess.PIPE,
|
||||
"stderr": subprocess.STDOUT,
|
||||
}
|
||||
if stdin_data is not None:
|
||||
run_kwargs["input"] = stdin_data
|
||||
else:
|
||||
run_kwargs["stdin"] = subprocess.DEVNULL
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, **run_kwargs)
|
||||
return {"output": result.stdout, "returncode": result.returncode}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"output": f"Command timed out after {timeout or self.timeout}s", "returncode": 124}
|
||||
|
||||
def cleanup(self):
|
||||
"""Stop the persistent instance and clean up."""
|
||||
if self._instance_started:
|
||||
try:
|
||||
subprocess.run(
|
||||
[self.executable, "instance", "stop", self.instance_id],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
logger.info("Singularity instance %s stopped", self.instance_id)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to stop Singularity instance %s: %s", self.instance_id, e)
|
||||
self._instance_started = False
|
||||
|
||||
def stop(self):
|
||||
"""Alias for cleanup."""
|
||||
self.cleanup()
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup on destruction."""
|
||||
try:
|
||||
self.cleanup()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class _SSHEnvironment:
|
||||
"""
|
||||
SSH-based remote execution environment.
|
||||
|
||||
Runs commands on a remote machine over SSH, keeping the agent code
|
||||
completely isolated from the execution environment. Uses SSH ControlMaster
|
||||
for connection persistence (faster subsequent commands).
|
||||
|
||||
Security benefits:
|
||||
- Agent cannot modify its own code
|
||||
- Remote machine acts as a sandbox
|
||||
- Clear separation between agent and execution environment
|
||||
"""
|
||||
|
||||
def __init__(self, host: str, user: str, cwd: str = "/tmp", timeout: int = 60,
|
||||
port: int = 22, key_path: str = ""):
|
||||
self.host = host
|
||||
self.user = user
|
||||
self.cwd = cwd
|
||||
self.timeout = timeout
|
||||
self.port = port
|
||||
self.key_path = key_path
|
||||
|
||||
# Create control socket directory for connection persistence
|
||||
self.control_dir = Path(tempfile.gettempdir()) / "hermes-ssh"
|
||||
self.control_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
|
||||
|
||||
# Test connection and establish ControlMaster
|
||||
self._establish_connection()
|
||||
|
||||
def _build_ssh_command(self, extra_args: list = None) -> list:
|
||||
"""Build base SSH command with connection options."""
|
||||
cmd = ["ssh"]
|
||||
|
||||
# Connection multiplexing for performance
|
||||
cmd.extend(["-o", f"ControlPath={self.control_socket}"])
|
||||
cmd.extend(["-o", "ControlMaster=auto"])
|
||||
cmd.extend(["-o", "ControlPersist=300"]) # Keep connection alive for 5 min
|
||||
|
||||
# Standard options
|
||||
cmd.extend(["-o", "BatchMode=yes"]) # No password prompts
|
||||
cmd.extend(["-o", "StrictHostKeyChecking=accept-new"]) # Accept new hosts
|
||||
cmd.extend(["-o", "ConnectTimeout=10"])
|
||||
|
||||
# Port
|
||||
if self.port != 22:
|
||||
cmd.extend(["-p", str(self.port)])
|
||||
|
||||
# Private key
|
||||
if self.key_path:
|
||||
cmd.extend(["-i", self.key_path])
|
||||
|
||||
# Extra args (like -t for TTY)
|
||||
if extra_args:
|
||||
cmd.extend(extra_args)
|
||||
|
||||
# Target
|
||||
cmd.append(f"{self.user}@{self.host}")
|
||||
|
||||
return cmd
|
||||
|
||||
def _establish_connection(self):
|
||||
"""Test SSH connection and establish ControlMaster."""
|
||||
cmd = self._build_ssh_command()
|
||||
cmd.append("echo 'SSH connection established'")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15
|
||||
)
|
||||
if result.returncode != 0:
|
||||
error_msg = result.stderr.strip() or result.stdout.strip()
|
||||
raise RuntimeError(f"SSH connection failed: {error_msg}")
|
||||
except subprocess.TimeoutExpired:
|
||||
raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out")
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
|
||||
stdin_data: str | None = None) -> dict:
|
||||
"""Execute a command on the remote host via SSH."""
|
||||
work_dir = cwd or self.cwd
|
||||
effective_timeout = timeout or self.timeout
|
||||
|
||||
# Transform sudo commands if SUDO_PASSWORD is available
|
||||
exec_command = _transform_sudo_command(command)
|
||||
|
||||
# Wrap command to run in the correct directory
|
||||
wrapped_command = f'cd {work_dir} && {exec_command}'
|
||||
|
||||
cmd = self._build_ssh_command()
|
||||
cmd.extend(["bash", "-c", wrapped_command])
|
||||
|
||||
run_kwargs = {
|
||||
"text": True,
|
||||
"timeout": effective_timeout,
|
||||
"encoding": "utf-8",
|
||||
"errors": "replace",
|
||||
"stdout": subprocess.PIPE,
|
||||
"stderr": subprocess.STDOUT,
|
||||
}
|
||||
if stdin_data is not None:
|
||||
run_kwargs["input"] = stdin_data
|
||||
else:
|
||||
run_kwargs["stdin"] = subprocess.DEVNULL
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, **run_kwargs)
|
||||
return {"output": result.stdout, "returncode": result.returncode}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124}
|
||||
except Exception as e:
|
||||
return {"output": f"SSH execution error: {str(e)}", "returncode": 1}
|
||||
|
||||
def cleanup(self):
|
||||
"""Close the SSH ControlMaster connection."""
|
||||
if self.control_socket.exists():
|
||||
try:
|
||||
# Send exit command to ControlMaster
|
||||
cmd = ["ssh", "-o", f"ControlPath={self.control_socket}", "-O", "exit",
|
||||
f"{self.user}@{self.host}"]
|
||||
subprocess.run(cmd, capture_output=True, timeout=5)
|
||||
except (OSError, subprocess.SubprocessError):
|
||||
pass
|
||||
|
||||
# Remove socket file
|
||||
try:
|
||||
self.control_socket.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def stop(self):
|
||||
"""Alias for cleanup."""
|
||||
self.cleanup()
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup on destruction."""
|
||||
try:
|
||||
self.cleanup()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class _DockerEnvironment:
|
||||
"""
|
||||
Docker execution environment wrapper with sudo support and non-blocking stdin.
|
||||
|
||||
Wraps mini-swe-agent's DockerEnvironment but adds:
|
||||
- stdin=DEVNULL to prevent hanging on interactive prompts
|
||||
- SUDO_PASSWORD support via _transform_sudo_command
|
||||
"""
|
||||
|
||||
def __init__(self, image: str, cwd: str = "/", timeout: int = 60):
|
||||
from minisweagent.environments.docker import DockerEnvironment
|
||||
self._inner = DockerEnvironment(image=image, cwd=cwd, timeout=timeout)
|
||||
self.cwd = cwd
|
||||
self.timeout = timeout
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
|
||||
stdin_data: str | None = None) -> dict:
|
||||
"""Execute a command in the Docker container with sudo support."""
|
||||
# Transform sudo commands if SUDO_PASSWORD is available
|
||||
exec_command = _transform_sudo_command(command)
|
||||
|
||||
work_dir = cwd or self.cwd
|
||||
effective_timeout = timeout or self.timeout
|
||||
|
||||
# Get container_id from inner environment
|
||||
assert self._inner.container_id, "Container not started"
|
||||
|
||||
cmd = [self._inner.config.executable, "exec"]
|
||||
if stdin_data is not None:
|
||||
cmd.append("-i") # Enable stdin piping into the container
|
||||
cmd.extend(["-w", work_dir])
|
||||
for key in self._inner.config.forward_env:
|
||||
if (value := os.getenv(key)) is not None:
|
||||
cmd.extend(["-e", f"{key}={value}"])
|
||||
for key, value in self._inner.config.env.items():
|
||||
cmd.extend(["-e", f"{key}={value}"])
|
||||
cmd.extend([self._inner.container_id, "bash", "-lc", exec_command])
|
||||
|
||||
run_kwargs = {
|
||||
"text": True,
|
||||
"timeout": effective_timeout,
|
||||
"encoding": "utf-8",
|
||||
"errors": "replace",
|
||||
"stdout": subprocess.PIPE,
|
||||
"stderr": subprocess.STDOUT,
|
||||
}
|
||||
if stdin_data is not None:
|
||||
run_kwargs["input"] = stdin_data
|
||||
else:
|
||||
run_kwargs["stdin"] = subprocess.DEVNULL
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, **run_kwargs)
|
||||
return {"output": result.stdout, "returncode": result.returncode}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124}
|
||||
|
||||
def cleanup(self):
|
||||
"""Cleanup the Docker container."""
|
||||
self._inner.cleanup()
|
||||
|
||||
def stop(self):
|
||||
"""Alias for cleanup."""
|
||||
self.cleanup()
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup on destruction."""
|
||||
try:
|
||||
self.cleanup()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class _ModalEnvironment:
|
||||
"""
|
||||
Modal cloud execution environment wrapper with sudo support.
|
||||
|
||||
Wraps mini-swe-agent's SwerexModalEnvironment but adds:
|
||||
- SUDO_PASSWORD support via _transform_sudo_command
|
||||
- Automatic async-safety patches (applied once, before first use)
|
||||
|
||||
The patches replace SwerexModalEnvironment's asyncio.run() calls with a
|
||||
background thread approach, making it safe to use inside any event loop
|
||||
(e.g., Atropos). Applied here at the point of use rather than relying on
|
||||
import-time side effects, so ALL callers get the fix automatically.
|
||||
"""
|
||||
|
||||
# Class-level flag: patches only need to be applied once
|
||||
_patches_applied = False
|
||||
|
||||
def __init__(self, image: str, cwd: str = "/root", timeout: int = 60):
|
||||
# Ensure async-safety patches are applied before creating any
|
||||
# SwerexModalEnvironment instance. This is the single authoritative
|
||||
# place -- no other module needs to call apply_patches() for Modal.
|
||||
if not _ModalEnvironment._patches_applied:
|
||||
try:
|
||||
from environments.patches import apply_patches
|
||||
apply_patches()
|
||||
except ImportError:
|
||||
pass # patches module not available (standalone use)
|
||||
_ModalEnvironment._patches_applied = True
|
||||
|
||||
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
|
||||
# Generous startup timeout: sandbox creation can take 30-60s for cold images,
|
||||
# and the SWE-ReX runtime needs another 10-30s to boot inside it.
|
||||
self._inner = SwerexModalEnvironment(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
startup_timeout=180.0,
|
||||
runtime_timeout=3600.0,
|
||||
)
|
||||
self.cwd = cwd
|
||||
self.timeout = timeout
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None,
|
||||
stdin_data: str | None = None) -> dict:
|
||||
"""Execute a command in Modal with sudo support.
|
||||
|
||||
Modal uses HTTP transport (no execve), so there's no ARG_MAX limit.
|
||||
When stdin_data is provided, we embed it as a heredoc since there's
|
||||
no process-level stdin pipe to the cloud sandbox.
|
||||
"""
|
||||
if stdin_data is not None:
|
||||
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
||||
while marker in stdin_data:
|
||||
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
||||
command = f"{command} << '{marker}'\n{stdin_data}\n{marker}"
|
||||
|
||||
# Transform sudo commands if SUDO_PASSWORD is available
|
||||
exec_command = _transform_sudo_command(command)
|
||||
|
||||
# Delegate to inner environment with transformed command
|
||||
return self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
|
||||
|
||||
def cleanup(self):
|
||||
"""Cleanup the Modal deployment."""
|
||||
if hasattr(self._inner, 'stop'):
|
||||
self._inner.stop()
|
||||
|
||||
def stop(self):
|
||||
"""Stop the Modal deployment."""
|
||||
self.cleanup()
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup on destruction."""
|
||||
try:
|
||||
self.cleanup()
|
||||
except Exception:
|
||||
pass
|
||||
# Environment classes now live in tools/environments/
|
||||
from tools.environments.local import LocalEnvironment as _LocalEnvironment
|
||||
from tools.environments.singularity import SingularityEnvironment as _SingularityEnvironment
|
||||
from tools.environments.ssh import SSHEnvironment as _SSHEnvironment
|
||||
from tools.environments.docker import DockerEnvironment as _DockerEnvironment
|
||||
from tools.environments.modal import ModalEnvironment as _ModalEnvironment
|
||||
|
||||
|
||||
# Tool description for LLM
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue