feat: devex help, add Makefile, ruff, pre-commit, and modernize CI

This commit is contained in:
Brooklyn Nicholson 2026-03-09 20:36:51 -05:00
parent 172a38c344
commit f4d7e6a29e
111 changed files with 11655 additions and 10200 deletions

View file

@ -1,8 +1,8 @@
"""Base class for all Hermes execution environment backends."""
from abc import ABC, abstractmethod
import os
import subprocess
from abc import ABC, abstractmethod
from pathlib import Path
@ -34,9 +34,9 @@ class BaseEnvironment(ABC):
self.env = env or {}
@abstractmethod
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
stdin_data: str | None = None) -> dict:
def execute(
self, command: str, cwd: str = "", *, timeout: int | None = None, stdin_data: str | None = None
) -> dict:
"""Execute a command, return {"output": str, "returncode": int}."""
...
@ -62,10 +62,10 @@ class BaseEnvironment(ABC):
def _prepare_command(self, command: str) -> str:
"""Transform sudo commands if SUDO_PASSWORD is available."""
from tools.terminal_tool import _transform_sudo_command
return _transform_sudo_command(command)
def _build_run_kwargs(self, timeout: int | None,
stdin_data: str | None = None) -> dict:
def _build_run_kwargs(self, timeout: int | None, stdin_data: str | None = None) -> dict:
"""Build common subprocess.run kwargs for non-interactive execution."""
kw = {
"text": True,

View file

@ -11,7 +11,6 @@ import shlex
import threading
import uuid
import warnings
from typing import Optional
from tools.environments.base import BaseEnvironment
from tools.interrupt import is_interrupted
@ -32,8 +31,8 @@ class DaytonaEnvironment(BaseEnvironment):
cwd: str = "/home/daytona",
timeout: int = 60,
cpu: int = 1,
memory: int = 5120, # MB (hermes convention)
disk: int = 10240, # MB (Daytona platform max is 10GB)
memory: int = 5120, # MB (hermes convention)
disk: int = 10240, # MB (Daytona platform max is 10GB)
persistent_filesystem: bool = True,
task_id: str = "default",
):
@ -41,8 +40,8 @@ class DaytonaEnvironment(BaseEnvironment):
super().__init__(cwd=cwd, timeout=timeout)
from daytona import (
Daytona,
CreateSandboxFromImageParams,
Daytona,
DaytonaError,
Resources,
SandboxState,
@ -73,13 +72,11 @@ class DaytonaEnvironment(BaseEnvironment):
try:
self._sandbox = self._daytona.find_one(labels=labels)
self._sandbox.start()
logger.info("Daytona: resumed sandbox %s for task %s",
self._sandbox.id, task_id)
logger.info("Daytona: resumed sandbox %s for task %s", self._sandbox.id, task_id)
except DaytonaError:
self._sandbox = None
except Exception as e:
logger.warning("Daytona: failed to resume sandbox for task %s: %s",
task_id, e)
logger.warning("Daytona: failed to resume sandbox for task %s: %s", task_id, e)
self._sandbox = None
# Create a fresh sandbox if we don't have one
@ -92,8 +89,7 @@ class DaytonaEnvironment(BaseEnvironment):
resources=resources,
)
)
logger.info("Daytona: created sandbox %s for task %s",
self._sandbox.id, task_id)
logger.info("Daytona: created sandbox %s for task %s", self._sandbox.id, task_id)
# Resolve cwd: detect actual home dir inside the sandbox
if self._requested_cwd in ("~", "/home/daytona"):
@ -112,7 +108,7 @@ class DaytonaEnvironment(BaseEnvironment):
self._sandbox.start()
logger.info("Daytona: restarted sandbox %s", self._sandbox.id)
def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict:
def _exec_in_thread(self, exec_command: str, cwd: str | None, timeout: int) -> dict:
"""Run exec in a background thread with interrupt polling.
The Daytona SDK's exec(timeout=...) parameter is unreliable (the
@ -130,7 +126,8 @@ class DaytonaEnvironment(BaseEnvironment):
def _run():
try:
response = self._sandbox.process.exec(
timed_command, cwd=cwd,
timed_command,
cwd=cwd,
)
result_holder["value"] = {
"output": response.result or "",
@ -169,9 +166,9 @@ class DaytonaEnvironment(BaseEnvironment):
return {"error": result_holder["error"]}
return result_holder["value"]
def execute(self, command: str, cwd: str = "", *,
timeout: Optional[int] = None,
stdin_data: Optional[str] = None) -> dict:
def execute(
self, command: str, cwd: str = "", *, timeout: int | None = None, stdin_data: str | None = None
) -> dict:
with self._lock:
self._ensure_sandbox_ready()
@ -189,6 +186,7 @@ class DaytonaEnvironment(BaseEnvironment):
if "error" in result:
from daytona import DaytonaError
err = result["error"]
if isinstance(err, DaytonaError):
with self._lock:
@ -210,8 +208,7 @@ class DaytonaEnvironment(BaseEnvironment):
try:
if self._persistent:
self._sandbox.stop()
logger.info("Daytona: stopped sandbox %s (filesystem preserved)",
self._sandbox.id)
logger.info("Daytona: stopped sandbox %s (filesystem preserved)", self._sandbox.id)
else:
self._daytona.delete(self._sandbox)
logger.info("Daytona: deleted sandbox %s", self._sandbox.id)

View file

@ -11,7 +11,6 @@ import subprocess
import sys
import threading
import time
from typing import Optional
from tools.environments.base import BaseEnvironment
from tools.interrupt import is_interrupted
@ -19,7 +18,6 @@ from tools.interrupt import is_interrupted
logger = logging.getLogger(__name__)
# Security flags applied to every container.
# The container itself is the security boundary (isolated from host).
# We drop all capabilities then add back the minimum needed:
@ -28,19 +26,28 @@ logger = logging.getLogger(__name__)
# Block privilege escalation and limit PIDs.
# /tmp is size-limited and nosuid but allows exec (needed by pip/npm builds).
_SECURITY_ARGS = [
"--cap-drop", "ALL",
"--cap-add", "DAC_OVERRIDE",
"--cap-add", "CHOWN",
"--cap-add", "FOWNER",
"--security-opt", "no-new-privileges",
"--pids-limit", "256",
"--tmpfs", "/tmp:rw,nosuid,size=512m",
"--tmpfs", "/var/tmp:rw,noexec,nosuid,size=256m",
"--tmpfs", "/run:rw,noexec,nosuid,size=64m",
"--cap-drop",
"ALL",
"--cap-add",
"DAC_OVERRIDE",
"--cap-add",
"CHOWN",
"--cap-add",
"FOWNER",
"--security-opt",
"no-new-privileges",
"--pids-limit",
"256",
"--tmpfs",
"/tmp:rw,nosuid,size=512m",
"--tmpfs",
"/var/tmp:rw,noexec,nosuid,size=256m",
"--tmpfs",
"/run:rw,noexec,nosuid,size=64m",
]
_storage_opt_ok: Optional[bool] = None # cached result across instances
_storage_opt_ok: bool | None = None # cached result across instances
class DockerEnvironment(BaseEnvironment):
@ -74,7 +81,7 @@ class DockerEnvironment(BaseEnvironment):
self._base_image = image
self._persistent = persistent_filesystem
self._task_id = task_id
self._container_id: Optional[str] = None
self._container_id: str | None = None
logger.info(f"DockerEnvironment volumes: {volumes}")
# Ensure volumes is a list (config.yaml could be malformed)
if volumes is not None and not isinstance(volumes, list):
@ -105,8 +112,8 @@ class DockerEnvironment(BaseEnvironment):
# mode uses tmpfs (ephemeral, fast, gone on cleanup).
from tools.environments.base import get_sandbox_dir
self._workspace_dir: Optional[str] = None
self._home_dir: Optional[str] = None
self._workspace_dir: str | None = None
self._home_dir: str | None = None
if self._persistent:
sandbox = get_sandbox_dir() / "docker" / task_id
self._workspace_dir = str(sandbox / "workspace")
@ -114,14 +121,19 @@ class DockerEnvironment(BaseEnvironment):
os.makedirs(self._workspace_dir, exist_ok=True)
os.makedirs(self._home_dir, exist_ok=True)
writable_args = [
"-v", f"{self._workspace_dir}:/workspace",
"-v", f"{self._home_dir}:/root",
"-v",
f"{self._workspace_dir}:/workspace",
"-v",
f"{self._home_dir}:/root",
]
else:
writable_args = [
"--tmpfs", "/workspace:rw,exec,size=10g",
"--tmpfs", "/home:rw,exec,size=1g",
"--tmpfs", "/root:rw,exec,size=1g",
"--tmpfs",
"/workspace:rw,exec,size=10g",
"--tmpfs",
"/home:rw,exec,size=1g",
"--tmpfs",
"/root:rw,exec,size=1g",
]
# All containers get security hardening (capabilities dropped, no privilege
@ -129,7 +141,7 @@ class DockerEnvironment(BaseEnvironment):
# can install packages as needed.
# User-configured volume mounts (from config.yaml docker_volumes)
volume_args = []
for vol in (volumes or []):
for vol in volumes or []:
if not isinstance(vol, str):
logger.warning(f"Docker volume entry is not a string: {vol!r}")
continue
@ -146,7 +158,9 @@ class DockerEnvironment(BaseEnvironment):
logger.info(f"Docker run_args: {all_run_args}")
self._inner = _Docker(
image=image, cwd=cwd, timeout=timeout,
image=image,
cwd=cwd,
timeout=timeout,
run_args=all_run_args,
)
self._container_id = self._inner.container_id
@ -154,7 +168,7 @@ class DockerEnvironment(BaseEnvironment):
@staticmethod
def _storage_opt_supported() -> bool:
"""Check if Docker's storage driver supports --storage-opt size=.
Only overlay2 on XFS with pquota supports per-container disk quotas.
Ubuntu (and most distros) default to ext4, where this flag errors out.
"""
@ -164,7 +178,9 @@ class DockerEnvironment(BaseEnvironment):
try:
result = subprocess.run(
["docker", "info", "--format", "{{.Driver}}"],
capture_output=True, text=True, timeout=10,
capture_output=True,
text=True,
timeout=10,
)
driver = result.stdout.strip().lower()
if driver != "overlay2":
@ -174,14 +190,15 @@ class DockerEnvironment(BaseEnvironment):
# Probe by attempting a dry-ish run — the fastest reliable check.
probe = subprocess.run(
["docker", "create", "--storage-opt", "size=1m", "hello-world"],
capture_output=True, text=True, timeout=15,
capture_output=True,
text=True,
timeout=15,
)
if probe.returncode == 0:
# Clean up the created container
container_id = probe.stdout.strip()
if container_id:
subprocess.run(["docker", "rm", container_id],
capture_output=True, timeout=5)
subprocess.run(["docker", "rm", container_id], capture_output=True, timeout=5)
_storage_opt_ok = True
else:
_storage_opt_ok = False
@ -190,9 +207,9 @@ class DockerEnvironment(BaseEnvironment):
logger.debug("Docker --storage-opt support: %s", _storage_opt_ok)
return _storage_opt_ok
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
stdin_data: str | None = None) -> dict:
def execute(
self, command: str, cwd: str = "", *, timeout: int | None = None, stdin_data: str | None = None
) -> dict:
exec_command = self._prepare_command(command)
work_dir = cwd or self.cwd
effective_timeout = timeout or self.timeout
@ -218,7 +235,8 @@ class DockerEnvironment(BaseEnvironment):
_output_chunks = []
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE if stdin_data else subprocess.DEVNULL,
text=True,
)
@ -269,6 +287,7 @@ class DockerEnvironment(BaseEnvironment):
if not self._persistent:
import shutil
for d in (self._workspace_dir, self._home_dir):
if d:
shutil.rmtree(d, ignore_errors=True)

View file

@ -154,9 +154,9 @@ class LocalEnvironment(BaseEnvironment):
def __init__(self, cwd: str = "", timeout: int = 60, env: dict = None):
super().__init__(cwd=cwd or os.getcwd(), timeout=timeout, env=env)
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
stdin_data: str | None = None) -> dict:
def execute(
self, command: str, cwd: str = "", *, timeout: int | None = None, stdin_data: str | None = None
) -> dict:
from tools.terminal_tool import _interrupt_event
work_dir = cwd or self.cwd or os.getcwd()
@ -172,11 +172,7 @@ class LocalEnvironment(BaseEnvironment):
# Wrap with output fences so we can later extract the real
# command output and discard shell init/exit noise.
fenced_cmd = (
f"printf '{_OUTPUT_FENCE}';"
f" {exec_command};"
f" __hermes_rc=$?;"
f" printf '{_OUTPUT_FENCE}';"
f" exit $__hermes_rc"
f"printf '{_OUTPUT_FENCE}'; {exec_command}; __hermes_rc=$?; printf '{_OUTPUT_FENCE}'; exit $__hermes_rc"
)
# Ensure PATH always includes standard dirs — systemd services
# and some terminal multiplexers inherit a minimal PATH.
@ -200,12 +196,14 @@ class LocalEnvironment(BaseEnvironment):
)
if stdin_data is not None:
def _write_stdin():
try:
proc.stdin.write(stdin_data)
proc.stdin.close()
except (BrokenPipeError, OSError):
pass
threading.Thread(target=_write_stdin, daemon=True).start()
_output_chunks: list[str] = []

View file

@ -8,10 +8,9 @@ project files, and config changes survive across sessions.
import json
import logging
import threading
import time
import uuid
from pathlib import Path
from typing import Any, Dict, Optional
from typing import Any
from tools.environments.base import BaseEnvironment
from tools.interrupt import is_interrupted
@ -21,7 +20,7 @@ logger = logging.getLogger(__name__)
_SNAPSHOT_STORE = Path.home() / ".hermes" / "modal_snapshots.json"
def _load_snapshots() -> Dict[str, str]:
def _load_snapshots() -> dict[str, str]:
"""Load snapshot ID mapping from disk."""
if _SNAPSHOT_STORE.exists():
try:
@ -31,7 +30,7 @@ def _load_snapshots() -> Dict[str, str]:
return {}
def _save_snapshots(data: Dict[str, str]) -> None:
def _save_snapshots(data: dict[str, str]) -> None:
"""Persist snapshot ID mapping to disk."""
_SNAPSHOT_STORE.parent.mkdir(parents=True, exist_ok=True)
_SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
@ -52,7 +51,7 @@ class ModalEnvironment(BaseEnvironment):
image: str,
cwd: str = "~",
timeout: int = 60,
modal_sandbox_kwargs: Optional[Dict[str, Any]] = None,
modal_sandbox_kwargs: dict[str, Any] | None = None,
persistent_filesystem: bool = True,
task_id: str = "default",
):
@ -61,6 +60,7 @@ class ModalEnvironment(BaseEnvironment):
if not ModalEnvironment._patches_applied:
try:
from environments.patches import apply_patches
apply_patches()
except ImportError:
pass
@ -79,6 +79,7 @@ class ModalEnvironment(BaseEnvironment):
if snapshot_id:
try:
import modal
restored_image = modal.Image.from_id(snapshot_id)
logger.info("Modal: restoring from snapshot %s", snapshot_id[:20])
except Exception as e:
@ -88,6 +89,7 @@ class ModalEnvironment(BaseEnvironment):
effective_image = restored_image if restored_image else image
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
self._inner = SwerexModalEnvironment(
image=effective_image,
cwd=cwd,
@ -97,9 +99,9 @@ class ModalEnvironment(BaseEnvironment):
modal_sandbox_kwargs=sandbox_kwargs,
)
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
stdin_data: str | None = None) -> dict:
def execute(
self, command: str, cwd: str = "", *, timeout: int | None = None, stdin_data: str | None = None
) -> dict:
if stdin_data is not None:
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
while marker in stdin_data:
@ -139,29 +141,29 @@ class ModalEnvironment(BaseEnvironment):
"""Snapshot the filesystem (if persistent) then stop the sandbox."""
if self._persistent:
try:
sandbox = getattr(self._inner, 'deployment', None)
sandbox = getattr(sandbox, '_sandbox', None) if sandbox else None
sandbox = getattr(self._inner, "deployment", None)
sandbox = getattr(sandbox, "_sandbox", None) if sandbox else None
if sandbox:
import asyncio
async def _snapshot():
img = await sandbox.snapshot_filesystem.aio()
return img.object_id
try:
snapshot_id = asyncio.run(_snapshot())
except RuntimeError:
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
snapshot_id = pool.submit(
asyncio.run, _snapshot()
).result(timeout=60)
snapshot_id = pool.submit(asyncio.run, _snapshot()).result(timeout=60)
snapshots = _load_snapshots()
snapshots[self._task_id] = snapshot_id
_save_snapshots(snapshots)
logger.info("Modal: saved filesystem snapshot %s for task %s",
snapshot_id[:20], self._task_id)
logger.info("Modal: saved filesystem snapshot %s for task %s", snapshot_id[:20], self._task_id)
except Exception as e:
logger.warning("Modal: filesystem snapshot failed: %s", e)
if hasattr(self._inner, 'stop'):
if hasattr(self._inner, "stop"):
self._inner.stop()

View file

@ -10,11 +10,9 @@ import logging
import os
import shutil
import subprocess
import tempfile
import threading
import uuid
from pathlib import Path
from typing import Any, Dict, Optional
from tools.environments.base import BaseEnvironment
from tools.interrupt import is_interrupted
@ -24,7 +22,7 @@ logger = logging.getLogger(__name__)
_SNAPSHOT_STORE = Path.home() / ".hermes" / "singularity_snapshots.json"
def _load_snapshots() -> Dict[str, str]:
def _load_snapshots() -> dict[str, str]:
if _SNAPSHOT_STORE.exists():
try:
return json.loads(_SNAPSHOT_STORE.read_text())
@ -33,7 +31,7 @@ def _load_snapshots() -> Dict[str, str]:
return {}
def _save_snapshots(data: Dict[str, str]) -> None:
def _save_snapshots(data: dict[str, str]) -> None:
_SNAPSHOT_STORE.parent.mkdir(parents=True, exist_ok=True)
_SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
@ -42,6 +40,7 @@ def _save_snapshots(data: Dict[str, str]) -> None:
# Singularity helpers (scratch dir, SIF cache, SIF building)
# -------------------------------------------------------------------------
def _get_scratch_dir() -> Path:
"""Get the best directory for Singularity sandboxes.
@ -58,6 +57,7 @@ def _get_scratch_dir() -> Path:
return scratch_path
from tools.environments.base import get_sandbox_dir
sandbox = get_sandbox_dir() / "singularity"
scratch = Path("/scratch")
@ -93,12 +93,12 @@ def _get_or_build_sif(image: str, executable: str = "apptainer") -> str:
Returns the path unchanged if it's already a .sif file.
For docker:// URLs, checks the cache and builds if needed.
"""
if image.endswith('.sif') and Path(image).exists():
if image.endswith(".sif") and Path(image).exists():
return image
if not image.startswith('docker://'):
if not image.startswith("docker://"):
return image
image_name = image.replace('docker://', '').replace('/', '-').replace(':', '-')
image_name = image.replace("docker://", "").replace("/", "-").replace(":", "-")
cache_dir = _get_apptainer_cache_dir()
sif_path = cache_dir / f"{image_name}.sif"
@ -123,7 +123,10 @@ def _get_or_build_sif(image: str, executable: str = "apptainer") -> str:
try:
result = subprocess.run(
[executable, "build", str(sif_path), image],
capture_output=True, text=True, timeout=600, env=env,
capture_output=True,
text=True,
timeout=600,
env=env,
)
if result.returncode != 0:
logger.warning("SIF build failed, falling back to docker:// URL")
@ -145,6 +148,7 @@ def _get_or_build_sif(image: str, executable: str = "apptainer") -> str:
# SingularityEnvironment
# -------------------------------------------------------------------------
class SingularityEnvironment(BaseEnvironment):
"""Hardened Singularity/Apptainer container with resource limits and persistence.
@ -174,7 +178,7 @@ class SingularityEnvironment(BaseEnvironment):
self._instance_started = False
self._persistent = persistent_filesystem
self._task_id = task_id
self._overlay_dir: Optional[Path] = None
self._overlay_dir: Path | None = None
# Resource limits
self._cpu = cpu
@ -215,14 +219,13 @@ class SingularityEnvironment(BaseEnvironment):
if result.returncode != 0:
raise RuntimeError(f"Failed to start instance: {result.stderr}")
self._instance_started = True
logger.info("Singularity instance %s started (persistent=%s)",
self.instance_id, self._persistent)
logger.info("Singularity instance %s started (persistent=%s)", self.instance_id, self._persistent)
except subprocess.TimeoutExpired:
raise RuntimeError("Instance start timed out")
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
stdin_data: str | None = None) -> dict:
def execute(
self, command: str, cwd: str = "", *, timeout: int | None = None, stdin_data: str | None = None
) -> dict:
if not self._instance_started:
return {"output": "Instance not started", "returncode": -1}
@ -235,16 +238,16 @@ class SingularityEnvironment(BaseEnvironment):
exec_command = f"cd {work_dir} && {exec_command}"
work_dir = "/tmp"
cmd = [self.executable, "exec", "--pwd", work_dir,
f"instance://{self.instance_id}",
"bash", "-c", exec_command]
cmd = [self.executable, "exec", "--pwd", work_dir, f"instance://{self.instance_id}", "bash", "-c", exec_command]
try:
import time as _time
_output_chunks = []
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE if stdin_data else subprocess.DEVNULL,
text=True,
)
@ -295,7 +298,9 @@ class SingularityEnvironment(BaseEnvironment):
try:
subprocess.run(
[self.executable, "instance", "stop", self.instance_id],
capture_output=True, text=True, timeout=30,
capture_output=True,
text=True,
timeout=30,
)
logger.info("Singularity instance %s stopped", self.instance_id)
except Exception as e:

View file

@ -24,8 +24,7 @@ class SSHEnvironment(BaseEnvironment):
and a remote kill is attempted over the ControlMaster socket.
"""
def __init__(self, host: str, user: str, cwd: str = "~",
timeout: int = 60, port: int = 22, key_path: str = ""):
def __init__(self, host: str, user: str, cwd: str = "~", timeout: int = 60, port: int = 22, key_path: str = ""):
super().__init__(cwd=cwd, timeout=timeout)
self.host = host
self.user = user
@ -65,12 +64,12 @@ class SSHEnvironment(BaseEnvironment):
except subprocess.TimeoutExpired:
raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out")
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
stdin_data: str | None = None) -> dict:
def execute(
self, command: str, cwd: str = "", *, timeout: int | None = None, stdin_data: str | None = None
) -> dict:
work_dir = cwd or self.cwd
exec_command = self._prepare_command(command)
wrapped = f'cd {work_dir} && {exec_command}'
wrapped = f"cd {work_dir} && {exec_command}"
effective_timeout = timeout or self.timeout
cmd = self._build_ssh_command()
@ -136,8 +135,7 @@ class SSHEnvironment(BaseEnvironment):
def cleanup(self):
if self.control_socket.exists():
try:
cmd = ["ssh", "-o", f"ControlPath={self.control_socket}",
"-O", "exit", f"{self.user}@{self.host}"]
cmd = ["ssh", "-o", f"ControlPath={self.control_socket}", "-O", "exit", f"{self.user}@{self.host}"]
subprocess.run(cmd, capture_output=True, timeout=5)
except (OSError, subprocess.SubprocessError):
pass