mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: enhance interrupt handling and container resource configuration
- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations. - Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt. - Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments. - Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.
This commit is contained in:
parent
c7857dc1d4
commit
90af34bc83
18 changed files with 940 additions and 90 deletions
|
|
@ -1,21 +1,61 @@
|
|||
"""Modal cloud execution environment wrapping mini-swe-agent's SwerexModalEnvironment."""
|
||||
"""Modal cloud execution environment wrapping mini-swe-agent's SwerexModalEnvironment.
|
||||
|
||||
Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
|
||||
is snapshotted on cleanup and restored on next creation, so installed packages,
|
||||
project files, and config changes survive across sessions.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from tools.environments.base import BaseEnvironment
|
||||
from tools.interrupt import is_interrupted
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SNAPSHOT_STORE = Path.home() / ".hermes" / "modal_snapshots.json"
|
||||
|
||||
|
||||
def _load_snapshots() -> Dict[str, str]:
|
||||
"""Load snapshot ID mapping from disk."""
|
||||
if _SNAPSHOT_STORE.exists():
|
||||
try:
|
||||
return json.loads(_SNAPSHOT_STORE.read_text())
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def _save_snapshots(data: Dict[str, str]) -> None:
|
||||
"""Persist snapshot ID mapping to disk."""
|
||||
_SNAPSHOT_STORE.parent.mkdir(parents=True, exist_ok=True)
|
||||
_SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
|
||||
|
||||
|
||||
class ModalEnvironment(BaseEnvironment):
|
||||
"""Modal cloud execution via mini-swe-agent.
|
||||
|
||||
Wraps SwerexModalEnvironment and adds sudo -S support.
|
||||
Async-safety patches are applied once before first use so Modal
|
||||
works inside any event loop (Atropos, gateway, etc.).
|
||||
Wraps SwerexModalEnvironment and adds sudo -S support, configurable
|
||||
resources (CPU, memory, disk), and optional filesystem persistence
|
||||
via Modal's snapshot_filesystem() API.
|
||||
"""
|
||||
|
||||
_patches_applied = False
|
||||
|
||||
def __init__(self, image: str, cwd: str = "/root", timeout: int = 60):
|
||||
def __init__(
|
||||
self,
|
||||
image: str,
|
||||
cwd: str = "/root",
|
||||
timeout: int = 60,
|
||||
modal_sandbox_kwargs: Optional[Dict[str, Any]] = None,
|
||||
persistent_filesystem: bool = True,
|
||||
task_id: str = "default",
|
||||
):
|
||||
super().__init__(cwd=cwd, timeout=timeout)
|
||||
|
||||
if not ModalEnvironment._patches_applied:
|
||||
|
|
@ -26,10 +66,35 @@ class ModalEnvironment(BaseEnvironment):
|
|||
pass
|
||||
ModalEnvironment._patches_applied = True
|
||||
|
||||
self._persistent = persistent_filesystem
|
||||
self._task_id = task_id
|
||||
self._base_image = image
|
||||
|
||||
sandbox_kwargs = dict(modal_sandbox_kwargs or {})
|
||||
|
||||
# If persistent, try to restore from a previous snapshot
|
||||
restored_image = None
|
||||
if self._persistent:
|
||||
snapshot_id = _load_snapshots().get(self._task_id)
|
||||
if snapshot_id:
|
||||
try:
|
||||
import modal
|
||||
restored_image = modal.Image.from_id(snapshot_id)
|
||||
logger.info("Modal: restoring from snapshot %s", snapshot_id[:20])
|
||||
except Exception as e:
|
||||
logger.warning("Modal: failed to restore snapshot, using base image: %s", e)
|
||||
restored_image = None
|
||||
|
||||
effective_image = restored_image if restored_image else image
|
||||
|
||||
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
|
||||
self._inner = SwerexModalEnvironment(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
startup_timeout=180.0, runtime_timeout=3600.0,
|
||||
image=effective_image,
|
||||
cwd=cwd,
|
||||
timeout=timeout,
|
||||
startup_timeout=180.0,
|
||||
runtime_timeout=3600.0,
|
||||
modal_sandbox_kwargs=sandbox_kwargs,
|
||||
)
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *,
|
||||
|
|
@ -42,8 +107,61 @@ class ModalEnvironment(BaseEnvironment):
|
|||
command = f"{command} << '{marker}'\n{stdin_data}\n{marker}"
|
||||
|
||||
exec_command = self._prepare_command(command)
|
||||
return self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
|
||||
|
||||
# Run in a background thread so we can poll for interrupts
|
||||
result_holder = {"value": None, "error": None}
|
||||
|
||||
def _run():
|
||||
try:
|
||||
result_holder["value"] = self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
|
||||
except Exception as e:
|
||||
result_holder["error"] = e
|
||||
|
||||
t = threading.Thread(target=_run, daemon=True)
|
||||
t.start()
|
||||
while t.is_alive():
|
||||
t.join(timeout=0.2)
|
||||
if is_interrupted():
|
||||
try:
|
||||
self._inner.stop()
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"output": "[Command interrupted - Modal sandbox terminated]",
|
||||
"returncode": 130,
|
||||
}
|
||||
|
||||
if result_holder["error"]:
|
||||
return {"output": f"Modal execution error: {result_holder['error']}", "returncode": 1}
|
||||
return result_holder["value"]
|
||||
|
||||
def cleanup(self):
|
||||
"""Snapshot the filesystem (if persistent) then stop the sandbox."""
|
||||
if self._persistent:
|
||||
try:
|
||||
sandbox = getattr(self._inner, 'deployment', None)
|
||||
sandbox = getattr(sandbox, '_sandbox', None) if sandbox else None
|
||||
if sandbox:
|
||||
import asyncio
|
||||
async def _snapshot():
|
||||
img = await sandbox.snapshot_filesystem.aio()
|
||||
return img.object_id
|
||||
try:
|
||||
snapshot_id = asyncio.run(_snapshot())
|
||||
except RuntimeError:
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
snapshot_id = pool.submit(
|
||||
asyncio.run, _snapshot()
|
||||
).result(timeout=60)
|
||||
|
||||
snapshots = _load_snapshots()
|
||||
snapshots[self._task_id] = snapshot_id
|
||||
_save_snapshots(snapshots)
|
||||
logger.info("Modal: saved filesystem snapshot %s for task %s",
|
||||
snapshot_id[:20], self._task_id)
|
||||
except Exception as e:
|
||||
logger.warning("Modal: filesystem snapshot failed: %s", e)
|
||||
|
||||
if hasattr(self._inner, 'stop'):
|
||||
self._inner.stop()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue