mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge PR #451: feat: Add Daytona environment backend
Authored by rovle. Adds Daytona as the sixth terminal execution backend with cloud sandboxes, persistent workspaces, and full CLI/gateway integration. Includes 24 unit tests and 8 integration tests.
This commit is contained in:
commit
39299e2de4
22 changed files with 865 additions and 30 deletions
|
|
@ -155,7 +155,7 @@ hermes-agent/
|
|||
│ ├── skill_tools.py # Skill search, load, manage
|
||||
│ └── environments/ # Terminal execution backends
|
||||
│ ├── base.py # BaseEnvironment ABC
|
||||
│ ├── local.py, docker.py, ssh.py, singularity.py, modal.py
|
||||
│ ├── local.py, docker.py, ssh.py, singularity.py, modal.py, daytona.py
|
||||
│
|
||||
├── gateway/ # Messaging gateway
|
||||
│ ├── run.py # GatewayRunner — platform lifecycle, message routing, cron
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
|
|||
<tr><td><b>Grows the longer it runs</b></td><td>Persistent memory across sessions. When it solves a hard problem, it writes a skill document for next time. Skills are searchable, shareable, and compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
|
||||
<tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
|
||||
<tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
|
||||
<tr><td><b>Real sandboxing</b></td><td>Five terminal backends — local, Docker, SSH, Singularity, and Modal — with persistent workspaces and container security hardening.</td></tr>
|
||||
<tr><td><b>Real sandboxing</b></td><td>Six terminal backends — local, Docker, SSH, Singularity, Modal, and Daytona — with persistent workspaces and container security hardening.</td></tr>
|
||||
<tr><td><b>Research-ready</b></td><td>Batch trajectory generation, Atropos RL environments, trajectory compression for training the next generation of tool-calling models.</td></tr>
|
||||
</table>
|
||||
|
||||
|
|
|
|||
|
|
@ -116,8 +116,23 @@ terminal:
|
|||
# timeout: 180
|
||||
# lifetime_seconds: 300
|
||||
# modal_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# OPTION 6: Daytona cloud execution
|
||||
# Commands run in Daytona cloud sandboxes
|
||||
# Great for: Cloud dev environments, persistent workspaces, team collaboration
|
||||
# Requires: pip install daytona, DAYTONA_API_KEY env var
|
||||
# -----------------------------------------------------------------------------
|
||||
# terminal:
|
||||
# backend: "daytona"
|
||||
# cwd: "~"
|
||||
# timeout: 180
|
||||
# lifetime_seconds: 300
|
||||
# daytona_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
# container_disk: 10240 # Daytona max is 10GB per sandbox
|
||||
|
||||
#
|
||||
# --- Container resource limits (docker, singularity, modal -- ignored for local/ssh) ---
|
||||
# --- Container resource limits (docker, singularity, modal, daytona -- ignored for local/ssh) ---
|
||||
# These settings apply to all container backends. They control the resources
|
||||
# allocated to the sandbox and whether its filesystem persists across sessions.
|
||||
container_cpu: 1 # CPU cores
|
||||
|
|
|
|||
4
cli.py
4
cli.py
|
|
@ -158,6 +158,7 @@ def load_cli_config() -> Dict[str, Any]:
|
|||
"docker_image": "python:3.11",
|
||||
"singularity_image": "docker://python:3.11",
|
||||
"modal_image": "python:3.11",
|
||||
"daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
},
|
||||
"browser": {
|
||||
"inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min
|
||||
|
|
@ -284,12 +285,13 @@ def load_cli_config() -> Dict[str, Any]:
|
|||
"docker_image": "TERMINAL_DOCKER_IMAGE",
|
||||
"singularity_image": "TERMINAL_SINGULARITY_IMAGE",
|
||||
"modal_image": "TERMINAL_MODAL_IMAGE",
|
||||
"daytona_image": "TERMINAL_DAYTONA_IMAGE",
|
||||
# SSH config
|
||||
"ssh_host": "TERMINAL_SSH_HOST",
|
||||
"ssh_user": "TERMINAL_SSH_USER",
|
||||
"ssh_port": "TERMINAL_SSH_PORT",
|
||||
"ssh_key": "TERMINAL_SSH_KEY",
|
||||
# Container resource config (docker, singularity, modal -- ignored for local/ssh)
|
||||
# Container resource config (docker, singularity, modal, daytona -- ignored for local/ssh)
|
||||
"container_cpu": "TERMINAL_CONTAINER_CPU",
|
||||
"container_memory": "TERMINAL_CONTAINER_MEMORY",
|
||||
"container_disk": "TERMINAL_CONTAINER_DISK",
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ if _config_path.exists():
|
|||
"docker_image": "TERMINAL_DOCKER_IMAGE",
|
||||
"singularity_image": "TERMINAL_SINGULARITY_IMAGE",
|
||||
"modal_image": "TERMINAL_MODAL_IMAGE",
|
||||
"daytona_image": "TERMINAL_DAYTONA_IMAGE",
|
||||
"ssh_host": "TERMINAL_SSH_HOST",
|
||||
"ssh_user": "TERMINAL_SSH_USER",
|
||||
"ssh_port": "TERMINAL_SSH_PORT",
|
||||
|
|
|
|||
|
|
@ -71,7 +71,8 @@ DEFAULT_CONFIG = {
|
|||
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
"singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
"modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
# Container resource limits (docker, singularity, modal — ignored for local/ssh)
|
||||
"daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
# Container resource limits (docker, singularity, modal, daytona — ignored for local/ssh)
|
||||
"container_cpu": 1,
|
||||
"container_memory": 5120, # MB (default 5GB)
|
||||
"container_disk": 51200, # MB (default 50GB)
|
||||
|
|
@ -761,6 +762,10 @@ def show_config():
|
|||
print(f" Modal image: {terminal.get('modal_image', 'python:3.11')}")
|
||||
modal_token = get_env_value('MODAL_TOKEN_ID')
|
||||
print(f" Modal token: {'configured' if modal_token else '(not set)'}")
|
||||
elif terminal.get('backend') == 'daytona':
|
||||
print(f" Daytona image: {terminal.get('daytona_image', 'nikolaik/python-nodejs:python3.11-nodejs20')}")
|
||||
daytona_key = get_env_value('DAYTONA_API_KEY')
|
||||
print(f" API key: {'configured' if daytona_key else '(not set)'}")
|
||||
elif terminal.get('backend') == 'ssh':
|
||||
ssh_host = get_env_value('TERMINAL_SSH_HOST')
|
||||
ssh_user = get_env_value('TERMINAL_SSH_USER')
|
||||
|
|
@ -886,6 +891,7 @@ def set_config_value(key: str, value: str):
|
|||
"terminal.docker_image": "TERMINAL_DOCKER_IMAGE",
|
||||
"terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
|
||||
"terminal.modal_image": "TERMINAL_MODAL_IMAGE",
|
||||
"terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
|
||||
"terminal.cwd": "TERMINAL_CWD",
|
||||
"terminal.timeout": "TERMINAL_TIMEOUT",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -355,6 +355,21 @@ def run_doctor(args):
|
|||
check_fail("TERMINAL_SSH_HOST not set", "(required for TERMINAL_ENV=ssh)")
|
||||
issues.append("Set TERMINAL_SSH_HOST in .env")
|
||||
|
||||
# Daytona (if using daytona backend)
|
||||
if terminal_env == "daytona":
|
||||
daytona_key = os.getenv("DAYTONA_API_KEY")
|
||||
if daytona_key:
|
||||
check_ok("Daytona API key", "(configured)")
|
||||
else:
|
||||
check_fail("DAYTONA_API_KEY not set", "(required for TERMINAL_ENV=daytona)")
|
||||
issues.append("Set DAYTONA_API_KEY environment variable")
|
||||
try:
|
||||
from daytona import Daytona
|
||||
check_ok("daytona SDK", "(installed)")
|
||||
except ImportError:
|
||||
check_fail("daytona SDK not installed", "(pip install daytona)")
|
||||
issues.append("Install daytona SDK: pip install daytona")
|
||||
|
||||
# Node.js + agent-browser (for browser automation tools)
|
||||
if shutil.which("node"):
|
||||
check_ok("Node.js")
|
||||
|
|
|
|||
|
|
@ -980,19 +980,20 @@ def run_setup_wizard(args):
|
|||
|
||||
terminal_choices.extend([
|
||||
"Modal (cloud execution, GPU access, serverless)",
|
||||
"Daytona (cloud sandboxes, persistent workspaces)",
|
||||
"SSH (run commands on a remote server)",
|
||||
f"Keep current ({current_backend})"
|
||||
])
|
||||
|
||||
# Build index map based on available choices
|
||||
if is_linux:
|
||||
backend_to_idx = {'local': 0, 'docker': 1, 'singularity': 2, 'modal': 3, 'ssh': 4}
|
||||
idx_to_backend = {0: 'local', 1: 'docker', 2: 'singularity', 3: 'modal', 4: 'ssh'}
|
||||
keep_current_idx = 5
|
||||
backend_to_idx = {'local': 0, 'docker': 1, 'singularity': 2, 'modal': 3, 'daytona': 4, 'ssh': 5}
|
||||
idx_to_backend = {0: 'local', 1: 'docker', 2: 'singularity', 3: 'modal', 4: 'daytona', 5: 'ssh'}
|
||||
keep_current_idx = 6
|
||||
else:
|
||||
backend_to_idx = {'local': 0, 'docker': 1, 'modal': 2, 'ssh': 3}
|
||||
idx_to_backend = {0: 'local', 1: 'docker', 2: 'modal', 3: 'ssh'}
|
||||
keep_current_idx = 4
|
||||
backend_to_idx = {'local': 0, 'docker': 1, 'modal': 2, 'daytona': 3, 'ssh': 4}
|
||||
idx_to_backend = {0: 'local', 1: 'docker', 2: 'modal', 3: 'daytona', 4: 'ssh'}
|
||||
keep_current_idx = 5
|
||||
if current_backend == 'singularity':
|
||||
print_warning("Singularity is only available on Linux - please select a different backend")
|
||||
|
||||
|
|
@ -1067,7 +1068,7 @@ def run_setup_wizard(args):
|
|||
|
||||
print()
|
||||
print_info("Note: Container resource settings (CPU, memory, disk, persistence)")
|
||||
print_info("are in your config but only apply to Docker/Singularity/Modal backends.")
|
||||
print_info("are in your config but only apply to Docker/Singularity/Modal/Daytona backends.")
|
||||
|
||||
if prompt_yes_no(" Enable sudo support? (allows agent to run sudo commands)", False):
|
||||
print_warning(" SECURITY WARNING: Sudo password will be stored in plaintext")
|
||||
|
|
@ -1151,7 +1152,52 @@ def run_setup_wizard(args):
|
|||
|
||||
_prompt_container_resources(config)
|
||||
print_success("Terminal set to Modal")
|
||||
|
||||
|
||||
elif selected_backend == 'daytona':
|
||||
config.setdefault('terminal', {})['backend'] = 'daytona'
|
||||
default_daytona = config.get('terminal', {}).get('daytona_image', 'nikolaik/python-nodejs:python3.11-nodejs20')
|
||||
print_info("Daytona Cloud Configuration:")
|
||||
print_info("Get your API key at: https://app.daytona.io/dashboard/keys")
|
||||
|
||||
# Check if daytona SDK is installed
|
||||
try:
|
||||
from daytona import Daytona
|
||||
print_info("daytona SDK: installed ✓")
|
||||
except ImportError:
|
||||
print_info("Installing required package: daytona...")
|
||||
import subprocess
|
||||
import shutil
|
||||
uv_bin = shutil.which("uv")
|
||||
if uv_bin:
|
||||
result = subprocess.run(
|
||||
[uv_bin, "pip", "install", "daytona"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
else:
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "daytona"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
if result.returncode == 0:
|
||||
print_success("daytona SDK installed")
|
||||
else:
|
||||
print_warning("Failed to install daytona SDK — install manually:")
|
||||
print_info(' pip install daytona')
|
||||
|
||||
daytona_image = prompt(" Container image", default_daytona)
|
||||
config['terminal']['daytona_image'] = daytona_image
|
||||
|
||||
current_key = get_env_value('DAYTONA_API_KEY')
|
||||
if current_key:
|
||||
print_info(f" API Key: {current_key[:8]}... (configured)")
|
||||
|
||||
api_key = prompt(" Daytona API key", current_key or "", password=True)
|
||||
if api_key:
|
||||
save_env_value("DAYTONA_API_KEY", api_key)
|
||||
|
||||
_prompt_container_resources(config)
|
||||
print_success("Terminal set to Daytona")
|
||||
|
||||
elif selected_backend == 'ssh':
|
||||
config.setdefault('terminal', {})['backend'] = 'ssh'
|
||||
print_info("SSH Remote Execution Configuration:")
|
||||
|
|
@ -1181,7 +1227,7 @@ def run_setup_wizard(args):
|
|||
|
||||
print()
|
||||
print_info("Note: Container resource settings (CPU, memory, disk, persistence)")
|
||||
print_info("are in your config but only apply to Docker/Singularity/Modal backends.")
|
||||
print_info("are in your config but only apply to Docker/Singularity/Modal/Daytona backends.")
|
||||
print_success("Terminal set to SSH")
|
||||
# else: Keep current (selected_backend is None)
|
||||
|
||||
|
|
@ -1192,6 +1238,9 @@ def run_setup_wizard(args):
|
|||
docker_image = config.get('terminal', {}).get('docker_image')
|
||||
if docker_image:
|
||||
save_env_value("TERMINAL_DOCKER_IMAGE", docker_image)
|
||||
daytona_image = config.get('terminal', {}).get('daytona_image')
|
||||
if daytona_image:
|
||||
save_env_value("TERMINAL_DAYTONA_IMAGE", daytona_image)
|
||||
|
||||
# =========================================================================
|
||||
# Step 5: Agent Settings
|
||||
|
|
|
|||
|
|
@ -163,6 +163,9 @@ def show_status(args):
|
|||
elif terminal_env == "docker":
|
||||
docker_image = os.getenv("TERMINAL_DOCKER_IMAGE", "python:3.11-slim")
|
||||
print(f" Docker Image: {docker_image}")
|
||||
elif terminal_env == "daytona":
|
||||
daytona_image = os.getenv("TERMINAL_DAYTONA_IMAGE", "nikolaik/python-nodejs:python3.11-nodejs20")
|
||||
print(f" Daytona Image: {daytona_image}")
|
||||
|
||||
sudo_password = os.getenv("SUDO_PASSWORD", "")
|
||||
print(f" Sudo: {check_mark(bool(sudo_password))} {'enabled' if sudo_password else 'disabled'}")
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ dependencies = [
|
|||
|
||||
[project.optional-dependencies]
|
||||
modal = ["swe-rex[modal]>=1.4.0"]
|
||||
daytona = ["daytona>=0.148.0"]
|
||||
dev = ["pytest", "pytest-asyncio"]
|
||||
messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
||||
cron = ["croniter"]
|
||||
|
|
@ -51,6 +52,7 @@ mcp = ["mcp>=1.2.0"]
|
|||
homeassistant = ["aiohttp>=3.9.0"]
|
||||
all = [
|
||||
"hermes-agent[modal]",
|
||||
"hermes-agent[daytona]",
|
||||
"hermes-agent[messaging]",
|
||||
"hermes-agent[cron]",
|
||||
"hermes-agent[cli]",
|
||||
|
|
|
|||
123
tests/integration/test_daytona_terminal.py
Normal file
123
tests/integration/test_daytona_terminal.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
"""Integration tests for the Daytona terminal backend.
|
||||
|
||||
Requires DAYTONA_API_KEY to be set. Run with:
|
||||
TERMINAL_ENV=daytona pytest tests/integration/test_daytona_terminal.py -v
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
# Skip entire module if no API key
|
||||
if not os.getenv("DAYTONA_API_KEY"):
|
||||
pytest.skip("DAYTONA_API_KEY not set", allow_module_level=True)
|
||||
|
||||
# Import terminal_tool via importlib to avoid tools/__init__.py side effects
|
||||
import importlib.util
|
||||
|
||||
parent_dir = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(parent_dir))
|
||||
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"terminal_tool", parent_dir / "tools" / "terminal_tool.py"
|
||||
)
|
||||
terminal_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(terminal_module)
|
||||
|
||||
terminal_tool = terminal_module.terminal_tool
|
||||
cleanup_vm = terminal_module.cleanup_vm
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _force_daytona(monkeypatch):
|
||||
monkeypatch.setenv("TERMINAL_ENV", "daytona")
|
||||
monkeypatch.setenv("TERMINAL_CONTAINER_DISK", "10240")
|
||||
monkeypatch.setenv("TERMINAL_CONTAINER_PERSISTENT", "false")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def task_id(request):
|
||||
"""Provide a unique task_id and clean up the sandbox after the test."""
|
||||
tid = f"daytona_test_{request.node.name}"
|
||||
yield tid
|
||||
cleanup_vm(tid)
|
||||
|
||||
|
||||
def _run(command, task_id, **kwargs):
|
||||
result = terminal_tool(command, task_id=task_id, **kwargs)
|
||||
return json.loads(result)
|
||||
|
||||
|
||||
class TestDaytonaBasic:
|
||||
def test_echo(self, task_id):
|
||||
r = _run("echo 'Hello from Daytona!'", task_id)
|
||||
assert r["exit_code"] == 0
|
||||
assert "Hello from Daytona!" in r["output"]
|
||||
|
||||
def test_python_version(self, task_id):
|
||||
r = _run("python3 --version", task_id)
|
||||
assert r["exit_code"] == 0
|
||||
assert "Python" in r["output"]
|
||||
|
||||
def test_nonzero_exit(self, task_id):
|
||||
r = _run("exit 42", task_id)
|
||||
assert r["exit_code"] == 42
|
||||
|
||||
def test_os_info(self, task_id):
|
||||
r = _run("uname -a", task_id)
|
||||
assert r["exit_code"] == 0
|
||||
assert "Linux" in r["output"]
|
||||
|
||||
|
||||
class TestDaytonaFilesystem:
|
||||
def test_write_and_read_file(self, task_id):
|
||||
_run("echo 'test content' > /tmp/daytona_test.txt", task_id)
|
||||
r = _run("cat /tmp/daytona_test.txt", task_id)
|
||||
assert r["exit_code"] == 0
|
||||
assert "test content" in r["output"]
|
||||
|
||||
def test_persistence_within_session(self, task_id):
|
||||
_run("pip install cowsay 2>/dev/null", task_id, timeout=120)
|
||||
r = _run('python3 -c "import cowsay; print(cowsay.__file__)"', task_id)
|
||||
assert r["exit_code"] == 0
|
||||
assert "cowsay" in r["output"]
|
||||
|
||||
|
||||
class TestDaytonaPersistence:
|
||||
def test_filesystem_survives_stop_and_resume(self):
|
||||
"""Write a file, stop the sandbox, resume it, assert the file persists."""
|
||||
task = "daytona_test_persist"
|
||||
try:
|
||||
# Enable persistence for this test
|
||||
os.environ["TERMINAL_CONTAINER_PERSISTENT"] = "true"
|
||||
|
||||
# Write a marker file and stop the sandbox
|
||||
_run("echo 'survive' > /tmp/persist_test.txt", task)
|
||||
cleanup_vm(task) # stops (not deletes) because persistent=true
|
||||
|
||||
# Resume with the same task_id — file should still exist
|
||||
r = _run("cat /tmp/persist_test.txt", task)
|
||||
assert r["exit_code"] == 0
|
||||
assert "survive" in r["output"]
|
||||
finally:
|
||||
# Force-delete so the sandbox doesn't leak
|
||||
os.environ["TERMINAL_CONTAINER_PERSISTENT"] = "false"
|
||||
cleanup_vm(task)
|
||||
|
||||
|
||||
class TestDaytonaIsolation:
|
||||
def test_different_tasks_isolated(self):
|
||||
task_a = "daytona_test_iso_a"
|
||||
task_b = "daytona_test_iso_b"
|
||||
try:
|
||||
_run("echo 'secret' > /tmp/isolated.txt", task_a)
|
||||
r = _run("cat /tmp/isolated.txt 2>&1 || echo NOT_FOUND", task_b)
|
||||
assert "secret" not in r["output"] or "NOT_FOUND" in r["output"]
|
||||
finally:
|
||||
cleanup_vm(task_a)
|
||||
cleanup_vm(task_b)
|
||||
381
tests/tools/test_daytona_environment.py
Normal file
381
tests/tools/test_daytona_environment.py
Normal file
|
|
@ -0,0 +1,381 @@
|
|||
"""Unit tests for the Daytona cloud sandbox environment backend."""
|
||||
|
||||
import threading
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch, PropertyMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers to build mock Daytona SDK objects
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_exec_response(result="", exit_code=0):
|
||||
return SimpleNamespace(result=result, exit_code=exit_code)
|
||||
|
||||
|
||||
def _make_sandbox(sandbox_id="sb-123", state="started"):
|
||||
sb = MagicMock()
|
||||
sb.id = sandbox_id
|
||||
sb.state = state
|
||||
sb.process.exec.return_value = _make_exec_response()
|
||||
return sb
|
||||
|
||||
|
||||
def _patch_daytona_imports(monkeypatch):
|
||||
"""Patch the daytona SDK so DaytonaEnvironment can be imported without it."""
|
||||
import types as _types
|
||||
|
||||
import enum
|
||||
|
||||
class _SandboxState(str, enum.Enum):
|
||||
STARTED = "started"
|
||||
STOPPED = "stopped"
|
||||
ARCHIVED = "archived"
|
||||
ERROR = "error"
|
||||
|
||||
daytona_mod = _types.ModuleType("daytona")
|
||||
daytona_mod.Daytona = MagicMock
|
||||
daytona_mod.CreateSandboxFromImageParams = MagicMock
|
||||
daytona_mod.DaytonaError = type("DaytonaError", (Exception,), {})
|
||||
daytona_mod.Resources = MagicMock(name="Resources")
|
||||
daytona_mod.SandboxState = _SandboxState
|
||||
|
||||
monkeypatch.setitem(__import__("sys").modules, "daytona", daytona_mod)
|
||||
return daytona_mod
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture()
|
||||
def daytona_sdk(monkeypatch):
|
||||
"""Provide a mock daytona SDK module and return it for assertions."""
|
||||
return _patch_daytona_imports(monkeypatch)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def make_env(daytona_sdk, monkeypatch):
|
||||
"""Factory that creates a DaytonaEnvironment with a mocked SDK."""
|
||||
# Prevent is_interrupted from interfering
|
||||
monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False)
|
||||
|
||||
def _factory(
|
||||
sandbox=None,
|
||||
find_one_side_effect=None,
|
||||
home_dir="/root",
|
||||
persistent=True,
|
||||
**kwargs,
|
||||
):
|
||||
sandbox = sandbox or _make_sandbox()
|
||||
# Mock the $HOME detection
|
||||
sandbox.process.exec.return_value = _make_exec_response(result=home_dir)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.create.return_value = sandbox
|
||||
|
||||
if find_one_side_effect is not None:
|
||||
mock_client.find_one.side_effect = find_one_side_effect
|
||||
else:
|
||||
# Default: no existing sandbox found
|
||||
mock_client.find_one.side_effect = daytona_sdk.DaytonaError("not found")
|
||||
|
||||
daytona_sdk.Daytona = MagicMock(return_value=mock_client)
|
||||
|
||||
from tools.environments.daytona import DaytonaEnvironment
|
||||
|
||||
kwargs.setdefault("disk", 10240)
|
||||
env = DaytonaEnvironment(
|
||||
image="test-image:latest",
|
||||
persistent_filesystem=persistent,
|
||||
**kwargs,
|
||||
)
|
||||
env._mock_client = mock_client # expose for assertions
|
||||
return env
|
||||
|
||||
return _factory
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constructor / cwd resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCwdResolution:
|
||||
def test_default_cwd_resolves_home(self, make_env):
|
||||
env = make_env(home_dir="/home/testuser")
|
||||
assert env.cwd == "/home/testuser"
|
||||
|
||||
def test_tilde_cwd_resolves_home(self, make_env):
|
||||
env = make_env(cwd="~", home_dir="/home/testuser")
|
||||
assert env.cwd == "/home/testuser"
|
||||
|
||||
def test_explicit_cwd_not_overridden(self, make_env):
|
||||
env = make_env(cwd="/workspace", home_dir="/root")
|
||||
assert env.cwd == "/workspace"
|
||||
|
||||
def test_home_detection_failure_keeps_default_cwd(self, make_env):
|
||||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = RuntimeError("exec failed")
|
||||
env = make_env(sandbox=sb)
|
||||
assert env.cwd == "/home/daytona" # keeps constructor default
|
||||
|
||||
def test_empty_home_keeps_default_cwd(self, make_env):
|
||||
env = make_env(home_dir="")
|
||||
assert env.cwd == "/home/daytona" # keeps constructor default
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sandbox persistence / resume
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestPersistence:
|
||||
def test_persistent_resumes_existing_sandbox(self, make_env):
|
||||
existing = _make_sandbox(sandbox_id="sb-existing")
|
||||
existing.process.exec.return_value = _make_exec_response(result="/root")
|
||||
env = make_env(find_one_side_effect=lambda **kw: existing, persistent=True)
|
||||
existing.start.assert_called_once()
|
||||
# Should NOT have called create since find_one succeeded
|
||||
env._mock_client.create.assert_not_called()
|
||||
|
||||
def test_persistent_creates_new_when_none_found(self, make_env, daytona_sdk):
|
||||
env = make_env(
|
||||
find_one_side_effect=daytona_sdk.DaytonaError("not found"),
|
||||
persistent=True,
|
||||
)
|
||||
env._mock_client.create.assert_called_once()
|
||||
|
||||
def test_non_persistent_skips_find_one(self, make_env):
|
||||
env = make_env(persistent=False)
|
||||
env._mock_client.find_one.assert_not_called()
|
||||
env._mock_client.create.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCleanup:
|
||||
def test_persistent_cleanup_stops_sandbox(self, make_env):
|
||||
env = make_env(persistent=True)
|
||||
sb = env._sandbox
|
||||
env.cleanup()
|
||||
sb.stop.assert_called_once()
|
||||
|
||||
def test_non_persistent_cleanup_deletes_sandbox(self, make_env):
|
||||
env = make_env(persistent=False)
|
||||
sb = env._sandbox
|
||||
env.cleanup()
|
||||
env._mock_client.delete.assert_called_once_with(sb)
|
||||
|
||||
def test_cleanup_idempotent(self, make_env):
|
||||
env = make_env(persistent=True)
|
||||
env.cleanup()
|
||||
env.cleanup() # should not raise
|
||||
|
||||
def test_cleanup_swallows_errors(self, make_env):
|
||||
env = make_env(persistent=True)
|
||||
env._sandbox.stop.side_effect = RuntimeError("stop failed")
|
||||
env.cleanup() # should not raise
|
||||
assert env._sandbox is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Execute
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExecute:
|
||||
def test_basic_command(self, make_env):
|
||||
sb = _make_sandbox()
|
||||
# First call: $HOME detection; subsequent calls: actual commands
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"), # $HOME
|
||||
_make_exec_response(result="hello", exit_code=0), # actual cmd
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
result = env.execute("echo hello")
|
||||
assert result["output"] == "hello"
|
||||
assert result["returncode"] == 0
|
||||
|
||||
def test_command_wrapped_with_shell_timeout(self, make_env):
|
||||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="ok", exit_code=0),
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb, timeout=42)
|
||||
|
||||
env.execute("echo hello")
|
||||
# The command sent to exec should be wrapped with `timeout N sh -c '...'`
|
||||
call_args = sb.process.exec.call_args_list[-1]
|
||||
cmd = call_args[0][0]
|
||||
assert cmd.startswith("timeout 42 sh -c ")
|
||||
# SDK timeout param should NOT be passed
|
||||
assert "timeout" not in call_args[1]
|
||||
|
||||
def test_timeout_returns_exit_code_124(self, make_env):
|
||||
"""Shell timeout utility returns exit code 124."""
|
||||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="", exit_code=124),
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
result = env.execute("sleep 300", timeout=5)
|
||||
assert result["returncode"] == 124
|
||||
|
||||
def test_nonzero_exit_code(self, make_env):
|
||||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="not found", exit_code=127),
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
result = env.execute("bad_cmd")
|
||||
assert result["returncode"] == 127
|
||||
|
||||
def test_stdin_data_wraps_heredoc(self, make_env):
|
||||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="ok", exit_code=0),
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
env.execute("python3", stdin_data="print('hi')")
|
||||
# Check that the command passed to exec contains heredoc markers
|
||||
# (single quotes get shell-escaped by shlex.quote, so check components)
|
||||
call_args = sb.process.exec.call_args_list[-1]
|
||||
cmd = call_args[0][0]
|
||||
assert "HERMES_EOF_" in cmd
|
||||
assert "print" in cmd
|
||||
assert "hi" in cmd
|
||||
|
||||
def test_custom_cwd_passed_through(self, make_env):
|
||||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="/tmp", exit_code=0),
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
env.execute("pwd", cwd="/tmp")
|
||||
call_kwargs = sb.process.exec.call_args_list[-1][1]
|
||||
assert call_kwargs["cwd"] == "/tmp"
|
||||
|
||||
def test_daytona_error_triggers_retry(self, make_env, daytona_sdk):
|
||||
sb = _make_sandbox()
|
||||
sb.state = "started"
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"), # $HOME
|
||||
daytona_sdk.DaytonaError("transient"), # first attempt fails
|
||||
_make_exec_response(result="ok", exit_code=0), # retry succeeds
|
||||
]
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
result = env.execute("echo retry")
|
||||
assert result["output"] == "ok"
|
||||
assert result["returncode"] == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Resource conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestResourceConversion:
|
||||
def _get_resources_kwargs(self, daytona_sdk):
|
||||
return daytona_sdk.Resources.call_args.kwargs
|
||||
|
||||
def test_memory_converted_to_gib(self, make_env, daytona_sdk):
|
||||
env = make_env(memory=5120)
|
||||
assert self._get_resources_kwargs(daytona_sdk)["memory"] == 5
|
||||
|
||||
def test_disk_converted_to_gib(self, make_env, daytona_sdk):
|
||||
env = make_env(disk=10240)
|
||||
assert self._get_resources_kwargs(daytona_sdk)["disk"] == 10
|
||||
|
||||
def test_small_values_clamped_to_1(self, make_env, daytona_sdk):
|
||||
env = make_env(memory=100, disk=100)
|
||||
kw = self._get_resources_kwargs(daytona_sdk)
|
||||
assert kw["memory"] == 1
|
||||
assert kw["disk"] == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ensure sandbox ready
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestInterrupt:
|
||||
def test_interrupt_stops_sandbox_and_returns_130(self, make_env, monkeypatch):
|
||||
sb = _make_sandbox()
|
||||
sb.state = "started"
|
||||
event = threading.Event()
|
||||
calls = {"n": 0}
|
||||
|
||||
def exec_side_effect(*args, **kwargs):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 1:
|
||||
return _make_exec_response(result="/root") # $HOME detection
|
||||
event.wait(timeout=5) # simulate long-running command
|
||||
return _make_exec_response(result="done", exit_code=0)
|
||||
|
||||
sb.process.exec.side_effect = exec_side_effect
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.environments.daytona.is_interrupted", lambda: True
|
||||
)
|
||||
try:
|
||||
result = env.execute("sleep 10")
|
||||
assert result["returncode"] == 130
|
||||
sb.stop.assert_called()
|
||||
finally:
|
||||
event.set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Retry exhaustion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRetryExhausted:
|
||||
def test_both_attempts_fail(self, make_env, daytona_sdk):
|
||||
sb = _make_sandbox()
|
||||
sb.state = "started"
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"), # $HOME
|
||||
daytona_sdk.DaytonaError("fail1"), # first attempt
|
||||
daytona_sdk.DaytonaError("fail2"), # retry
|
||||
]
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
result = env.execute("echo x")
|
||||
assert result["returncode"] == 1
|
||||
assert "Daytona execution error" in result["output"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ensure sandbox ready
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEnsureSandboxReady:
|
||||
def test_restarts_stopped_sandbox(self, make_env):
|
||||
env = make_env()
|
||||
env._sandbox.state = "stopped"
|
||||
env._ensure_sandbox_ready()
|
||||
env._sandbox.start.assert_called()
|
||||
|
||||
def test_no_restart_when_running(self, make_env):
|
||||
env = make_env()
|
||||
env._sandbox.state = "started"
|
||||
env._ensure_sandbox_ready()
|
||||
env._sandbox.start.assert_not_called()
|
||||
|
|
@ -247,7 +247,7 @@ def check_dangerous_command(command: str, env_type: str,
|
|||
Returns:
|
||||
{"approved": True/False, "message": str or None, ...}
|
||||
"""
|
||||
if env_type in ("docker", "singularity", "modal"):
|
||||
if env_type in ("docker", "singularity", "modal", "daytona"):
|
||||
return {"approved": True, "message": None}
|
||||
|
||||
is_dangerous, pattern_key, description = detect_dangerous_command(command)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
Each backend provides the same interface (BaseEnvironment ABC) for running
|
||||
shell commands in a specific execution context: local, Docker, Singularity,
|
||||
SSH, or Modal.
|
||||
SSH, Modal, or Daytona.
|
||||
|
||||
The terminal_tool.py factory (_create_environment) selects the backend
|
||||
based on the TERMINAL_ENV configuration.
|
||||
|
|
|
|||
220
tools/environments/daytona.py
Normal file
220
tools/environments/daytona.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
"""Daytona cloud execution environment.
|
||||
|
||||
Uses the Daytona Python SDK to run commands in cloud sandboxes.
|
||||
Supports persistent sandboxes: when enabled, sandboxes are stopped on cleanup
|
||||
and resumed on next creation, preserving the filesystem across sessions.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import math
|
||||
import shlex
|
||||
import threading
|
||||
import uuid
|
||||
import warnings
|
||||
from typing import Optional
|
||||
|
||||
from tools.environments.base import BaseEnvironment
|
||||
from tools.interrupt import is_interrupted
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DaytonaEnvironment(BaseEnvironment):
|
||||
"""Daytona cloud sandbox execution backend.
|
||||
|
||||
Uses stopped/started sandbox lifecycle for filesystem persistence
|
||||
instead of snapshots, making it faster and stateless on the host.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
image: str,
|
||||
cwd: str = "/home/daytona",
|
||||
timeout: int = 60,
|
||||
cpu: int = 1,
|
||||
memory: int = 5120, # MB (hermes convention)
|
||||
disk: int = 10240, # MB (Daytona platform max is 10GB)
|
||||
persistent_filesystem: bool = True,
|
||||
task_id: str = "default",
|
||||
):
|
||||
self._requested_cwd = cwd
|
||||
super().__init__(cwd=cwd, timeout=timeout)
|
||||
|
||||
from daytona import (
|
||||
Daytona,
|
||||
CreateSandboxFromImageParams,
|
||||
DaytonaError,
|
||||
Resources,
|
||||
SandboxState,
|
||||
)
|
||||
|
||||
self._persistent = persistent_filesystem
|
||||
self._task_id = task_id
|
||||
self._SandboxState = SandboxState
|
||||
self._daytona = Daytona()
|
||||
self._sandbox = None
|
||||
self._lock = threading.Lock()
|
||||
|
||||
memory_gib = max(1, math.ceil(memory / 1024))
|
||||
disk_gib = max(1, math.ceil(disk / 1024))
|
||||
if disk_gib > 10:
|
||||
warnings.warn(
|
||||
f"Daytona: requested disk ({disk_gib}GB) exceeds platform limit (10GB). "
|
||||
f"Capping to 10GB. Set container_disk: 10240 in config to silence this.",
|
||||
stacklevel=2,
|
||||
)
|
||||
disk_gib = 10
|
||||
resources = Resources(cpu=cpu, memory=memory_gib, disk=disk_gib)
|
||||
|
||||
labels = {"hermes_task_id": task_id}
|
||||
|
||||
# Try to resume an existing stopped sandbox for this task
|
||||
if self._persistent:
|
||||
try:
|
||||
self._sandbox = self._daytona.find_one(labels=labels)
|
||||
self._sandbox.start()
|
||||
logger.info("Daytona: resumed sandbox %s for task %s",
|
||||
self._sandbox.id, task_id)
|
||||
except DaytonaError:
|
||||
self._sandbox = None
|
||||
except Exception as e:
|
||||
logger.warning("Daytona: failed to resume sandbox for task %s: %s",
|
||||
task_id, e)
|
||||
self._sandbox = None
|
||||
|
||||
# Create a fresh sandbox if we don't have one
|
||||
if self._sandbox is None:
|
||||
self._sandbox = self._daytona.create(
|
||||
CreateSandboxFromImageParams(
|
||||
image=image,
|
||||
labels=labels,
|
||||
auto_stop_interval=0,
|
||||
resources=resources,
|
||||
)
|
||||
)
|
||||
logger.info("Daytona: created sandbox %s for task %s",
|
||||
self._sandbox.id, task_id)
|
||||
|
||||
# Resolve cwd: detect actual home dir inside the sandbox
|
||||
if self._requested_cwd in ("~", "/home/daytona"):
|
||||
try:
|
||||
home = self._sandbox.process.exec("echo $HOME").result.strip()
|
||||
if home:
|
||||
self.cwd = home
|
||||
except Exception:
|
||||
pass # leave cwd as-is; sandbox will use its own default
|
||||
logger.info("Daytona: resolved cwd to %s", self.cwd)
|
||||
|
||||
def _ensure_sandbox_ready(self):
|
||||
"""Restart sandbox if it was stopped (e.g., by a previous interrupt)."""
|
||||
self._sandbox.refresh_data()
|
||||
if self._sandbox.state in (self._SandboxState.STOPPED, self._SandboxState.ARCHIVED):
|
||||
self._sandbox.start()
|
||||
logger.info("Daytona: restarted sandbox %s", self._sandbox.id)
|
||||
|
||||
def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict:
|
||||
"""Run exec in a background thread with interrupt polling.
|
||||
|
||||
The Daytona SDK's exec(timeout=...) parameter is unreliable (the
|
||||
server-side timeout is not enforced and the SDK has no client-side
|
||||
fallback), so we wrap the command with the shell ``timeout`` utility
|
||||
which reliably kills the process and returns exit code 124.
|
||||
"""
|
||||
# Wrap with shell `timeout` to enforce the deadline reliably.
|
||||
# Add a small buffer so the shell timeout fires before any SDK-level
|
||||
# timeout would, giving us a clean exit code 124.
|
||||
timed_command = f"timeout {timeout} sh -c {shlex.quote(exec_command)}"
|
||||
|
||||
result_holder: dict = {"value": None, "error": None}
|
||||
|
||||
def _run():
|
||||
try:
|
||||
response = self._sandbox.process.exec(
|
||||
timed_command, cwd=cwd,
|
||||
)
|
||||
result_holder["value"] = {
|
||||
"output": response.result or "",
|
||||
"returncode": response.exit_code,
|
||||
}
|
||||
except Exception as e:
|
||||
result_holder["error"] = e
|
||||
|
||||
t = threading.Thread(target=_run, daemon=True)
|
||||
t.start()
|
||||
# Wait for timeout + generous buffer for network/SDK overhead
|
||||
deadline = timeout + 10
|
||||
while t.is_alive():
|
||||
t.join(timeout=0.2)
|
||||
deadline -= 0.2
|
||||
if is_interrupted():
|
||||
with self._lock:
|
||||
try:
|
||||
self._sandbox.stop()
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"output": "[Command interrupted - Daytona sandbox stopped]",
|
||||
"returncode": 130,
|
||||
}
|
||||
if deadline <= 0:
|
||||
# Shell timeout didn't fire and SDK is hung — force stop
|
||||
with self._lock:
|
||||
try:
|
||||
self._sandbox.stop()
|
||||
except Exception:
|
||||
pass
|
||||
return self._timeout_result(timeout)
|
||||
|
||||
if result_holder["error"]:
|
||||
return {"error": result_holder["error"]}
|
||||
return result_holder["value"]
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *,
|
||||
timeout: Optional[int] = None,
|
||||
stdin_data: Optional[str] = None) -> dict:
|
||||
with self._lock:
|
||||
self._ensure_sandbox_ready()
|
||||
|
||||
if stdin_data is not None:
|
||||
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
||||
while marker in stdin_data:
|
||||
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
||||
command = f"{command} << '{marker}'\n{stdin_data}\n{marker}"
|
||||
|
||||
exec_command = self._prepare_command(command)
|
||||
effective_cwd = cwd or self.cwd or None
|
||||
effective_timeout = timeout or self.timeout
|
||||
|
||||
result = self._exec_in_thread(exec_command, effective_cwd, effective_timeout)
|
||||
|
||||
if "error" in result:
|
||||
from daytona import DaytonaError
|
||||
err = result["error"]
|
||||
if isinstance(err, DaytonaError):
|
||||
with self._lock:
|
||||
try:
|
||||
self._ensure_sandbox_ready()
|
||||
except Exception:
|
||||
return {"output": f"Daytona execution error: {err}", "returncode": 1}
|
||||
result = self._exec_in_thread(exec_command, effective_cwd, effective_timeout)
|
||||
if "error" not in result:
|
||||
return result
|
||||
return {"output": f"Daytona execution error: {err}", "returncode": 1}
|
||||
|
||||
return result
|
||||
|
||||
def cleanup(self):
|
||||
with self._lock:
|
||||
if self._sandbox is None:
|
||||
return
|
||||
try:
|
||||
if self._persistent:
|
||||
self._sandbox.stop()
|
||||
logger.info("Daytona: stopped sandbox %s (filesystem preserved)",
|
||||
self._sandbox.id)
|
||||
else:
|
||||
self._daytona.delete(self._sandbox)
|
||||
logger.info("Daytona: deleted sandbox %s", self._sandbox.id)
|
||||
except Exception as e:
|
||||
logger.warning("Daytona: cleanup failed: %s", e)
|
||||
self._sandbox = None
|
||||
|
|
@ -75,6 +75,8 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
|||
image = overrides.get("singularity_image") or config["singularity_image"]
|
||||
elif env_type == "modal":
|
||||
image = overrides.get("modal_image") or config["modal_image"]
|
||||
elif env_type == "daytona":
|
||||
image = overrides.get("daytona_image") or config["daytona_image"]
|
||||
else:
|
||||
image = ""
|
||||
|
||||
|
|
@ -82,7 +84,7 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
|||
logger.info("Creating new %s environment for task %s...", env_type, task_id[:8])
|
||||
|
||||
container_config = None
|
||||
if env_type in ("docker", "singularity", "modal"):
|
||||
if env_type in ("docker", "singularity", "modal", "daytona"):
|
||||
container_config = {
|
||||
"container_cpu": config.get("container_cpu", 1),
|
||||
"container_memory": config.get("container_memory", 5120),
|
||||
|
|
|
|||
|
|
@ -423,7 +423,7 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
# catches the case where cli.py (or .env) leaked the host's CWD.
|
||||
# SSH is excluded since /home/ paths are valid on remote machines.
|
||||
cwd = os.getenv("TERMINAL_CWD", default_cwd)
|
||||
if env_type in ("modal", "docker", "singularity") and cwd:
|
||||
if env_type in ("modal", "docker", "singularity", "daytona") and cwd:
|
||||
host_prefixes = ("/Users/", "C:\\", "C:/")
|
||||
if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd:
|
||||
logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
|
||||
|
|
@ -436,6 +436,7 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
"docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", default_image),
|
||||
"singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"),
|
||||
"modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image),
|
||||
"daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image),
|
||||
"cwd": cwd,
|
||||
"timeout": int(os.getenv("TERMINAL_TIMEOUT", "180")),
|
||||
"lifetime_seconds": int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")),
|
||||
|
|
@ -444,7 +445,7 @@ def _get_env_config() -> Dict[str, Any]:
|
|||
"ssh_user": os.getenv("TERMINAL_SSH_USER", ""),
|
||||
"ssh_port": int(os.getenv("TERMINAL_SSH_PORT", "22")),
|
||||
"ssh_key": os.getenv("TERMINAL_SSH_KEY", ""),
|
||||
# Container resource config (applies to docker, singularity, modal -- ignored for local/ssh)
|
||||
# Container resource config (applies to docker, singularity, modal, daytona -- ignored for local/ssh)
|
||||
"container_cpu": float(os.getenv("TERMINAL_CONTAINER_CPU", "1")),
|
||||
"container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")), # MB (default 5GB)
|
||||
"container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")), # MB (default 50GB)
|
||||
|
|
@ -460,7 +461,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
Create an execution environment from mini-swe-agent.
|
||||
|
||||
Args:
|
||||
env_type: One of "local", "docker", "singularity", "modal", "ssh"
|
||||
env_type: One of "local", "docker", "singularity", "modal", "daytona", "ssh"
|
||||
image: Docker/Singularity/Modal image name (ignored for local/ssh)
|
||||
cwd: Working directory
|
||||
timeout: Default command timeout
|
||||
|
|
@ -511,6 +512,15 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
persistent_filesystem=persistent, task_id=task_id,
|
||||
)
|
||||
|
||||
elif env_type == "daytona":
|
||||
# Lazy import so daytona SDK is only required when backend is selected.
|
||||
from tools.environments.daytona import DaytonaEnvironment as _DaytonaEnvironment
|
||||
return _DaytonaEnvironment(
|
||||
image=image, cwd=cwd, timeout=timeout,
|
||||
cpu=int(cpu), memory=memory, disk=disk,
|
||||
persistent_filesystem=persistent, task_id=task_id,
|
||||
)
|
||||
|
||||
elif env_type == "ssh":
|
||||
if not ssh_config or not ssh_config.get("host") or not ssh_config.get("user"):
|
||||
raise ValueError("SSH environment requires ssh_host and ssh_user to be configured")
|
||||
|
|
@ -522,9 +532,9 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
|||
cwd=cwd,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', or 'ssh'")
|
||||
raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', 'singularity', 'modal', 'daytona', or 'ssh'")
|
||||
|
||||
|
||||
def _cleanup_inactive_envs(lifetime_seconds: int = 300):
|
||||
|
|
@ -799,9 +809,11 @@ def terminal_tool(
|
|||
image = overrides.get("singularity_image") or config["singularity_image"]
|
||||
elif env_type == "modal":
|
||||
image = overrides.get("modal_image") or config["modal_image"]
|
||||
elif env_type == "daytona":
|
||||
image = overrides.get("daytona_image") or config["daytona_image"]
|
||||
else:
|
||||
image = ""
|
||||
|
||||
|
||||
cwd = overrides.get("cwd") or config["cwd"]
|
||||
default_timeout = config["timeout"]
|
||||
effective_timeout = timeout or default_timeout
|
||||
|
|
@ -851,7 +863,7 @@ def terminal_tool(
|
|||
}
|
||||
|
||||
container_config = None
|
||||
if env_type in ("docker", "singularity", "modal"):
|
||||
if env_type in ("docker", "singularity", "modal", "daytona"):
|
||||
container_config = {
|
||||
"container_cpu": config.get("container_cpu", 1),
|
||||
"container_memory": config.get("container_memory", 5120),
|
||||
|
|
@ -1090,6 +1102,9 @@ def check_terminal_requirements() -> bool:
|
|||
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
|
||||
# Check for modal token
|
||||
return os.getenv("MODAL_TOKEN_ID") is not None or Path.home().joinpath(".modal.toml").exists()
|
||||
elif env_type == "daytona":
|
||||
from daytona import Daytona
|
||||
return os.getenv("DAYTONA_API_KEY") is not None
|
||||
else:
|
||||
return False
|
||||
except Exception as e:
|
||||
|
|
@ -1128,10 +1143,11 @@ if __name__ == "__main__":
|
|||
|
||||
print("\nEnvironment Variables:")
|
||||
default_img = "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
print(f" TERMINAL_ENV: {os.getenv('TERMINAL_ENV', 'local')} (local/docker/singularity/modal/ssh)")
|
||||
print(f" TERMINAL_ENV: {os.getenv('TERMINAL_ENV', 'local')} (local/docker/singularity/modal/daytona/ssh)")
|
||||
print(f" TERMINAL_DOCKER_IMAGE: {os.getenv('TERMINAL_DOCKER_IMAGE', default_img)}")
|
||||
print(f" TERMINAL_SINGULARITY_IMAGE: {os.getenv('TERMINAL_SINGULARITY_IMAGE', f'docker://{default_img}')}")
|
||||
print(f" TERMINAL_MODAL_IMAGE: {os.getenv('TERMINAL_MODAL_IMAGE', default_img)}")
|
||||
print(f" TERMINAL_DAYTONA_IMAGE: {os.getenv('TERMINAL_DAYTONA_IMAGE', default_img)}")
|
||||
print(f" TERMINAL_CWD: {os.getenv('TERMINAL_CWD', os.getcwd())}")
|
||||
print(f" TERMINAL_SANDBOX_DIR: {os.getenv('TERMINAL_SANDBOX_DIR', '~/.hermes/sandboxes')}")
|
||||
print(f" TERMINAL_TIMEOUT: {os.getenv('TERMINAL_TIMEOUT', '60')}")
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ hermes-agent/
|
|||
│ ├── skill_manager_tool.py # Skill management
|
||||
│ └── environments/ # Terminal execution backends
|
||||
│ ├── base.py # BaseEnvironment ABC
|
||||
│ ├── local.py, docker.py, ssh.py, singularity.py, modal.py
|
||||
│ ├── local.py, docker.py, ssh.py, singularity.py, modal.py, daytona.py
|
||||
│
|
||||
├── gateway/ # Messaging gateway
|
||||
│ ├── run.py # GatewayRunner — platform lifecycle, message routing
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ It's not a coding copilot tethered to an IDE or a chatbot wrapper around a singl
|
|||
- **Grows the longer it runs** — Persistent memory and self-created skills
|
||||
- **Scheduled automations** — Built-in cron with delivery to any platform
|
||||
- **Delegates & parallelizes** — Spawn isolated subagents for parallel workstreams
|
||||
- **Real sandboxing** — 5 terminal backends: local, Docker, SSH, Singularity, Modal
|
||||
- **Real sandboxing** — 6 terminal backends: local, Docker, SSH, Singularity, Modal, Daytona
|
||||
- **Full web control** — Search, extract, browse, vision, image generation, TTS
|
||||
- **MCP support** — Connect to any MCP server for extended tool capabilities
|
||||
- **Research-ready** — Batch processing, trajectory export, RL training integration
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
|||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `TERMINAL_ENV` | Backend: `local`, `docker`, `ssh`, `singularity`, `modal` |
|
||||
| `TERMINAL_ENV` | Backend: `local`, `docker`, `ssh`, `singularity`, `modal`, `daytona` |
|
||||
| `TERMINAL_DOCKER_IMAGE` | Docker image (default: `python:3.11`) |
|
||||
| `TERMINAL_DOCKER_VOLUMES` | Additional Docker volume mounts (comma-separated `host:container` pairs) |
|
||||
| `TERMINAL_SINGULARITY_IMAGE` | Singularity image or `.sif` path |
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ Configure which environment the agent uses for terminal commands:
|
|||
|
||||
```yaml
|
||||
terminal:
|
||||
backend: local # or: docker, ssh, singularity, modal
|
||||
backend: local # or: docker, ssh, singularity, modal, daytona
|
||||
cwd: "." # Working directory ("." = current dir)
|
||||
timeout: 180 # Command timeout in seconds
|
||||
```
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ The terminal tool can execute commands in different environments:
|
|||
```yaml
|
||||
# In ~/.hermes/config.yaml
|
||||
terminal:
|
||||
backend: local # or: docker, ssh, singularity, modal
|
||||
backend: local # or: docker, ssh, singularity, modal, daytona
|
||||
cwd: "." # Working directory
|
||||
timeout: 180 # Command timeout in seconds
|
||||
```
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue