diff --git a/tests/integration/test_daytona_terminal.py b/tests/integration/test_daytona_terminal.py new file mode 100644 index 000000000..b8b72fb26 --- /dev/null +++ b/tests/integration/test_daytona_terminal.py @@ -0,0 +1,123 @@ +"""Integration tests for the Daytona terminal backend. + +Requires DAYTONA_API_KEY to be set. Run with: + TERMINAL_ENV=daytona pytest tests/integration/test_daytona_terminal.py -v +""" + +import json +import os +import sys +from pathlib import Path + +import pytest + +pytestmark = pytest.mark.integration + +# Skip entire module if no API key +if not os.getenv("DAYTONA_API_KEY"): + pytest.skip("DAYTONA_API_KEY not set", allow_module_level=True) + +# Import terminal_tool via importlib to avoid tools/__init__.py side effects +import importlib.util + +parent_dir = Path(__file__).parent.parent.parent +sys.path.insert(0, str(parent_dir)) + +spec = importlib.util.spec_from_file_location( + "terminal_tool", parent_dir / "tools" / "terminal_tool.py" +) +terminal_module = importlib.util.module_from_spec(spec) +spec.loader.exec_module(terminal_module) + +terminal_tool = terminal_module.terminal_tool +cleanup_vm = terminal_module.cleanup_vm + + +@pytest.fixture(autouse=True) +def _force_daytona(monkeypatch): + monkeypatch.setenv("TERMINAL_ENV", "daytona") + monkeypatch.setenv("TERMINAL_CONTAINER_DISK", "10240") + monkeypatch.setenv("TERMINAL_CONTAINER_PERSISTENT", "false") + + +@pytest.fixture() +def task_id(request): + """Provide a unique task_id and clean up the sandbox after the test.""" + tid = f"daytona_test_{request.node.name}" + yield tid + cleanup_vm(tid) + + +def _run(command, task_id, **kwargs): + result = terminal_tool(command, task_id=task_id, **kwargs) + return json.loads(result) + + +class TestDaytonaBasic: + def test_echo(self, task_id): + r = _run("echo 'Hello from Daytona!'", task_id) + assert r["exit_code"] == 0 + assert "Hello from Daytona!" in r["output"] + + def test_python_version(self, task_id): + r = _run("python3 --version", task_id) + assert r["exit_code"] == 0 + assert "Python" in r["output"] + + def test_nonzero_exit(self, task_id): + r = _run("exit 42", task_id) + assert r["exit_code"] == 42 + + def test_os_info(self, task_id): + r = _run("uname -a", task_id) + assert r["exit_code"] == 0 + assert "Linux" in r["output"] + + +class TestDaytonaFilesystem: + def test_write_and_read_file(self, task_id): + _run("echo 'test content' > /tmp/daytona_test.txt", task_id) + r = _run("cat /tmp/daytona_test.txt", task_id) + assert r["exit_code"] == 0 + assert "test content" in r["output"] + + def test_persistence_within_session(self, task_id): + _run("pip install cowsay 2>/dev/null", task_id, timeout=120) + r = _run('python3 -c "import cowsay; print(cowsay.__file__)"', task_id) + assert r["exit_code"] == 0 + assert "cowsay" in r["output"] + + +class TestDaytonaPersistence: + def test_filesystem_survives_stop_and_resume(self): + """Write a file, stop the sandbox, resume it, assert the file persists.""" + task = "daytona_test_persist" + try: + # Enable persistence for this test + os.environ["TERMINAL_CONTAINER_PERSISTENT"] = "true" + + # Write a marker file and stop the sandbox + _run("echo 'survive' > /tmp/persist_test.txt", task) + cleanup_vm(task) # stops (not deletes) because persistent=true + + # Resume with the same task_id — file should still exist + r = _run("cat /tmp/persist_test.txt", task) + assert r["exit_code"] == 0 + assert "survive" in r["output"] + finally: + # Force-delete so the sandbox doesn't leak + os.environ["TERMINAL_CONTAINER_PERSISTENT"] = "false" + cleanup_vm(task) + + +class TestDaytonaIsolation: + def test_different_tasks_isolated(self): + task_a = "daytona_test_iso_a" + task_b = "daytona_test_iso_b" + try: + _run("echo 'secret' > /tmp/isolated.txt", task_a) + r = _run("cat /tmp/isolated.txt 2>&1 || echo NOT_FOUND", task_b) + assert "secret" not in r["output"] or "NOT_FOUND" in r["output"] + finally: + cleanup_vm(task_a) + cleanup_vm(task_b) diff --git a/tests/tools/test_daytona_environment.py b/tests/tools/test_daytona_environment.py new file mode 100644 index 000000000..a7fe71e26 --- /dev/null +++ b/tests/tools/test_daytona_environment.py @@ -0,0 +1,340 @@ +"""Unit tests for the Daytona cloud sandbox environment backend.""" + +import threading +from types import SimpleNamespace +from unittest.mock import MagicMock, patch, PropertyMock + +import pytest + + +# --------------------------------------------------------------------------- +# Helpers to build mock Daytona SDK objects +# --------------------------------------------------------------------------- + +def _make_exec_response(result="", exit_code=0): + return SimpleNamespace(result=result, exit_code=exit_code) + + +def _make_sandbox(sandbox_id="sb-123", state="started"): + sb = MagicMock() + sb.id = sandbox_id + sb.state = state + sb.process.exec.return_value = _make_exec_response() + return sb + + +def _patch_daytona_imports(monkeypatch): + """Patch the daytona SDK so DaytonaEnvironment can be imported without it.""" + import types as _types + + daytona_mod = _types.ModuleType("daytona") + daytona_mod.Daytona = MagicMock + daytona_mod.CreateSandboxFromImageParams = MagicMock + daytona_mod.DaytonaError = type("DaytonaError", (Exception,), {}) + daytona_mod.Resources = MagicMock(name="Resources") + + monkeypatch.setitem(__import__("sys").modules, "daytona", daytona_mod) + return daytona_mod + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture() +def daytona_sdk(monkeypatch): + """Provide a mock daytona SDK module and return it for assertions.""" + return _patch_daytona_imports(monkeypatch) + + +@pytest.fixture() +def make_env(daytona_sdk, monkeypatch): + """Factory that creates a DaytonaEnvironment with a mocked SDK.""" + # Prevent is_interrupted from interfering + monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False) + + def _factory( + sandbox=None, + find_one_side_effect=None, + home_dir="/root", + persistent=True, + **kwargs, + ): + sandbox = sandbox or _make_sandbox() + # Mock the $HOME detection + sandbox.process.exec.return_value = _make_exec_response(result=home_dir) + + mock_client = MagicMock() + mock_client.create.return_value = sandbox + + if find_one_side_effect is not None: + mock_client.find_one.side_effect = find_one_side_effect + else: + # Default: no existing sandbox found + mock_client.find_one.side_effect = daytona_sdk.DaytonaError("not found") + + daytona_sdk.Daytona = MagicMock(return_value=mock_client) + + from tools.environments.daytona import DaytonaEnvironment + + kwargs.setdefault("disk", 10240) + env = DaytonaEnvironment( + image="test-image:latest", + persistent_filesystem=persistent, + **kwargs, + ) + env._mock_client = mock_client # expose for assertions + return env + + return _factory + + +# --------------------------------------------------------------------------- +# Constructor / cwd resolution +# --------------------------------------------------------------------------- + +class TestCwdResolution: + def test_default_cwd_resolves_home(self, make_env): + env = make_env(home_dir="/home/testuser") + assert env.cwd == "/home/testuser" + + def test_tilde_cwd_resolves_home(self, make_env): + env = make_env(cwd="~", home_dir="/home/testuser") + assert env.cwd == "/home/testuser" + + def test_explicit_cwd_not_overridden(self, make_env): + env = make_env(cwd="/workspace", home_dir="/root") + assert env.cwd == "/workspace" + + def test_home_detection_failure_falls_back_to_root(self, make_env): + sb = _make_sandbox() + sb.process.exec.side_effect = RuntimeError("exec failed") + env = make_env(sandbox=sb) + assert env.cwd == "/root" + + def test_empty_home_falls_back_to_root(self, make_env): + env = make_env(home_dir="") + assert env.cwd == "/root" + + +# --------------------------------------------------------------------------- +# Sandbox persistence / resume +# --------------------------------------------------------------------------- + +class TestPersistence: + def test_persistent_resumes_existing_sandbox(self, make_env): + existing = _make_sandbox(sandbox_id="sb-existing") + existing.process.exec.return_value = _make_exec_response(result="/root") + env = make_env(find_one_side_effect=lambda **kw: existing, persistent=True) + existing.start.assert_called_once() + # Should NOT have called create since find_one succeeded + env._mock_client.create.assert_not_called() + + def test_persistent_creates_new_when_none_found(self, make_env, daytona_sdk): + env = make_env( + find_one_side_effect=daytona_sdk.DaytonaError("not found"), + persistent=True, + ) + env._mock_client.create.assert_called_once() + + def test_non_persistent_skips_find_one(self, make_env): + env = make_env(persistent=False) + env._mock_client.find_one.assert_not_called() + env._mock_client.create.assert_called_once() + + +# --------------------------------------------------------------------------- +# Cleanup +# --------------------------------------------------------------------------- + +class TestCleanup: + def test_persistent_cleanup_stops_sandbox(self, make_env): + env = make_env(persistent=True) + sb = env._sandbox + env.cleanup() + sb.stop.assert_called_once() + + def test_non_persistent_cleanup_deletes_sandbox(self, make_env): + env = make_env(persistent=False) + sb = env._sandbox + env.cleanup() + env._mock_client.delete.assert_called_once_with(sb) + + def test_cleanup_idempotent(self, make_env): + env = make_env(persistent=True) + env.cleanup() + env.cleanup() # should not raise + + def test_cleanup_swallows_errors(self, make_env): + env = make_env(persistent=True) + env._sandbox.stop.side_effect = RuntimeError("stop failed") + env.cleanup() # should not raise + assert env._sandbox is None + + +# --------------------------------------------------------------------------- +# Execute +# --------------------------------------------------------------------------- + +class TestExecute: + def test_basic_command(self, make_env): + sb = _make_sandbox() + # First call: $HOME detection; subsequent calls: actual commands + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), # $HOME + _make_exec_response(result="hello", exit_code=0), # actual cmd + ] + sb.state = "started" + env = make_env(sandbox=sb) + + result = env.execute("echo hello") + assert result["output"] == "hello" + assert result["returncode"] == 0 + + def test_nonzero_exit_code(self, make_env): + sb = _make_sandbox() + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), + _make_exec_response(result="not found", exit_code=127), + ] + sb.state = "started" + env = make_env(sandbox=sb) + + result = env.execute("bad_cmd") + assert result["returncode"] == 127 + + def test_stdin_data_wraps_heredoc(self, make_env): + sb = _make_sandbox() + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), + _make_exec_response(result="ok", exit_code=0), + ] + sb.state = "started" + env = make_env(sandbox=sb) + + env.execute("python3", stdin_data="print('hi')") + # Check that the command passed to exec contains heredoc markers + call_args = sb.process.exec.call_args_list[-1] + cmd = call_args[0][0] + assert "HERMES_EOF_" in cmd + assert "print('hi')" in cmd + + def test_custom_cwd_passed_through(self, make_env): + sb = _make_sandbox() + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), + _make_exec_response(result="/tmp", exit_code=0), + ] + sb.state = "started" + env = make_env(sandbox=sb) + + env.execute("pwd", cwd="/tmp") + call_kwargs = sb.process.exec.call_args_list[-1][1] + assert call_kwargs["cwd"] == "/tmp" + + def test_daytona_error_triggers_retry(self, make_env, daytona_sdk): + sb = _make_sandbox() + sb.state = "started" + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), # $HOME + daytona_sdk.DaytonaError("transient"), # first attempt fails + _make_exec_response(result="ok", exit_code=0), # retry succeeds + ] + env = make_env(sandbox=sb) + + result = env.execute("echo retry") + assert result["output"] == "ok" + assert result["returncode"] == 0 + + +# --------------------------------------------------------------------------- +# Resource conversion +# --------------------------------------------------------------------------- + +class TestResourceConversion: + def _get_resources_kwargs(self, daytona_sdk): + return daytona_sdk.Resources.call_args.kwargs + + def test_memory_converted_to_gib(self, make_env, daytona_sdk): + env = make_env(memory=5120) + assert self._get_resources_kwargs(daytona_sdk)["memory"] == 5 + + def test_disk_converted_to_gib(self, make_env, daytona_sdk): + env = make_env(disk=10240) + assert self._get_resources_kwargs(daytona_sdk)["disk"] == 10 + + def test_small_values_clamped_to_1(self, make_env, daytona_sdk): + env = make_env(memory=100, disk=100) + kw = self._get_resources_kwargs(daytona_sdk) + assert kw["memory"] == 1 + assert kw["disk"] == 1 + + +# --------------------------------------------------------------------------- +# Ensure sandbox ready +# --------------------------------------------------------------------------- + +class TestInterrupt: + def test_interrupt_stops_sandbox_and_returns_130(self, make_env, monkeypatch): + sb = _make_sandbox() + sb.state = "started" + event = threading.Event() + calls = {"n": 0} + + def exec_side_effect(*args, **kwargs): + calls["n"] += 1 + if calls["n"] == 1: + return _make_exec_response(result="/root") # $HOME detection + event.wait(timeout=5) # simulate long-running command + return _make_exec_response(result="done", exit_code=0) + + sb.process.exec.side_effect = exec_side_effect + env = make_env(sandbox=sb) + + monkeypatch.setattr( + "tools.environments.daytona.is_interrupted", lambda: True + ) + try: + result = env.execute("sleep 10") + assert result["returncode"] == 130 + sb.stop.assert_called() + finally: + event.set() + + +# --------------------------------------------------------------------------- +# Retry exhaustion +# --------------------------------------------------------------------------- + +class TestRetryExhausted: + def test_both_attempts_fail(self, make_env, daytona_sdk): + sb = _make_sandbox() + sb.state = "started" + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), # $HOME + daytona_sdk.DaytonaError("fail1"), # first attempt + daytona_sdk.DaytonaError("fail2"), # retry + ] + env = make_env(sandbox=sb) + + result = env.execute("echo x") + assert result["returncode"] == 1 + assert "Daytona execution error" in result["output"] + + +# --------------------------------------------------------------------------- +# Ensure sandbox ready +# --------------------------------------------------------------------------- + +class TestEnsureSandboxReady: + def test_restarts_stopped_sandbox(self, make_env): + env = make_env() + env._sandbox.state = "stopped" + env._ensure_sandbox_ready() + env._sandbox.start.assert_called() + + def test_no_restart_when_running(self, make_env): + env = make_env() + env._sandbox.state = "started" + env._ensure_sandbox_ready() + env._sandbox.start.assert_not_called()