diff --git a/tests/tools/test_daytona_environment.py b/tests/tools/test_daytona_environment.py index 41f1ace87f..6d32f74418 100644 --- a/tests/tools/test_daytona_environment.py +++ b/tests/tools/test_daytona_environment.py @@ -200,6 +200,36 @@ class TestExecute: assert result["output"] == "hello" assert result["returncode"] == 0 + def test_command_wrapped_with_shell_timeout(self, make_env): + sb = _make_sandbox() + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), + _make_exec_response(result="ok", exit_code=0), + ] + sb.state = "started" + env = make_env(sandbox=sb, timeout=42) + + env.execute("echo hello") + # The command sent to exec should be wrapped with `timeout N sh -c '...'` + call_args = sb.process.exec.call_args_list[-1] + cmd = call_args[0][0] + assert cmd.startswith("timeout 42 sh -c ") + # SDK timeout param should NOT be passed + assert "timeout" not in call_args[1] + + def test_timeout_returns_exit_code_124(self, make_env): + """Shell timeout utility returns exit code 124.""" + sb = _make_sandbox() + sb.process.exec.side_effect = [ + _make_exec_response(result="/root"), + _make_exec_response(result="", exit_code=124), + ] + sb.state = "started" + env = make_env(sandbox=sb) + + result = env.execute("sleep 300", timeout=5) + assert result["returncode"] == 124 + def test_nonzero_exit_code(self, make_env): sb = _make_sandbox() sb.process.exec.side_effect = [ @@ -223,10 +253,12 @@ class TestExecute: env.execute("python3", stdin_data="print('hi')") # Check that the command passed to exec contains heredoc markers + # (single quotes get shell-escaped by shlex.quote, so check components) call_args = sb.process.exec.call_args_list[-1] cmd = call_args[0][0] assert "HERMES_EOF_" in cmd - assert "print('hi')" in cmd + assert "print" in cmd + assert "hi" in cmd def test_custom_cwd_passed_through(self, make_env): sb = _make_sandbox() diff --git a/tools/environments/daytona.py b/tools/environments/daytona.py index bfd1732e0b..c8df198c1c 100644 --- a/tools/environments/daytona.py +++ b/tools/environments/daytona.py @@ -7,6 +7,7 @@ and resumed on next creation, preserving the filesystem across sessions. import logging import math +import shlex import threading import uuid import warnings @@ -112,13 +113,24 @@ class DaytonaEnvironment(BaseEnvironment): logger.info("Daytona: restarted sandbox %s", self._sandbox.id) def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict: - """Run exec in a background thread with interrupt polling.""" + """Run exec in a background thread with interrupt polling. + + The Daytona SDK's exec(timeout=...) parameter is unreliable (the + server-side timeout is not enforced and the SDK has no client-side + fallback), so we wrap the command with the shell ``timeout`` utility + which reliably kills the process and returns exit code 124. + """ + # Wrap with shell `timeout` to enforce the deadline reliably. + # Add a small buffer so the shell timeout fires before any SDK-level + # timeout would, giving us a clean exit code 124. + timed_command = f"timeout {timeout} sh -c {shlex.quote(exec_command)}" + result_holder: dict = {"value": None, "error": None} def _run(): try: response = self._sandbox.process.exec( - exec_command, cwd=cwd, timeout=timeout, + timed_command, cwd=cwd, ) result_holder["value"] = { "output": response.result or "", @@ -129,8 +141,11 @@ class DaytonaEnvironment(BaseEnvironment): t = threading.Thread(target=_run, daemon=True) t.start() + # Wait for timeout + generous buffer for network/SDK overhead + deadline = timeout + 10 while t.is_alive(): t.join(timeout=0.2) + deadline -= 0.2 if is_interrupted(): with self._lock: try: @@ -141,6 +156,14 @@ class DaytonaEnvironment(BaseEnvironment): "output": "[Command interrupted - Daytona sandbox stopped]", "returncode": 130, } + if deadline <= 0: + # Shell timeout didn't fire and SDK is hung — force stop + with self._lock: + try: + self._sandbox.stop() + except Exception: + pass + return self._timeout_result(timeout) if result_holder["error"]: return {"error": result_holder["error"]}