fix(daytona): use shell timeout wrapper instead of broken SDK exec timeout

The Daytona SDK's process.exec(timeout=N) parameter is not enforced — the server-side timeout never fires and the SDK has no client-side fallback, causing commands to hang indefinitely. Fix: wrap commands with timeout N sh -c '...' (coreutils) which reliably kills the process and returns exit code 124. Added shlex.quote for proper shell escaping and a secondary deadline (timeout + 10s) that force-stops the sandbox if the shell timeout somehow fails. Signed-off-by: rovle <lovre.pesut@gmail.com>
2026-04-25 00:51:20 +00:00 · 2026-03-05 13:12:41 -08:00 · 2026-03-05 13:12:41 -08:00 · a6499b6107
commit a6499b6107
parent 74a36b0729
2 changed files with 58 additions and 3 deletions
--- a/tests/tools/test_daytona_environment.py
+++ b/tests/tools/test_daytona_environment.py
@ -200,6 +200,36 @@ class TestExecute:
        assert result["output"] == "hello"
        assert result["returncode"] == 0

+    def test_command_wrapped_with_shell_timeout(self, make_env):
+        sb = _make_sandbox()
+        sb.process.exec.side_effect = [
+            _make_exec_response(result="/root"),
+            _make_exec_response(result="ok", exit_code=0),
+        ]
+        sb.state = "started"
+        env = make_env(sandbox=sb, timeout=42)
+
+        env.execute("echo hello")
+        # The command sent to exec should be wrapped with `timeout N sh -c '...'`
+        call_args = sb.process.exec.call_args_list[-1]
+        cmd = call_args[0][0]
+        assert cmd.startswith("timeout 42 sh -c ")
+        # SDK timeout param should NOT be passed
+        assert "timeout" not in call_args[1]
+
+    def test_timeout_returns_exit_code_124(self, make_env):
+        """Shell timeout utility returns exit code 124."""
+        sb = _make_sandbox()
+        sb.process.exec.side_effect = [
+            _make_exec_response(result="/root"),
+            _make_exec_response(result="", exit_code=124),
+        ]
+        sb.state = "started"
+        env = make_env(sandbox=sb)
+
+        result = env.execute("sleep 300", timeout=5)
+        assert result["returncode"] == 124
+
    def test_nonzero_exit_code(self, make_env):
        sb = _make_sandbox()
        sb.process.exec.side_effect = [
@ -223,10 +253,12 @@ class TestExecute:

        env.execute("python3", stdin_data="print('hi')")
        # Check that the command passed to exec contains heredoc markers
+        # (single quotes get shell-escaped by shlex.quote, so check components)
        call_args = sb.process.exec.call_args_list[-1]
        cmd = call_args[0][0]
        assert "HERMES_EOF_" in cmd
-        assert "print('hi')" in cmd
+        assert "print" in cmd
+        assert "hi" in cmd

    def test_custom_cwd_passed_through(self, make_env):
        sb = _make_sandbox()
--- a/tools/environments/daytona.py
+++ b/tools/environments/daytona.py
@ -7,6 +7,7 @@ and resumed on next creation, preserving the filesystem across sessions.

 import logging
 import math
+import shlex
 import threading
 import uuid
 import warnings
@ -112,13 +113,24 @@ class DaytonaEnvironment(BaseEnvironment):
            logger.info("Daytona: restarted sandbox %s", self._sandbox.id)

    def _exec_in_thread(self, exec_command: str, cwd: Optional[str], timeout: int) -> dict:
-        """Run exec in a background thread with interrupt polling."""
+        """Run exec in a background thread with interrupt polling.
+
+        The Daytona SDK's exec(timeout=...) parameter is unreliable (the
+        server-side timeout is not enforced and the SDK has no client-side
+        fallback), so we wrap the command with the shell ``timeout`` utility
+        which reliably kills the process and returns exit code 124.
+        """
+        # Wrap with shell `timeout` to enforce the deadline reliably.
+        # Add a small buffer so the shell timeout fires before any SDK-level
+        # timeout would, giving us a clean exit code 124.
+        timed_command = f"timeout {timeout} sh -c {shlex.quote(exec_command)}"
+
        result_holder: dict = {"value": None, "error": None}

        def _run():
            try:
                response = self._sandbox.process.exec(
-                    exec_command, cwd=cwd, timeout=timeout,
+                    timed_command, cwd=cwd,
                )
                result_holder["value"] = {
                    "output": response.result or "",
@ -129,8 +141,11 @@ class DaytonaEnvironment(BaseEnvironment):

        t = threading.Thread(target=_run, daemon=True)
        t.start()
+        # Wait for timeout + generous buffer for network/SDK overhead
+        deadline = timeout + 10
        while t.is_alive():
            t.join(timeout=0.2)
+            deadline -= 0.2
            if is_interrupted():
                with self._lock:
                    try:
@ -141,6 +156,14 @@ class DaytonaEnvironment(BaseEnvironment):
                    "output": "[Command interrupted - Daytona sandbox stopped]",
                    "returncode": 130,
                }
+            if deadline <= 0:
+                # Shell timeout didn't fire and SDK is hung — force stop
+                with self._lock:
+                    try:
+                        self._sandbox.stop()
+                    except Exception:
+                        pass
+                return self._timeout_result(timeout)

        if result_holder["error"]:
            return {"error": result_holder["error"]}