fix: surface execute_code timeout to user instead of silently dropping (#10807)

When execute_code times out, the result JSON had status="timeout" and an error field, but the output field was empty. Many models treat empty output as "nothing happened" and produce an empty/minimal response. The gateway stream consumer then considers the response "already sent" (from pre-tool streaming) and silently drops it — leaving the user staring at silence. Three changes: 1. Include the timeout message in the output field (both local and remote paths) so the model always has visible content to relay to the user. 2. Add periodic activity callbacks to the local execution polling loop so the gateway's inactivity monitor knows execute_code is alive during long runs. 3. Fix stream_consumer._send_fallback_final to not silently drop content when the continuation appears empty but the final text differs from what was previously streamed (e.g. after a tool boundary reset).
2026-04-25 00:51:20 +00:00 · 2026-04-16 09:17:24 +02:00 · 2026-04-16 09:17:24 +02:00 · 3e3ec35a5e
commit 3e3ec35a5e
parent 73befa505d
2 changed files with 52 additions and 5 deletions
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@ -871,7 +871,18 @@ def _execute_remote(
    }

    if status == "timeout":
-        result["error"] = f"Script timed out after {timeout}s and was killed."
+        timeout_msg = f"Script timed out after {timeout}s and was killed."
+        result["error"] = timeout_msg
+        # Include timeout message in output so the LLM always surfaces it
+        # to the user (see local path comment — same reasoning, #10807).
+        if stdout_text:
+            result["output"] = stdout_text + f"\n\n⏰ {timeout_msg}"
+        else:
+            result["output"] = f"⏰ {timeout_msg}"
+        logger.warning(
+            "execute_code (remote) timed out after %ss (limit %ss) with %d tool calls",
+            duration, timeout, tool_call_counter[0],
+        )
    elif status == "interrupted":
        result["output"] = (
            stdout_text + "\n[execution interrupted — user sent a new message]"
@ -1117,6 +1128,8 @@ def execute_code(
        stderr_reader.start()

        status = "success"
+        _last_activity_touch = time.monotonic()
+        _ACTIVITY_INTERVAL = 10.0
        while proc.poll() is None:
            if _is_interrupted():
                _kill_process_group(proc)
@ -1126,6 +1139,19 @@ def execute_code(
                _kill_process_group(proc, escalate=True)
                status = "timeout"
                break
+            # Periodic activity touch so the gateway's inactivity timeout
+            # doesn't kill the agent during long code execution (#10807).
+            _now = time.monotonic()
+            if _now - _last_activity_touch >= _ACTIVITY_INTERVAL:
+                _last_activity_touch = _now
+                try:
+                    from tools.environments.base import _get_activity_callback
+                    _cb = _get_activity_callback()
+                    if _cb:
+                        _elapsed = int(_now - exec_start)
+                        _cb(f"execute_code running ({_elapsed}s elapsed)")
+                except Exception:
+                    pass
            time.sleep(0.2)

        # Wait for readers to finish draining
@ -1179,7 +1205,20 @@ def execute_code(
        }

        if status == "timeout":
-            result["error"] = f"Script timed out after {timeout}s and was killed."
+            timeout_msg = f"Script timed out after {timeout}s and was killed."
+            result["error"] = timeout_msg
+            # Include timeout message in output so the LLM always surfaces it
+            # to the user.  When output is empty, models often treat the result
+            # as "nothing happened" and produce an empty response, which the
+            # gateway stream consumer silently drops (#10807).
+            if stdout_text:
+                result["output"] = stdout_text + f"\n\n⏰ {timeout_msg}"
+            else:
+                result["output"] = f"⏰ {timeout_msg}"
+            logger.warning(
+                "execute_code timed out after %ss (limit %ss) with %d tool calls",
+                duration, timeout, tool_call_counter[0],
+            )
        elif status == "interrupted":
            result["output"] = stdout_text + "\n[execution interrupted — user sent a new message]"
        elif exit_code != 0: