From 3e3ec35a5e0235184bb5c11e3404dd3d51458af0 Mon Sep 17 00:00:00 2001 From: konsisumer Date: Thu, 16 Apr 2026 09:17:24 +0200 Subject: [PATCH] fix: surface execute_code timeout to user instead of silently dropping (#10807) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When execute_code times out, the result JSON had status="timeout" and an error field, but the output field was empty. Many models treat empty output as "nothing happened" and produce an empty/minimal response. The gateway stream consumer then considers the response "already sent" (from pre-tool streaming) and silently drops it — leaving the user staring at silence. Three changes: 1. Include the timeout message in the output field (both local and remote paths) so the model always has visible content to relay to the user. 2. Add periodic activity callbacks to the local execution polling loop so the gateway's inactivity monitor knows execute_code is alive during long runs. 3. Fix stream_consumer._send_fallback_final to not silently drop content when the continuation appears empty but the final text differs from what was previously streamed (e.g. after a tool boundary reset). --- gateway/stream_consumer.py | 14 +++++++++--- tools/code_execution_tool.py | 43 ++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index a644547e6a..853b159034 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -515,9 +515,17 @@ class GatewayStreamConsumer: self._fallback_final_send = False if not continuation.strip(): # Nothing new to send — the visible partial already matches final text. - self._already_sent = True - self._final_response_sent = True - return + # BUT: if final_text itself has meaningful content (e.g. a timeout + # message after a long tool call), the prefix-based continuation + # calculation may wrongly conclude "already shown" because the + # streamed prefix was from a *previous* segment (before the tool + # boundary). In that case, send the full final_text as-is (#10807). + if final_text.strip() and final_text != self._visible_prefix(): + continuation = final_text + else: + self._already_sent = True + self._final_response_sent = True + return raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096) safe_limit = max(500, raw_limit - 100) diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 8cffeda804..d61164bcac 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -871,7 +871,18 @@ def _execute_remote( } if status == "timeout": - result["error"] = f"Script timed out after {timeout}s and was killed." + timeout_msg = f"Script timed out after {timeout}s and was killed." + result["error"] = timeout_msg + # Include timeout message in output so the LLM always surfaces it + # to the user (see local path comment — same reasoning, #10807). + if stdout_text: + result["output"] = stdout_text + f"\n\n⏰ {timeout_msg}" + else: + result["output"] = f"⏰ {timeout_msg}" + logger.warning( + "execute_code (remote) timed out after %ss (limit %ss) with %d tool calls", + duration, timeout, tool_call_counter[0], + ) elif status == "interrupted": result["output"] = ( stdout_text + "\n[execution interrupted — user sent a new message]" @@ -1117,6 +1128,8 @@ def execute_code( stderr_reader.start() status = "success" + _last_activity_touch = time.monotonic() + _ACTIVITY_INTERVAL = 10.0 while proc.poll() is None: if _is_interrupted(): _kill_process_group(proc) @@ -1126,6 +1139,19 @@ def execute_code( _kill_process_group(proc, escalate=True) status = "timeout" break + # Periodic activity touch so the gateway's inactivity timeout + # doesn't kill the agent during long code execution (#10807). + _now = time.monotonic() + if _now - _last_activity_touch >= _ACTIVITY_INTERVAL: + _last_activity_touch = _now + try: + from tools.environments.base import _get_activity_callback + _cb = _get_activity_callback() + if _cb: + _elapsed = int(_now - exec_start) + _cb(f"execute_code running ({_elapsed}s elapsed)") + except Exception: + pass time.sleep(0.2) # Wait for readers to finish draining @@ -1179,7 +1205,20 @@ def execute_code( } if status == "timeout": - result["error"] = f"Script timed out after {timeout}s and was killed." + timeout_msg = f"Script timed out after {timeout}s and was killed." + result["error"] = timeout_msg + # Include timeout message in output so the LLM always surfaces it + # to the user. When output is empty, models often treat the result + # as "nothing happened" and produce an empty response, which the + # gateway stream consumer silently drops (#10807). + if stdout_text: + result["output"] = stdout_text + f"\n\n⏰ {timeout_msg}" + else: + result["output"] = f"⏰ {timeout_msg}" + logger.warning( + "execute_code timed out after %ss (limit %ss) with %d tool calls", + duration, timeout, tool_call_counter[0], + ) elif status == "interrupted": result["output"] = stdout_text + "\n[execution interrupted — user sent a new message]" elif exit_code != 0: