diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index a644547e6..853b15903 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -515,9 +515,17 @@ class GatewayStreamConsumer: self._fallback_final_send = False if not continuation.strip(): # Nothing new to send — the visible partial already matches final text. - self._already_sent = True - self._final_response_sent = True - return + # BUT: if final_text itself has meaningful content (e.g. a timeout + # message after a long tool call), the prefix-based continuation + # calculation may wrongly conclude "already shown" because the + # streamed prefix was from a *previous* segment (before the tool + # boundary). In that case, send the full final_text as-is (#10807). + if final_text.strip() and final_text != self._visible_prefix(): + continuation = final_text + else: + self._already_sent = True + self._final_response_sent = True + return raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096) safe_limit = max(500, raw_limit - 100) diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 8cffeda80..d61164bca 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -871,7 +871,18 @@ def _execute_remote( } if status == "timeout": - result["error"] = f"Script timed out after {timeout}s and was killed." + timeout_msg = f"Script timed out after {timeout}s and was killed." + result["error"] = timeout_msg + # Include timeout message in output so the LLM always surfaces it + # to the user (see local path comment — same reasoning, #10807). + if stdout_text: + result["output"] = stdout_text + f"\n\n⏰ {timeout_msg}" + else: + result["output"] = f"⏰ {timeout_msg}" + logger.warning( + "execute_code (remote) timed out after %ss (limit %ss) with %d tool calls", + duration, timeout, tool_call_counter[0], + ) elif status == "interrupted": result["output"] = ( stdout_text + "\n[execution interrupted — user sent a new message]" @@ -1117,6 +1128,8 @@ def execute_code( stderr_reader.start() status = "success" + _last_activity_touch = time.monotonic() + _ACTIVITY_INTERVAL = 10.0 while proc.poll() is None: if _is_interrupted(): _kill_process_group(proc) @@ -1126,6 +1139,19 @@ def execute_code( _kill_process_group(proc, escalate=True) status = "timeout" break + # Periodic activity touch so the gateway's inactivity timeout + # doesn't kill the agent during long code execution (#10807). + _now = time.monotonic() + if _now - _last_activity_touch >= _ACTIVITY_INTERVAL: + _last_activity_touch = _now + try: + from tools.environments.base import _get_activity_callback + _cb = _get_activity_callback() + if _cb: + _elapsed = int(_now - exec_start) + _cb(f"execute_code running ({_elapsed}s elapsed)") + except Exception: + pass time.sleep(0.2) # Wait for readers to finish draining @@ -1179,7 +1205,20 @@ def execute_code( } if status == "timeout": - result["error"] = f"Script timed out after {timeout}s and was killed." + timeout_msg = f"Script timed out after {timeout}s and was killed." + result["error"] = timeout_msg + # Include timeout message in output so the LLM always surfaces it + # to the user. When output is empty, models often treat the result + # as "nothing happened" and produce an empty response, which the + # gateway stream consumer silently drops (#10807). + if stdout_text: + result["output"] = stdout_text + f"\n\n⏰ {timeout_msg}" + else: + result["output"] = f"⏰ {timeout_msg}" + logger.warning( + "execute_code timed out after %ss (limit %ss) with %d tool calls", + duration, timeout, tool_call_counter[0], + ) elif status == "interrupted": result["output"] = stdout_text + "\n[execution interrupted — user sent a new message]" elif exit_code != 0: