diff --git a/run_agent.py b/run_agent.py index f995a2886d..e56e23b7d2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9506,12 +9506,41 @@ class AIAgent: invalid_json_args.append((tc.function.name, str(e))) if invalid_json_args: + # Check if the invalid JSON is due to truncation rather + # than a model formatting mistake. Routers sometimes + # rewrite finish_reason from "length" to "tool_calls", + # hiding the truncation from the length handler above. + # Detect truncation: args that don't end with } or ] + # (after stripping whitespace) are cut off mid-stream. + _truncated = any( + not (tc.function.arguments or "").rstrip().endswith(("}", "]")) + for tc in assistant_message.tool_calls + if tc.function.name in {n for n, _ in invalid_json_args} + ) + if _truncated: + self._vprint( + f"{self.log_prefix}⚠️ Truncated tool call arguments detected " + f"(finish_reason={finish_reason!r}) — refusing to execute.", + force=True, + ) + self._invalid_json_retries = 0 + self._cleanup_task_resources(effective_task_id) + self._persist_session(messages, conversation_history) + return { + "final_response": None, + "messages": messages, + "api_calls": api_call_count, + "completed": False, + "partial": True, + "error": "Response truncated due to output length limit", + } + # Track retries for invalid JSON arguments self._invalid_json_retries += 1 - + tool_name, error_msg = invalid_json_args[0] self._vprint(f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}") - + if self._invalid_json_retries < 3: self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...") # Don't add anything to messages, just retry the API call diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 0f2d1d4de9..9851939ae4 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2169,6 +2169,35 @@ class TestRunConversation: mock_hfc.assert_called_once() assert result["final_response"] == "Done!" + def test_truncated_tool_args_detected_when_finish_reason_not_length(self, agent): + """When a router rewrites finish_reason from 'length' to 'tool_calls', + truncated JSON arguments should still be detected and refused rather + than wasting 3 retry attempts.""" + self._setup_agent(agent) + agent.valid_tool_names.add("write_file") + bad_tc = _mock_tool_call( + name="write_file", + arguments='{"path":"report.md","content":"partial', + call_id="c1", + ) + resp = _mock_response( + content="", finish_reason="tool_calls", tool_calls=[bad_tc], + ) + agent.client.chat.completions.create.return_value = resp + + with ( + patch("run_agent.handle_function_call") as mock_handle_function_call, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("write the report") + + assert result["completed"] is False + assert result["partial"] is True + assert "truncated due to output length limit" in result["error"] + mock_handle_function_call.assert_not_called() + class TestRetryExhaustion: """Regression: retry_count > max_retries was dead code (off-by-one).