fix: detect truncated tool_calls when finish_reason is not length

When API routers rewrite finish_reason from "length" to "tool_calls", truncated JSON arguments bypassed the length handler and wasted 3 retry attempts in the generic JSON validation loop. Now detects truncation patterns in tool call arguments regardless of finish_reason. Fixes #7680 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-13 03:52:00 +00:00 · 2026-04-11 19:27:22 +08:00 · 2026-04-11 19:27:22 +08:00 · 9abd8b27a5
commit 9abd8b27a5
parent 5be2162922
2 changed files with 60 additions and 2 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -9396,6 +9396,35 @@ class AIAgent:
                            invalid_json_args.append((tc.function.name, str(e)))
                    if invalid_json_args:
                        # Check if the invalid JSON is due to truncation rather
                        # than a model formatting mistake.  Routers sometimes
                        # rewrite finish_reason from "length" to "tool_calls",
                        # hiding the truncation from the length handler above.
                        # Detect truncation: args that don't end with } or ]
                        # (after stripping whitespace) are cut off mid-stream.
                        _truncated = any(
                            not (tc.function.arguments or "").rstrip().endswith(("}", "]"))
                            for tc in assistant_message.tool_calls
                            if tc.function.name in {n for n, _ in invalid_json_args}
                        )
                        if _truncated:
                            self._vprint(
                                f"{self.log_prefix}⚠️  Truncated tool call arguments detected "
                                f"(finish_reason={finish_reason!r}) — refusing to execute.",
                                force=True,
                            )
                            self._invalid_json_retries = 0
                            self._cleanup_task_resources(effective_task_id)
                            self._persist_session(messages, conversation_history)
                            return {
                                "final_response": None,
                                "messages": messages,
                                "api_calls": api_call_count,
                                "completed": False,
                                "partial": True,
                                "error": "Response truncated due to output length limit",
                            }
                        # Track retries for invalid JSON arguments
                        self._invalid_json_retries += 1
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -2169,6 +2169,35 @@ class TestRunConversation:
        mock_hfc.assert_called_once()
        assert result["final_response"] == "Done!"
    def test_truncated_tool_args_detected_when_finish_reason_not_length(self, agent):
        """When a router rewrites finish_reason from 'length' to 'tool_calls',
        truncated JSON arguments should still be detected and refused rather
        than wasting 3 retry attempts."""
        self._setup_agent(agent)
        agent.valid_tool_names.add("write_file")
        bad_tc = _mock_tool_call(
            name="write_file",
            arguments='{"path":"report.md","content":"partial',
            call_id="c1",
        )
        resp = _mock_response(
            content="", finish_reason="tool_calls", tool_calls=[bad_tc],
        )
        agent.client.chat.completions.create.return_value = resp
        with (
            patch("run_agent.handle_function_call") as mock_handle_function_call,
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
            patch.object(agent, "_cleanup_task_resources"),
        ):
            result = agent.run_conversation("write the report")
        assert result["completed"] is False
        assert result["partial"] is True
        assert "truncated due to output length limit" in result["error"]
        mock_handle_function_call.assert_not_called()
 class TestRetryExhaustion:
    """Regression: retry_count > max_retries was dead code (off-by-one).