mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-13 03:52:00 +00:00
fix: detect truncated tool_calls when finish_reason is not length
When API routers rewrite finish_reason from "length" to "tool_calls", truncated JSON arguments bypassed the length handler and wasted 3 retry attempts in the generic JSON validation loop. Now detects truncation patterns in tool call arguments regardless of finish_reason. Fixes #7680 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5be2162922
commit
9abd8b27a5
2 changed files with 60 additions and 2 deletions
29
run_agent.py
29
run_agent.py
|
|
@ -9396,6 +9396,35 @@ class AIAgent:
|
||||||
invalid_json_args.append((tc.function.name, str(e)))
|
invalid_json_args.append((tc.function.name, str(e)))
|
||||||
|
|
||||||
if invalid_json_args:
|
if invalid_json_args:
|
||||||
|
# Check if the invalid JSON is due to truncation rather
|
||||||
|
# than a model formatting mistake. Routers sometimes
|
||||||
|
# rewrite finish_reason from "length" to "tool_calls",
|
||||||
|
# hiding the truncation from the length handler above.
|
||||||
|
# Detect truncation: args that don't end with } or ]
|
||||||
|
# (after stripping whitespace) are cut off mid-stream.
|
||||||
|
_truncated = any(
|
||||||
|
not (tc.function.arguments or "").rstrip().endswith(("}", "]"))
|
||||||
|
for tc in assistant_message.tool_calls
|
||||||
|
if tc.function.name in {n for n, _ in invalid_json_args}
|
||||||
|
)
|
||||||
|
if _truncated:
|
||||||
|
self._vprint(
|
||||||
|
f"{self.log_prefix}⚠️ Truncated tool call arguments detected "
|
||||||
|
f"(finish_reason={finish_reason!r}) — refusing to execute.",
|
||||||
|
force=True,
|
||||||
|
)
|
||||||
|
self._invalid_json_retries = 0
|
||||||
|
self._cleanup_task_resources(effective_task_id)
|
||||||
|
self._persist_session(messages, conversation_history)
|
||||||
|
return {
|
||||||
|
"final_response": None,
|
||||||
|
"messages": messages,
|
||||||
|
"api_calls": api_call_count,
|
||||||
|
"completed": False,
|
||||||
|
"partial": True,
|
||||||
|
"error": "Response truncated due to output length limit",
|
||||||
|
}
|
||||||
|
|
||||||
# Track retries for invalid JSON arguments
|
# Track retries for invalid JSON arguments
|
||||||
self._invalid_json_retries += 1
|
self._invalid_json_retries += 1
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2169,6 +2169,35 @@ class TestRunConversation:
|
||||||
mock_hfc.assert_called_once()
|
mock_hfc.assert_called_once()
|
||||||
assert result["final_response"] == "Done!"
|
assert result["final_response"] == "Done!"
|
||||||
|
|
||||||
|
def test_truncated_tool_args_detected_when_finish_reason_not_length(self, agent):
|
||||||
|
"""When a router rewrites finish_reason from 'length' to 'tool_calls',
|
||||||
|
truncated JSON arguments should still be detected and refused rather
|
||||||
|
than wasting 3 retry attempts."""
|
||||||
|
self._setup_agent(agent)
|
||||||
|
agent.valid_tool_names.add("write_file")
|
||||||
|
bad_tc = _mock_tool_call(
|
||||||
|
name="write_file",
|
||||||
|
arguments='{"path":"report.md","content":"partial',
|
||||||
|
call_id="c1",
|
||||||
|
)
|
||||||
|
resp = _mock_response(
|
||||||
|
content="", finish_reason="tool_calls", tool_calls=[bad_tc],
|
||||||
|
)
|
||||||
|
agent.client.chat.completions.create.return_value = resp
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("run_agent.handle_function_call") as mock_handle_function_call,
|
||||||
|
patch.object(agent, "_persist_session"),
|
||||||
|
patch.object(agent, "_save_trajectory"),
|
||||||
|
patch.object(agent, "_cleanup_task_resources"),
|
||||||
|
):
|
||||||
|
result = agent.run_conversation("write the report")
|
||||||
|
|
||||||
|
assert result["completed"] is False
|
||||||
|
assert result["partial"] is True
|
||||||
|
assert "truncated due to output length limit" in result["error"]
|
||||||
|
mock_handle_function_call.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
class TestRetryExhaustion:
|
class TestRetryExhaustion:
|
||||||
"""Regression: retry_count > max_retries was dead code (off-by-one).
|
"""Regression: retry_count > max_retries was dead code (off-by-one).
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue