mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: repair malformed tool call args in streaming assembly before flagging as truncated
When the streaming path (chat completions) assembled tool call deltas and detected malformed JSON arguments, it set has_truncated_tool_args=True but passed the broken args through unchanged. This triggered the truncation handler which returned a partial result and killed the session (/new required). _many_ malformations are repairable: trailing commas, unclosed brackets, Python None, empty strings. _repair_tool_call_arguments() already existed for the pre-API-request path but wasn't called during streaming assembly. Now when JSON parsing fails during streaming assembly, we attempt repair via _repair_tool_call_arguments() before flagging as truncated. If repair succeeds (returns valid JSON), the tool call proceeds normally. Only truly unrepairable args fall through to the truncation handler. This prevents the most common session-killing failure mode for models like GLM-5.1 that produce trailing commas or unclosed brackets. Tests: 12 new streaming assembly repair tests, all 29 existing repair tests still passing.
This commit is contained in:
parent
b7c1d77e55
commit
17fc84c256
2 changed files with 131 additions and 1 deletions
16
run_agent.py
16
run_agent.py
|
|
@ -6073,11 +6073,25 @@ class AIAgent:
|
||||||
for idx in sorted(tool_calls_acc):
|
for idx in sorted(tool_calls_acc):
|
||||||
tc = tool_calls_acc[idx]
|
tc = tool_calls_acc[idx]
|
||||||
arguments = tc["function"]["arguments"]
|
arguments = tc["function"]["arguments"]
|
||||||
|
tool_name = tc["function"]["name"] or "?"
|
||||||
if arguments and arguments.strip():
|
if arguments and arguments.strip():
|
||||||
try:
|
try:
|
||||||
json.loads(arguments)
|
json.loads(arguments)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
has_truncated_tool_args = True
|
# Attempt repair before flagging as truncated.
|
||||||
|
# Models like GLM-5.1 via Ollama produce trailing
|
||||||
|
# commas, unclosed brackets, Python None, etc.
|
||||||
|
# Without repair, these hit the truncation handler
|
||||||
|
# and kill the session. _repair_tool_call_arguments
|
||||||
|
# returns "{}" for unrepairable args, which is far
|
||||||
|
# better than a crashed session.
|
||||||
|
repaired = _repair_tool_call_arguments(arguments, tool_name)
|
||||||
|
if repaired != "{}":
|
||||||
|
# Successfully repaired — use the fixed args
|
||||||
|
arguments = repaired
|
||||||
|
else:
|
||||||
|
# Unrepairable — flag for truncation handling
|
||||||
|
has_truncated_tool_args = True
|
||||||
mock_tool_calls.append(SimpleNamespace(
|
mock_tool_calls.append(SimpleNamespace(
|
||||||
id=tc["id"],
|
id=tc["id"],
|
||||||
type=tc["type"],
|
type=tc["type"],
|
||||||
|
|
|
||||||
116
tests/run_agent/test_streaming_tool_call_repair.py
Normal file
116
tests/run_agent/test_streaming_tool_call_repair.py
Normal file
|
|
@ -0,0 +1,116 @@
|
||||||
|
"""Tests for tool call argument repair in the streaming assembly path.
|
||||||
|
|
||||||
|
The streaming path (run_agent._call_chat_completions) assembles tool call
|
||||||
|
deltas into full arguments. When a model truncates or malforms the JSON
|
||||||
|
(e.g. GLM-5.1 via Ollama), the assembly path used to pass the broken JSON
|
||||||
|
straight through — setting has_truncated_tool_args but NOT repairing it.
|
||||||
|
That triggered the truncation handler to kill the session with /new required.
|
||||||
|
|
||||||
|
The fix: repair arguments in the streaming assembly path using
|
||||||
|
_repair_tool_call_arguments() so repairable malformations (trailing commas,
|
||||||
|
unclosed brackets, Python None) don't kill the session.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from run_agent import _repair_tool_call_arguments
|
||||||
|
|
||||||
|
|
||||||
|
class TestStreamingAssemblyRepair:
|
||||||
|
"""Verify that _repair_tool_call_arguments is applied to streaming tool
|
||||||
|
call arguments before they're assembled into mock_tool_calls.
|
||||||
|
|
||||||
|
These tests verify the REPAIR FUNCTION itself works correctly for the
|
||||||
|
cases that arise during streaming assembly. Integration tests that
|
||||||
|
exercise the full streaming path are in test_agent_loop_tool_calling.py.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# -- Truncation cases (most common streaming failure) --
|
||||||
|
|
||||||
|
def test_truncated_object_no_close_brace(self):
|
||||||
|
"""Model stops mid-JSON, common with output length limits."""
|
||||||
|
raw = '{"command": "ls -la", "timeout": 30'
|
||||||
|
result = _repair_tool_call_arguments(raw, "terminal")
|
||||||
|
parsed = json.loads(result)
|
||||||
|
assert parsed["command"] == "ls -la"
|
||||||
|
assert parsed["timeout"] == 30
|
||||||
|
|
||||||
|
def test_truncated_nested_object(self):
|
||||||
|
"""Model truncates inside a nested structure."""
|
||||||
|
raw = '{"path": "/tmp/foo", "content": "hello"'
|
||||||
|
result = _repair_tool_call_arguments(raw, "write_file")
|
||||||
|
parsed = json.loads(result)
|
||||||
|
assert parsed["path"] == "/tmp/foo"
|
||||||
|
|
||||||
|
def test_truncated_mid_value(self):
|
||||||
|
"""Model cuts off mid-string-value."""
|
||||||
|
raw = '{"command": "git clone ht'
|
||||||
|
result = _repair_tool_call_arguments(raw, "terminal")
|
||||||
|
# Should produce valid JSON (even if command value is lost)
|
||||||
|
json.loads(result)
|
||||||
|
|
||||||
|
# -- Trailing comma cases (Ollama/GLM common) --
|
||||||
|
|
||||||
|
def test_trailing_comma_before_close_brace(self):
|
||||||
|
raw = '{"path": "/tmp", "content": "x",}'
|
||||||
|
result = _repair_tool_call_arguments(raw, "write_file")
|
||||||
|
assert json.loads(result) == {"path": "/tmp", "content": "x"}
|
||||||
|
|
||||||
|
def test_trailing_comma_in_list(self):
|
||||||
|
raw = '{"items": [1, 2, 3,]}'
|
||||||
|
result = _repair_tool_call_arguments(raw, "test")
|
||||||
|
assert json.loads(result) == {"items": [1, 2, 3]}
|
||||||
|
|
||||||
|
# -- Python None from model output --
|
||||||
|
|
||||||
|
def test_python_none_literal(self):
|
||||||
|
raw = "None"
|
||||||
|
result = _repair_tool_call_arguments(raw, "test")
|
||||||
|
assert result == "{}"
|
||||||
|
|
||||||
|
# -- Empty arguments (some models emit empty string) --
|
||||||
|
|
||||||
|
def test_empty_string(self):
|
||||||
|
assert _repair_tool_call_arguments("", "test") == "{}"
|
||||||
|
|
||||||
|
def test_whitespace_only(self):
|
||||||
|
assert _repair_tool_call_arguments(" \n ", "test") == "{}"
|
||||||
|
|
||||||
|
# -- Already-valid JSON passes through unchanged --
|
||||||
|
|
||||||
|
def test_valid_json_passthrough(self):
|
||||||
|
raw = '{"path": "/tmp/foo", "content": "hello"}'
|
||||||
|
result = _repair_tool_call_arguments(raw, "write_file")
|
||||||
|
assert json.loads(result) == {"path": "/tmp/foo", "content": "hello"}
|
||||||
|
|
||||||
|
# -- Extra closing brackets (rare but happens) --
|
||||||
|
|
||||||
|
def test_extra_closing_brace(self):
|
||||||
|
raw = '{"key": "value"}}'
|
||||||
|
result = _repair_tool_call_arguments(raw, "test")
|
||||||
|
assert json.loads(result) == {"key": "value"}
|
||||||
|
|
||||||
|
# -- Real-world GLM-5.1 truncation pattern --
|
||||||
|
|
||||||
|
def test_glm_truncation_pattern(self):
|
||||||
|
"""GLM-5.1 via Ollama commonly truncates like this.
|
||||||
|
|
||||||
|
This pattern has an unclosed colon at the end ("background":) which
|
||||||
|
makes it unrepairable — the last-resort empty object {} is the
|
||||||
|
safest option. The important thing is that repairable patterns
|
||||||
|
(trailing comma, unclosed brace WITHOUT hanging colon) DO get fixed.
|
||||||
|
"""
|
||||||
|
raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
|
||||||
|
result = _repair_tool_call_arguments(raw, "terminal")
|
||||||
|
# Unrepairable — returns empty object (hanging colon can't be fixed)
|
||||||
|
parsed = json.loads(result)
|
||||||
|
assert parsed == {}
|
||||||
|
|
||||||
|
def test_glm_truncation_repairable(self):
|
||||||
|
"""GLM-5.1 truncation pattern that IS repairable."""
|
||||||
|
raw = '{"command": "ls -la /tmp", "timeout": 30'
|
||||||
|
result = _repair_tool_call_arguments(raw, "terminal")
|
||||||
|
parsed = json.loads(result)
|
||||||
|
assert parsed["command"] == "ls -la /tmp"
|
||||||
|
assert parsed["timeout"] == 30
|
||||||
Loading…
Add table
Add a link
Reference in a new issue