hermes-agent/tests/run_agent/test_repair_tool_call_arguments.py
Teknium 2d444fc84d
fix(run_agent): handle unescaped control chars in tool_call arguments (#15356)
Extends _repair_tool_call_arguments() to cover the most common local-model
JSON corruption pattern: llama.cpp/Ollama backends emit literal tabs and
newlines inside JSON string values (memory save summaries, file contents,
etc.). Previously fell through to '{}' replacement, losing the call.

Adds two repair passes:
  - Pass 0: json.loads(strict=False) + re-serialise to canonical wire form
  - Pass 4: escape 0x00-0x1F control chars inside string values, then retry

Ports the core utility from #12068 / PR #12093 without the larger plumbing
change (that PR also replaced json.loads at 8 call sites; current main's
_repair_tool_call_arguments is already the single chokepoint, so the
upgrade happens transparently for every existing caller).

Credit: @truenorth-lj for the original utility design.

4 new regression tests covering literal newlines, tabs, re-serialisation
to strict=True-valid output, and the trailing-comma + control-char
combination case.
2026-04-24 15:06:41 -07:00

143 lines
5.5 KiB
Python

"""Tests for _repair_tool_call_arguments — malformed JSON repair pipeline."""
import json
import pytest
from run_agent import _repair_tool_call_arguments
class TestRepairToolCallArguments:
"""Verify each repair stage in the pipeline."""
# -- Stage 1: empty / whitespace-only --
def test_empty_string_returns_empty_object(self):
assert _repair_tool_call_arguments("", "t") == "{}"
def test_whitespace_only_returns_empty_object(self):
assert _repair_tool_call_arguments(" \n\t ", "t") == "{}"
def test_none_type_returns_empty_object(self):
"""Non-string input (e.g. None from a broken model response)."""
assert _repair_tool_call_arguments(None, "t") == "{}"
# -- Stage 2: Python None literal --
def test_python_none_literal(self):
assert _repair_tool_call_arguments("None", "t") == "{}"
def test_python_none_with_whitespace(self):
assert _repair_tool_call_arguments(" None ", "t") == "{}"
# -- Stage 3: trailing comma repair --
def test_trailing_comma_in_object(self):
result = _repair_tool_call_arguments('{"key": "value",}', "t")
assert json.loads(result) == {"key": "value"}
def test_trailing_comma_in_array(self):
result = _repair_tool_call_arguments('{"a": [1, 2,]}', "t")
parsed = json.loads(result)
assert parsed == {"a": [1, 2]}
def test_multiple_trailing_commas(self):
result = _repair_tool_call_arguments('{"a": 1, "b": 2,}', "t")
parsed = json.loads(result)
assert parsed["a"] == 1
assert parsed["b"] == 2
# -- Stage 4: unclosed brackets --
def test_unclosed_brace(self):
result = _repair_tool_call_arguments('{"key": "value"', "t")
parsed = json.loads(result)
assert parsed == {"key": "value"}
def test_unclosed_bracket_and_brace(self):
result = _repair_tool_call_arguments('{"a": [1, 2', "t")
# Bracket counting adds ']' then '}', producing {"a": [1, 2]}
# which is valid JSON. But the naive count can't always recover
# complex nesting — verify we at least get valid JSON.
json.loads(result)
# -- Stage 5: excess closing delimiters --
def test_extra_closing_brace(self):
result = _repair_tool_call_arguments('{"key": "value"}}', "t")
parsed = json.loads(result)
assert parsed == {"key": "value"}
def test_extra_closing_bracket(self):
result = _repair_tool_call_arguments('{"a": [1]]}', "t")
# Should produce valid JSON
json.loads(result)
# -- Stage 6: last resort --
def test_unrepairable_garbage_returns_empty_object(self):
assert _repair_tool_call_arguments("totally not json", "t") == "{}"
def test_unrepairable_partial_returns_empty_object(self):
# Truncated in the middle of a string key — bracket closing won't help
assert _repair_tool_call_arguments('{"truncated": "val', "t") == "{}"
# -- Valid JSON passthrough (this path is via except, but still works) --
def test_already_valid_json_passes_through(self):
"""When json.loads fails for a non-JSON reason (shouldn't normally
happen), but the repair pipeline still produces valid output."""
raw = '{"path": "/tmp/foo", "content": "hello"}'
result = _repair_tool_call_arguments(raw, "t")
parsed = json.loads(result)
assert parsed["path"] == "/tmp/foo"
# -- Combined repairs --
def test_trailing_comma_plus_unclosed_brace(self):
result = _repair_tool_call_arguments('{"a": 1, "b": 2,', "t")
# Trailing comma stripped first, then closing brace added.
# May or may not fully recover — verify valid JSON at minimum.
json.loads(result)
def test_real_world_glm_truncation(self):
"""Simulates GLM-5.1 truncating mid-argument."""
raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
result = _repair_tool_call_arguments(raw, "terminal")
# Should at least be valid JSON, even if background is lost
json.loads(result)
# -- Stage 0: strict=False (literal control chars in strings) --
# llama.cpp backends sometimes emit literal tabs/newlines inside JSON
# string values. strict=False accepts these; we re-serialise to the
# canonical wire form (#12068).
def test_literal_newline_inside_string_value(self):
raw = '{"summary": "line one\nline two"}'
result = _repair_tool_call_arguments(raw, "t")
parsed = json.loads(result)
assert parsed == {"summary": "line one\nline two"}
def test_literal_tab_inside_string_value(self):
raw = '{"summary": "col1\tcol2"}'
result = _repair_tool_call_arguments(raw, "t")
parsed = json.loads(result)
assert parsed == {"summary": "col1\tcol2"}
def test_literal_control_char_reserialised_to_wire_form(self):
"""After repair, the output must parse under strict=True."""
raw = '{"msg": "has\tliteral\ttabs"}'
result = _repair_tool_call_arguments(raw, "t")
# strict=True must now accept this
parsed = json.loads(result)
assert parsed["msg"] == "has\tliteral\ttabs"
# -- Stage 4: control-char escape fallback --
def test_control_chars_with_trailing_comma(self):
"""strict=False fails due to trailing comma, but brace-count pass
+ control-char escape rescues it."""
raw = '{"msg": "line\none",}'
result = _repair_tool_call_arguments(raw, "t")
parsed = json.loads(result)
assert "line" in parsed["msg"]