mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(run_agent): handle unescaped control chars in tool_call arguments (#15356)
Extends _repair_tool_call_arguments() to cover the most common local-model
JSON corruption pattern: llama.cpp/Ollama backends emit literal tabs and
newlines inside JSON string values (memory save summaries, file contents,
etc.). Previously fell through to '{}' replacement, losing the call.
Adds two repair passes:
- Pass 0: json.loads(strict=False) + re-serialise to canonical wire form
- Pass 4: escape 0x00-0x1F control chars inside string values, then retry
Ports the core utility from #12068 / PR #12093 without the larger plumbing
change (that PR also replaced json.loads at 8 call sites; current main's
_repair_tool_call_arguments is already the single chokepoint, so the
upgrade happens transparently for every existing caller).
Credit: @truenorth-lj for the original utility design.
4 new regression tests covering literal newlines, tabs, re-serialisation
to strict=True-valid output, and the trailing-comma + control-char
combination case.
This commit is contained in:
parent
bb53d79d26
commit
2d444fc84d
2 changed files with 110 additions and 0 deletions
|
|
@ -105,3 +105,39 @@ class TestRepairToolCallArguments:
|
|||
result = _repair_tool_call_arguments(raw, "terminal")
|
||||
# Should at least be valid JSON, even if background is lost
|
||||
json.loads(result)
|
||||
|
||||
# -- Stage 0: strict=False (literal control chars in strings) --
|
||||
# llama.cpp backends sometimes emit literal tabs/newlines inside JSON
|
||||
# string values. strict=False accepts these; we re-serialise to the
|
||||
# canonical wire form (#12068).
|
||||
|
||||
def test_literal_newline_inside_string_value(self):
|
||||
raw = '{"summary": "line one\nline two"}'
|
||||
result = _repair_tool_call_arguments(raw, "t")
|
||||
parsed = json.loads(result)
|
||||
assert parsed == {"summary": "line one\nline two"}
|
||||
|
||||
def test_literal_tab_inside_string_value(self):
|
||||
raw = '{"summary": "col1\tcol2"}'
|
||||
result = _repair_tool_call_arguments(raw, "t")
|
||||
parsed = json.loads(result)
|
||||
assert parsed == {"summary": "col1\tcol2"}
|
||||
|
||||
def test_literal_control_char_reserialised_to_wire_form(self):
|
||||
"""After repair, the output must parse under strict=True."""
|
||||
raw = '{"msg": "has\tliteral\ttabs"}'
|
||||
result = _repair_tool_call_arguments(raw, "t")
|
||||
# strict=True must now accept this
|
||||
parsed = json.loads(result)
|
||||
assert parsed["msg"] == "has\tliteral\ttabs"
|
||||
|
||||
# -- Stage 4: control-char escape fallback --
|
||||
|
||||
def test_control_chars_with_trailing_comma(self):
|
||||
"""strict=False fails due to trailing comma, but brace-count pass
|
||||
+ control-char escape rescues it."""
|
||||
raw = '{"msg": "line\none",}'
|
||||
result = _repair_tool_call_arguments(raw, "t")
|
||||
parsed = json.loads(result)
|
||||
assert "line" in parsed["msg"]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue