mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(run_agent): handle unescaped control chars in tool_call arguments (#15356)
Extends _repair_tool_call_arguments() to cover the most common local-model
JSON corruption pattern: llama.cpp/Ollama backends emit literal tabs and
newlines inside JSON string values (memory save summaries, file contents,
etc.). Previously fell through to '{}' replacement, losing the call.
Adds two repair passes:
- Pass 0: json.loads(strict=False) + re-serialise to canonical wire form
- Pass 4: escape 0x00-0x1F control chars inside string values, then retry
Ports the core utility from #12068 / PR #12093 without the larger plumbing
change (that PR also replaced json.loads at 8 call sites; current main's
_repair_tool_call_arguments is already the single chokepoint, so the
upgrade happens transparently for every existing caller).
Credit: @truenorth-lj for the original utility design.
4 new regression tests covering literal newlines, tabs, re-serialisation
to strict=True-valid output, and the trailing-comma + control-char
combination case.
This commit is contained in:
parent
bb53d79d26
commit
2d444fc84d
2 changed files with 110 additions and 0 deletions
74
run_agent.py
74
run_agent.py
|
|
@ -502,6 +502,48 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
|
||||||
return found
|
return found
|
||||||
|
|
||||||
|
|
||||||
|
def _escape_invalid_chars_in_json_strings(raw: str) -> str:
|
||||||
|
"""Escape unescaped control chars inside JSON string values.
|
||||||
|
|
||||||
|
Walks the raw JSON character-by-character, tracking whether we are
|
||||||
|
inside a double-quoted string. Inside strings, replaces literal
|
||||||
|
control characters (0x00-0x1F) that aren't already part of an escape
|
||||||
|
sequence with their ``\\uXXXX`` equivalents. Pass-through for everything
|
||||||
|
else.
|
||||||
|
|
||||||
|
Ported from #12093 — complements the other repair passes in
|
||||||
|
``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is
|
||||||
|
not enough (e.g. llama.cpp backends that emit literal apostrophes or
|
||||||
|
tabs alongside other malformations).
|
||||||
|
"""
|
||||||
|
out: list[str] = []
|
||||||
|
in_string = False
|
||||||
|
i = 0
|
||||||
|
n = len(raw)
|
||||||
|
while i < n:
|
||||||
|
ch = raw[i]
|
||||||
|
if in_string:
|
||||||
|
if ch == "\\" and i + 1 < n:
|
||||||
|
# Already-escaped char — pass through as-is
|
||||||
|
out.append(ch)
|
||||||
|
out.append(raw[i + 1])
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
if ch == '"':
|
||||||
|
in_string = False
|
||||||
|
out.append(ch)
|
||||||
|
elif ord(ch) < 0x20:
|
||||||
|
out.append(f"\\u{ord(ch):04x}")
|
||||||
|
else:
|
||||||
|
out.append(ch)
|
||||||
|
else:
|
||||||
|
if ch == '"':
|
||||||
|
in_string = True
|
||||||
|
out.append(ch)
|
||||||
|
i += 1
|
||||||
|
return "".join(out)
|
||||||
|
|
||||||
|
|
||||||
def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||||
"""Attempt to repair malformed tool_call argument JSON.
|
"""Attempt to repair malformed tool_call argument JSON.
|
||||||
|
|
||||||
|
|
@ -523,6 +565,23 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||||
logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
|
logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
|
||||||
return "{}"
|
return "{}"
|
||||||
|
|
||||||
|
# Repair pass 0: llama.cpp backends sometimes emit literal control
|
||||||
|
# characters (tabs, newlines) inside JSON string values. json.loads
|
||||||
|
# with strict=False accepts these and lets us re-serialise the
|
||||||
|
# result into wire-valid JSON without any string surgery. This is
|
||||||
|
# the most common local-model repair case (#12068).
|
||||||
|
try:
|
||||||
|
parsed = json.loads(raw_stripped, strict=False)
|
||||||
|
reserialised = json.dumps(parsed, separators=(",", ":"))
|
||||||
|
if reserialised != raw_stripped:
|
||||||
|
logger.warning(
|
||||||
|
"Repaired unescaped control chars in tool_call arguments for %s",
|
||||||
|
tool_name,
|
||||||
|
)
|
||||||
|
return reserialised
|
||||||
|
except (json.JSONDecodeError, TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
# Attempt common JSON repairs
|
# Attempt common JSON repairs
|
||||||
fixed = raw_stripped
|
fixed = raw_stripped
|
||||||
# 1. Strip trailing commas before } or ]
|
# 1. Strip trailing commas before } or ]
|
||||||
|
|
@ -557,6 +616,21 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Repair pass 4: escape unescaped control chars inside JSON strings,
|
||||||
|
# then retry. Catches cases where strict=False alone fails because
|
||||||
|
# other malformations are present too.
|
||||||
|
try:
|
||||||
|
escaped = _escape_invalid_chars_in_json_strings(fixed)
|
||||||
|
if escaped != fixed:
|
||||||
|
json.loads(escaped)
|
||||||
|
logger.warning(
|
||||||
|
"Repaired control-char-laced tool_call arguments for %s: %s → %s",
|
||||||
|
tool_name, raw_stripped[:80], escaped[:80],
|
||||||
|
)
|
||||||
|
return escaped
|
||||||
|
except (json.JSONDecodeError, TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
# Last resort: replace with empty object so the API request doesn't
|
# Last resort: replace with empty object so the API request doesn't
|
||||||
# crash the entire session.
|
# crash the entire session.
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|
|
||||||
|
|
@ -105,3 +105,39 @@ class TestRepairToolCallArguments:
|
||||||
result = _repair_tool_call_arguments(raw, "terminal")
|
result = _repair_tool_call_arguments(raw, "terminal")
|
||||||
# Should at least be valid JSON, even if background is lost
|
# Should at least be valid JSON, even if background is lost
|
||||||
json.loads(result)
|
json.loads(result)
|
||||||
|
|
||||||
|
# -- Stage 0: strict=False (literal control chars in strings) --
|
||||||
|
# llama.cpp backends sometimes emit literal tabs/newlines inside JSON
|
||||||
|
# string values. strict=False accepts these; we re-serialise to the
|
||||||
|
# canonical wire form (#12068).
|
||||||
|
|
||||||
|
def test_literal_newline_inside_string_value(self):
|
||||||
|
raw = '{"summary": "line one\nline two"}'
|
||||||
|
result = _repair_tool_call_arguments(raw, "t")
|
||||||
|
parsed = json.loads(result)
|
||||||
|
assert parsed == {"summary": "line one\nline two"}
|
||||||
|
|
||||||
|
def test_literal_tab_inside_string_value(self):
|
||||||
|
raw = '{"summary": "col1\tcol2"}'
|
||||||
|
result = _repair_tool_call_arguments(raw, "t")
|
||||||
|
parsed = json.loads(result)
|
||||||
|
assert parsed == {"summary": "col1\tcol2"}
|
||||||
|
|
||||||
|
def test_literal_control_char_reserialised_to_wire_form(self):
|
||||||
|
"""After repair, the output must parse under strict=True."""
|
||||||
|
raw = '{"msg": "has\tliteral\ttabs"}'
|
||||||
|
result = _repair_tool_call_arguments(raw, "t")
|
||||||
|
# strict=True must now accept this
|
||||||
|
parsed = json.loads(result)
|
||||||
|
assert parsed["msg"] == "has\tliteral\ttabs"
|
||||||
|
|
||||||
|
# -- Stage 4: control-char escape fallback --
|
||||||
|
|
||||||
|
def test_control_chars_with_trailing_comma(self):
|
||||||
|
"""strict=False fails due to trailing comma, but brace-count pass
|
||||||
|
+ control-char escape rescues it."""
|
||||||
|
raw = '{"msg": "line\none",}'
|
||||||
|
result = _repair_tool_call_arguments(raw, "t")
|
||||||
|
parsed = json.loads(result)
|
||||||
|
assert "line" in parsed["msg"]
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue