mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: extract _repair_tool_call_arguments helper, add tests, bound loop
Follow-up for PR #12252 salvage: - Extract 75-line inline repair block to _repair_tool_call_arguments() module-level helper for testability and readability - Remove redundant 'import re as _re' (re already imported at line 33) - Bound the while-True excess-delimiter removal loop to 50 iterations - Add 17 tests covering all 6 repair stages - Add sirEven to AUTHOR_MAP in release.py
This commit is contained in:
parent
9eeaaa4f1b
commit
9725b452a1
3 changed files with 178 additions and 75 deletions
144
run_agent.py
144
run_agent.py
|
|
@ -555,6 +555,71 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
|
|||
return found
|
||||
|
||||
|
||||
def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
"""Attempt to repair malformed tool_call argument JSON.
|
||||
|
||||
Models like GLM-5.1 via Ollama can produce truncated JSON, trailing
|
||||
commas, Python ``None``, etc. The API proxy rejects these with HTTP 400
|
||||
"invalid tool call arguments". This function applies common repairs;
|
||||
if all fail it returns ``"{}"`` so the request succeeds (better than
|
||||
crashing the session). All repairs are logged at WARNING level.
|
||||
"""
|
||||
raw_stripped = raw_args.strip() if isinstance(raw_args, str) else ""
|
||||
|
||||
# Fast-path: empty / whitespace-only -> empty object
|
||||
if not raw_stripped:
|
||||
logger.warning("Sanitized empty tool_call arguments for %s", tool_name)
|
||||
return "{}"
|
||||
|
||||
# Python-literal None -> normalise to {}
|
||||
if raw_stripped == "None":
|
||||
logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
|
||||
return "{}"
|
||||
|
||||
# Attempt common JSON repairs
|
||||
fixed = raw_stripped
|
||||
# 1. Strip trailing commas before } or ]
|
||||
fixed = re.sub(r',\s*([}\]])', r'\1', fixed)
|
||||
# 2. Close unclosed structures
|
||||
open_curly = fixed.count('{') - fixed.count('}')
|
||||
open_bracket = fixed.count('[') - fixed.count(']')
|
||||
if open_curly > 0:
|
||||
fixed += '}' * open_curly
|
||||
if open_bracket > 0:
|
||||
fixed += ']' * open_bracket
|
||||
# 3. Remove excess closing braces/brackets (bounded to 50 iterations)
|
||||
for _ in range(50):
|
||||
try:
|
||||
json.loads(fixed)
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
if fixed.endswith('}') and fixed.count('}') > fixed.count('{'):
|
||||
fixed = fixed[:-1]
|
||||
elif fixed.endswith(']') and fixed.count(']') > fixed.count('['):
|
||||
fixed = fixed[:-1]
|
||||
else:
|
||||
break
|
||||
|
||||
try:
|
||||
json.loads(fixed)
|
||||
logger.warning(
|
||||
"Repaired malformed tool_call arguments for %s: %s → %s",
|
||||
tool_name, raw_stripped[:80], fixed[:80],
|
||||
)
|
||||
return fixed
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Last resort: replace with empty object so the API request doesn't
|
||||
# crash the entire session.
|
||||
logger.warning(
|
||||
"Unrepairable tool_call arguments for %s — "
|
||||
"replaced with empty object (was: %s)",
|
||||
tool_name, raw_stripped[:80],
|
||||
)
|
||||
return "{}"
|
||||
|
||||
|
||||
def _strip_non_ascii(text: str) -> str:
|
||||
"""Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
|
||||
|
||||
|
|
@ -9662,81 +9727,10 @@ class AIAgent:
|
|||
),
|
||||
}}
|
||||
except Exception:
|
||||
# GLM-5.1 and similar models can generate
|
||||
# malformed tool_call arguments (truncated JSON,
|
||||
# trailing commas, Python None, etc.). The API
|
||||
# proxy rejects these with HTTP 400 "invalid tool
|
||||
# call arguments". Attempt common repairs; if
|
||||
# all fail, replace with "{}" so the request
|
||||
# succeeds (better than crashing the session).
|
||||
raw_args = tc["function"]["arguments"]
|
||||
repaired = False
|
||||
raw_stripped = raw_args.strip() if isinstance(raw_args, str) else ""
|
||||
|
||||
# Fast-path: empty / whitespace-only → empty object
|
||||
if not raw_stripped:
|
||||
tc["function"]["arguments"] = "{}"
|
||||
repaired = True
|
||||
logger.warning(
|
||||
"Sanitized empty tool_call arguments for %s",
|
||||
tc["function"].get("name", "?"),
|
||||
)
|
||||
# Python-literal None → JSON null → normalise to {}
|
||||
elif raw_stripped == "None":
|
||||
tc["function"]["arguments"] = "{}"
|
||||
repaired = True
|
||||
logger.warning(
|
||||
"Sanitized Python-None tool_call arguments for %s",
|
||||
tc["function"].get("name", "?"),
|
||||
)
|
||||
|
||||
if not repaired:
|
||||
# Attempt common JSON repairs
|
||||
import re as _re
|
||||
fixed = raw_stripped
|
||||
# 1. Strip trailing commas before } or ]
|
||||
fixed = _re.sub(r',\s*([}\]])', r'\1', fixed)
|
||||
# 2. Close unclosed structures
|
||||
open_curly = fixed.count('{') - fixed.count('}')
|
||||
open_bracket = fixed.count('[') - fixed.count(']')
|
||||
if open_curly > 0:
|
||||
fixed += '}' * open_curly
|
||||
if open_bracket > 0:
|
||||
fixed += ']' * open_bracket
|
||||
# 3. Remove extra closing braces/brackets
|
||||
while True:
|
||||
try:
|
||||
json.loads(fixed)
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
if fixed.endswith('}') and fixed.count('}') > fixed.count('{'):
|
||||
fixed = fixed[:-1]
|
||||
elif fixed.endswith(']') and fixed.count(']') > fixed.count('['):
|
||||
fixed = fixed[:-1]
|
||||
else:
|
||||
break
|
||||
try:
|
||||
json.loads(fixed)
|
||||
tc["function"]["arguments"] = fixed
|
||||
repaired = True
|
||||
logger.warning(
|
||||
"Repaired malformed tool_call arguments for %s: %s → %s",
|
||||
tc["function"].get("name", "?"),
|
||||
raw_stripped[:80], fixed[:80],
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
if not repaired:
|
||||
# Last resort: replace with empty object so the
|
||||
# API request doesn't crash the entire session.
|
||||
tc["function"]["arguments"] = "{}"
|
||||
logger.warning(
|
||||
"Unrepairable tool_call arguments for %s — "
|
||||
"replaced with empty object (was: %s)",
|
||||
tc["function"].get("name", "?"),
|
||||
raw_stripped[:80],
|
||||
)
|
||||
tc["function"]["arguments"] = _repair_tool_call_arguments(
|
||||
tc["function"]["arguments"],
|
||||
tc["function"].get("name", "?"),
|
||||
)
|
||||
new_tcs.append(tc)
|
||||
am["tool_calls"] = new_tcs
|
||||
|
||||
|
|
|
|||
|
|
@ -66,6 +66,8 @@ AUTHOR_MAP = {
|
|||
"104278804+Sertug17@users.noreply.github.com": "Sertug17",
|
||||
"112503481+caentzminger@users.noreply.github.com": "caentzminger",
|
||||
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
|
||||
"sir_even@icloud.com": "sirEven",
|
||||
"36056348+sirEven@users.noreply.github.com": "sirEven",
|
||||
"70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
|
||||
"254021826+dodo-reach@users.noreply.github.com": "dodo-reach",
|
||||
"259807879+Bartok9@users.noreply.github.com": "Bartok9",
|
||||
|
|
|
|||
107
tests/run_agent/test_repair_tool_call_arguments.py
Normal file
107
tests/run_agent/test_repair_tool_call_arguments.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
"""Tests for _repair_tool_call_arguments — malformed JSON repair pipeline."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from run_agent import _repair_tool_call_arguments
|
||||
|
||||
|
||||
class TestRepairToolCallArguments:
|
||||
"""Verify each repair stage in the pipeline."""
|
||||
|
||||
# -- Stage 1: empty / whitespace-only --
|
||||
|
||||
def test_empty_string_returns_empty_object(self):
|
||||
assert _repair_tool_call_arguments("", "t") == "{}"
|
||||
|
||||
def test_whitespace_only_returns_empty_object(self):
|
||||
assert _repair_tool_call_arguments(" \n\t ", "t") == "{}"
|
||||
|
||||
def test_none_type_returns_empty_object(self):
|
||||
"""Non-string input (e.g. None from a broken model response)."""
|
||||
assert _repair_tool_call_arguments(None, "t") == "{}"
|
||||
|
||||
# -- Stage 2: Python None literal --
|
||||
|
||||
def test_python_none_literal(self):
|
||||
assert _repair_tool_call_arguments("None", "t") == "{}"
|
||||
|
||||
def test_python_none_with_whitespace(self):
|
||||
assert _repair_tool_call_arguments(" None ", "t") == "{}"
|
||||
|
||||
# -- Stage 3: trailing comma repair --
|
||||
|
||||
def test_trailing_comma_in_object(self):
|
||||
result = _repair_tool_call_arguments('{"key": "value",}', "t")
|
||||
assert json.loads(result) == {"key": "value"}
|
||||
|
||||
def test_trailing_comma_in_array(self):
|
||||
result = _repair_tool_call_arguments('{"a": [1, 2,]}', "t")
|
||||
parsed = json.loads(result)
|
||||
assert parsed == {"a": [1, 2]}
|
||||
|
||||
def test_multiple_trailing_commas(self):
|
||||
result = _repair_tool_call_arguments('{"a": 1, "b": 2,}', "t")
|
||||
parsed = json.loads(result)
|
||||
assert parsed["a"] == 1
|
||||
assert parsed["b"] == 2
|
||||
|
||||
# -- Stage 4: unclosed brackets --
|
||||
|
||||
def test_unclosed_brace(self):
|
||||
result = _repair_tool_call_arguments('{"key": "value"', "t")
|
||||
parsed = json.loads(result)
|
||||
assert parsed == {"key": "value"}
|
||||
|
||||
def test_unclosed_bracket_and_brace(self):
|
||||
result = _repair_tool_call_arguments('{"a": [1, 2', "t")
|
||||
# Bracket counting adds ']' then '}', producing {"a": [1, 2]}
|
||||
# which is valid JSON. But the naive count can't always recover
|
||||
# complex nesting — verify we at least get valid JSON.
|
||||
json.loads(result)
|
||||
|
||||
# -- Stage 5: excess closing delimiters --
|
||||
|
||||
def test_extra_closing_brace(self):
|
||||
result = _repair_tool_call_arguments('{"key": "value"}}', "t")
|
||||
parsed = json.loads(result)
|
||||
assert parsed == {"key": "value"}
|
||||
|
||||
def test_extra_closing_bracket(self):
|
||||
result = _repair_tool_call_arguments('{"a": [1]]}', "t")
|
||||
# Should produce valid JSON
|
||||
json.loads(result)
|
||||
|
||||
# -- Stage 6: last resort --
|
||||
|
||||
def test_unrepairable_garbage_returns_empty_object(self):
|
||||
assert _repair_tool_call_arguments("totally not json", "t") == "{}"
|
||||
|
||||
def test_unrepairable_partial_returns_empty_object(self):
|
||||
# Truncated in the middle of a string key — bracket closing won't help
|
||||
assert _repair_tool_call_arguments('{"truncated": "val', "t") == "{}"
|
||||
|
||||
# -- Valid JSON passthrough (this path is via except, but still works) --
|
||||
|
||||
def test_already_valid_json_passes_through(self):
|
||||
"""When json.loads fails for a non-JSON reason (shouldn't normally
|
||||
happen), but the repair pipeline still produces valid output."""
|
||||
raw = '{"path": "/tmp/foo", "content": "hello"}'
|
||||
result = _repair_tool_call_arguments(raw, "t")
|
||||
parsed = json.loads(result)
|
||||
assert parsed["path"] == "/tmp/foo"
|
||||
|
||||
# -- Combined repairs --
|
||||
|
||||
def test_trailing_comma_plus_unclosed_brace(self):
|
||||
result = _repair_tool_call_arguments('{"a": 1, "b": 2,', "t")
|
||||
# Trailing comma stripped first, then closing brace added.
|
||||
# May or may not fully recover — verify valid JSON at minimum.
|
||||
json.loads(result)
|
||||
|
||||
def test_real_world_glm_truncation(self):
|
||||
"""Simulates GLM-5.1 truncating mid-argument."""
|
||||
raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
|
||||
result = _repair_tool_call_arguments(raw, "terminal")
|
||||
# Should at least be valid JSON, even if background is lost
|
||||
json.loads(result)
|
||||
Loading…
Add table
Add a link
Reference in a new issue