fix: extract _repair_tool_call_arguments helper, add tests, bound loop

Follow-up for PR #12252 salvage:
- Extract 75-line inline repair block to _repair_tool_call_arguments()
  module-level helper for testability and readability
- Remove redundant 'import re as _re' (re already imported at line 33)
- Bound the while-True excess-delimiter removal loop to 50 iterations
- Add 17 tests covering all 6 repair stages
- Add sirEven to AUTHOR_MAP in release.py
This commit is contained in:
Teknium 2026-04-20 05:04:26 -07:00 committed by Teknium
parent 9eeaaa4f1b
commit 9725b452a1
3 changed files with 178 additions and 75 deletions

View file

@ -555,6 +555,71 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
return found
def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
"""Attempt to repair malformed tool_call argument JSON.
Models like GLM-5.1 via Ollama can produce truncated JSON, trailing
commas, Python ``None``, etc. The API proxy rejects these with HTTP 400
"invalid tool call arguments". This function applies common repairs;
if all fail it returns ``"{}"`` so the request succeeds (better than
crashing the session). All repairs are logged at WARNING level.
"""
raw_stripped = raw_args.strip() if isinstance(raw_args, str) else ""
# Fast-path: empty / whitespace-only -> empty object
if not raw_stripped:
logger.warning("Sanitized empty tool_call arguments for %s", tool_name)
return "{}"
# Python-literal None -> normalise to {}
if raw_stripped == "None":
logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
return "{}"
# Attempt common JSON repairs
fixed = raw_stripped
# 1. Strip trailing commas before } or ]
fixed = re.sub(r',\s*([}\]])', r'\1', fixed)
# 2. Close unclosed structures
open_curly = fixed.count('{') - fixed.count('}')
open_bracket = fixed.count('[') - fixed.count(']')
if open_curly > 0:
fixed += '}' * open_curly
if open_bracket > 0:
fixed += ']' * open_bracket
# 3. Remove excess closing braces/brackets (bounded to 50 iterations)
for _ in range(50):
try:
json.loads(fixed)
break
except json.JSONDecodeError:
if fixed.endswith('}') and fixed.count('}') > fixed.count('{'):
fixed = fixed[:-1]
elif fixed.endswith(']') and fixed.count(']') > fixed.count('['):
fixed = fixed[:-1]
else:
break
try:
json.loads(fixed)
logger.warning(
"Repaired malformed tool_call arguments for %s: %s%s",
tool_name, raw_stripped[:80], fixed[:80],
)
return fixed
except json.JSONDecodeError:
pass
# Last resort: replace with empty object so the API request doesn't
# crash the entire session.
logger.warning(
"Unrepairable tool_call arguments for %s"
"replaced with empty object (was: %s)",
tool_name, raw_stripped[:80],
)
return "{}"
def _strip_non_ascii(text: str) -> str:
"""Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
@ -9662,81 +9727,10 @@ class AIAgent:
),
}}
except Exception:
# GLM-5.1 and similar models can generate
# malformed tool_call arguments (truncated JSON,
# trailing commas, Python None, etc.). The API
# proxy rejects these with HTTP 400 "invalid tool
# call arguments". Attempt common repairs; if
# all fail, replace with "{}" so the request
# succeeds (better than crashing the session).
raw_args = tc["function"]["arguments"]
repaired = False
raw_stripped = raw_args.strip() if isinstance(raw_args, str) else ""
# Fast-path: empty / whitespace-only → empty object
if not raw_stripped:
tc["function"]["arguments"] = "{}"
repaired = True
logger.warning(
"Sanitized empty tool_call arguments for %s",
tc["function"].get("name", "?"),
)
# Python-literal None → JSON null → normalise to {}
elif raw_stripped == "None":
tc["function"]["arguments"] = "{}"
repaired = True
logger.warning(
"Sanitized Python-None tool_call arguments for %s",
tc["function"].get("name", "?"),
)
if not repaired:
# Attempt common JSON repairs
import re as _re
fixed = raw_stripped
# 1. Strip trailing commas before } or ]
fixed = _re.sub(r',\s*([}\]])', r'\1', fixed)
# 2. Close unclosed structures
open_curly = fixed.count('{') - fixed.count('}')
open_bracket = fixed.count('[') - fixed.count(']')
if open_curly > 0:
fixed += '}' * open_curly
if open_bracket > 0:
fixed += ']' * open_bracket
# 3. Remove extra closing braces/brackets
while True:
try:
json.loads(fixed)
break
except json.JSONDecodeError:
if fixed.endswith('}') and fixed.count('}') > fixed.count('{'):
fixed = fixed[:-1]
elif fixed.endswith(']') and fixed.count(']') > fixed.count('['):
fixed = fixed[:-1]
else:
break
try:
json.loads(fixed)
tc["function"]["arguments"] = fixed
repaired = True
logger.warning(
"Repaired malformed tool_call arguments for %s: %s%s",
tc["function"].get("name", "?"),
raw_stripped[:80], fixed[:80],
)
except json.JSONDecodeError:
pass
if not repaired:
# Last resort: replace with empty object so the
# API request doesn't crash the entire session.
tc["function"]["arguments"] = "{}"
logger.warning(
"Unrepairable tool_call arguments for %s"
"replaced with empty object (was: %s)",
tc["function"].get("name", "?"),
raw_stripped[:80],
)
tc["function"]["arguments"] = _repair_tool_call_arguments(
tc["function"]["arguments"],
tc["function"].get("name", "?"),
)
new_tcs.append(tc)
am["tool_calls"] = new_tcs

View file

@ -66,6 +66,8 @@ AUTHOR_MAP = {
"104278804+Sertug17@users.noreply.github.com": "Sertug17",
"112503481+caentzminger@users.noreply.github.com": "caentzminger",
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
"sir_even@icloud.com": "sirEven",
"36056348+sirEven@users.noreply.github.com": "sirEven",
"70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
"254021826+dodo-reach@users.noreply.github.com": "dodo-reach",
"259807879+Bartok9@users.noreply.github.com": "Bartok9",

View file

@ -0,0 +1,107 @@
"""Tests for _repair_tool_call_arguments — malformed JSON repair pipeline."""
import json
import pytest
from run_agent import _repair_tool_call_arguments
class TestRepairToolCallArguments:
"""Verify each repair stage in the pipeline."""
# -- Stage 1: empty / whitespace-only --
def test_empty_string_returns_empty_object(self):
assert _repair_tool_call_arguments("", "t") == "{}"
def test_whitespace_only_returns_empty_object(self):
assert _repair_tool_call_arguments(" \n\t ", "t") == "{}"
def test_none_type_returns_empty_object(self):
"""Non-string input (e.g. None from a broken model response)."""
assert _repair_tool_call_arguments(None, "t") == "{}"
# -- Stage 2: Python None literal --
def test_python_none_literal(self):
assert _repair_tool_call_arguments("None", "t") == "{}"
def test_python_none_with_whitespace(self):
assert _repair_tool_call_arguments(" None ", "t") == "{}"
# -- Stage 3: trailing comma repair --
def test_trailing_comma_in_object(self):
result = _repair_tool_call_arguments('{"key": "value",}', "t")
assert json.loads(result) == {"key": "value"}
def test_trailing_comma_in_array(self):
result = _repair_tool_call_arguments('{"a": [1, 2,]}', "t")
parsed = json.loads(result)
assert parsed == {"a": [1, 2]}
def test_multiple_trailing_commas(self):
result = _repair_tool_call_arguments('{"a": 1, "b": 2,}', "t")
parsed = json.loads(result)
assert parsed["a"] == 1
assert parsed["b"] == 2
# -- Stage 4: unclosed brackets --
def test_unclosed_brace(self):
result = _repair_tool_call_arguments('{"key": "value"', "t")
parsed = json.loads(result)
assert parsed == {"key": "value"}
def test_unclosed_bracket_and_brace(self):
result = _repair_tool_call_arguments('{"a": [1, 2', "t")
# Bracket counting adds ']' then '}', producing {"a": [1, 2]}
# which is valid JSON. But the naive count can't always recover
# complex nesting — verify we at least get valid JSON.
json.loads(result)
# -- Stage 5: excess closing delimiters --
def test_extra_closing_brace(self):
result = _repair_tool_call_arguments('{"key": "value"}}', "t")
parsed = json.loads(result)
assert parsed == {"key": "value"}
def test_extra_closing_bracket(self):
result = _repair_tool_call_arguments('{"a": [1]]}', "t")
# Should produce valid JSON
json.loads(result)
# -- Stage 6: last resort --
def test_unrepairable_garbage_returns_empty_object(self):
assert _repair_tool_call_arguments("totally not json", "t") == "{}"
def test_unrepairable_partial_returns_empty_object(self):
# Truncated in the middle of a string key — bracket closing won't help
assert _repair_tool_call_arguments('{"truncated": "val', "t") == "{}"
# -- Valid JSON passthrough (this path is via except, but still works) --
def test_already_valid_json_passes_through(self):
"""When json.loads fails for a non-JSON reason (shouldn't normally
happen), but the repair pipeline still produces valid output."""
raw = '{"path": "/tmp/foo", "content": "hello"}'
result = _repair_tool_call_arguments(raw, "t")
parsed = json.loads(result)
assert parsed["path"] == "/tmp/foo"
# -- Combined repairs --
def test_trailing_comma_plus_unclosed_brace(self):
result = _repair_tool_call_arguments('{"a": 1, "b": 2,', "t")
# Trailing comma stripped first, then closing brace added.
# May or may not fully recover — verify valid JSON at minimum.
json.loads(result)
def test_real_world_glm_truncation(self):
"""Simulates GLM-5.1 truncating mid-argument."""
raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
result = _repair_tool_call_arguments(raw, "terminal")
# Should at least be valid JSON, even if background is lost
json.loads(result)