fix(context_compressor): keep tool-call arguments JSON valid when shrinking

Pass 3 of `_prune_old_tool_results` previously shrunk long `function.arguments`
blobs by slicing the raw JSON string at byte 200 and appending the literal
text `...[truncated]`. That routinely produced payloads like::

    {"path": "/foo.md", "content": "# Long markdown
    ...[truncated]

— an unterminated string with no closing brace. Strict providers (observed
on MiniMax) reject this as `invalid function arguments json string` with a
non-retryable 400. Because the broken call survives in the session history,
every subsequent turn re-sends the same malformed payload and gets the same
400, locking the session into a re-send loop until the call falls out of
the window.

Fix: parse the arguments first, shrink long string leaves inside the parsed
structure, and re-serialise. Non-string values (paths, ints, booleans, lists)
pass through intact. Arguments that are not valid JSON to begin with (rare,
some backends use non-JSON tool args) are returned unchanged rather than
replaced with something neither we nor the provider can parse.

Observed in the wild: a `write_file` with ~800 chars of markdown `content`
triggered this on a real session against MiniMax-M2.7; every turn after
compression got rejected until the session was manually reset.

Tests:
- 7 direct tests of `_truncate_tool_call_args_json` covering valid-JSON
  output, non-JSON pass-through, nested structures, non-string leaves,
  scalar JSON, and Unicode preservation
- 1 end-to-end test through `_prune_old_tool_results` Pass 3 that
  reproduces the exact failure payload shape from the incident

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Honghua Yang 2026-04-17 13:32:10 -07:00 committed by Teknium
parent b73ebfee30
commit 3128d9fcd2
2 changed files with 179 additions and 2 deletions

View file

@ -63,6 +63,52 @@ _CHARS_PER_TOKEN = 4
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
"""Shrink long string values inside a tool-call arguments JSON blob while
preserving JSON validity.
The ``function.arguments`` field on a tool call is a JSON-encoded string
passed through to the LLM provider; downstream providers strictly
validate it and return a non-retryable 400 when it is not well-formed.
An earlier implementation sliced the raw JSON at a fixed byte offset and
appended ``...[truncated]`` which routinely produced strings like::
{"path": "/foo/bar", "content": "# long markdown
...[truncated]
i.e. an unterminated string and a missing closing brace. MiniMax, for
example, rejects this with ``invalid function arguments json string``
and the session gets stuck re-sending the same broken history on every
turn. See issue #11762 for the observed loop.
This helper parses the arguments, shrinks long string leaves inside the
parsed structure, and re-serialises. Non-string values (paths, ints,
booleans) are preserved intact. If the arguments are not valid JSON
to begin with some model backends use non-JSON tool arguments the
original string is returned unchanged rather than replaced with
something neither we nor the backend can parse.
"""
try:
parsed = json.loads(args)
except (ValueError, TypeError):
return args
def _shrink(obj: Any) -> Any:
if isinstance(obj, str):
if len(obj) > head_chars:
return obj[:head_chars] + "...[truncated]"
return obj
if isinstance(obj, dict):
return {k: _shrink(v) for k, v in obj.items()}
if isinstance(obj, list):
return [_shrink(v) for v in obj]
return obj
shrunken = _shrink(parsed)
# ensure_ascii=False preserves CJK/emoji instead of bloating with \uXXXX
return json.dumps(shrunken, ensure_ascii=False)
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
"""Create an informative 1-line summary of a tool call + result.
@ -449,6 +495,11 @@ class ContextCompressor(ContextEngine):
# Pass 3: Truncate large tool_call arguments in assistant messages
# outside the protected tail. write_file with 50KB content, for
# example, survives pruning entirely without this.
#
# The shrinking is done inside the parsed JSON structure so the
# result remains valid JSON — otherwise downstream providers 400
# on every subsequent turn until the broken call falls out of
# the window. See ``_truncate_tool_call_args_json`` docstring.
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "assistant" or not msg.get("tool_calls"):
@ -459,8 +510,10 @@ class ContextCompressor(ContextEngine):
if isinstance(tc, dict):
args = tc.get("function", {}).get("arguments", "")
if len(args) > 500:
tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
modified = True
new_args = _truncate_tool_call_args_json(args)
if new_args != args:
tc = {**tc, "function": {**tc["function"], "arguments": new_args}}
modified = True
new_tcs.append(tc)
if modified:
result[i] = {**msg, "tool_calls": new_tcs}

View file

@ -781,3 +781,127 @@ class TestTokenBudgetTailProtection:
# Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
# so it might or might not be pruned depending on boundary
assert isinstance(pruned, int)
class TestTruncateToolCallArgsJson:
"""Regression tests for #11762.
The previous implementation produced invalid JSON by slicing
``function.arguments`` mid-string, which caused non-retryable 400s from
strict providers (observed on MiniMax) and stuck long sessions in a
re-send loop. The helper here must always emit parseable JSON whose
shape matches the original shrunken, not corrupted.
"""
def _helper(self):
from agent.context_compressor import _truncate_tool_call_args_json
return _truncate_tool_call_args_json
def test_shrunken_args_remain_valid_json(self):
import json as _json
shrink = self._helper()
original = _json.dumps({
"path": "~/.hermes/skills/shopping/browser-setup-notes.md",
"content": "# Shopping Browser Setup Notes\n\n" + "abc " * 400,
})
assert len(original) > 500
shrunk = shrink(original)
parsed = _json.loads(shrunk) # must not raise
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
assert parsed["content"].endswith("...[truncated]")
assert len(shrunk) < len(original)
def test_non_json_arguments_pass_through(self):
shrink = self._helper()
not_json = "this is not json at all, " * 50
assert shrink(not_json) == not_json
def test_short_string_leaves_unchanged(self):
import json as _json
shrink = self._helper()
payload = _json.dumps({"command": "ls -la", "cwd": "/tmp"})
assert _json.loads(shrink(payload)) == {"command": "ls -la", "cwd": "/tmp"}
def test_nested_structures_are_walked(self):
import json as _json
shrink = self._helper()
payload = _json.dumps({
"messages": [
{"role": "user", "content": "x" * 500},
{"role": "assistant", "content": "ok"},
],
"meta": {"note": "y" * 500},
})
parsed = _json.loads(shrink(payload))
assert parsed["messages"][0]["content"].endswith("...[truncated]")
assert parsed["messages"][1]["content"] == "ok"
assert parsed["meta"]["note"].endswith("...[truncated]")
def test_non_string_leaves_preserved(self):
import json as _json
shrink = self._helper()
payload = _json.dumps({
"retries": 3,
"enabled": True,
"timeout": None,
"items": [1, 2, 3],
"note": "z" * 500,
})
parsed = _json.loads(shrink(payload))
assert parsed["retries"] == 3
assert parsed["enabled"] is True
assert parsed["timeout"] is None
assert parsed["items"] == [1, 2, 3]
assert parsed["note"].endswith("...[truncated]")
def test_scalar_json_string_gets_shrunk(self):
import json as _json
shrink = self._helper()
payload = _json.dumps("q" * 500)
parsed = _json.loads(shrink(payload))
assert isinstance(parsed, str)
assert parsed.endswith("...[truncated]")
def test_unicode_preserved(self):
import json as _json
shrink = self._helper()
payload = _json.dumps({"content": "非德满" + ("a" * 500)})
out = shrink(payload)
# ensure_ascii=False keeps CJK intact rather than emitting \uXXXX
assert "非德满" in out
def test_pass3_emits_valid_json_for_downstream_provider(self):
"""End-to-end: Pass 3 must never produce the exact failure payload
that caused the 400 loop (unterminated string, missing brace)."""
import json as _json
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
c = ContextCompressor(
model="test/model",
threshold_percent=0.85,
protect_first_n=1,
protect_last_n=1,
quiet_mode=True,
)
huge_content = "# Shopping Browser Setup Notes\n\n## Overview\n" + "x " * 400
args_payload = _json.dumps({
"path": "~/.hermes/skills/shopping/browser-setup-notes.md",
"content": huge_content,
})
assert len(args_payload) > 500 # triggers the Pass-3 shrink
messages = [
{"role": "user", "content": "please write two files"},
{"role": "assistant", "content": None, "tool_calls": [
{"id": "call_1", "type": "function",
"function": {"name": "write_file", "arguments": args_payload}},
]},
{"role": "tool", "tool_call_id": "call_1",
"content": '{"bytes_written": 727}'},
{"role": "user", "content": "ok"},
{"role": "assistant", "content": "done"},
]
result, _ = c._prune_old_tool_results(messages, protect_tail_count=2)
shrunk = result[1]["tool_calls"][0]["function"]["arguments"]
# Must parse — otherwise downstream provider returns 400
parsed = _json.loads(shrunk)
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
assert parsed["content"].endswith("...[truncated]")