mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(context_compressor): keep tool-call arguments JSON valid when shrinking
Pass 3 of `_prune_old_tool_results` previously shrunk long `function.arguments`
blobs by slicing the raw JSON string at byte 200 and appending the literal
text `...[truncated]`. That routinely produced payloads like::
{"path": "/foo.md", "content": "# Long markdown
...[truncated]
— an unterminated string with no closing brace. Strict providers (observed
on MiniMax) reject this as `invalid function arguments json string` with a
non-retryable 400. Because the broken call survives in the session history,
every subsequent turn re-sends the same malformed payload and gets the same
400, locking the session into a re-send loop until the call falls out of
the window.
Fix: parse the arguments first, shrink long string leaves inside the parsed
structure, and re-serialise. Non-string values (paths, ints, booleans, lists)
pass through intact. Arguments that are not valid JSON to begin with (rare,
some backends use non-JSON tool args) are returned unchanged rather than
replaced with something neither we nor the provider can parse.
Observed in the wild: a `write_file` with ~800 chars of markdown `content`
triggered this on a real session against MiniMax-M2.7; every turn after
compression got rejected until the session was manually reset.
Tests:
- 7 direct tests of `_truncate_tool_call_args_json` covering valid-JSON
output, non-JSON pass-through, nested structures, non-string leaves,
scalar JSON, and Unicode preservation
- 1 end-to-end test through `_prune_old_tool_results` Pass 3 that
reproduces the exact failure payload shape from the incident
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b73ebfee30
commit
3128d9fcd2
2 changed files with 179 additions and 2 deletions
|
|
@ -63,6 +63,52 @@ _CHARS_PER_TOKEN = 4
|
|||
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
|
||||
|
||||
|
||||
def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
|
||||
"""Shrink long string values inside a tool-call arguments JSON blob while
|
||||
preserving JSON validity.
|
||||
|
||||
The ``function.arguments`` field on a tool call is a JSON-encoded string
|
||||
passed through to the LLM provider; downstream providers strictly
|
||||
validate it and return a non-retryable 400 when it is not well-formed.
|
||||
An earlier implementation sliced the raw JSON at a fixed byte offset and
|
||||
appended ``...[truncated]`` — which routinely produced strings like::
|
||||
|
||||
{"path": "/foo/bar", "content": "# long markdown
|
||||
...[truncated]
|
||||
|
||||
i.e. an unterminated string and a missing closing brace. MiniMax, for
|
||||
example, rejects this with ``invalid function arguments json string``
|
||||
and the session gets stuck re-sending the same broken history on every
|
||||
turn. See issue #11762 for the observed loop.
|
||||
|
||||
This helper parses the arguments, shrinks long string leaves inside the
|
||||
parsed structure, and re-serialises. Non-string values (paths, ints,
|
||||
booleans) are preserved intact. If the arguments are not valid JSON
|
||||
to begin with — some model backends use non-JSON tool arguments — the
|
||||
original string is returned unchanged rather than replaced with
|
||||
something neither we nor the backend can parse.
|
||||
"""
|
||||
try:
|
||||
parsed = json.loads(args)
|
||||
except (ValueError, TypeError):
|
||||
return args
|
||||
|
||||
def _shrink(obj: Any) -> Any:
|
||||
if isinstance(obj, str):
|
||||
if len(obj) > head_chars:
|
||||
return obj[:head_chars] + "...[truncated]"
|
||||
return obj
|
||||
if isinstance(obj, dict):
|
||||
return {k: _shrink(v) for k, v in obj.items()}
|
||||
if isinstance(obj, list):
|
||||
return [_shrink(v) for v in obj]
|
||||
return obj
|
||||
|
||||
shrunken = _shrink(parsed)
|
||||
# ensure_ascii=False preserves CJK/emoji instead of bloating with \uXXXX
|
||||
return json.dumps(shrunken, ensure_ascii=False)
|
||||
|
||||
|
||||
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
|
||||
"""Create an informative 1-line summary of a tool call + result.
|
||||
|
||||
|
|
@ -449,6 +495,11 @@ class ContextCompressor(ContextEngine):
|
|||
# Pass 3: Truncate large tool_call arguments in assistant messages
|
||||
# outside the protected tail. write_file with 50KB content, for
|
||||
# example, survives pruning entirely without this.
|
||||
#
|
||||
# The shrinking is done inside the parsed JSON structure so the
|
||||
# result remains valid JSON — otherwise downstream providers 400
|
||||
# on every subsequent turn until the broken call falls out of
|
||||
# the window. See ``_truncate_tool_call_args_json`` docstring.
|
||||
for i in range(prune_boundary):
|
||||
msg = result[i]
|
||||
if msg.get("role") != "assistant" or not msg.get("tool_calls"):
|
||||
|
|
@ -459,8 +510,10 @@ class ContextCompressor(ContextEngine):
|
|||
if isinstance(tc, dict):
|
||||
args = tc.get("function", {}).get("arguments", "")
|
||||
if len(args) > 500:
|
||||
tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
|
||||
modified = True
|
||||
new_args = _truncate_tool_call_args_json(args)
|
||||
if new_args != args:
|
||||
tc = {**tc, "function": {**tc["function"], "arguments": new_args}}
|
||||
modified = True
|
||||
new_tcs.append(tc)
|
||||
if modified:
|
||||
result[i] = {**msg, "tool_calls": new_tcs}
|
||||
|
|
|
|||
|
|
@ -781,3 +781,127 @@ class TestTokenBudgetTailProtection:
|
|||
# Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
|
||||
# so it might or might not be pruned depending on boundary
|
||||
assert isinstance(pruned, int)
|
||||
|
||||
|
||||
class TestTruncateToolCallArgsJson:
|
||||
"""Regression tests for #11762.
|
||||
|
||||
The previous implementation produced invalid JSON by slicing
|
||||
``function.arguments`` mid-string, which caused non-retryable 400s from
|
||||
strict providers (observed on MiniMax) and stuck long sessions in a
|
||||
re-send loop. The helper here must always emit parseable JSON whose
|
||||
shape matches the original — shrunken, not corrupted.
|
||||
"""
|
||||
|
||||
def _helper(self):
|
||||
from agent.context_compressor import _truncate_tool_call_args_json
|
||||
return _truncate_tool_call_args_json
|
||||
|
||||
def test_shrunken_args_remain_valid_json(self):
|
||||
import json as _json
|
||||
shrink = self._helper()
|
||||
original = _json.dumps({
|
||||
"path": "~/.hermes/skills/shopping/browser-setup-notes.md",
|
||||
"content": "# Shopping Browser Setup Notes\n\n" + "abc " * 400,
|
||||
})
|
||||
assert len(original) > 500
|
||||
shrunk = shrink(original)
|
||||
parsed = _json.loads(shrunk) # must not raise
|
||||
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
|
||||
assert parsed["content"].endswith("...[truncated]")
|
||||
assert len(shrunk) < len(original)
|
||||
|
||||
def test_non_json_arguments_pass_through(self):
|
||||
shrink = self._helper()
|
||||
not_json = "this is not json at all, " * 50
|
||||
assert shrink(not_json) == not_json
|
||||
|
||||
def test_short_string_leaves_unchanged(self):
|
||||
import json as _json
|
||||
shrink = self._helper()
|
||||
payload = _json.dumps({"command": "ls -la", "cwd": "/tmp"})
|
||||
assert _json.loads(shrink(payload)) == {"command": "ls -la", "cwd": "/tmp"}
|
||||
|
||||
def test_nested_structures_are_walked(self):
|
||||
import json as _json
|
||||
shrink = self._helper()
|
||||
payload = _json.dumps({
|
||||
"messages": [
|
||||
{"role": "user", "content": "x" * 500},
|
||||
{"role": "assistant", "content": "ok"},
|
||||
],
|
||||
"meta": {"note": "y" * 500},
|
||||
})
|
||||
parsed = _json.loads(shrink(payload))
|
||||
assert parsed["messages"][0]["content"].endswith("...[truncated]")
|
||||
assert parsed["messages"][1]["content"] == "ok"
|
||||
assert parsed["meta"]["note"].endswith("...[truncated]")
|
||||
|
||||
def test_non_string_leaves_preserved(self):
|
||||
import json as _json
|
||||
shrink = self._helper()
|
||||
payload = _json.dumps({
|
||||
"retries": 3,
|
||||
"enabled": True,
|
||||
"timeout": None,
|
||||
"items": [1, 2, 3],
|
||||
"note": "z" * 500,
|
||||
})
|
||||
parsed = _json.loads(shrink(payload))
|
||||
assert parsed["retries"] == 3
|
||||
assert parsed["enabled"] is True
|
||||
assert parsed["timeout"] is None
|
||||
assert parsed["items"] == [1, 2, 3]
|
||||
assert parsed["note"].endswith("...[truncated]")
|
||||
|
||||
def test_scalar_json_string_gets_shrunk(self):
|
||||
import json as _json
|
||||
shrink = self._helper()
|
||||
payload = _json.dumps("q" * 500)
|
||||
parsed = _json.loads(shrink(payload))
|
||||
assert isinstance(parsed, str)
|
||||
assert parsed.endswith("...[truncated]")
|
||||
|
||||
def test_unicode_preserved(self):
|
||||
import json as _json
|
||||
shrink = self._helper()
|
||||
payload = _json.dumps({"content": "非德满" + ("a" * 500)})
|
||||
out = shrink(payload)
|
||||
# ensure_ascii=False keeps CJK intact rather than emitting \uXXXX
|
||||
assert "非德满" in out
|
||||
|
||||
def test_pass3_emits_valid_json_for_downstream_provider(self):
|
||||
"""End-to-end: Pass 3 must never produce the exact failure payload
|
||||
that caused the 400 loop (unterminated string, missing brace)."""
|
||||
import json as _json
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="test/model",
|
||||
threshold_percent=0.85,
|
||||
protect_first_n=1,
|
||||
protect_last_n=1,
|
||||
quiet_mode=True,
|
||||
)
|
||||
huge_content = "# Shopping Browser Setup Notes\n\n## Overview\n" + "x " * 400
|
||||
args_payload = _json.dumps({
|
||||
"path": "~/.hermes/skills/shopping/browser-setup-notes.md",
|
||||
"content": huge_content,
|
||||
})
|
||||
assert len(args_payload) > 500 # triggers the Pass-3 shrink
|
||||
messages = [
|
||||
{"role": "user", "content": "please write two files"},
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "call_1", "type": "function",
|
||||
"function": {"name": "write_file", "arguments": args_payload}},
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "call_1",
|
||||
"content": '{"bytes_written": 727}'},
|
||||
{"role": "user", "content": "ok"},
|
||||
{"role": "assistant", "content": "done"},
|
||||
]
|
||||
result, _ = c._prune_old_tool_results(messages, protect_tail_count=2)
|
||||
shrunk = result[1]["tool_calls"][0]["function"]["arguments"]
|
||||
# Must parse — otherwise downstream provider returns 400
|
||||
parsed = _json.loads(shrunk)
|
||||
assert parsed["path"] == "~/.hermes/skills/shopping/browser-setup-notes.md"
|
||||
assert parsed["content"].endswith("...[truncated]")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue