mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(volcengine): strip XML attribute fragments from tool_use.name (#33007)
VolcEngine's api/plan endpoint occasionally leaks raw XML attribute fragments into tool_use.name when its protocol-translation layer converts the model's native XML-style tool emission to Anthropic Messages tool_use blocks, producing names like: terminal" parameter="command" string="true execute_code" parameter="code" string="true session_search" parameter="session_id" string="true The corruption happens server-side at the provider, but it breaks every tool call for affected users — no normalization rule in repair_tool_call can rescue them, so each request runs through three retries and then aborts as partial. Add an early sanitizer in agent_runtime_helpers.repair_tool_call that trims at the first ' " ', " ' ", '<', or '>' character (idx > 0 only) so the rest of the existing repair pipeline (lowercase / snake_case / fuzzy match) can resolve the cleaned name normally. Whitespace is deliberately NOT a separator — the legitimate "write file" -> write_file repair path (covered by test_space_to_underscore) must keep working. Tests: 11 new regression cases in TestVolcEngineXmlPollution covering all three observed polluted names, CamelCase + pollution mix, single-quote variants, angle-bracket variants, clean-name passthrough, and the whitespace-preservation guard. All 18 pre- existing repair tests still pass (29 total in the file).
This commit is contained in:
parent
f5bd09af4b
commit
132d6fe6d6
2 changed files with 92 additions and 0 deletions
|
|
@ -1846,6 +1846,27 @@ def repair_tool_call(agent, tool_name: str) -> str | None:
|
|||
if not tool_name:
|
||||
return None
|
||||
|
||||
# VolcEngine api/plan workaround (issue #33007): the endpoint's
|
||||
# protocol-translation layer occasionally leaks raw XML attribute
|
||||
# fragments into tool_use.name, e.g.
|
||||
# `terminal" parameter="command" string="true`
|
||||
# `execute_code" parameter="code" string="true`
|
||||
# `session_search" parameter="session_id" string="true`
|
||||
# We trim at the first unambiguous XML/quote character so the rest
|
||||
# of the repair pipeline (lowercase / snake_case / fuzzy match)
|
||||
# can resolve the cleaned name to a real tool.
|
||||
#
|
||||
# Crucially we DO NOT split on whitespace: legitimate inputs like
|
||||
# "write file" must keep flowing through ``_norm`` -> ``write_file``
|
||||
# (covered by test_space_to_underscore in
|
||||
# tests/run_agent/test_repair_tool_call_name.py).
|
||||
for _xml_sep in ('"', "'", "<", ">"):
|
||||
_idx = tool_name.find(_xml_sep)
|
||||
if _idx > 0:
|
||||
tool_name = tool_name[:_idx]
|
||||
if not tool_name:
|
||||
return None
|
||||
|
||||
def _norm(s: str) -> str:
|
||||
return s.lower().replace("-", "_").replace(" ", "_")
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ VALID = {
|
|||
"read_file",
|
||||
"write_file",
|
||||
"terminal",
|
||||
"execute_code",
|
||||
"session_search",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -115,3 +117,72 @@ class TestEdgeCases:
|
|||
def test_very_long_name_does_not_match_by_accident(self, repair):
|
||||
# Fuzzy match should not claim a tool for something obviously unrelated.
|
||||
assert repair("ThisIsNotRemotelyARealToolName_tool") is None
|
||||
|
||||
|
||||
class TestVolcEngineXmlPollution:
|
||||
"""Regression coverage for #33007 — VolcEngine ``api/plan`` endpoint
|
||||
leaks raw XML attribute fragments into ``tool_use.name``.
|
||||
|
||||
Observed in production with the ``anthropic_messages`` API mode:
|
||||
|
||||
terminal" parameter="command" string="true
|
||||
execute_code" parameter="code" string="true
|
||||
session_search" parameter="session_id" string="true
|
||||
|
||||
The fix trims at the first ``"``/``'``/``<``/``>`` so the rest of
|
||||
the repair pipeline can resolve the cleaned name to a real tool.
|
||||
"""
|
||||
|
||||
def test_terminal_with_xml_attribute_pollution(self, repair):
|
||||
# Exact pattern from the bug report (terminal call).
|
||||
polluted = 'terminal" parameter="command" string="true'
|
||||
assert repair(polluted) == "terminal"
|
||||
|
||||
def test_execute_code_with_xml_attribute_pollution(self, repair):
|
||||
polluted = 'execute_code" parameter="code" string="true'
|
||||
assert repair(polluted) == "execute_code"
|
||||
|
||||
def test_session_search_with_xml_attribute_pollution(self, repair):
|
||||
polluted = 'session_search" parameter="session_id" string="true'
|
||||
assert repair(polluted) == "session_search"
|
||||
|
||||
def test_camel_case_tool_with_xml_pollution(self, repair):
|
||||
# If the polluted prefix is CamelCase / suffixed, the rest of
|
||||
# the pipeline (CamelCase -> snake_case, _tool strip) still runs.
|
||||
polluted = 'BrowserClick_tool" parameter="selector" string="true'
|
||||
assert repair(polluted) == "browser_click"
|
||||
|
||||
def test_tool_name_with_trailing_quote_only(self, repair):
|
||||
# Minimal leak — just a stray trailing quote, no full attribute.
|
||||
assert repair('terminal"') == "terminal"
|
||||
|
||||
def test_tool_name_with_angle_bracket_pollution(self, repair):
|
||||
# Defensive — same root cause, raw '<' bleeding through.
|
||||
assert repair("terminal<parameter=command") == "terminal"
|
||||
|
||||
def test_tool_name_with_single_quote_pollution(self, repair):
|
||||
# Defensive — same root cause, single-quoted attribute style.
|
||||
assert repair("terminal' parameter='command' string='true") == "terminal"
|
||||
|
||||
def test_clean_tool_name_unaffected_by_sanitizer(self, repair):
|
||||
# Pure passthrough — no XML/quote chars, no change.
|
||||
assert repair("execute_code") == "execute_code"
|
||||
assert repair("session_search") == "session_search"
|
||||
|
||||
def test_space_separated_name_still_normalizes(self, repair):
|
||||
# Critical: the XML strip must NOT consume whitespace, or the
|
||||
# legitimate ``"write file" -> write_file`` repair path breaks.
|
||||
assert repair("write file") == "write_file"
|
||||
|
||||
def test_pollution_with_unknown_tool_root_still_fails(self, repair):
|
||||
# Sanitizer must not mask invalid tool names by laundering them
|
||||
# through the cleaner.
|
||||
polluted = 'no_such_tool" parameter="x" string="true'
|
||||
assert repair(polluted) is None
|
||||
|
||||
def test_leading_quote_falls_through_to_fuzzy_match(self, repair):
|
||||
# Sanitizer only trims when the XML char is at idx > 0 — a
|
||||
# name that *starts* with a quote is left untouched so the
|
||||
# rest of the pipeline (fuzzy match at 0.7 cutoff) can still
|
||||
# recover the obvious target.
|
||||
assert repair('"terminal"') == "terminal"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue