From 132d6fe6d6af0d2218494e133b775f12f7bf9f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Alcal=C3=A1=20Rub=C3=AD?= Date: Wed, 27 May 2026 09:13:09 -0300 Subject: [PATCH] fix(volcengine): strip XML attribute fragments from tool_use.name (#33007) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VolcEngine's api/plan endpoint occasionally leaks raw XML attribute fragments into tool_use.name when its protocol-translation layer converts the model's native XML-style tool emission to Anthropic Messages tool_use blocks, producing names like: terminal" parameter="command" string="true execute_code" parameter="code" string="true session_search" parameter="session_id" string="true The corruption happens server-side at the provider, but it breaks every tool call for affected users — no normalization rule in repair_tool_call can rescue them, so each request runs through three retries and then aborts as partial. Add an early sanitizer in agent_runtime_helpers.repair_tool_call that trims at the first ' " ', " ' ", '<', or '>' character (idx > 0 only) so the rest of the existing repair pipeline (lowercase / snake_case / fuzzy match) can resolve the cleaned name normally. Whitespace is deliberately NOT a separator — the legitimate "write file" -> write_file repair path (covered by test_space_to_underscore) must keep working. Tests: 11 new regression cases in TestVolcEngineXmlPollution covering all three observed polluted names, CamelCase + pollution mix, single-quote variants, angle-bracket variants, clean-name passthrough, and the whitespace-preservation guard. All 18 pre- existing repair tests still pass (29 total in the file). --- agent/agent_runtime_helpers.py | 21 ++++++ tests/run_agent/test_repair_tool_call_name.py | 71 +++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py index 3e4e92a33a8..f9bfb7a4319 100644 --- a/agent/agent_runtime_helpers.py +++ b/agent/agent_runtime_helpers.py @@ -1846,6 +1846,27 @@ def repair_tool_call(agent, tool_name: str) -> str | None: if not tool_name: return None + # VolcEngine api/plan workaround (issue #33007): the endpoint's + # protocol-translation layer occasionally leaks raw XML attribute + # fragments into tool_use.name, e.g. + # `terminal" parameter="command" string="true` + # `execute_code" parameter="code" string="true` + # `session_search" parameter="session_id" string="true` + # We trim at the first unambiguous XML/quote character so the rest + # of the repair pipeline (lowercase / snake_case / fuzzy match) + # can resolve the cleaned name to a real tool. + # + # Crucially we DO NOT split on whitespace: legitimate inputs like + # "write file" must keep flowing through ``_norm`` -> ``write_file`` + # (covered by test_space_to_underscore in + # tests/run_agent/test_repair_tool_call_name.py). + for _xml_sep in ('"', "'", "<", ">"): + _idx = tool_name.find(_xml_sep) + if _idx > 0: + tool_name = tool_name[:_idx] + if not tool_name: + return None + def _norm(s: str) -> str: return s.lower().replace("-", "_").replace(" ", "_") diff --git a/tests/run_agent/test_repair_tool_call_name.py b/tests/run_agent/test_repair_tool_call_name.py index 15dfcccad24..0cacdbf0f61 100644 --- a/tests/run_agent/test_repair_tool_call_name.py +++ b/tests/run_agent/test_repair_tool_call_name.py @@ -25,6 +25,8 @@ VALID = { "read_file", "write_file", "terminal", + "execute_code", + "session_search", } @@ -115,3 +117,72 @@ class TestEdgeCases: def test_very_long_name_does_not_match_by_accident(self, repair): # Fuzzy match should not claim a tool for something obviously unrelated. assert repair("ThisIsNotRemotelyARealToolName_tool") is None + + +class TestVolcEngineXmlPollution: + """Regression coverage for #33007 — VolcEngine ``api/plan`` endpoint + leaks raw XML attribute fragments into ``tool_use.name``. + + Observed in production with the ``anthropic_messages`` API mode: + + terminal" parameter="command" string="true + execute_code" parameter="code" string="true + session_search" parameter="session_id" string="true + + The fix trims at the first ``"``/``'``/``<``/``>`` so the rest of + the repair pipeline can resolve the cleaned name to a real tool. + """ + + def test_terminal_with_xml_attribute_pollution(self, repair): + # Exact pattern from the bug report (terminal call). + polluted = 'terminal" parameter="command" string="true' + assert repair(polluted) == "terminal" + + def test_execute_code_with_xml_attribute_pollution(self, repair): + polluted = 'execute_code" parameter="code" string="true' + assert repair(polluted) == "execute_code" + + def test_session_search_with_xml_attribute_pollution(self, repair): + polluted = 'session_search" parameter="session_id" string="true' + assert repair(polluted) == "session_search" + + def test_camel_case_tool_with_xml_pollution(self, repair): + # If the polluted prefix is CamelCase / suffixed, the rest of + # the pipeline (CamelCase -> snake_case, _tool strip) still runs. + polluted = 'BrowserClick_tool" parameter="selector" string="true' + assert repair(polluted) == "browser_click" + + def test_tool_name_with_trailing_quote_only(self, repair): + # Minimal leak — just a stray trailing quote, no full attribute. + assert repair('terminal"') == "terminal" + + def test_tool_name_with_angle_bracket_pollution(self, repair): + # Defensive — same root cause, raw '<' bleeding through. + assert repair("terminal