From 132d6fe6d6af0d2218494e133b775f12f7bf9f53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mart=C3=ADn=20Alcal=C3=A1=20Rub=C3=AD?=
 <martin.alca@gmail.com>
Date: Wed, 27 May 2026 09:13:09 -0300
Subject: [PATCH] fix(volcengine): strip XML attribute fragments from
 tool_use.name (#33007)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

VolcEngine's api/plan endpoint occasionally leaks raw XML attribute
fragments into tool_use.name when its protocol-translation layer
converts the model's native XML-style tool emission to Anthropic
Messages tool_use blocks, producing names like:

  terminal" parameter="command" string="true
  execute_code" parameter="code" string="true
  session_search" parameter="session_id" string="true

The corruption happens server-side at the provider, but it breaks
every tool call for affected users — no normalization rule in
repair_tool_call can rescue them, so each request runs through three
retries and then aborts as partial.

Add an early sanitizer in agent_runtime_helpers.repair_tool_call that
trims at the first ' " ', " ' ", '<', or '>' character (idx > 0
only) so the rest of the existing repair pipeline (lowercase /
snake_case / fuzzy match) can resolve the cleaned name normally.

Whitespace is deliberately NOT a separator — the legitimate
"write file" -> write_file repair path (covered by
test_space_to_underscore) must keep working.

Tests: 11 new regression cases in TestVolcEngineXmlPollution
covering all three observed polluted names, CamelCase + pollution
mix, single-quote variants, angle-bracket variants, clean-name
passthrough, and the whitespace-preservation guard. All 18 pre-
existing repair tests still pass (29 total in the file).
---
 agent/agent_runtime_helpers.py                | 21 ++++++
 tests/run_agent/test_repair_tool_call_name.py | 71 +++++++++++++++++++
 2 files changed, 92 insertions(+)

diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py
index 3e4e92a33a8..f9bfb7a4319 100644
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -1846,6 +1846,27 @@ def repair_tool_call(agent, tool_name: str) -> str | None:
     if not tool_name:
         return None
 
+    # VolcEngine api/plan workaround (issue #33007): the endpoint's
+    # protocol-translation layer occasionally leaks raw XML attribute
+    # fragments into tool_use.name, e.g.
+    #   `terminal" parameter="command" string="true`
+    #   `execute_code" parameter="code" string="true`
+    #   `session_search" parameter="session_id" string="true`
+    # We trim at the first unambiguous XML/quote character so the rest
+    # of the repair pipeline (lowercase / snake_case / fuzzy match)
+    # can resolve the cleaned name to a real tool.
+    #
+    # Crucially we DO NOT split on whitespace: legitimate inputs like
+    # "write file" must keep flowing through ``_norm`` -> ``write_file``
+    # (covered by test_space_to_underscore in
+    # tests/run_agent/test_repair_tool_call_name.py).
+    for _xml_sep in ('"', "'", "<", ">"):
+        _idx = tool_name.find(_xml_sep)
+        if _idx > 0:
+            tool_name = tool_name[:_idx]
+    if not tool_name:
+        return None
+
     def _norm(s: str) -> str:
         return s.lower().replace("-", "_").replace(" ", "_")
 
diff --git a/tests/run_agent/test_repair_tool_call_name.py b/tests/run_agent/test_repair_tool_call_name.py
index 15dfcccad24..0cacdbf0f61 100644
--- a/tests/run_agent/test_repair_tool_call_name.py
+++ b/tests/run_agent/test_repair_tool_call_name.py
@@ -25,6 +25,8 @@ VALID = {
     "read_file",
     "write_file",
     "terminal",
+    "execute_code",
+    "session_search",
 }
 
 
@@ -115,3 +117,72 @@ class TestEdgeCases:
     def test_very_long_name_does_not_match_by_accident(self, repair):
         # Fuzzy match should not claim a tool for something obviously unrelated.
         assert repair("ThisIsNotRemotelyARealToolName_tool") is None
+
+
+class TestVolcEngineXmlPollution:
+    """Regression coverage for #33007 — VolcEngine ``api/plan`` endpoint
+    leaks raw XML attribute fragments into ``tool_use.name``.
+
+    Observed in production with the ``anthropic_messages`` API mode:
+
+        terminal" parameter="command" string="true
+        execute_code" parameter="code" string="true
+        session_search" parameter="session_id" string="true
+
+    The fix trims at the first ``"``/``'``/``<``/``>`` so the rest of
+    the repair pipeline can resolve the cleaned name to a real tool.
+    """
+
+    def test_terminal_with_xml_attribute_pollution(self, repair):
+        # Exact pattern from the bug report (terminal call).
+        polluted = 'terminal" parameter="command" string="true'
+        assert repair(polluted) == "terminal"
+
+    def test_execute_code_with_xml_attribute_pollution(self, repair):
+        polluted = 'execute_code" parameter="code" string="true'
+        assert repair(polluted) == "execute_code"
+
+    def test_session_search_with_xml_attribute_pollution(self, repair):
+        polluted = 'session_search" parameter="session_id" string="true'
+        assert repair(polluted) == "session_search"
+
+    def test_camel_case_tool_with_xml_pollution(self, repair):
+        # If the polluted prefix is CamelCase / suffixed, the rest of
+        # the pipeline (CamelCase -> snake_case, _tool strip) still runs.
+        polluted = 'BrowserClick_tool" parameter="selector" string="true'
+        assert repair(polluted) == "browser_click"
+
+    def test_tool_name_with_trailing_quote_only(self, repair):
+        # Minimal leak — just a stray trailing quote, no full attribute.
+        assert repair('terminal"') == "terminal"
+
+    def test_tool_name_with_angle_bracket_pollution(self, repair):
+        # Defensive — same root cause, raw '<' bleeding through.
+        assert repair("terminal<parameter=command") == "terminal"
+
+    def test_tool_name_with_single_quote_pollution(self, repair):
+        # Defensive — same root cause, single-quoted attribute style.
+        assert repair("terminal' parameter='command' string='true") == "terminal"
+
+    def test_clean_tool_name_unaffected_by_sanitizer(self, repair):
+        # Pure passthrough — no XML/quote chars, no change.
+        assert repair("execute_code") == "execute_code"
+        assert repair("session_search") == "session_search"
+
+    def test_space_separated_name_still_normalizes(self, repair):
+        # Critical: the XML strip must NOT consume whitespace, or the
+        # legitimate ``"write file" -> write_file`` repair path breaks.
+        assert repair("write file") == "write_file"
+
+    def test_pollution_with_unknown_tool_root_still_fails(self, repair):
+        # Sanitizer must not mask invalid tool names by laundering them
+        # through the cleaner.
+        polluted = 'no_such_tool" parameter="x" string="true'
+        assert repair(polluted) is None
+
+    def test_leading_quote_falls_through_to_fuzzy_match(self, repair):
+        # Sanitizer only trims when the XML char is at idx > 0 — a
+        # name that *starts* with a quote is left untouched so the
+        # rest of the pipeline (fuzzy match at 0.7 cutoff) can still
+        # recover the obvious target.
+        assert repair('"terminal"') == "terminal"