mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-13 09:01:54 +00:00
VolcEngine's api/plan endpoint occasionally leaks raw XML attribute fragments into tool_use.name when its protocol-translation layer converts the model's native XML-style tool emission to Anthropic Messages tool_use blocks, producing names like: terminal" parameter="command" string="true execute_code" parameter="code" string="true session_search" parameter="session_id" string="true The corruption happens server-side at the provider, but it breaks every tool call for affected users — no normalization rule in repair_tool_call can rescue them, so each request runs through three retries and then aborts as partial. Add an early sanitizer in agent_runtime_helpers.repair_tool_call that trims at the first ' " ', " ' ", '<', or '>' character (idx > 0 only) so the rest of the existing repair pipeline (lowercase / snake_case / fuzzy match) can resolve the cleaned name normally. Whitespace is deliberately NOT a separator — the legitimate "write file" -> write_file repair path (covered by test_space_to_underscore) must keep working. Tests: 11 new regression cases in TestVolcEngineXmlPollution covering all three observed polluted names, CamelCase + pollution mix, single-quote variants, angle-bracket variants, clean-name passthrough, and the whitespace-preservation guard. All 18 pre- existing repair tests still pass (29 total in the file).
188 lines
7.2 KiB
Python
188 lines
7.2 KiB
Python
"""Tests for AIAgent._repair_tool_call — tool-name normalization.
|
|
|
|
Regression guard for #14784: Claude-style models sometimes emit
|
|
class-like tool-call names (``TodoTool_tool``, ``Patch_tool``,
|
|
``BrowserClick_tool``, ``PatchTool``). Before the fix they returned
|
|
"Unknown tool" even though the target tool was registered under a
|
|
snake_case name. The repair routine now normalizes CamelCase,
|
|
strips trailing ``_tool`` / ``-tool`` / ``tool`` suffixes (up to
|
|
twice to handle double-tacked suffixes like ``TodoTool_tool``), and
|
|
falls back to fuzzy match.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
|
|
VALID = {
|
|
"todo",
|
|
"patch",
|
|
"browser_click",
|
|
"browser_navigate",
|
|
"web_search",
|
|
"read_file",
|
|
"write_file",
|
|
"terminal",
|
|
"execute_code",
|
|
"session_search",
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def repair():
|
|
"""Return a bound _repair_tool_call built on a minimal shell agent.
|
|
|
|
We avoid constructing a real AIAgent (which pulls in credential
|
|
resolution, session DB, etc.) because the repair routine only
|
|
reads self.valid_tool_names. A SimpleNamespace stub is enough to
|
|
bind the unbound function.
|
|
"""
|
|
from run_agent import AIAgent
|
|
stub = SimpleNamespace(valid_tool_names=VALID)
|
|
return AIAgent._repair_tool_call.__get__(stub, AIAgent)
|
|
|
|
|
|
class TestExistingBehaviorStillWorks:
|
|
"""Pre-existing repairs must keep working (no regressions)."""
|
|
|
|
def test_lowercase_already_matches(self, repair):
|
|
assert repair("browser_click") == "browser_click"
|
|
|
|
def test_uppercase_simple(self, repair):
|
|
assert repair("TERMINAL") == "terminal"
|
|
|
|
def test_dash_to_underscore(self, repair):
|
|
assert repair("web-search") == "web_search"
|
|
|
|
def test_space_to_underscore(self, repair):
|
|
assert repair("write file") == "write_file"
|
|
|
|
def test_fuzzy_near_miss(self, repair):
|
|
# One-character typo — fuzzy match at 0.7 cutoff
|
|
assert repair("terminall") == "terminal"
|
|
|
|
def test_unknown_returns_none(self, repair):
|
|
assert repair("xyz_no_such_tool") is None
|
|
|
|
|
|
class TestClassLikeEmissions:
|
|
"""Regression coverage for #14784 — CamelCase + _tool suffix variants."""
|
|
|
|
def test_camel_case_no_suffix(self, repair):
|
|
assert repair("BrowserClick") == "browser_click"
|
|
|
|
def test_camel_case_with_underscore_tool_suffix(self, repair):
|
|
assert repair("BrowserClick_tool") == "browser_click"
|
|
|
|
def test_camel_case_with_Tool_class_suffix(self, repair):
|
|
assert repair("PatchTool") == "patch"
|
|
|
|
def test_double_tacked_class_and_snake_suffix(self, repair):
|
|
# Hardest case from the report: TodoTool_tool — strip both
|
|
# '_tool' (trailing) and 'Tool' (CamelCase embedded) to reach 'todo'.
|
|
assert repair("TodoTool_tool") == "todo"
|
|
|
|
def test_simple_name_with_tool_suffix(self, repair):
|
|
assert repair("Patch_tool") == "patch"
|
|
|
|
def test_simple_name_with_dash_tool_suffix(self, repair):
|
|
assert repair("patch-tool") == "patch"
|
|
|
|
def test_camel_case_preserves_multi_word_match(self, repair):
|
|
assert repair("ReadFile_tool") == "read_file"
|
|
assert repair("WriteFileTool") == "write_file"
|
|
|
|
def test_mixed_separators_and_suffix(self, repair):
|
|
assert repair("write-file_Tool") == "write_file"
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Edge inputs that must not crash or produce surprising results."""
|
|
|
|
def test_empty_string(self, repair):
|
|
assert repair("") is None
|
|
|
|
def test_only_tool_suffix(self, repair):
|
|
# '_tool' by itself is not a valid tool name — must not match
|
|
# anything plausible.
|
|
assert repair("_tool") is None
|
|
|
|
def test_none_passed_as_name(self, repair):
|
|
# Defensive: real callers always pass str, but guard against
|
|
# a bug upstream that sends None.
|
|
assert repair(None) is None
|
|
|
|
def test_very_long_name_does_not_match_by_accident(self, repair):
|
|
# Fuzzy match should not claim a tool for something obviously unrelated.
|
|
assert repair("ThisIsNotRemotelyARealToolName_tool") is None
|
|
|
|
|
|
class TestVolcEngineXmlPollution:
|
|
"""Regression coverage for #33007 — VolcEngine ``api/plan`` endpoint
|
|
leaks raw XML attribute fragments into ``tool_use.name``.
|
|
|
|
Observed in production with the ``anthropic_messages`` API mode:
|
|
|
|
terminal" parameter="command" string="true
|
|
execute_code" parameter="code" string="true
|
|
session_search" parameter="session_id" string="true
|
|
|
|
The fix trims at the first ``"``/``'``/``<``/``>`` so the rest of
|
|
the repair pipeline can resolve the cleaned name to a real tool.
|
|
"""
|
|
|
|
def test_terminal_with_xml_attribute_pollution(self, repair):
|
|
# Exact pattern from the bug report (terminal call).
|
|
polluted = 'terminal" parameter="command" string="true'
|
|
assert repair(polluted) == "terminal"
|
|
|
|
def test_execute_code_with_xml_attribute_pollution(self, repair):
|
|
polluted = 'execute_code" parameter="code" string="true'
|
|
assert repair(polluted) == "execute_code"
|
|
|
|
def test_session_search_with_xml_attribute_pollution(self, repair):
|
|
polluted = 'session_search" parameter="session_id" string="true'
|
|
assert repair(polluted) == "session_search"
|
|
|
|
def test_camel_case_tool_with_xml_pollution(self, repair):
|
|
# If the polluted prefix is CamelCase / suffixed, the rest of
|
|
# the pipeline (CamelCase -> snake_case, _tool strip) still runs.
|
|
polluted = 'BrowserClick_tool" parameter="selector" string="true'
|
|
assert repair(polluted) == "browser_click"
|
|
|
|
def test_tool_name_with_trailing_quote_only(self, repair):
|
|
# Minimal leak — just a stray trailing quote, no full attribute.
|
|
assert repair('terminal"') == "terminal"
|
|
|
|
def test_tool_name_with_angle_bracket_pollution(self, repair):
|
|
# Defensive — same root cause, raw '<' bleeding through.
|
|
assert repair("terminal<parameter=command") == "terminal"
|
|
|
|
def test_tool_name_with_single_quote_pollution(self, repair):
|
|
# Defensive — same root cause, single-quoted attribute style.
|
|
assert repair("terminal' parameter='command' string='true") == "terminal"
|
|
|
|
def test_clean_tool_name_unaffected_by_sanitizer(self, repair):
|
|
# Pure passthrough — no XML/quote chars, no change.
|
|
assert repair("execute_code") == "execute_code"
|
|
assert repair("session_search") == "session_search"
|
|
|
|
def test_space_separated_name_still_normalizes(self, repair):
|
|
# Critical: the XML strip must NOT consume whitespace, or the
|
|
# legitimate ``"write file" -> write_file`` repair path breaks.
|
|
assert repair("write file") == "write_file"
|
|
|
|
def test_pollution_with_unknown_tool_root_still_fails(self, repair):
|
|
# Sanitizer must not mask invalid tool names by laundering them
|
|
# through the cleaner.
|
|
polluted = 'no_such_tool" parameter="x" string="true'
|
|
assert repair(polluted) is None
|
|
|
|
def test_leading_quote_falls_through_to_fuzzy_match(self, repair):
|
|
# Sanitizer only trims when the XML char is at idx > 0 — a
|
|
# name that *starts* with a quote is left untouched so the
|
|
# rest of the pipeline (fuzzy match at 0.7 cutoff) can still
|
|
# recover the obvious target.
|
|
assert repair('"terminal"') == "terminal"
|