fix(agent): repair CamelCase + _tool suffix tool-call emissions (#15124)

Claude-style and some Anthropic-tuned models occasionally emit tool names as class-like identifiers: TodoTool_tool, Patch_tool, BrowserClick_tool, PatchTool. These failed strict-dict lookup in valid_tool_names and triggered the 'Unknown tool' self-correction loop, wasting a full turn of iteration and tokens. _repair_tool_call already handled lowercase / separator / fuzzy matches but couldn't bridge the CamelCase-to-snake_case gap or the trailing '_tool' suffix that Claude sometimes tacks on. Extend it with two bounded normalization passes: 1. CamelCase -> snake_case (via regex lookbehind). 2. Strip trailing _tool / -tool / tool suffix (case-insensitive, applied twice so TodoTool_tool reduces all the way: strip _tool -> TodoTool, snake -> todo_tool, strip 'tool' -> todo). Cheap fast-paths (lowercase / separator-normalized) still run first so the common case stays zero-cost. Fuzzy match remains the last resort unchanged. Tests: tests/run_agent/test_repair_tool_call_name.py covers the three original reports (TodoTool_tool, Patch_tool, BrowserClick_tool), plus PatchTool, WriteFileTool, ReadFile_tool, write-file_Tool, patch-tool, and edge cases (empty, None, '_tool' alone, genuinely unknown names). 18 new tests + 17 existing arg-repair tests = 35/35 pass. Closes #14784
2026-04-25 00:51:20 +00:00 · 2026-04-24 05:32:08 -07:00 · 2026-04-24 05:32:08 -07:00 · a1caec1088
commit a1caec1088
parent 05394f2f28
2 changed files with 169 additions and 8 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -4471,25 +4471,69 @@ class AIAgent:
    def _repair_tool_call(self, tool_name: str) -> str | None:
        """Attempt to repair a mismatched tool name before aborting.
-        1. Try lowercase
+        Models sometimes emit variants of a tool name that differ only
-        2. Try normalized (lowercase + hyphens/spaces -> underscores)
+        in casing, separators, or class-like suffixes. Normalize
-        3. Try fuzzy match (difflib, cutoff=0.7)
+        aggressively before falling back to fuzzy match:
        1. Lowercase direct match.
        2. Lowercase + hyphens/spaces -> underscores.
        3. CamelCase -> snake_case (TodoTool -> todo_tool).
        4. Strip trailing ``_tool`` / ``-tool`` / ``tool`` suffix that
           Claude-style models sometimes tack on (TodoTool_tool ->
           TodoTool -> Todo -> todo). Applied twice so double-tacked
           suffixes like ``TodoTool_tool`` reduce all the way.
        5. Fuzzy match (difflib, cutoff=0.7).
        See #14784 for the original reports (TodoTool_tool, Patch_tool,
        BrowserClick_tool were all returning "Unknown tool" before).
        Returns the repaired name if found in valid_tool_names, else None.
        """
        import re
        from difflib import get_close_matches
-        # 1. Lowercase
+        if not tool_name:
            return None
        def _norm(s: str) -> str:
            return s.lower().replace("-", "_").replace(" ", "_")
        def _camel_snake(s: str) -> str:
            return re.sub(r"(?<!^)(?=[A-Z])", "_", s).lower()
        def _strip_tool_suffix(s: str) -> str | None:
            lc = s.lower()
            for suffix in ("_tool", "-tool", "tool"):
                if lc.endswith(suffix):
                    return s[: -len(suffix)].rstrip("_-")
            return None
        # Cheap fast-paths first — these cover the common case.
        lowered = tool_name.lower()
        if lowered in self.valid_tool_names:
            return lowered
-
+        normalized = _norm(tool_name)
        # 2. Normalize
        normalized = lowered.replace("-", "_").replace(" ", "_")
        if normalized in self.valid_tool_names:
            return normalized
-        # 3. Fuzzy match
+        # Build the full candidate set for class-like emissions.
        cands: set[str] = {tool_name, lowered, normalized, _camel_snake(tool_name)}
        # Strip trailing tool-suffix up to twice — TodoTool_tool needs it.
        for _ in range(2):
            extra: set[str] = set()
            for c in cands:
                stripped = _strip_tool_suffix(c)
                if stripped:
                    extra.add(stripped)
                    extra.add(_norm(stripped))
                    extra.add(_camel_snake(stripped))
            cands |= extra
        for c in cands:
            if c and c in self.valid_tool_names:
                return c
        # Fuzzy match as last resort.
        matches = get_close_matches(lowered, self.valid_tool_names, n=1, cutoff=0.7)
        if matches:
            return matches[0]
--- a/tests/run_agent/test_repair_tool_call_name.py
+++ b/tests/run_agent/test_repair_tool_call_name.py
@ -0,0 +1,117 @@
 """Tests for AIAgent._repair_tool_call — tool-name normalization.
 Regression guard for #14784: Claude-style models sometimes emit
 class-like tool-call names (``TodoTool_tool``, ``Patch_tool``,
 ``BrowserClick_tool``, ``PatchTool``). Before the fix they returned
 "Unknown tool" even though the target tool was registered under a
 snake_case name. The repair routine now normalizes CamelCase,
 strips trailing ``_tool`` / ``-tool`` / ``tool`` suffixes (up to
 twice to handle double-tacked suffixes like ``TodoTool_tool``), and
 falls back to fuzzy match.
 """
 from __future__ import annotations
 from types import SimpleNamespace
 import pytest
 VALID = {
    "todo",
    "patch",
    "browser_click",
    "browser_navigate",
    "web_search",
    "read_file",
    "write_file",
    "terminal",
 }
@pytest.fixture
 def repair():
    """Return a bound _repair_tool_call built on a minimal shell agent.
    We avoid constructing a real AIAgent (which pulls in credential
    resolution, session DB, etc.) because the repair routine only
    reads self.valid_tool_names. A SimpleNamespace stub is enough to
    bind the unbound function.
    """
    from run_agent import AIAgent
    stub = SimpleNamespace(valid_tool_names=VALID)
    return AIAgent._repair_tool_call.__get__(stub, AIAgent)
 class TestExistingBehaviorStillWorks:
    """Pre-existing repairs must keep working (no regressions)."""
    def test_lowercase_already_matches(self, repair):
        assert repair("browser_click") == "browser_click"
    def test_uppercase_simple(self, repair):
        assert repair("TERMINAL") == "terminal"
    def test_dash_to_underscore(self, repair):
        assert repair("web-search") == "web_search"
    def test_space_to_underscore(self, repair):
        assert repair("write file") == "write_file"
    def test_fuzzy_near_miss(self, repair):
        # One-character typo — fuzzy match at 0.7 cutoff
        assert repair("terminall") == "terminal"
    def test_unknown_returns_none(self, repair):
        assert repair("xyz_no_such_tool") is None
 class TestClassLikeEmissions:
    """Regression coverage for #14784 — CamelCase + _tool suffix variants."""
    def test_camel_case_no_suffix(self, repair):
        assert repair("BrowserClick") == "browser_click"
    def test_camel_case_with_underscore_tool_suffix(self, repair):
        assert repair("BrowserClick_tool") == "browser_click"
    def test_camel_case_with_Tool_class_suffix(self, repair):
        assert repair("PatchTool") == "patch"
    def test_double_tacked_class_and_snake_suffix(self, repair):
        # Hardest case from the report: TodoTool_tool — strip both
        # '_tool' (trailing) and 'Tool' (CamelCase embedded) to reach 'todo'.
        assert repair("TodoTool_tool") == "todo"
    def test_simple_name_with_tool_suffix(self, repair):
        assert repair("Patch_tool") == "patch"
    def test_simple_name_with_dash_tool_suffix(self, repair):
        assert repair("patch-tool") == "patch"
    def test_camel_case_preserves_multi_word_match(self, repair):
        assert repair("ReadFile_tool") == "read_file"
        assert repair("WriteFileTool") == "write_file"
    def test_mixed_separators_and_suffix(self, repair):
        assert repair("write-file_Tool") == "write_file"
 class TestEdgeCases:
    """Edge inputs that must not crash or produce surprising results."""
    def test_empty_string(self, repair):
        assert repair("") is None
    def test_only_tool_suffix(self, repair):
        # '_tool' by itself is not a valid tool name — must not match
        # anything plausible.
        assert repair("_tool") is None
    def test_none_passed_as_name(self, repair):
        # Defensive: real callers always pass str, but guard against
        # a bug upstream that sends None.
        assert repair(None) is None
    def test_very_long_name_does_not_match_by_accident(self, repair):
        # Fuzzy match should not claim a tool for something obviously unrelated.
        assert repair("ThisIsNotRemotelyARealToolName_tool") is None