From a1caec1088a2eb0d9dea55cc52d2e173d8395b5b Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 24 Apr 2026 05:32:08 -0700
Subject: [PATCH] fix(agent): repair CamelCase + _tool suffix tool-call
 emissions (#15124)

Claude-style and some Anthropic-tuned models occasionally emit tool
names as class-like identifiers: TodoTool_tool, Patch_tool,
BrowserClick_tool, PatchTool. These failed strict-dict lookup in
valid_tool_names and triggered the 'Unknown tool' self-correction
loop, wasting a full turn of iteration and tokens.

_repair_tool_call already handled lowercase / separator / fuzzy
matches but couldn't bridge the CamelCase-to-snake_case gap or the
trailing '_tool' suffix that Claude sometimes tacks on. Extend it
with two bounded normalization passes:

  1. CamelCase -> snake_case (via regex lookbehind).
  2. Strip trailing _tool / -tool / tool suffix (case-insensitive,
     applied twice so TodoTool_tool reduces all the way: strip
     _tool -> TodoTool, snake -> todo_tool, strip 'tool' -> todo).

Cheap fast-paths (lowercase / separator-normalized) still run first
so the common case stays zero-cost. Fuzzy match remains the last
resort unchanged.

Tests: tests/run_agent/test_repair_tool_call_name.py covers the
three original reports (TodoTool_tool, Patch_tool, BrowserClick_tool),
plus PatchTool, WriteFileTool, ReadFile_tool, write-file_Tool,
patch-tool, and edge cases (empty, None, '_tool' alone, genuinely
unknown names).

18 new tests + 17 existing arg-repair tests = 35/35 pass.

Closes #14784
---
 run_agent.py                                  |  60 +++++++--
 tests/run_agent/test_repair_tool_call_name.py | 117 ++++++++++++++++++
 2 files changed, 169 insertions(+), 8 deletions(-)
 create mode 100644 tests/run_agent/test_repair_tool_call_name.py

diff --git a/run_agent.py b/run_agent.py
index b605585f2..4b1429bc7 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -4471,25 +4471,69 @@ class AIAgent:
     def _repair_tool_call(self, tool_name: str) -> str | None:
         """Attempt to repair a mismatched tool name before aborting.
 
-        1. Try lowercase
-        2. Try normalized (lowercase + hyphens/spaces -> underscores)
-        3. Try fuzzy match (difflib, cutoff=0.7)
+        Models sometimes emit variants of a tool name that differ only
+        in casing, separators, or class-like suffixes. Normalize
+        aggressively before falling back to fuzzy match:
+
+        1. Lowercase direct match.
+        2. Lowercase + hyphens/spaces -> underscores.
+        3. CamelCase -> snake_case (TodoTool -> todo_tool).
+        4. Strip trailing ``_tool`` / ``-tool`` / ``tool`` suffix that
+           Claude-style models sometimes tack on (TodoTool_tool ->
+           TodoTool -> Todo -> todo). Applied twice so double-tacked
+           suffixes like ``TodoTool_tool`` reduce all the way.
+        5. Fuzzy match (difflib, cutoff=0.7).
+
+        See #14784 for the original reports (TodoTool_tool, Patch_tool,
+        BrowserClick_tool were all returning "Unknown tool" before).
 
         Returns the repaired name if found in valid_tool_names, else None.
         """
+        import re
         from difflib import get_close_matches
 
-        # 1. Lowercase
+        if not tool_name:
+            return None
+
+        def _norm(s: str) -> str:
+            return s.lower().replace("-", "_").replace(" ", "_")
+
+        def _camel_snake(s: str) -> str:
+            return re.sub(r"(?<!^)(?=[A-Z])", "_", s).lower()
+
+        def _strip_tool_suffix(s: str) -> str | None:
+            lc = s.lower()
+            for suffix in ("_tool", "-tool", "tool"):
+                if lc.endswith(suffix):
+                    return s[: -len(suffix)].rstrip("_-")
+            return None
+
+        # Cheap fast-paths first — these cover the common case.
         lowered = tool_name.lower()
         if lowered in self.valid_tool_names:
             return lowered
-
-        # 2. Normalize
-        normalized = lowered.replace("-", "_").replace(" ", "_")
+        normalized = _norm(tool_name)
         if normalized in self.valid_tool_names:
             return normalized
 
-        # 3. Fuzzy match
+        # Build the full candidate set for class-like emissions.
+        cands: set[str] = {tool_name, lowered, normalized, _camel_snake(tool_name)}
+        # Strip trailing tool-suffix up to twice — TodoTool_tool needs it.
+        for _ in range(2):
+            extra: set[str] = set()
+            for c in cands:
+                stripped = _strip_tool_suffix(c)
+                if stripped:
+                    extra.add(stripped)
+                    extra.add(_norm(stripped))
+                    extra.add(_camel_snake(stripped))
+            cands |= extra
+
+        for c in cands:
+            if c and c in self.valid_tool_names:
+                return c
+
+        # Fuzzy match as last resort.
         matches = get_close_matches(lowered, self.valid_tool_names, n=1, cutoff=0.7)
         if matches:
             return matches[0]
diff --git a/tests/run_agent/test_repair_tool_call_name.py b/tests/run_agent/test_repair_tool_call_name.py
new file mode 100644
index 000000000..15dfcccad
--- /dev/null
+++ b/tests/run_agent/test_repair_tool_call_name.py
@@ -0,0 +1,117 @@
+"""Tests for AIAgent._repair_tool_call — tool-name normalization.
+
+Regression guard for #14784: Claude-style models sometimes emit
+class-like tool-call names (``TodoTool_tool``, ``Patch_tool``,
+``BrowserClick_tool``, ``PatchTool``). Before the fix they returned
+"Unknown tool" even though the target tool was registered under a
+snake_case name. The repair routine now normalizes CamelCase,
+strips trailing ``_tool`` / ``-tool`` / ``tool`` suffixes (up to
+twice to handle double-tacked suffixes like ``TodoTool_tool``), and
+falls back to fuzzy match.
+"""
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+
+VALID = {
+    "todo",
+    "patch",
+    "browser_click",
+    "browser_navigate",
+    "web_search",
+    "read_file",
+    "write_file",
+    "terminal",
+}
+
+
+@pytest.fixture
+def repair():
+    """Return a bound _repair_tool_call built on a minimal shell agent.
+
+    We avoid constructing a real AIAgent (which pulls in credential
+    resolution, session DB, etc.) because the repair routine only
+    reads self.valid_tool_names. A SimpleNamespace stub is enough to
+    bind the unbound function.
+    """
+    from run_agent import AIAgent
+    stub = SimpleNamespace(valid_tool_names=VALID)
+    return AIAgent._repair_tool_call.__get__(stub, AIAgent)
+
+
+class TestExistingBehaviorStillWorks:
+    """Pre-existing repairs must keep working (no regressions)."""
+
+    def test_lowercase_already_matches(self, repair):
+        assert repair("browser_click") == "browser_click"
+
+    def test_uppercase_simple(self, repair):
+        assert repair("TERMINAL") == "terminal"
+
+    def test_dash_to_underscore(self, repair):
+        assert repair("web-search") == "web_search"
+
+    def test_space_to_underscore(self, repair):
+        assert repair("write file") == "write_file"
+
+    def test_fuzzy_near_miss(self, repair):
+        # One-character typo — fuzzy match at 0.7 cutoff
+        assert repair("terminall") == "terminal"
+
+    def test_unknown_returns_none(self, repair):
+        assert repair("xyz_no_such_tool") is None
+
+
+class TestClassLikeEmissions:
+    """Regression coverage for #14784 — CamelCase + _tool suffix variants."""
+
+    def test_camel_case_no_suffix(self, repair):
+        assert repair("BrowserClick") == "browser_click"
+
+    def test_camel_case_with_underscore_tool_suffix(self, repair):
+        assert repair("BrowserClick_tool") == "browser_click"
+
+    def test_camel_case_with_Tool_class_suffix(self, repair):
+        assert repair("PatchTool") == "patch"
+
+    def test_double_tacked_class_and_snake_suffix(self, repair):
+        # Hardest case from the report: TodoTool_tool — strip both
+        # '_tool' (trailing) and 'Tool' (CamelCase embedded) to reach 'todo'.
+        assert repair("TodoTool_tool") == "todo"
+
+    def test_simple_name_with_tool_suffix(self, repair):
+        assert repair("Patch_tool") == "patch"
+
+    def test_simple_name_with_dash_tool_suffix(self, repair):
+        assert repair("patch-tool") == "patch"
+
+    def test_camel_case_preserves_multi_word_match(self, repair):
+        assert repair("ReadFile_tool") == "read_file"
+        assert repair("WriteFileTool") == "write_file"
+
+    def test_mixed_separators_and_suffix(self, repair):
+        assert repair("write-file_Tool") == "write_file"
+
+
+class TestEdgeCases:
+    """Edge inputs that must not crash or produce surprising results."""
+
+    def test_empty_string(self, repair):
+        assert repair("") is None
+
+    def test_only_tool_suffix(self, repair):
+        # '_tool' by itself is not a valid tool name — must not match
+        # anything plausible.
+        assert repair("_tool") is None
+
+    def test_none_passed_as_name(self, repair):
+        # Defensive: real callers always pass str, but guard against
+        # a bug upstream that sends None.
+        assert repair(None) is None
+
+    def test_very_long_name_does_not_match_by_accident(self, repair):
+        # Fuzzy match should not claim a tool for something obviously unrelated.
+        assert repair("ThisIsNotRemotelyARealToolName_tool") is None