From a1caec1088a2eb0d9dea55cc52d2e173d8395b5b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 24 Apr 2026 05:32:08 -0700 Subject: [PATCH] fix(agent): repair CamelCase + _tool suffix tool-call emissions (#15124) Claude-style and some Anthropic-tuned models occasionally emit tool names as class-like identifiers: TodoTool_tool, Patch_tool, BrowserClick_tool, PatchTool. These failed strict-dict lookup in valid_tool_names and triggered the 'Unknown tool' self-correction loop, wasting a full turn of iteration and tokens. _repair_tool_call already handled lowercase / separator / fuzzy matches but couldn't bridge the CamelCase-to-snake_case gap or the trailing '_tool' suffix that Claude sometimes tacks on. Extend it with two bounded normalization passes: 1. CamelCase -> snake_case (via regex lookbehind). 2. Strip trailing _tool / -tool / tool suffix (case-insensitive, applied twice so TodoTool_tool reduces all the way: strip _tool -> TodoTool, snake -> todo_tool, strip 'tool' -> todo). Cheap fast-paths (lowercase / separator-normalized) still run first so the common case stays zero-cost. Fuzzy match remains the last resort unchanged. Tests: tests/run_agent/test_repair_tool_call_name.py covers the three original reports (TodoTool_tool, Patch_tool, BrowserClick_tool), plus PatchTool, WriteFileTool, ReadFile_tool, write-file_Tool, patch-tool, and edge cases (empty, None, '_tool' alone, genuinely unknown names). 18 new tests + 17 existing arg-repair tests = 35/35 pass. Closes #14784 --- run_agent.py | 60 +++++++-- tests/run_agent/test_repair_tool_call_name.py | 117 ++++++++++++++++++ 2 files changed, 169 insertions(+), 8 deletions(-) create mode 100644 tests/run_agent/test_repair_tool_call_name.py diff --git a/run_agent.py b/run_agent.py index b605585f2..4b1429bc7 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4471,25 +4471,69 @@ class AIAgent: def _repair_tool_call(self, tool_name: str) -> str | None: """Attempt to repair a mismatched tool name before aborting. - 1. Try lowercase - 2. Try normalized (lowercase + hyphens/spaces -> underscores) - 3. Try fuzzy match (difflib, cutoff=0.7) + Models sometimes emit variants of a tool name that differ only + in casing, separators, or class-like suffixes. Normalize + aggressively before falling back to fuzzy match: + + 1. Lowercase direct match. + 2. Lowercase + hyphens/spaces -> underscores. + 3. CamelCase -> snake_case (TodoTool -> todo_tool). + 4. Strip trailing ``_tool`` / ``-tool`` / ``tool`` suffix that + Claude-style models sometimes tack on (TodoTool_tool -> + TodoTool -> Todo -> todo). Applied twice so double-tacked + suffixes like ``TodoTool_tool`` reduce all the way. + 5. Fuzzy match (difflib, cutoff=0.7). + + See #14784 for the original reports (TodoTool_tool, Patch_tool, + BrowserClick_tool were all returning "Unknown tool" before). Returns the repaired name if found in valid_tool_names, else None. """ + import re from difflib import get_close_matches - # 1. Lowercase + if not tool_name: + return None + + def _norm(s: str) -> str: + return s.lower().replace("-", "_").replace(" ", "_") + + def _camel_snake(s: str) -> str: + return re.sub(r"(? str | None: + lc = s.lower() + for suffix in ("_tool", "-tool", "tool"): + if lc.endswith(suffix): + return s[: -len(suffix)].rstrip("_-") + return None + + # Cheap fast-paths first — these cover the common case. lowered = tool_name.lower() if lowered in self.valid_tool_names: return lowered - - # 2. Normalize - normalized = lowered.replace("-", "_").replace(" ", "_") + normalized = _norm(tool_name) if normalized in self.valid_tool_names: return normalized - # 3. Fuzzy match + # Build the full candidate set for class-like emissions. + cands: set[str] = {tool_name, lowered, normalized, _camel_snake(tool_name)} + # Strip trailing tool-suffix up to twice — TodoTool_tool needs it. + for _ in range(2): + extra: set[str] = set() + for c in cands: + stripped = _strip_tool_suffix(c) + if stripped: + extra.add(stripped) + extra.add(_norm(stripped)) + extra.add(_camel_snake(stripped)) + cands |= extra + + for c in cands: + if c and c in self.valid_tool_names: + return c + + # Fuzzy match as last resort. matches = get_close_matches(lowered, self.valid_tool_names, n=1, cutoff=0.7) if matches: return matches[0] diff --git a/tests/run_agent/test_repair_tool_call_name.py b/tests/run_agent/test_repair_tool_call_name.py new file mode 100644 index 000000000..15dfcccad --- /dev/null +++ b/tests/run_agent/test_repair_tool_call_name.py @@ -0,0 +1,117 @@ +"""Tests for AIAgent._repair_tool_call — tool-name normalization. + +Regression guard for #14784: Claude-style models sometimes emit +class-like tool-call names (``TodoTool_tool``, ``Patch_tool``, +``BrowserClick_tool``, ``PatchTool``). Before the fix they returned +"Unknown tool" even though the target tool was registered under a +snake_case name. The repair routine now normalizes CamelCase, +strips trailing ``_tool`` / ``-tool`` / ``tool`` suffixes (up to +twice to handle double-tacked suffixes like ``TodoTool_tool``), and +falls back to fuzzy match. +""" +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + + +VALID = { + "todo", + "patch", + "browser_click", + "browser_navigate", + "web_search", + "read_file", + "write_file", + "terminal", +} + + +@pytest.fixture +def repair(): + """Return a bound _repair_tool_call built on a minimal shell agent. + + We avoid constructing a real AIAgent (which pulls in credential + resolution, session DB, etc.) because the repair routine only + reads self.valid_tool_names. A SimpleNamespace stub is enough to + bind the unbound function. + """ + from run_agent import AIAgent + stub = SimpleNamespace(valid_tool_names=VALID) + return AIAgent._repair_tool_call.__get__(stub, AIAgent) + + +class TestExistingBehaviorStillWorks: + """Pre-existing repairs must keep working (no regressions).""" + + def test_lowercase_already_matches(self, repair): + assert repair("browser_click") == "browser_click" + + def test_uppercase_simple(self, repair): + assert repair("TERMINAL") == "terminal" + + def test_dash_to_underscore(self, repair): + assert repair("web-search") == "web_search" + + def test_space_to_underscore(self, repair): + assert repair("write file") == "write_file" + + def test_fuzzy_near_miss(self, repair): + # One-character typo — fuzzy match at 0.7 cutoff + assert repair("terminall") == "terminal" + + def test_unknown_returns_none(self, repair): + assert repair("xyz_no_such_tool") is None + + +class TestClassLikeEmissions: + """Regression coverage for #14784 — CamelCase + _tool suffix variants.""" + + def test_camel_case_no_suffix(self, repair): + assert repair("BrowserClick") == "browser_click" + + def test_camel_case_with_underscore_tool_suffix(self, repair): + assert repair("BrowserClick_tool") == "browser_click" + + def test_camel_case_with_Tool_class_suffix(self, repair): + assert repair("PatchTool") == "patch" + + def test_double_tacked_class_and_snake_suffix(self, repair): + # Hardest case from the report: TodoTool_tool — strip both + # '_tool' (trailing) and 'Tool' (CamelCase embedded) to reach 'todo'. + assert repair("TodoTool_tool") == "todo" + + def test_simple_name_with_tool_suffix(self, repair): + assert repair("Patch_tool") == "patch" + + def test_simple_name_with_dash_tool_suffix(self, repair): + assert repair("patch-tool") == "patch" + + def test_camel_case_preserves_multi_word_match(self, repair): + assert repair("ReadFile_tool") == "read_file" + assert repair("WriteFileTool") == "write_file" + + def test_mixed_separators_and_suffix(self, repair): + assert repair("write-file_Tool") == "write_file" + + +class TestEdgeCases: + """Edge inputs that must not crash or produce surprising results.""" + + def test_empty_string(self, repair): + assert repair("") is None + + def test_only_tool_suffix(self, repair): + # '_tool' by itself is not a valid tool name — must not match + # anything plausible. + assert repair("_tool") is None + + def test_none_passed_as_name(self, repair): + # Defensive: real callers always pass str, but guard against + # a bug upstream that sends None. + assert repair(None) is None + + def test_very_long_name_does_not_match_by_accident(self, repair): + # Fuzzy match should not claim a tool for something obviously unrelated. + assert repair("ThisIsNotRemotelyARealToolName_tool") is None