fix(agent): repair CamelCase + _tool suffix tool-call emissions (#15124)

Claude-style and some Anthropic-tuned models occasionally emit tool
names as class-like identifiers: TodoTool_tool, Patch_tool,
BrowserClick_tool, PatchTool. These failed strict-dict lookup in
valid_tool_names and triggered the 'Unknown tool' self-correction
loop, wasting a full turn of iteration and tokens.

_repair_tool_call already handled lowercase / separator / fuzzy
matches but couldn't bridge the CamelCase-to-snake_case gap or the
trailing '_tool' suffix that Claude sometimes tacks on. Extend it
with two bounded normalization passes:

  1. CamelCase -> snake_case (via regex lookbehind).
  2. Strip trailing _tool / -tool / tool suffix (case-insensitive,
     applied twice so TodoTool_tool reduces all the way: strip
     _tool -> TodoTool, snake -> todo_tool, strip 'tool' -> todo).

Cheap fast-paths (lowercase / separator-normalized) still run first
so the common case stays zero-cost. Fuzzy match remains the last
resort unchanged.

Tests: tests/run_agent/test_repair_tool_call_name.py covers the
three original reports (TodoTool_tool, Patch_tool, BrowserClick_tool),
plus PatchTool, WriteFileTool, ReadFile_tool, write-file_Tool,
patch-tool, and edge cases (empty, None, '_tool' alone, genuinely
unknown names).

18 new tests + 17 existing arg-repair tests = 35/35 pass.

Closes #14784
This commit is contained in:
Teknium 2026-04-24 05:32:08 -07:00 committed by GitHub
parent 05394f2f28
commit a1caec1088
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 169 additions and 8 deletions

View file

@ -0,0 +1,117 @@
"""Tests for AIAgent._repair_tool_call — tool-name normalization.
Regression guard for #14784: Claude-style models sometimes emit
class-like tool-call names (``TodoTool_tool``, ``Patch_tool``,
``BrowserClick_tool``, ``PatchTool``). Before the fix they returned
"Unknown tool" even though the target tool was registered under a
snake_case name. The repair routine now normalizes CamelCase,
strips trailing ``_tool`` / ``-tool`` / ``tool`` suffixes (up to
twice to handle double-tacked suffixes like ``TodoTool_tool``), and
falls back to fuzzy match.
"""
from __future__ import annotations
from types import SimpleNamespace
import pytest
VALID = {
"todo",
"patch",
"browser_click",
"browser_navigate",
"web_search",
"read_file",
"write_file",
"terminal",
}
@pytest.fixture
def repair():
"""Return a bound _repair_tool_call built on a minimal shell agent.
We avoid constructing a real AIAgent (which pulls in credential
resolution, session DB, etc.) because the repair routine only
reads self.valid_tool_names. A SimpleNamespace stub is enough to
bind the unbound function.
"""
from run_agent import AIAgent
stub = SimpleNamespace(valid_tool_names=VALID)
return AIAgent._repair_tool_call.__get__(stub, AIAgent)
class TestExistingBehaviorStillWorks:
"""Pre-existing repairs must keep working (no regressions)."""
def test_lowercase_already_matches(self, repair):
assert repair("browser_click") == "browser_click"
def test_uppercase_simple(self, repair):
assert repair("TERMINAL") == "terminal"
def test_dash_to_underscore(self, repair):
assert repair("web-search") == "web_search"
def test_space_to_underscore(self, repair):
assert repair("write file") == "write_file"
def test_fuzzy_near_miss(self, repair):
# One-character typo — fuzzy match at 0.7 cutoff
assert repair("terminall") == "terminal"
def test_unknown_returns_none(self, repair):
assert repair("xyz_no_such_tool") is None
class TestClassLikeEmissions:
"""Regression coverage for #14784 — CamelCase + _tool suffix variants."""
def test_camel_case_no_suffix(self, repair):
assert repair("BrowserClick") == "browser_click"
def test_camel_case_with_underscore_tool_suffix(self, repair):
assert repair("BrowserClick_tool") == "browser_click"
def test_camel_case_with_Tool_class_suffix(self, repair):
assert repair("PatchTool") == "patch"
def test_double_tacked_class_and_snake_suffix(self, repair):
# Hardest case from the report: TodoTool_tool — strip both
# '_tool' (trailing) and 'Tool' (CamelCase embedded) to reach 'todo'.
assert repair("TodoTool_tool") == "todo"
def test_simple_name_with_tool_suffix(self, repair):
assert repair("Patch_tool") == "patch"
def test_simple_name_with_dash_tool_suffix(self, repair):
assert repair("patch-tool") == "patch"
def test_camel_case_preserves_multi_word_match(self, repair):
assert repair("ReadFile_tool") == "read_file"
assert repair("WriteFileTool") == "write_file"
def test_mixed_separators_and_suffix(self, repair):
assert repair("write-file_Tool") == "write_file"
class TestEdgeCases:
"""Edge inputs that must not crash or produce surprising results."""
def test_empty_string(self, repair):
assert repair("") is None
def test_only_tool_suffix(self, repair):
# '_tool' by itself is not a valid tool name — must not match
# anything plausible.
assert repair("_tool") is None
def test_none_passed_as_name(self, repair):
# Defensive: real callers always pass str, but guard against
# a bug upstream that sends None.
assert repair(None) is None
def test_very_long_name_does_not_match_by_accident(self, repair):
# Fuzzy match should not claim a tool for something obviously unrelated.
assert repair("ThisIsNotRemotelyARealToolName_tool") is None