mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(agent): repair CamelCase + _tool suffix tool-call emissions (#15124)
Claude-style and some Anthropic-tuned models occasionally emit tool
names as class-like identifiers: TodoTool_tool, Patch_tool,
BrowserClick_tool, PatchTool. These failed strict-dict lookup in
valid_tool_names and triggered the 'Unknown tool' self-correction
loop, wasting a full turn of iteration and tokens.
_repair_tool_call already handled lowercase / separator / fuzzy
matches but couldn't bridge the CamelCase-to-snake_case gap or the
trailing '_tool' suffix that Claude sometimes tacks on. Extend it
with two bounded normalization passes:
1. CamelCase -> snake_case (via regex lookbehind).
2. Strip trailing _tool / -tool / tool suffix (case-insensitive,
applied twice so TodoTool_tool reduces all the way: strip
_tool -> TodoTool, snake -> todo_tool, strip 'tool' -> todo).
Cheap fast-paths (lowercase / separator-normalized) still run first
so the common case stays zero-cost. Fuzzy match remains the last
resort unchanged.
Tests: tests/run_agent/test_repair_tool_call_name.py covers the
three original reports (TodoTool_tool, Patch_tool, BrowserClick_tool),
plus PatchTool, WriteFileTool, ReadFile_tool, write-file_Tool,
patch-tool, and edge cases (empty, None, '_tool' alone, genuinely
unknown names).
18 new tests + 17 existing arg-repair tests = 35/35 pass.
Closes #14784
This commit is contained in:
parent
05394f2f28
commit
a1caec1088
2 changed files with 169 additions and 8 deletions
60
run_agent.py
60
run_agent.py
|
|
@ -4471,25 +4471,69 @@ class AIAgent:
|
||||||
def _repair_tool_call(self, tool_name: str) -> str | None:
|
def _repair_tool_call(self, tool_name: str) -> str | None:
|
||||||
"""Attempt to repair a mismatched tool name before aborting.
|
"""Attempt to repair a mismatched tool name before aborting.
|
||||||
|
|
||||||
1. Try lowercase
|
Models sometimes emit variants of a tool name that differ only
|
||||||
2. Try normalized (lowercase + hyphens/spaces -> underscores)
|
in casing, separators, or class-like suffixes. Normalize
|
||||||
3. Try fuzzy match (difflib, cutoff=0.7)
|
aggressively before falling back to fuzzy match:
|
||||||
|
|
||||||
|
1. Lowercase direct match.
|
||||||
|
2. Lowercase + hyphens/spaces -> underscores.
|
||||||
|
3. CamelCase -> snake_case (TodoTool -> todo_tool).
|
||||||
|
4. Strip trailing ``_tool`` / ``-tool`` / ``tool`` suffix that
|
||||||
|
Claude-style models sometimes tack on (TodoTool_tool ->
|
||||||
|
TodoTool -> Todo -> todo). Applied twice so double-tacked
|
||||||
|
suffixes like ``TodoTool_tool`` reduce all the way.
|
||||||
|
5. Fuzzy match (difflib, cutoff=0.7).
|
||||||
|
|
||||||
|
See #14784 for the original reports (TodoTool_tool, Patch_tool,
|
||||||
|
BrowserClick_tool were all returning "Unknown tool" before).
|
||||||
|
|
||||||
Returns the repaired name if found in valid_tool_names, else None.
|
Returns the repaired name if found in valid_tool_names, else None.
|
||||||
"""
|
"""
|
||||||
|
import re
|
||||||
from difflib import get_close_matches
|
from difflib import get_close_matches
|
||||||
|
|
||||||
# 1. Lowercase
|
if not tool_name:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _norm(s: str) -> str:
|
||||||
|
return s.lower().replace("-", "_").replace(" ", "_")
|
||||||
|
|
||||||
|
def _camel_snake(s: str) -> str:
|
||||||
|
return re.sub(r"(?<!^)(?=[A-Z])", "_", s).lower()
|
||||||
|
|
||||||
|
def _strip_tool_suffix(s: str) -> str | None:
|
||||||
|
lc = s.lower()
|
||||||
|
for suffix in ("_tool", "-tool", "tool"):
|
||||||
|
if lc.endswith(suffix):
|
||||||
|
return s[: -len(suffix)].rstrip("_-")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Cheap fast-paths first — these cover the common case.
|
||||||
lowered = tool_name.lower()
|
lowered = tool_name.lower()
|
||||||
if lowered in self.valid_tool_names:
|
if lowered in self.valid_tool_names:
|
||||||
return lowered
|
return lowered
|
||||||
|
normalized = _norm(tool_name)
|
||||||
# 2. Normalize
|
|
||||||
normalized = lowered.replace("-", "_").replace(" ", "_")
|
|
||||||
if normalized in self.valid_tool_names:
|
if normalized in self.valid_tool_names:
|
||||||
return normalized
|
return normalized
|
||||||
|
|
||||||
# 3. Fuzzy match
|
# Build the full candidate set for class-like emissions.
|
||||||
|
cands: set[str] = {tool_name, lowered, normalized, _camel_snake(tool_name)}
|
||||||
|
# Strip trailing tool-suffix up to twice — TodoTool_tool needs it.
|
||||||
|
for _ in range(2):
|
||||||
|
extra: set[str] = set()
|
||||||
|
for c in cands:
|
||||||
|
stripped = _strip_tool_suffix(c)
|
||||||
|
if stripped:
|
||||||
|
extra.add(stripped)
|
||||||
|
extra.add(_norm(stripped))
|
||||||
|
extra.add(_camel_snake(stripped))
|
||||||
|
cands |= extra
|
||||||
|
|
||||||
|
for c in cands:
|
||||||
|
if c and c in self.valid_tool_names:
|
||||||
|
return c
|
||||||
|
|
||||||
|
# Fuzzy match as last resort.
|
||||||
matches = get_close_matches(lowered, self.valid_tool_names, n=1, cutoff=0.7)
|
matches = get_close_matches(lowered, self.valid_tool_names, n=1, cutoff=0.7)
|
||||||
if matches:
|
if matches:
|
||||||
return matches[0]
|
return matches[0]
|
||||||
|
|
|
||||||
117
tests/run_agent/test_repair_tool_call_name.py
Normal file
117
tests/run_agent/test_repair_tool_call_name.py
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
"""Tests for AIAgent._repair_tool_call — tool-name normalization.
|
||||||
|
|
||||||
|
Regression guard for #14784: Claude-style models sometimes emit
|
||||||
|
class-like tool-call names (``TodoTool_tool``, ``Patch_tool``,
|
||||||
|
``BrowserClick_tool``, ``PatchTool``). Before the fix they returned
|
||||||
|
"Unknown tool" even though the target tool was registered under a
|
||||||
|
snake_case name. The repair routine now normalizes CamelCase,
|
||||||
|
strips trailing ``_tool`` / ``-tool`` / ``tool`` suffixes (up to
|
||||||
|
twice to handle double-tacked suffixes like ``TodoTool_tool``), and
|
||||||
|
falls back to fuzzy match.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
VALID = {
|
||||||
|
"todo",
|
||||||
|
"patch",
|
||||||
|
"browser_click",
|
||||||
|
"browser_navigate",
|
||||||
|
"web_search",
|
||||||
|
"read_file",
|
||||||
|
"write_file",
|
||||||
|
"terminal",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def repair():
|
||||||
|
"""Return a bound _repair_tool_call built on a minimal shell agent.
|
||||||
|
|
||||||
|
We avoid constructing a real AIAgent (which pulls in credential
|
||||||
|
resolution, session DB, etc.) because the repair routine only
|
||||||
|
reads self.valid_tool_names. A SimpleNamespace stub is enough to
|
||||||
|
bind the unbound function.
|
||||||
|
"""
|
||||||
|
from run_agent import AIAgent
|
||||||
|
stub = SimpleNamespace(valid_tool_names=VALID)
|
||||||
|
return AIAgent._repair_tool_call.__get__(stub, AIAgent)
|
||||||
|
|
||||||
|
|
||||||
|
class TestExistingBehaviorStillWorks:
|
||||||
|
"""Pre-existing repairs must keep working (no regressions)."""
|
||||||
|
|
||||||
|
def test_lowercase_already_matches(self, repair):
|
||||||
|
assert repair("browser_click") == "browser_click"
|
||||||
|
|
||||||
|
def test_uppercase_simple(self, repair):
|
||||||
|
assert repair("TERMINAL") == "terminal"
|
||||||
|
|
||||||
|
def test_dash_to_underscore(self, repair):
|
||||||
|
assert repair("web-search") == "web_search"
|
||||||
|
|
||||||
|
def test_space_to_underscore(self, repair):
|
||||||
|
assert repair("write file") == "write_file"
|
||||||
|
|
||||||
|
def test_fuzzy_near_miss(self, repair):
|
||||||
|
# One-character typo — fuzzy match at 0.7 cutoff
|
||||||
|
assert repair("terminall") == "terminal"
|
||||||
|
|
||||||
|
def test_unknown_returns_none(self, repair):
|
||||||
|
assert repair("xyz_no_such_tool") is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestClassLikeEmissions:
|
||||||
|
"""Regression coverage for #14784 — CamelCase + _tool suffix variants."""
|
||||||
|
|
||||||
|
def test_camel_case_no_suffix(self, repair):
|
||||||
|
assert repair("BrowserClick") == "browser_click"
|
||||||
|
|
||||||
|
def test_camel_case_with_underscore_tool_suffix(self, repair):
|
||||||
|
assert repair("BrowserClick_tool") == "browser_click"
|
||||||
|
|
||||||
|
def test_camel_case_with_Tool_class_suffix(self, repair):
|
||||||
|
assert repair("PatchTool") == "patch"
|
||||||
|
|
||||||
|
def test_double_tacked_class_and_snake_suffix(self, repair):
|
||||||
|
# Hardest case from the report: TodoTool_tool — strip both
|
||||||
|
# '_tool' (trailing) and 'Tool' (CamelCase embedded) to reach 'todo'.
|
||||||
|
assert repair("TodoTool_tool") == "todo"
|
||||||
|
|
||||||
|
def test_simple_name_with_tool_suffix(self, repair):
|
||||||
|
assert repair("Patch_tool") == "patch"
|
||||||
|
|
||||||
|
def test_simple_name_with_dash_tool_suffix(self, repair):
|
||||||
|
assert repair("patch-tool") == "patch"
|
||||||
|
|
||||||
|
def test_camel_case_preserves_multi_word_match(self, repair):
|
||||||
|
assert repair("ReadFile_tool") == "read_file"
|
||||||
|
assert repair("WriteFileTool") == "write_file"
|
||||||
|
|
||||||
|
def test_mixed_separators_and_suffix(self, repair):
|
||||||
|
assert repair("write-file_Tool") == "write_file"
|
||||||
|
|
||||||
|
|
||||||
|
class TestEdgeCases:
|
||||||
|
"""Edge inputs that must not crash or produce surprising results."""
|
||||||
|
|
||||||
|
def test_empty_string(self, repair):
|
||||||
|
assert repair("") is None
|
||||||
|
|
||||||
|
def test_only_tool_suffix(self, repair):
|
||||||
|
# '_tool' by itself is not a valid tool name — must not match
|
||||||
|
# anything plausible.
|
||||||
|
assert repair("_tool") is None
|
||||||
|
|
||||||
|
def test_none_passed_as_name(self, repair):
|
||||||
|
# Defensive: real callers always pass str, but guard against
|
||||||
|
# a bug upstream that sends None.
|
||||||
|
assert repair(None) is None
|
||||||
|
|
||||||
|
def test_very_long_name_does_not_match_by_accident(self, repair):
|
||||||
|
# Fuzzy match should not claim a tool for something obviously unrelated.
|
||||||
|
assert repair("ThisIsNotRemotelyARealToolName_tool") is None
|
||||||
Loading…
Add table
Add a link
Reference in a new issue