hermes-agent/tests/tools/test_fuzzy_match.py
teyrebaz33 15abf4ed8f feat(patch): add 'did you mean?' feedback when patch fails to match
When patch_replace() cannot find old_string in a file, the error message
now includes the closest matching lines from the file with line numbers
and context. This helps the LLM self-correct without a separate read_file
call.

Implements Phase 1 of #536: enhanced patch error feedback with no
architectural changes.

- tools/fuzzy_match.py: new find_closest_lines() using SequenceMatcher
- tools/file_operations.py: attach closest-lines hint to patch errors
- tests/tools/test_fuzzy_match.py: 5 new tests for find_closest_lines
2026-04-21 02:03:46 -07:00

264 lines
11 KiB
Python

"""Tests for the fuzzy matching module."""
from tools.fuzzy_match import fuzzy_find_and_replace
class TestExactMatch:
def test_single_replacement(self):
content = "hello world"
new, count, _, err = fuzzy_find_and_replace(content, "hello", "hi")
assert err is None
assert count == 1
assert new == "hi world"
def test_no_match(self):
content = "hello world"
new, count, _, err = fuzzy_find_and_replace(content, "xyz", "abc")
assert count == 0
assert err is not None
assert new == content
def test_empty_old_string(self):
new, count, _, err = fuzzy_find_and_replace("abc", "", "x")
assert count == 0
assert err is not None
def test_identical_strings(self):
new, count, _, err = fuzzy_find_and_replace("abc", "abc", "abc")
assert count == 0
assert "identical" in err
def test_multiline_exact(self):
content = "line1\nline2\nline3"
new, count, _, err = fuzzy_find_and_replace(content, "line1\nline2", "replaced")
assert err is None
assert count == 1
assert new == "replaced\nline3"
class TestWhitespaceDifference:
def test_extra_spaces_match(self):
content = "def foo( x, y ):"
new, count, _, err = fuzzy_find_and_replace(content, "def foo( x, y ):", "def bar(x, y):")
assert count == 1
assert "bar" in new
class TestIndentDifference:
def test_different_indentation(self):
content = " def foo():\n pass"
new, count, _, err = fuzzy_find_and_replace(content, "def foo():\n pass", "def bar():\n return 1")
assert count == 1
assert "bar" in new
class TestReplaceAll:
def test_multiple_matches_without_flag_errors(self):
content = "aaa bbb aaa"
new, count, _, err = fuzzy_find_and_replace(content, "aaa", "ccc", replace_all=False)
assert count == 0
assert "Found 2 matches" in err
def test_multiple_matches_with_flag(self):
content = "aaa bbb aaa"
new, count, _, err = fuzzy_find_and_replace(content, "aaa", "ccc", replace_all=True)
assert err is None
assert count == 2
assert new == "ccc bbb ccc"
class TestUnicodeNormalized:
"""Tests for the unicode_normalized strategy (Bug 5)."""
def test_em_dash_matched(self):
"""Em-dash in content should match ASCII '--' in pattern."""
content = "return value\u2014fallback"
new, count, strategy, err = fuzzy_find_and_replace(
content, "return value--fallback", "return value or fallback"
)
assert count == 1, f"Expected match via unicode_normalized, got err={err}"
assert strategy == "unicode_normalized"
assert "return value or fallback" in new
def test_smart_quotes_matched(self):
"""Smart double quotes in content should match straight quotes in pattern."""
content = 'print(\u201chello\u201d)'
new, count, strategy, err = fuzzy_find_and_replace(
content, 'print("hello")', 'print("world")'
)
assert count == 1, f"Expected match via unicode_normalized, got err={err}"
assert "world" in new
def test_no_unicode_skips_strategy(self):
"""When content and pattern have no Unicode variants, strategy is skipped."""
content = "hello world"
# Should match via exact, not unicode_normalized
new, count, strategy, err = fuzzy_find_and_replace(content, "hello", "hi")
assert count == 1
assert strategy == "exact"
class TestBlockAnchorThreshold:
"""Tests for the raised block_anchor threshold (Bug 4)."""
def test_high_similarity_matches(self):
"""A block with >50% middle similarity should match."""
content = "def foo():\n x = 1\n y = 2\n return x + y\n"
pattern = "def foo():\n x = 1\n y = 9\n return x + y"
new, count, strategy, err = fuzzy_find_and_replace(content, pattern, "def foo():\n return 0\n")
# Should match via block_anchor or earlier strategy
assert count == 1
def test_completely_different_middle_does_not_match(self):
"""A block where only first+last lines match but middle is completely different
should NOT match under the raised 0.50 threshold."""
content = (
"class Foo:\n"
" completely = 'unrelated'\n"
" content = 'here'\n"
" nothing = 'in common'\n"
" pass\n"
)
# Pattern has same first/last lines but completely different middle
pattern = (
"class Foo:\n"
" x = 1\n"
" y = 2\n"
" z = 3\n"
" pass"
)
new, count, strategy, err = fuzzy_find_and_replace(content, pattern, "replaced")
# With threshold=0.50, this near-zero-similarity middle should not match
assert count == 0, (
f"Block with unrelated middle should not match under threshold=0.50, "
f"but matched via strategy={strategy}"
)
class TestStrategyNameSurfaced:
"""Tests for the strategy name in the 4-tuple return (Bug 6)."""
def test_exact_strategy_name(self):
new, count, strategy, err = fuzzy_find_and_replace("hello", "hello", "world")
assert strategy == "exact"
assert count == 1
def test_failed_match_returns_none_strategy(self):
new, count, strategy, err = fuzzy_find_and_replace("hello", "xyz", "world")
assert count == 0
assert strategy is None
class TestEscapeDriftGuard:
"""Tests for the escape-drift guard that catches bash/JSON serialization
artifacts where an apostrophe gets prefixed with a spurious backslash
in tool-call transport.
"""
def test_drift_blocked_apostrophe(self):
"""File has ', old_string and new_string both have \\' — classic
tool-call drift. Guard must block with a helpful error instead of
writing \\' literals into source code."""
content = "x = \"hello there\"\n"
# Simulate transport-corrupted old_string and new_string where an
# apostrophe-like context got prefixed with a backslash. The content
# itself has no apostrophe, but both strings do — matching via
# whitespace/anchor strategies would otherwise succeed.
old_string = "x = \"hello there\" # don\\'t edit\n"
new_string = "x = \"hi there\" # don\\'t edit\n"
# This particular pair won't match anything, so it exits via
# no-match path. Build a case where a non-exact strategy DOES match.
content = "line\n x = 1\nline"
old_string = "line\n x = \\'a\\'\nline"
new_string = "line\n x = \\'b\\'\nline"
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
assert count == 0
assert err is not None and "Escape-drift" in err
assert "backslash" in err.lower()
assert new == content # file untouched
def test_drift_blocked_double_quote(self):
"""Same idea but with \\" drift instead of \\'."""
content = 'line\n x = 1\nline'
old_string = 'line\n x = \\"a\\"\nline'
new_string = 'line\n x = \\"b\\"\nline'
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
assert count == 0
assert err is not None and "Escape-drift" in err
def test_drift_allowed_when_file_genuinely_has_backslash_escapes(self):
"""If the file already contains \\' (e.g. inside an existing escaped
string), the model is legitimately preserving it. Guard must NOT
fire."""
content = "line\n x = \\'a\\'\nline"
old_string = "line\n x = \\'a\\'\nline"
new_string = "line\n x = \\'b\\'\nline"
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
assert err is None
assert count == 1
assert "\\'b\\'" in new
def test_drift_allowed_on_exact_match(self):
"""Exact matches bypass the drift guard entirely — if the file
really contains the exact bytes old_string specified, it's not
drift."""
content = "hello \\'world\\'"
new, count, strategy, err = fuzzy_find_and_replace(
content, "hello \\'world\\'", "hello \\'there\\'"
)
assert err is None
assert count == 1
assert strategy == "exact"
def test_drift_allowed_when_adding_escaped_strings(self):
"""Model is adding new content with \\' that wasn't in the original.
old_string has no \\', so guard doesn't fire."""
content = "line1\nline2\nline3"
old_string = "line1\nline2\nline3"
new_string = "line1\nprint(\\'added\\')\nline2\nline3"
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
assert err is None
assert count == 1
assert "\\'added\\'" in new
def test_no_drift_check_when_new_string_lacks_suspect_chars(self):
"""Fast-path: if new_string has no \\' or \\", guard must not
fire even on fuzzy match."""
content = "def foo():\n pass" # extra space ignored by line_trimmed
old_string = "def foo():\n pass"
new_string = "def bar():\n return 1"
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
assert err is None
assert count == 1
class TestFindClosestLines:
def setup_method(self):
from tools.fuzzy_match import find_closest_lines
self.find_closest_lines = find_closest_lines
def test_finds_similar_line(self):
content = "def foo():\n pass\ndef bar():\n return 1\n"
result = self.find_closest_lines("def baz():", content)
assert "def foo" in result or "def bar" in result
def test_returns_empty_for_no_match(self):
content = "completely different content here"
result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content)
assert result == ""
def test_returns_empty_for_empty_inputs(self):
assert self.find_closest_lines("", "some content") == ""
assert self.find_closest_lines("old string", "") == ""
def test_includes_context_lines(self):
content = "line1\nline2\ndef target():\n pass\nline5\n"
result = self.find_closest_lines("def target():", content)
assert "target" in result
def test_includes_line_numbers(self):
content = "line1\nline2\ndef foo():\n pass\n"
result = self.find_closest_lines("def foo():", content)
# Should include line numbers in format "N| content"
assert "|" in result