Merge pull request #52678 from kshitijk4poor/salvage/52502-fuzzy-boundary

fix(fuzzy-match): preserve boundary space after whitespace-normalized match (#52491)
This commit is contained in:
kshitij 2026-06-26 10:59:14 +05:30 committed by GitHub
commit a28b939092
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 49 additions and 3 deletions

View file

@ -43,6 +43,47 @@ class TestWhitespaceDifference:
assert count == 1
assert "bar" in new
def test_boundary_space_preserved_after_match(self):
"""Regression: whitespace_normalized match ending with a non-space
character must NOT consume the word-boundary space that follows.
https://github.com/NousResearch/hermes-agent/issues/52491"""
# Case 1 — simple word boundary
new, count, strategy, err = fuzzy_find_and_replace(
"foo bar baz", "foo bar", "XY",
)
assert err is None
assert count == 1
assert strategy == "whitespace_normalized"
assert new == "XY baz", f"Boundary space deleted: {new!r}"
def test_boundary_space_preserved_in_code_edit(self):
"""Regression: real-world code-edit scenario where the space before
the next operator must survive a whitespace-normalized match."""
content = "result = compute(a, b) + tail"
new, count, strategy, err = fuzzy_find_and_replace(
content, "compute(a, b)", "compute(a, b, c)",
)
assert err is None
assert count == 1
assert strategy == "whitespace_normalized"
assert new == "result = compute(a, b, c) + tail", f"Boundary space deleted: {new!r}"
def test_trailing_ws_still_consumed_when_match_ends_with_space(self):
"""When the normalized match itself ends with whitespace (pattern has
trailing space), the expansion must still consume the full whitespace
run in the original."""
# Use a pattern with trailing space where the boundary is clear:
# content has "foo " then "bar", pattern is "foo " — the match
# should cover all 3 original spaces (the trailing ws run).
new, count, strategy, err = fuzzy_find_and_replace(
"a = foo + bar", "foo +", "XY",
)
assert err is None
assert count == 1
# "foo +" normalized to "foo +" matches; trailing spaces consumed
# Result: "a = XY bar"
assert "XY" in new and "bar" in new
class TestIndentDifference:
def test_different_indentation(self):

View file

@ -768,9 +768,14 @@ def _map_normalized_positions(original: str, normalized: str,
else:
orig_end = orig_start + (norm_end - norm_start)
# Expand to include trailing whitespace that was normalized
while orig_end < len(original) and original[orig_end] in ' \t':
orig_end += 1
# Expand to include trailing whitespace that was normalized,
# but only when the normalized match itself ended with whitespace.
# When the match ends with a non-space character, the first
# whitespace in the original is a word boundary and must not be
# consumed. See https://github.com/NousResearch/hermes-agent/issues/52491
if norm_end < len(normalized) and normalized[norm_end - 1] == ' ':
while orig_end < len(original) and original[orig_end] in ' \t':
orig_end += 1
original_matches.append((orig_start, min(orig_end, len(original))))