From f23d077b5f04facf78e895eb41788fc6ca148951 Mon Sep 17 00:00:00 2001
From: liuhao1024 <sunsky.lau@gmail.com>
Date: Thu, 25 Jun 2026 21:46:15 +0800
Subject: [PATCH] fix(fuzzy-match): preserve boundary space after
 whitespace-normalized match
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The trailing-whitespace expansion in _map_normalized_positions
unconditionally consumed whitespace after the matched region — including
the word-boundary space that separates the match from the next token.
This caused silent file corruption when the fuzzy matcher fell back to
the whitespace_normalized strategy.

Guard the expansion on the normalized match actually ending with
whitespace (i.e. the original had a run of spaces that were collapsed).
When the match ends with a non-space character, the first whitespace in
the original is a boundary and must not be consumed.

Fixes #52491
---
 tests/tools/test_fuzzy_match.py | 41 +++++++++++++++++++++++++++++++++
 tools/fuzzy_match.py            | 11 ++++++---
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/tests/tools/test_fuzzy_match.py b/tests/tools/test_fuzzy_match.py
index f81d0437434..0a7ce464f44 100644
--- a/tests/tools/test_fuzzy_match.py
+++ b/tests/tools/test_fuzzy_match.py
@@ -43,6 +43,47 @@ class TestWhitespaceDifference:
         assert count == 1
         assert "bar" in new
 
+    def test_boundary_space_preserved_after_match(self):
+        """Regression: whitespace_normalized match ending with a non-space
+        character must NOT consume the word-boundary space that follows.
+        https://github.com/NousResearch/hermes-agent/issues/52491"""
+        # Case 1 — simple word boundary
+        new, count, strategy, err = fuzzy_find_and_replace(
+            "foo   bar baz", "foo bar", "XY",
+        )
+        assert err is None
+        assert count == 1
+        assert strategy == "whitespace_normalized"
+        assert new == "XY baz", f"Boundary space deleted: {new!r}"
+
+    def test_boundary_space_preserved_in_code_edit(self):
+        """Regression: real-world code-edit scenario where the space before
+        the next operator must survive a whitespace-normalized match."""
+        content = "result = compute(a,  b) + tail"
+        new, count, strategy, err = fuzzy_find_and_replace(
+            content, "compute(a, b)", "compute(a, b, c)",
+        )
+        assert err is None
+        assert count == 1
+        assert strategy == "whitespace_normalized"
+        assert new == "result = compute(a, b, c) + tail", f"Boundary space deleted: {new!r}"
+
+    def test_trailing_ws_still_consumed_when_match_ends_with_space(self):
+        """When the normalized match itself ends with whitespace (pattern has
+        trailing space), the expansion must still consume the full whitespace
+        run in the original."""
+        # Use a pattern with trailing space where the boundary is clear:
+        # content has "foo   " then "bar", pattern is "foo " — the match
+        # should cover all 3 original spaces (the trailing ws run).
+        new, count, strategy, err = fuzzy_find_and_replace(
+            "a = foo   + bar", "foo +", "XY",
+        )
+        assert err is None
+        assert count == 1
+        # "foo   +" normalized to "foo +" matches; trailing spaces consumed
+        # Result: "a = XY bar"
+        assert "XY" in new and "bar" in new
+
 
 class TestIndentDifference:
     def test_different_indentation(self):
diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py
index 5ebb2b8b26f..709cde10fc3 100644
--- a/tools/fuzzy_match.py
+++ b/tools/fuzzy_match.py
@@ -768,9 +768,14 @@ def _map_normalized_positions(original: str, normalized: str,
         else:
             orig_end = orig_start + (norm_end - norm_start)
         
-        # Expand to include trailing whitespace that was normalized
-        while orig_end < len(original) and original[orig_end] in ' \t':
-            orig_end += 1
+        # Expand to include trailing whitespace that was normalized,
+        # but only when the normalized match itself ended with whitespace.
+        # When the match ends with a non-space character, the first
+        # whitespace in the original is a word boundary and must not be
+        # consumed.  See https://github.com/NousResearch/hermes-agent/issues/52491
+        if norm_end < len(normalized) and normalized[norm_end - 1] == ' ':
+            while orig_end < len(original) and original[orig_end] in ' \t':
+                orig_end += 1
         
         original_matches.append((orig_start, min(orig_end, len(original))))