fix(tools): unescape common sequences in new_string when escape_normalized matches

When the patch tool matches via the escape_normalized strategy, old_string contains literal \t, \n, \r sequences that get unescaped to match real control characters in the file. However, new_string was written as-is, leaving literal backslash sequences in the output. Add _unescape_common_sequences() helper and apply it to new_string when the matching strategy is escape_normalized. This ensures LLM-generated tab/newline sequences become real bytes in the patched file. Fixes #33733
2026-06-06 07:51:53 +00:00 · 2026-05-28 16:10:00 +08:00 · 2026-05-28 16:10:00 +08:00 · e9f3f2b34a
commit e9f3f2b34a
parent 10ee4a729b
2 changed files with 94 additions and 1 deletions
--- a/tools/fuzzy_match.py
+++ b/tools/fuzzy_match.py
@ -113,8 +113,16 @@ def fuzzy_find_and_replace(content: str, old_string: str, new_string: str,
            # old_string/new_string — e.g. LLM used 2-space indent but the
            # file is 4-space. Shift new_string by the indentation delta so
            # the replacement matches the file's actual indent pattern.
+            effective_new = new_string
+            if strategy_name == "escape_normalized":
+                # The escape_normalized strategy matched because old_string
+                # contained literal \t/\n/\r that were unescaped to match
+                # real control characters in the file. Apply the same
+                # unescaping to new_string so we don't write literal
+                # backslash sequences where the file has real tabs/newlines.
+                effective_new = _unescape_common_sequences(new_string)
            new_content = _apply_replacements(
-                content, matches, new_string,
+                content, matches, effective_new,
                old_string=old_string if strategy_name != "exact" else None,
            )
            return new_content, len(matches), strategy_name, None
@ -247,6 +255,19 @@ def _reindent_replacement(file_region: str, old_string: str, new_string: str) ->
    return "\n".join(out_lines)


+def _unescape_common_sequences(s: str) -> str:
+    """Unescape common C-style escape sequences that LLMs produce literally.
+
+    When the model sends ``\\t`` (two characters: backslash + t) instead of a
+    real tab byte (0x09), the patch tool would write the literal characters.
+    This helper converts common escape sequences to their actual byte values.
+
+    Only call this when the matching strategy confirmed that the file already
+    contains real control characters (i.e. ``escape_normalized`` matched).
+    """
+    return s.replace('\\t', '\t').replace('\\n', '\n').replace('\\r', '\r')
+
+
 def _apply_replacements(content: str, matches: List[Tuple[int, int]],
                        new_string: str, old_string: Optional[str] = None) -> str:
    """