mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(tools): unescape common sequences in new_string when escape_normalized matches
When the patch tool matches via the escape_normalized strategy, old_string contains literal \t, \n, \r sequences that get unescaped to match real control characters in the file. However, new_string was written as-is, leaving literal backslash sequences in the output. Add _unescape_common_sequences() helper and apply it to new_string when the matching strategy is escape_normalized. This ensures LLM-generated tab/newline sequences become real bytes in the patched file. Fixes #33733
This commit is contained in:
parent
10ee4a729b
commit
e9f3f2b34a
2 changed files with 94 additions and 1 deletions
|
|
@ -429,3 +429,75 @@ class TestFormatNoMatchHint:
|
|||
)
|
||||
assert result == ""
|
||||
|
||||
|
||||
class TestEscapeNormalizedNewString:
|
||||
"""Regression tests for unescaping common sequences in new_string
|
||||
when the match succeeded via the escape_normalized strategy.
|
||||
|
||||
Issue #33733: LLMs overwhelmingly represent tabs as the two-character
|
||||
sequence \\t (backslash + t) in JSON tool-call arguments. When the file
|
||||
contains real tab bytes (0x09), the escape_normalized strategy matches
|
||||
old_string by unescaping it — but new_string was written as-is, leaving
|
||||
literal \\t characters in the file.
|
||||
"""
|
||||
|
||||
def test_tab_in_new_string_unescaped(self):
|
||||
"""File has real tab, model sends literal \\t in new_string."""
|
||||
content = "def hello():\n\tprint(\"before\")\n"
|
||||
old_string = "def hello():\n\\tprint(\"before\")\n"
|
||||
new_string = "def hello():\n\\tprint(\"after\")\n"
|
||||
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
|
||||
assert err is None, f"Unexpected error: {err}"
|
||||
assert count == 1
|
||||
assert strategy == "escape_normalized"
|
||||
# Must contain a real tab, not literal \t
|
||||
assert "\tprint(\"after\")" in new
|
||||
assert "\\t" not in new
|
||||
|
||||
def test_newline_in_new_string_unescaped(self):
|
||||
"""File has real newlines, model sends literal \\n in new_string."""
|
||||
content = "line1\nline2\nline3\n"
|
||||
old_string = "line1\\nline2\\n"
|
||||
new_string = "line1\\nreplaced\\n"
|
||||
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
|
||||
assert err is None, f"Unexpected error: {err}"
|
||||
assert count == 1
|
||||
assert strategy == "escape_normalized"
|
||||
assert "replaced" in new
|
||||
assert "\\n" not in new
|
||||
|
||||
def test_carriage_return_in_new_string_unescaped(self):
|
||||
"""File has real CR, model sends literal \\r in new_string."""
|
||||
content = "line1\r\nline2\r\n"
|
||||
old_string = "line1\\r\\nline2\\r\\n"
|
||||
new_string = "replaced\\r\\n"
|
||||
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
|
||||
assert err is None, f"Unexpected error: {err}"
|
||||
assert count == 1
|
||||
assert strategy == "escape_normalized"
|
||||
assert "replaced\r" in new
|
||||
|
||||
def test_mixed_escape_sequences_in_new_string(self):
|
||||
"""Model sends \\t and \\n together in new_string."""
|
||||
content = "def foo():\n\tpass\n"
|
||||
old_string = "def foo():\\n\\tpass\\n"
|
||||
new_string = "def bar():\\n\\treturn 1\\n"
|
||||
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
|
||||
assert err is None, f"Unexpected error: {err}"
|
||||
assert count == 1
|
||||
assert strategy == "escape_normalized"
|
||||
assert "\treturn 1" in new
|
||||
assert "\\t" not in new
|
||||
assert "\\n" not in new
|
||||
|
||||
def test_exact_match_preserves_literal_escapes(self):
|
||||
"""When matching via exact strategy, literal \\t in new_string should
|
||||
NOT be unescaped — the file genuinely contains backslash-t characters."""
|
||||
content = "path = \"C:\\\\Users\\\"\n"
|
||||
old_string = "path = \"C:\\\\Users\\\"\n"
|
||||
new_string = "path = \"D:\\\\data\\\"\n"
|
||||
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
|
||||
assert err is None
|
||||
assert strategy == "exact"
|
||||
# Literal backslashes should be preserved
|
||||
assert "\\\\" in new
|
||||
|
|
|
|||
|
|
@ -113,8 +113,16 @@ def fuzzy_find_and_replace(content: str, old_string: str, new_string: str,
|
|||
# old_string/new_string — e.g. LLM used 2-space indent but the
|
||||
# file is 4-space. Shift new_string by the indentation delta so
|
||||
# the replacement matches the file's actual indent pattern.
|
||||
effective_new = new_string
|
||||
if strategy_name == "escape_normalized":
|
||||
# The escape_normalized strategy matched because old_string
|
||||
# contained literal \t/\n/\r that were unescaped to match
|
||||
# real control characters in the file. Apply the same
|
||||
# unescaping to new_string so we don't write literal
|
||||
# backslash sequences where the file has real tabs/newlines.
|
||||
effective_new = _unescape_common_sequences(new_string)
|
||||
new_content = _apply_replacements(
|
||||
content, matches, new_string,
|
||||
content, matches, effective_new,
|
||||
old_string=old_string if strategy_name != "exact" else None,
|
||||
)
|
||||
return new_content, len(matches), strategy_name, None
|
||||
|
|
@ -247,6 +255,19 @@ def _reindent_replacement(file_region: str, old_string: str, new_string: str) ->
|
|||
return "\n".join(out_lines)
|
||||
|
||||
|
||||
def _unescape_common_sequences(s: str) -> str:
|
||||
"""Unescape common C-style escape sequences that LLMs produce literally.
|
||||
|
||||
When the model sends ``\\t`` (two characters: backslash + t) instead of a
|
||||
real tab byte (0x09), the patch tool would write the literal characters.
|
||||
This helper converts common escape sequences to their actual byte values.
|
||||
|
||||
Only call this when the matching strategy confirmed that the file already
|
||||
contains real control characters (i.e. ``escape_normalized`` matched).
|
||||
"""
|
||||
return s.replace('\\t', '\t').replace('\\n', '\n').replace('\\r', '\r')
|
||||
|
||||
|
||||
def _apply_replacements(content: str, matches: List[Tuple[int, int]],
|
||||
new_string: str, old_string: Optional[str] = None) -> str:
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue