mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-08 08:11:38 +00:00
feat(patch): indentation preservation, CRLF preservation, per-file failure escalation (#507) (#32273)
Three granular patch-tool refinements from the Roo Code deep-dive (#507). ## Indentation preservation (fuzzy_match.py) When fuzzy_find_and_replace matches via a non-exact strategy, the file's indentation may differ from what the LLM sent in old_string/new_string (common case: model sends zero-indent old/new for a method body that lives inside an 8-space-indented class). Before this commit the replacement was spliced in verbatim, producing a file with a broken indent level that may still parse but is logically wrong. The fix computes the indent delta between old_string's first meaningful line and the matched region's first meaningful line, then re-indents every line of new_string by that delta. Exact-strategy matches are untouched (passthrough). Same approach as Roo Code's multi-search-replace.ts:466-500. ## CRLF preservation (file_operations.py) Models nearly always send tool args with bare LF endings (JSON-encoded), but the file on disk may have CRLF (Windows-line-ending configs, .bat, .cmd, .ini files). Before this commit: - write_file silently normalized CRLF to LF on every overwrite - patch produced mixed-ending files: the substituted region had LF, the surrounding context kept CRLF The fix detects the file's existing line endings (via pre_content if already read for lint/LSP, otherwise a tiny head -c 4096 probe), and normalizes the entire write to that ending. New files are written verbatim (no detection possible). ## Per-file failure escalation (file_tools.py) When the agent fails to patch the same file 3+ times in a row, the existing 'old_string not found' hint isn't strong enough — the model keeps retrying with variations against a stale view of the file. The fix tracks consecutive failures per (task_id, resolved_path) and injects an escalating hint after 3 failures: 'This is failure #N patching X. Stop retrying. Either re-read fresh, use longer context, or fall back to write_file.' Counter resets on a successful patch to the same path. ## Validation - 22 new tests across tests/tools/test_fuzzy_match.py (5), test_line_ending_preservation.py (12), test_patch_failure_tracking.py (5) - All existing tests pass (165/165 in the touched files) - E2E verified with real _handle_patch / _handle_write_file calls against real CRLF files and real failure loops Closes part of #507. The remaining open items in #507 (2b start_line hint, behavioral rules) were declined after audit: - 2b adds schema bloat for a problem the existing 'multiple matches' contract already handles - Behavioral rules conflict with the personality system Items 1, 2d, 2e, 3, 4 of #507 were already landed in earlier work.
This commit is contained in:
parent
c2aa235328
commit
6bd0be30be
6 changed files with 824 additions and 10 deletions
|
|
@ -52,6 +52,106 @@ class TestIndentDifference:
|
|||
assert "bar" in new
|
||||
|
||||
|
||||
class TestIndentationPreservation:
|
||||
"""When a non-exact strategy matches, ``new_string`` should be re-indented
|
||||
so it lands at the file's actual indent depth — not at whatever indent the
|
||||
LLM happened to send in the tool args. Without this fix the file gets a
|
||||
silently-broken indent level that may even still parse but is logically
|
||||
wrong."""
|
||||
|
||||
def test_unindented_input_reindented_to_match_file(self):
|
||||
# File: 8-space-indented method body inside a class.
|
||||
content = (
|
||||
"class Calculator:\n"
|
||||
" def add(self, a, b):\n"
|
||||
" result = a + b\n"
|
||||
" return result\n"
|
||||
)
|
||||
# LLM sends zero-indent old/new — common bug from frontier models
|
||||
# that "remember" code instead of reading it.
|
||||
old = "result = a + b\nreturn result"
|
||||
new = "result = a + b\nresult *= 2\nreturn result"
|
||||
out, count, strategy, err = fuzzy_find_and_replace(content, old, new)
|
||||
assert err is None and count == 1
|
||||
assert strategy != "exact" # must have gone through a fuzzy strategy
|
||||
# Every replaced line should be at 8-space indent.
|
||||
for marker in ("result = a + b", "result *= 2", "return result"):
|
||||
line = next(line for line in out.split("\n") if marker in line)
|
||||
indent = len(line) - len(line.lstrip())
|
||||
assert indent == 8, f"Expected 8-space indent for {marker!r}, got {indent}: {line!r}"
|
||||
# Resulting file must still be valid Python.
|
||||
import ast
|
||||
ast.parse(out)
|
||||
|
||||
def test_dedent_at_start_anchors_to_file_base(self):
|
||||
# File: 2-space-indented function body. LLM sends zero-indent
|
||||
# old/new where new_string contains a dedent (the new structure
|
||||
# adds a top-level class wrapper). After re-indent, every line
|
||||
# of new_string should be anchored to the file's 2-space base.
|
||||
content = " return 1\n return 2\n"
|
||||
old = "return 1\nreturn 2" # zero-indent — forces line_trimmed
|
||||
new = "class X:\n return 99\n return 100"
|
||||
out, count, strategy, err = fuzzy_find_and_replace(content, old, new)
|
||||
assert err is None and count == 1
|
||||
assert strategy != "exact"
|
||||
lines = out.split("\n")
|
||||
# 'class X:' anchored to file's 2-space base.
|
||||
assert lines[0] == " class X:", repr(lines[0])
|
||||
# Indented body lines lift to 4-space (file base + LLM's +2).
|
||||
assert lines[1] == " return 99", repr(lines[1])
|
||||
assert lines[2] == " return 100", repr(lines[2])
|
||||
|
||||
def test_exact_match_no_reindent(self):
|
||||
# Exact strategy should be a pure passthrough — no shift logic
|
||||
# should touch the result.
|
||||
content = " def foo():\n return 1\n"
|
||||
old = " def foo():\n return 1"
|
||||
new = " def foo():\n return 2"
|
||||
out, count, strategy, err = fuzzy_find_and_replace(content, old, new)
|
||||
assert err is None and strategy == "exact"
|
||||
assert out == " def foo():\n return 2\n"
|
||||
|
||||
def test_llm_zero_indent_shifts_to_file_two_space(self):
|
||||
# LLM sent zero-indent old/new; file has 2-space indent. The
|
||||
# re-indent shifts the whole replacement so 'def x()' lands at
|
||||
# 2-space and the body keeps its relative +2 from new_string.
|
||||
content = " def x():\n return 1\n"
|
||||
old = "def x():\n return 1"
|
||||
new = "def x():\n return 99"
|
||||
out, count, _, err = fuzzy_find_and_replace(content, old, new)
|
||||
assert err is None and count == 1
|
||||
lines = out.strip("\n").split("\n")
|
||||
assert lines[0] == " def x():"
|
||||
assert lines[1] == " return 99"
|
||||
|
||||
def test_indent_already_matches_passthrough(self):
|
||||
# When old_string's base indent already equals file_region's base
|
||||
# indent, _reindent_replacement returns new_string unchanged.
|
||||
# Verify with whitespace_normalized strategy (collapsed spaces).
|
||||
content = " def x( ):\n return 1\n"
|
||||
old = " def x():\n return 1" # same base indent (2), different inner whitespace
|
||||
new = " def x():\n return 42"
|
||||
out, count, strategy, err = fuzzy_find_and_replace(content, old, new)
|
||||
assert err is None and count == 1
|
||||
assert strategy != "exact" # non-exact strategy matched
|
||||
# Body retains its 4-space indent (passthrough — no shift).
|
||||
assert " return 42" in out
|
||||
|
||||
def test_blank_lines_left_alone(self):
|
||||
# Blank lines in new_string should keep whatever whitespace they
|
||||
# had — we never strip or pad them.
|
||||
content = " a = 1\n b = 2\n"
|
||||
old = "a = 1\nb = 2"
|
||||
new = "a = 1\n\nb = 99"
|
||||
out, count, _, err = fuzzy_find_and_replace(content, old, new)
|
||||
assert err is None and count == 1
|
||||
# blank line is preserved (empty), indented lines anchored.
|
||||
lines = out.split("\n")
|
||||
assert lines[0] == " a = 1"
|
||||
assert lines[1] == ""
|
||||
assert lines[2] == " b = 99"
|
||||
|
||||
|
||||
class TestReplaceAll:
|
||||
def test_multiple_matches_without_flag_errors(self):
|
||||
content = "aaa bbb aaa"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue