mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-07 08:02:23 +00:00
feat(patch): indentation preservation, CRLF preservation, per-file failure escalation (#507) (#32273)
Three granular patch-tool refinements from the Roo Code deep-dive (#507). ## Indentation preservation (fuzzy_match.py) When fuzzy_find_and_replace matches via a non-exact strategy, the file's indentation may differ from what the LLM sent in old_string/new_string (common case: model sends zero-indent old/new for a method body that lives inside an 8-space-indented class). Before this commit the replacement was spliced in verbatim, producing a file with a broken indent level that may still parse but is logically wrong. The fix computes the indent delta between old_string's first meaningful line and the matched region's first meaningful line, then re-indents every line of new_string by that delta. Exact-strategy matches are untouched (passthrough). Same approach as Roo Code's multi-search-replace.ts:466-500. ## CRLF preservation (file_operations.py) Models nearly always send tool args with bare LF endings (JSON-encoded), but the file on disk may have CRLF (Windows-line-ending configs, .bat, .cmd, .ini files). Before this commit: - write_file silently normalized CRLF to LF on every overwrite - patch produced mixed-ending files: the substituted region had LF, the surrounding context kept CRLF The fix detects the file's existing line endings (via pre_content if already read for lint/LSP, otherwise a tiny head -c 4096 probe), and normalizes the entire write to that ending. New files are written verbatim (no detection possible). ## Per-file failure escalation (file_tools.py) When the agent fails to patch the same file 3+ times in a row, the existing 'old_string not found' hint isn't strong enough — the model keeps retrying with variations against a stale view of the file. The fix tracks consecutive failures per (task_id, resolved_path) and injects an escalating hint after 3 failures: 'This is failure #N patching X. Stop retrying. Either re-read fresh, use longer context, or fall back to write_file.' Counter resets on a successful patch to the same path. ## Validation - 22 new tests across tests/tools/test_fuzzy_match.py (5), test_line_ending_preservation.py (12), test_patch_failure_tracking.py (5) - All existing tests pass (165/165 in the touched files) - E2E verified with real _handle_patch / _handle_write_file calls against real CRLF files and real failure loops Closes part of #507. The remaining open items in #507 (2b start_line hint, behavioral rules) were declined after audit: - 2b adds schema bloat for a problem the existing 'multiple matches' contract already handles - Behavioral rules conflict with the personality system Items 1, 2d, 2e, 3, 4 of #507 were already landed in earlier work.
This commit is contained in:
parent
c2aa235328
commit
6bd0be30be
6 changed files with 824 additions and 10 deletions
|
|
@ -108,8 +108,15 @@ def fuzzy_find_and_replace(content: str, old_string: str, new_string: str,
|
|||
if drift_err:
|
||||
return content, 0, None, drift_err
|
||||
|
||||
# Perform replacement
|
||||
new_content = _apply_replacements(content, matches, new_string)
|
||||
# Perform replacement. When the matched strategy is NOT `exact`,
|
||||
# the file's indentation may differ from what the LLM sent in
|
||||
# old_string/new_string — e.g. LLM used 2-space indent but the
|
||||
# file is 4-space. Shift new_string by the indentation delta so
|
||||
# the replacement matches the file's actual indent pattern.
|
||||
new_content = _apply_replacements(
|
||||
content, matches, new_string,
|
||||
old_string=old_string if strategy_name != "exact" else None,
|
||||
)
|
||||
return new_content, len(matches), strategy_name, None
|
||||
|
||||
# No strategy found a match
|
||||
|
|
@ -156,26 +163,119 @@ def _detect_escape_drift(content: str, matches: List[Tuple[int, int]],
|
|||
return None
|
||||
|
||||
|
||||
def _apply_replacements(content: str, matches: List[Tuple[int, int]], new_string: str) -> str:
|
||||
def _leading_whitespace(line: str) -> str:
|
||||
"""Return the leading whitespace prefix of a line (spaces/tabs)."""
|
||||
i = 0
|
||||
while i < len(line) and line[i] in (" ", "\t"):
|
||||
i += 1
|
||||
return line[:i]
|
||||
|
||||
|
||||
def _first_meaningful_line(text: str) -> Optional[str]:
|
||||
"""Return the first line of ``text`` that has any non-whitespace content.
|
||||
|
||||
Returns ``None`` if no such line exists (text is empty or all whitespace).
|
||||
"""
|
||||
for line in text.split("\n"):
|
||||
if line.strip():
|
||||
return line
|
||||
return None
|
||||
|
||||
|
||||
def _reindent_replacement(file_region: str, old_string: str, new_string: str) -> str:
|
||||
"""Adjust ``new_string`` so its indentation matches ``file_region``.
|
||||
|
||||
Used after a non-exact fuzzy match: the LLM may have sent old_string and
|
||||
new_string with a different indent than the file actually has (e.g.
|
||||
2-space indent in tool args vs 4-space indent on disk). The fuzzy
|
||||
strategy successfully matched anyway, but writing ``new_string`` verbatim
|
||||
would corrupt the file's indentation.
|
||||
|
||||
Approach:
|
||||
|
||||
1. For each non-blank line in ``new_string``, compute its indent
|
||||
*relative* to the shallowest non-blank line of ``old_string`` (the
|
||||
LLM's base indent).
|
||||
2. Anchor that relative indent onto the file's actual base indent (the
|
||||
leading whitespace of the file_region's first non-blank line).
|
||||
3. Re-emit each non-blank line as ``file_base + (line_indent - llm_base)``.
|
||||
|
||||
Blank lines and lines less-indented than the LLM's base are anchored
|
||||
directly to the file's base indent.
|
||||
|
||||
No-op cases (returns ``new_string`` unchanged):
|
||||
- file_region or old_string has no meaningful line
|
||||
- LLM base indent equals file base indent
|
||||
- new_string is empty
|
||||
"""
|
||||
if not new_string:
|
||||
return new_string
|
||||
|
||||
old_first = _first_meaningful_line(old_string)
|
||||
file_first = _first_meaningful_line(file_region)
|
||||
if old_first is None or file_first is None:
|
||||
return new_string
|
||||
|
||||
old_indent = _leading_whitespace(old_first)
|
||||
file_indent = _leading_whitespace(file_first)
|
||||
|
||||
if old_indent == file_indent:
|
||||
return new_string
|
||||
|
||||
# Re-indent each line of new_string. Strategy: replace the LLM's base
|
||||
# indent prefix with the file's base indent prefix, preserving any
|
||||
# additional indent the LLM added on top. This is the same approach
|
||||
# Roo Code uses (multi-search-replace.ts:466-500). It preserves the
|
||||
# LLM's intended *relative* nesting between lines while anchoring to
|
||||
# the file's actual indent style.
|
||||
out_lines: List[str] = []
|
||||
for line in new_string.split("\n"):
|
||||
if not line.strip():
|
||||
# Blank lines: leave whitespace untouched.
|
||||
out_lines.append(line)
|
||||
continue
|
||||
line_indent = _leading_whitespace(line)
|
||||
if line_indent.startswith(old_indent):
|
||||
# Common case: line has the LLM's base indent (possibly plus
|
||||
# extra). Swap base prefix for the file's base prefix.
|
||||
remainder = line[len(old_indent):]
|
||||
out_lines.append(file_indent + remainder)
|
||||
else:
|
||||
# Line is less-indented than the LLM's base — e.g. a dedent at
|
||||
# the start of new_string. Anchor to the file's base.
|
||||
out_lines.append(file_indent + line.lstrip(" \t"))
|
||||
return "\n".join(out_lines)
|
||||
|
||||
|
||||
def _apply_replacements(content: str, matches: List[Tuple[int, int]],
|
||||
new_string: str, old_string: Optional[str] = None) -> str:
|
||||
"""
|
||||
Apply replacements at the given positions.
|
||||
|
||||
|
||||
Args:
|
||||
content: Original content
|
||||
matches: List of (start, end) positions to replace
|
||||
new_string: Replacement text
|
||||
|
||||
old_string: When non-None, signals that the match came from a
|
||||
non-exact fuzzy strategy; ``new_string`` is re-indented to
|
||||
match the file's actual indentation before substitution.
|
||||
|
||||
Returns:
|
||||
Content with replacements applied
|
||||
"""
|
||||
# Sort matches by position (descending) to replace from end to start
|
||||
# This preserves positions of earlier matches
|
||||
sorted_matches = sorted(matches, key=lambda x: x[0], reverse=True)
|
||||
|
||||
|
||||
result = content
|
||||
for start, end in sorted_matches:
|
||||
result = result[:start] + new_string + result[end:]
|
||||
|
||||
if old_string is not None:
|
||||
file_region = content[start:end]
|
||||
adjusted = _reindent_replacement(file_region, old_string, new_string)
|
||||
else:
|
||||
adjusted = new_string
|
||||
result = result[:start] + adjusted + result[end:]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue