mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-07 08:02:23 +00:00
feat(patch): indentation preservation, CRLF preservation, per-file failure escalation (#507) (#32273)
Three granular patch-tool refinements from the Roo Code deep-dive (#507). ## Indentation preservation (fuzzy_match.py) When fuzzy_find_and_replace matches via a non-exact strategy, the file's indentation may differ from what the LLM sent in old_string/new_string (common case: model sends zero-indent old/new for a method body that lives inside an 8-space-indented class). Before this commit the replacement was spliced in verbatim, producing a file with a broken indent level that may still parse but is logically wrong. The fix computes the indent delta between old_string's first meaningful line and the matched region's first meaningful line, then re-indents every line of new_string by that delta. Exact-strategy matches are untouched (passthrough). Same approach as Roo Code's multi-search-replace.ts:466-500. ## CRLF preservation (file_operations.py) Models nearly always send tool args with bare LF endings (JSON-encoded), but the file on disk may have CRLF (Windows-line-ending configs, .bat, .cmd, .ini files). Before this commit: - write_file silently normalized CRLF to LF on every overwrite - patch produced mixed-ending files: the substituted region had LF, the surrounding context kept CRLF The fix detects the file's existing line endings (via pre_content if already read for lint/LSP, otherwise a tiny head -c 4096 probe), and normalizes the entire write to that ending. New files are written verbatim (no detection possible). ## Per-file failure escalation (file_tools.py) When the agent fails to patch the same file 3+ times in a row, the existing 'old_string not found' hint isn't strong enough — the model keeps retrying with variations against a stale view of the file. The fix tracks consecutive failures per (task_id, resolved_path) and injects an escalating hint after 3 failures: 'This is failure #N patching X. Stop retrying. Either re-read fresh, use longer context, or fall back to write_file.' Counter resets on a successful patch to the same path. ## Validation - 22 new tests across tests/tools/test_fuzzy_match.py (5), test_line_ending_preservation.py (12), test_patch_failure_tracking.py (5) - All existing tests pass (165/165 in the touched files) - E2E verified with real _handle_patch / _handle_write_file calls against real CRLF files and real failure loops Closes part of #507. The remaining open items in #507 (2b start_line hint, behavioral rules) were declined after audit: - 2b adds schema bloat for a problem the existing 'multiple matches' contract already handles - Behavioral rules conflict with the personality system Items 1, 2d, 2e, 3, 4 of #507 were already landed in earlier work.
This commit is contained in:
parent
c2aa235328
commit
6bd0be30be
6 changed files with 824 additions and 10 deletions
|
|
@ -74,6 +74,46 @@ def _strip_terminal_fence_leaks(text: str) -> str:
|
|||
return "".join(cleaned_lines)
|
||||
|
||||
|
||||
def _detect_line_ending(sample: str) -> Optional[str]:
|
||||
"""Return the dominant line ending in ``sample`` or None if undetermined.
|
||||
|
||||
Looks at the first few line breaks and picks ``\\r\\n`` if any are
|
||||
present (Windows / DOS), otherwise ``\\n`` (Unix). Returns ``None``
|
||||
for empty / single-line content where we can't tell. Used to
|
||||
preserve the file's original line endings across write_file and
|
||||
patch operations — without this the agent's bare-LF tool args
|
||||
silently normalize Windows-line-ending files, and patch produces
|
||||
mixed endings when only a substituted region changes.
|
||||
"""
|
||||
if not sample:
|
||||
return None
|
||||
# Look at the first chunk — enough to tell, cheap to scan.
|
||||
head = sample[:4096]
|
||||
if "\r\n" in head:
|
||||
return "\r\n"
|
||||
if "\n" in head:
|
||||
return "\n"
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_line_endings(text: str, target: str) -> str:
|
||||
"""Convert all line endings in ``text`` to ``target`` (``\\n`` or ``\\r\\n``).
|
||||
|
||||
Idempotent: ``_normalize_line_endings(_normalize_line_endings(x, "\\r\\n"), "\\r\\n") == _normalize_line_endings(x, "\\r\\n")``.
|
||||
Strips lone ``\\r`` characters as well, so mixed-ending content is
|
||||
homogenized in a single pass.
|
||||
"""
|
||||
# First collapse to LF (handle CRLF and lone CR), then expand if target
|
||||
# is CRLF. Order matters: doing the replacements separately would
|
||||
# double-convert a CRLF -> LFLF.
|
||||
lf_normalized = text.replace("\r\n", "\n").replace("\r", "\n")
|
||||
if target == "\n":
|
||||
return lf_normalized
|
||||
if target == "\r\n":
|
||||
return lf_normalized.replace("\n", "\r\n")
|
||||
return text
|
||||
|
||||
|
||||
def _get_safe_write_root() -> Optional[str]:
|
||||
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset.
|
||||
|
||||
|
|
@ -697,7 +737,29 @@ class ShellFileOperations(FileOperations):
|
|||
"""Escape a string for safe use in shell commands."""
|
||||
# Use single quotes and escape any single quotes in the string
|
||||
return "'" + arg.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
def _detect_file_line_ending(self, path: str, pre_content: Optional[str] = None) -> Optional[str]:
|
||||
"""Detect the dominant line ending of a file on disk.
|
||||
|
||||
If ``pre_content`` is already available (we just read the file
|
||||
for lint/LSP purposes), inspect that — zero extra exec calls.
|
||||
Otherwise issue a tiny ``head -c 4096`` to sample the first 4KB.
|
||||
|
||||
Returns ``"\\r\\n"`` for CRLF (Windows), ``"\\n"`` for LF (Unix),
|
||||
or ``None`` if undetermined (new file, empty file, single-line
|
||||
file with no line break in the first chunk).
|
||||
"""
|
||||
if pre_content:
|
||||
return _detect_line_ending(pre_content)
|
||||
# File may not exist (new write) — `head` exits 0 with empty
|
||||
# stdout in that case which yields None below. Cheap probe.
|
||||
head_cmd = f"head -c 4096 {self._escape_shell_arg(path)} 2>/dev/null"
|
||||
head_result = self._exec(head_cmd)
|
||||
if head_result.exit_code != 0 or not head_result.stdout:
|
||||
return None
|
||||
return _detect_line_ending(head_result.stdout)
|
||||
|
||||
|
||||
def _unified_diff(self, old_content: str, new_content: str, filename: str) -> str:
|
||||
"""Generate unified diff between old and new content."""
|
||||
old_lines = old_content.splitlines(keepends=True)
|
||||
|
|
@ -975,6 +1037,17 @@ class ShellFileOperations(FileOperations):
|
|||
if read_result.exit_code == 0 and read_result.stdout:
|
||||
pre_content = read_result.stdout
|
||||
|
||||
# ── Line-ending preservation (Roo Code pattern) ──────────────
|
||||
# If the file existed with CRLF endings and the agent's content
|
||||
# has bare LFs, convert to CRLF before writing. Otherwise the
|
||||
# write silently normalizes a Windows-line-ending file (and patch
|
||||
# produces mixed endings when only a substituted region changes).
|
||||
# Detect from a small head sample to avoid reading the full file
|
||||
# for line-ending purposes alone.
|
||||
original_ending = self._detect_file_line_ending(path, pre_content)
|
||||
if original_ending == "\r\n":
|
||||
content = _normalize_line_endings(content, "\r\n")
|
||||
|
||||
# Snapshot LSP diagnostics for this file (best-effort) so the
|
||||
# post-write LSP layer can return only diagnostics introduced
|
||||
# by this specific edit. Mirrors claude-code's
|
||||
|
|
@ -1082,6 +1155,19 @@ class ShellFileOperations(FileOperations):
|
|||
except Exception:
|
||||
pass
|
||||
return PatchResult(error=err_msg)
|
||||
|
||||
# ── Line-ending preservation ──────────────────────────────────
|
||||
# Models nearly always send old_string/new_string with bare LF
|
||||
# in tool args (JSON-encoded), but the file may have CRLF on
|
||||
# disk. After fuzzy_find_and_replace, ``new_content`` is a
|
||||
# mixed-ending string: the substituted region is LF, surrounding
|
||||
# text keeps the file's CRLF. Normalize the whole thing to the
|
||||
# file's detected line ending so the on-disk file is consistent
|
||||
# and the unified diff below reflects the actual change.
|
||||
file_ending = _detect_line_ending(content)
|
||||
if file_ending:
|
||||
new_content = _normalize_line_endings(new_content, file_ending)
|
||||
|
||||
# Write back
|
||||
write_result = self.write_file(path, new_content)
|
||||
if write_result.error:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue