mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
feat(patch): indentation preservation, CRLF preservation, per-file failure escalation (#507) (#32273)
Three granular patch-tool refinements from the Roo Code deep-dive (#507). ## Indentation preservation (fuzzy_match.py) When fuzzy_find_and_replace matches via a non-exact strategy, the file's indentation may differ from what the LLM sent in old_string/new_string (common case: model sends zero-indent old/new for a method body that lives inside an 8-space-indented class). Before this commit the replacement was spliced in verbatim, producing a file with a broken indent level that may still parse but is logically wrong. The fix computes the indent delta between old_string's first meaningful line and the matched region's first meaningful line, then re-indents every line of new_string by that delta. Exact-strategy matches are untouched (passthrough). Same approach as Roo Code's multi-search-replace.ts:466-500. ## CRLF preservation (file_operations.py) Models nearly always send tool args with bare LF endings (JSON-encoded), but the file on disk may have CRLF (Windows-line-ending configs, .bat, .cmd, .ini files). Before this commit: - write_file silently normalized CRLF to LF on every overwrite - patch produced mixed-ending files: the substituted region had LF, the surrounding context kept CRLF The fix detects the file's existing line endings (via pre_content if already read for lint/LSP, otherwise a tiny head -c 4096 probe), and normalizes the entire write to that ending. New files are written verbatim (no detection possible). ## Per-file failure escalation (file_tools.py) When the agent fails to patch the same file 3+ times in a row, the existing 'old_string not found' hint isn't strong enough — the model keeps retrying with variations against a stale view of the file. The fix tracks consecutive failures per (task_id, resolved_path) and injects an escalating hint after 3 failures: 'This is failure #N patching X. Stop retrying. Either re-read fresh, use longer context, or fall back to write_file.' Counter resets on a successful patch to the same path. ## Validation - 22 new tests across tests/tools/test_fuzzy_match.py (5), test_line_ending_preservation.py (12), test_patch_failure_tracking.py (5) - All existing tests pass (165/165 in the touched files) - E2E verified with real _handle_patch / _handle_write_file calls against real CRLF files and real failure loops Closes part of #507. The remaining open items in #507 (2b start_line hint, behavioral rules) were declined after audit: - 2b adds schema bloat for a problem the existing 'multiple matches' contract already handles - Behavioral rules conflict with the personality system Items 1, 2d, 2e, 3, 4 of #507 were already landed in earlier work.
This commit is contained in:
parent
c2aa235328
commit
6bd0be30be
6 changed files with 824 additions and 10 deletions
|
|
@ -254,6 +254,43 @@ _file_ops_cache: dict = {}
|
|||
_read_tracker_lock = threading.Lock()
|
||||
_read_tracker: dict = {}
|
||||
|
||||
# Track consecutive patch failures per (task_id, resolved_path). Used to
|
||||
# escalate the hint when the model repeatedly fails to patch the same file
|
||||
# (typical cause: stale view of file contents, ambiguous old_string, or
|
||||
# the file was modified externally between the agent's read and patch
|
||||
# attempt). Reset on a successful patch to that path.
|
||||
_patch_failure_lock = threading.Lock()
|
||||
_patch_failure_tracker: dict = {} # {task_id: {resolved_path: count}}
|
||||
|
||||
|
||||
def _record_patch_failure(task_id: str, resolved_path: str) -> int:
|
||||
"""Increment and return the consecutive-failure count for this path."""
|
||||
with _patch_failure_lock:
|
||||
task_failures = _patch_failure_tracker.setdefault(task_id, {})
|
||||
# Cap dict size per task to avoid unbounded growth in long sessions
|
||||
# where the agent fails on many distinct files. 64 distinct
|
||||
# failing files per task is generous; older entries get evicted.
|
||||
if len(task_failures) >= 64 and resolved_path not in task_failures:
|
||||
try:
|
||||
first_key = next(iter(task_failures))
|
||||
del task_failures[first_key]
|
||||
except StopIteration:
|
||||
pass
|
||||
task_failures[resolved_path] = task_failures.get(resolved_path, 0) + 1
|
||||
return task_failures[resolved_path]
|
||||
|
||||
|
||||
def _reset_patch_failures(task_id: str, resolved_paths: list) -> None:
|
||||
"""Clear consecutive-failure counts for the given paths."""
|
||||
if not resolved_paths:
|
||||
return
|
||||
with _patch_failure_lock:
|
||||
task_failures = _patch_failure_tracker.get(task_id)
|
||||
if not task_failures:
|
||||
return
|
||||
for rp in resolved_paths:
|
||||
task_failures.pop(rp, None)
|
||||
|
||||
# Per-task bounds for the containers inside each _read_tracker[task_id].
|
||||
# A CLI session uses one stable task_id for its lifetime; without these
|
||||
# caps, a 10k-read session would accumulate ~1.5MB of dict/set state that
|
||||
|
|
@ -1020,12 +1057,43 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
|||
_r = _path_to_resolved.get(_p)
|
||||
if _r:
|
||||
file_state.note_write(task_id, _r)
|
||||
# Successful patch: clear any prior consecutive-failure
|
||||
# counters for the touched paths so a future failure on
|
||||
# the same path starts the escalation cycle fresh.
|
||||
_reset_patch_failures(task_id, [
|
||||
_r for _r in (_path_to_resolved.get(_p) for _p in _paths_to_check) if _r
|
||||
])
|
||||
# Hint when old_string not found — saves iterations where the agent
|
||||
# retries with stale content instead of re-reading the file.
|
||||
# Suppressed when patch_replace already attached a rich "Did you mean?"
|
||||
# snippet (which is strictly more useful than the generic hint).
|
||||
if result_dict.get("error") and "Could not find" in str(result_dict["error"]):
|
||||
if "Did you mean one of these sections?" not in str(result_dict["error"]):
|
||||
# Track per-file consecutive failures for replace mode. The
|
||||
# ``path`` arg only exists for replace mode; for V4A patches
|
||||
# we'd need to walk the headers, but in practice V4A failures
|
||||
# are far rarer and the existing _hint covers them adequately.
|
||||
failure_count = 0
|
||||
if mode == "replace" and path:
|
||||
resolved = _path_to_resolved.get(path) or path
|
||||
failure_count = _record_patch_failure(task_id, resolved)
|
||||
|
||||
if failure_count >= 3:
|
||||
# Escalating hint after multiple consecutive failures on the
|
||||
# same path. Most common cause is a stale view of the file —
|
||||
# the model is retrying with the same old_string against
|
||||
# content that has since changed. Surface the failure count
|
||||
# so the model recognises it's in a loop and breaks out by
|
||||
# re-reading or falling back to write_file.
|
||||
result_dict["_hint"] = (
|
||||
f"This is failure #{failure_count} patching {path!r}. "
|
||||
"Stop retrying with variations of the same old_string. "
|
||||
"Either: (1) re-read the file fresh to verify current "
|
||||
"content, (2) use a longer / more unique old_string with "
|
||||
"surrounding context lines, or (3) use write_file to "
|
||||
"replace the entire file if the targeted region is hard "
|
||||
"to anchor."
|
||||
)
|
||||
elif "Did you mean one of these sections?" not in str(result_dict["error"]):
|
||||
result_dict["_hint"] = (
|
||||
"old_string not found. Use read_file to verify the current "
|
||||
"content, or search_files to locate the text."
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue