mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Three granular patch-tool refinements from the Roo Code deep-dive (#507). ## Indentation preservation (fuzzy_match.py) When fuzzy_find_and_replace matches via a non-exact strategy, the file's indentation may differ from what the LLM sent in old_string/new_string (common case: model sends zero-indent old/new for a method body that lives inside an 8-space-indented class). Before this commit the replacement was spliced in verbatim, producing a file with a broken indent level that may still parse but is logically wrong. The fix computes the indent delta between old_string's first meaningful line and the matched region's first meaningful line, then re-indents every line of new_string by that delta. Exact-strategy matches are untouched (passthrough). Same approach as Roo Code's multi-search-replace.ts:466-500. ## CRLF preservation (file_operations.py) Models nearly always send tool args with bare LF endings (JSON-encoded), but the file on disk may have CRLF (Windows-line-ending configs, .bat, .cmd, .ini files). Before this commit: - write_file silently normalized CRLF to LF on every overwrite - patch produced mixed-ending files: the substituted region had LF, the surrounding context kept CRLF The fix detects the file's existing line endings (via pre_content if already read for lint/LSP, otherwise a tiny head -c 4096 probe), and normalizes the entire write to that ending. New files are written verbatim (no detection possible). ## Per-file failure escalation (file_tools.py) When the agent fails to patch the same file 3+ times in a row, the existing 'old_string not found' hint isn't strong enough — the model keeps retrying with variations against a stale view of the file. The fix tracks consecutive failures per (task_id, resolved_path) and injects an escalating hint after 3 failures: 'This is failure #N patching X. Stop retrying. Either re-read fresh, use longer context, or fall back to write_file.' Counter resets on a successful patch to the same path. ## Validation - 22 new tests across tests/tools/test_fuzzy_match.py (5), test_line_ending_preservation.py (12), test_patch_failure_tracking.py (5) - All existing tests pass (165/165 in the touched files) - E2E verified with real _handle_patch / _handle_write_file calls against real CRLF files and real failure loops Closes part of #507. The remaining open items in #507 (2b start_line hint, behavioral rules) were declined after audit: - 2b adds schema bloat for a problem the existing 'multiple matches' contract already handles - Behavioral rules conflict with the personality system Items 1, 2d, 2e, 3, 4 of #507 were already landed in earlier work.
238 lines
7.9 KiB
Python
238 lines
7.9 KiB
Python
"""Tests for CRLF line-ending preservation in write_file and patch.
|
|
|
|
Without this, the agent silently normalizes Windows-line-ending files
|
|
to LF whenever it edits them — and patch produces a mixed-ending file
|
|
when only a substituted region changes (the rest of the file keeps its
|
|
CRLF endings while the replacement is LF-only).
|
|
|
|
See issue #507 (Roo Code deep-dive, item 2c).
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def hermes_home(monkeypatch, tmp_path):
|
|
"""Isolate HERMES_HOME so the tests don't pollute the real config.
|
|
|
|
Also clears module-level caches (file_ops, active_environments,
|
|
file-staleness state) after the test so subsequent tests in the
|
|
same pytest process aren't affected by our shell-out side effects
|
|
(real file_ops and terminal environments get created under
|
|
task_id='default' via _resolve_container_task_id).
|
|
"""
|
|
home = tmp_path / "hermes"
|
|
home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
yield home
|
|
# Cleanup: drop the cached file_ops and active environment so the
|
|
# next test sees a fresh state. Without this, _get_live_tracking_cwd
|
|
# returns the stale cwd from this test's ops and breaks tests like
|
|
# test_resolve_path that rely on TERMINAL_CWD env var.
|
|
try:
|
|
from tools.file_tools import clear_file_ops_cache, _read_tracker_lock, _read_tracker
|
|
clear_file_ops_cache()
|
|
with _read_tracker_lock:
|
|
_read_tracker.clear()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
from tools.terminal_tool import _active_environments, _env_lock
|
|
with _env_lock:
|
|
_active_environments.clear()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def _crlf_count(b: bytes) -> int:
|
|
return b.count(b"\r\n")
|
|
|
|
|
|
def _bare_lf_count(b: bytes) -> int:
|
|
return b.count(b"\n") - b.count(b"\r\n")
|
|
|
|
|
|
class TestPatchCRLFPreservation:
|
|
def test_patch_on_crlf_file_stays_pure_crlf(self, hermes_home, tmp_path):
|
|
"""LLM sends LF old/new; file has CRLF. Result must be all CRLF,
|
|
no mixed endings."""
|
|
from tools.file_tools import _handle_patch
|
|
|
|
target = tmp_path / "config.ini"
|
|
target.write_bytes(b"[a]\r\nkey=1\r\n\r\n[b]\r\nkey=2\r\n")
|
|
|
|
result = _handle_patch(
|
|
{
|
|
"mode": "replace",
|
|
"path": str(target),
|
|
"old_string": "key=1",
|
|
"new_string": "key=99",
|
|
},
|
|
task_id="crlf_patch_1",
|
|
)
|
|
d = json.loads(result)
|
|
assert not d.get("error"), d
|
|
|
|
raw = target.read_bytes()
|
|
assert _bare_lf_count(raw) == 0, (
|
|
f"Mixed line endings after patch: {raw!r}"
|
|
)
|
|
# Same number of line breaks as before; just the value swapped.
|
|
assert _crlf_count(raw) == 5
|
|
assert b"key=99\r\n" in raw
|
|
|
|
def test_patch_on_lf_file_stays_lf(self, hermes_home, tmp_path):
|
|
"""LF file with LF new_string stays LF — no spurious CRLF added."""
|
|
from tools.file_tools import _handle_patch
|
|
|
|
target = tmp_path / "config.ini"
|
|
target.write_bytes(b"[a]\nkey=1\n\n[b]\nkey=2\n")
|
|
|
|
result = _handle_patch(
|
|
{
|
|
"mode": "replace",
|
|
"path": str(target),
|
|
"old_string": "key=1",
|
|
"new_string": "key=99",
|
|
},
|
|
task_id="crlf_patch_2",
|
|
)
|
|
d = json.loads(result)
|
|
assert not d.get("error"), d
|
|
|
|
raw = target.read_bytes()
|
|
assert _crlf_count(raw) == 0, (
|
|
f"Spurious CRLF added to LF file: {raw!r}"
|
|
)
|
|
|
|
def test_patch_multiline_replacement_on_crlf(self, hermes_home, tmp_path):
|
|
"""Multi-line new_string with bare LFs should be CRLF-converted
|
|
before write."""
|
|
from tools.file_tools import _handle_patch
|
|
|
|
target = tmp_path / "f.py"
|
|
target.write_bytes(b"def foo():\r\n return 1\r\n")
|
|
|
|
result = _handle_patch(
|
|
{
|
|
"mode": "replace",
|
|
"path": str(target),
|
|
"old_string": "def foo():\n return 1",
|
|
"new_string": "def foo():\n x = 1\n return x",
|
|
},
|
|
task_id="crlf_patch_3",
|
|
)
|
|
d = json.loads(result)
|
|
assert not d.get("error"), d
|
|
|
|
raw = target.read_bytes()
|
|
assert _bare_lf_count(raw) == 0, (
|
|
f"Mixed endings after multi-line patch: {raw!r}"
|
|
)
|
|
assert raw == b"def foo():\r\n x = 1\r\n return x\r\n"
|
|
|
|
|
|
class TestWriteFileCRLFPreservation:
|
|
def test_overwrite_crlf_file_with_lf_content_preserves_crlf(
|
|
self, hermes_home, tmp_path
|
|
):
|
|
"""The agent typically sends bare-LF content; if the file existed
|
|
with CRLF, the write should convert to CRLF rather than silently
|
|
flipping the endings."""
|
|
from tools.file_tools import _handle_write_file
|
|
|
|
target = tmp_path / "config.bat"
|
|
target.write_bytes(b"@echo off\r\nset X=1\r\n")
|
|
|
|
result = _handle_write_file(
|
|
{
|
|
"path": str(target),
|
|
"content": "@echo off\nset X=99\nset Y=42\n",
|
|
},
|
|
task_id="crlf_write_1",
|
|
)
|
|
d = json.loads(result)
|
|
assert "error" not in d, d
|
|
|
|
raw = target.read_bytes()
|
|
assert _bare_lf_count(raw) == 0, (
|
|
f"CRLF file got normalized to LF: {raw!r}"
|
|
)
|
|
assert _crlf_count(raw) == 3
|
|
|
|
def test_new_file_written_as_is(self, hermes_home, tmp_path):
|
|
"""No pre-existing file → write content verbatim (LF by default)."""
|
|
from tools.file_tools import _handle_write_file
|
|
|
|
target = tmp_path / "new.txt"
|
|
result = _handle_write_file(
|
|
{"path": str(target), "content": "a\nb\nc\n"},
|
|
task_id="crlf_write_2",
|
|
)
|
|
d = json.loads(result)
|
|
assert "error" not in d, d
|
|
|
|
assert target.read_bytes() == b"a\nb\nc\n"
|
|
|
|
def test_overwrite_lf_file_stays_lf(self, hermes_home, tmp_path):
|
|
"""Pre-existing LF file should not get spurious CRLFs."""
|
|
from tools.file_tools import _handle_write_file
|
|
|
|
target = tmp_path / "lf.txt"
|
|
target.write_bytes(b"line1\nline2\n")
|
|
|
|
result = _handle_write_file(
|
|
{"path": str(target), "content": "X\nY\nZ\n"},
|
|
task_id="crlf_write_3",
|
|
)
|
|
d = json.loads(result)
|
|
assert "error" not in d, d
|
|
|
|
raw = target.read_bytes()
|
|
assert _crlf_count(raw) == 0
|
|
assert raw == b"X\nY\nZ\n"
|
|
|
|
|
|
class TestLineEndingHelpers:
|
|
"""Direct unit tests for the pure helpers — easier to debug than the
|
|
integration tests above."""
|
|
|
|
def test_detect_crlf(self):
|
|
from tools.file_operations import _detect_line_ending
|
|
|
|
assert _detect_line_ending("a\r\nb\r\n") == "\r\n"
|
|
|
|
def test_detect_lf(self):
|
|
from tools.file_operations import _detect_line_ending
|
|
|
|
assert _detect_line_ending("a\nb\n") == "\n"
|
|
|
|
def test_detect_empty(self):
|
|
from tools.file_operations import _detect_line_ending
|
|
|
|
assert _detect_line_ending("") is None
|
|
assert _detect_line_ending("no newline here") is None
|
|
|
|
def test_detect_mixed_picks_crlf(self):
|
|
"""Mixed-ending content (any CRLF in the head) returns CRLF —
|
|
we prefer to normalize TO CRLF rather than away from it, since
|
|
a single CRLF in the file is usually a Windows-origin marker."""
|
|
from tools.file_operations import _detect_line_ending
|
|
|
|
assert _detect_line_ending("a\nb\r\nc\n") == "\r\n"
|
|
|
|
def test_normalize_to_lf_strips_cr(self):
|
|
from tools.file_operations import _normalize_line_endings
|
|
|
|
assert _normalize_line_endings("a\r\nb\rc\n", "\n") == "a\nb\nc\n"
|
|
|
|
def test_normalize_to_crlf_idempotent(self):
|
|
from tools.file_operations import _normalize_line_endings
|
|
|
|
once = _normalize_line_endings("a\nb\n", "\r\n")
|
|
twice = _normalize_line_endings(once, "\r\n")
|
|
assert once == twice == "a\r\nb\r\n"
|