mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-27 01:11:40 +00:00
fix(patch): catch silent persistence failures and escape-drift in tool-call transport (#12669)
Two hardening layers in the patch tool, triggered by a real silent failure in the previous session: (1) Post-write verification in patch_replace — after write_file succeeds, re-read the file and confirm the bytes on disk match the intended write. If not, return an error instead of the current success-with-diff. Catches silent persistence failures from any cause (backend FS oddities, stdin pipe truncation, concurrent task races, mount drift). (2) Escape-drift guard in fuzzy_find_and_replace — when a non-exact strategy matches and both old_string and new_string contain literal \' or \" sequences but the matched file region does not, reject the patch with a clear error pointing at the likely cause (tool-call serialization adding a spurious backslash around apostrophes/quotes). Exact matches bypass the guard, and legitimate edits that add or preserve escape sequences in files that already have them still work. Why: in a prior tool call, old_string was sent with \' where the file has ' (tool-call transport drift). The fuzzy matcher's block_anchor strategy matched anyway and produced a diff the tool reported as successful — but the file was never modified on disk. The agent moved on believing the edit landed when it hadn't. Tests: added TestPatchReplacePostWriteVerification (3 cases) and TestEscapeDriftGuard (6 cases). All pass, existing fuzzy match and file_operations tests unaffected.
This commit is contained in:
parent
db60c98276
commit
d2c2e34469
4 changed files with 254 additions and 2 deletions
|
|
@ -355,3 +355,101 @@ class TestShellFileOpsWriteDenied:
|
|||
result = ops.move_file("/tmp/nonexistent.txt", "/tmp/dest.txt")
|
||||
assert result.error is not None
|
||||
assert "Failed to move" in result.error
|
||||
|
||||
|
||||
class TestPatchReplacePostWriteVerification:
|
||||
"""Tests for the post-write verification added in patch_replace.
|
||||
|
||||
Confirms that a silent persistence failure (where write_file's command
|
||||
appears to succeed but the bytes on disk don't match new_content) is
|
||||
surfaced as an error instead of being reported as a successful patch.
|
||||
"""
|
||||
|
||||
def test_patch_replace_fails_when_file_not_persisted(self, mock_env):
|
||||
"""write_file reports success but the re-read returns old content:
|
||||
patch_replace must return an error, not success-with-diff."""
|
||||
file_contents = {"/tmp/test/a.py": "hello world\n"}
|
||||
|
||||
def side_effect(command, **kwargs):
|
||||
# cat reads the file — both the initial read and the verify read
|
||||
if command.startswith("cat "):
|
||||
# Extract path from cat command (strip quotes)
|
||||
for path in file_contents:
|
||||
if path in command:
|
||||
return {"output": file_contents[path], "returncode": 0}
|
||||
return {"output": "", "returncode": 1}
|
||||
# mkdir for parent dir
|
||||
if command.startswith("mkdir "):
|
||||
return {"output": "", "returncode": 0}
|
||||
# wc -c for byte count after write
|
||||
if command.startswith("wc -c"):
|
||||
for path in file_contents:
|
||||
if path in command:
|
||||
return {"output": str(len(file_contents[path].encode())), "returncode": 0}
|
||||
return {"output": "0", "returncode": 0}
|
||||
# Everything else (including the write itself) pretends to succeed
|
||||
# but DOESN'T update file_contents — simulates silent failure
|
||||
return {"output": "", "returncode": 0}
|
||||
|
||||
mock_env.execute.side_effect = side_effect
|
||||
ops = ShellFileOperations(mock_env)
|
||||
result = ops.patch_replace("/tmp/test/a.py", "hello", "hi")
|
||||
assert result.error is not None, (
|
||||
"Silent persistence failure must surface as error, got: "
|
||||
f"success={result.success}, diff={result.diff}"
|
||||
)
|
||||
assert "verification failed" in result.error.lower()
|
||||
assert "did not persist" in result.error.lower()
|
||||
|
||||
def test_patch_replace_succeeds_when_file_persisted(self, mock_env):
|
||||
"""Normal success path: write persists, verify read returns new bytes."""
|
||||
state = {"content": "hello world\n"}
|
||||
|
||||
def side_effect(command, stdin_data=None, **kwargs):
|
||||
# Write is `cat > path` — detect by the `>` redirect, NOT just `cat `
|
||||
if command.startswith("cat >"):
|
||||
if stdin_data is not None:
|
||||
state["content"] = stdin_data
|
||||
return {"output": "", "returncode": 0}
|
||||
if command.startswith("cat "): # read
|
||||
return {"output": state["content"], "returncode": 0}
|
||||
if command.startswith("mkdir "):
|
||||
return {"output": "", "returncode": 0}
|
||||
if command.startswith("wc -c"):
|
||||
return {"output": str(len(state["content"].encode())), "returncode": 0}
|
||||
return {"output": "", "returncode": 0}
|
||||
|
||||
mock_env.execute.side_effect = side_effect
|
||||
ops = ShellFileOperations(mock_env)
|
||||
result = ops.patch_replace("/tmp/test/a.py", "hello", "hi")
|
||||
assert result.error is None, f"Unexpected error: {result.error}"
|
||||
assert result.success is True
|
||||
assert state["content"] == "hi world\n", f"File not actually updated: {state['content']!r}"
|
||||
|
||||
def test_patch_replace_fails_when_verify_read_errors(self, mock_env):
|
||||
"""If the verify-read step itself fails (exit code != 0), return an error."""
|
||||
call_count = {"cat": 0}
|
||||
state = {"content": "hello world\n"}
|
||||
|
||||
def side_effect(command, stdin_data=None, **kwargs):
|
||||
if command.startswith("cat >"): # write
|
||||
if stdin_data is not None:
|
||||
state["content"] = stdin_data
|
||||
return {"output": "", "returncode": 0}
|
||||
if command.startswith("cat "): # read
|
||||
call_count["cat"] += 1
|
||||
# First read (initial fetch) succeeds; second read (verify) fails
|
||||
if call_count["cat"] == 1:
|
||||
return {"output": state["content"], "returncode": 0}
|
||||
return {"output": "", "returncode": 1}
|
||||
if command.startswith("mkdir "):
|
||||
return {"output": "", "returncode": 0}
|
||||
if command.startswith("wc -c"):
|
||||
return {"output": str(len(state["content"].encode())), "returncode": 0}
|
||||
return {"output": "", "returncode": 0}
|
||||
|
||||
mock_env.execute.side_effect = side_effect
|
||||
ops = ShellFileOperations(mock_env)
|
||||
result = ops.patch_replace("/tmp/test/a.py", "hello", "hi")
|
||||
assert result.error is not None
|
||||
assert "could not re-read" in result.error.lower()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue