fix(patch): catch silent persistence failures and escape-drift in tool-call transport (#12669)

Two hardening layers in the patch tool, triggered by a real silent failure in the previous session: (1) Post-write verification in patch_replace — after write_file succeeds, re-read the file and confirm the bytes on disk match the intended write. If not, return an error instead of the current success-with-diff. Catches silent persistence failures from any cause (backend FS oddities, stdin pipe truncation, concurrent task races, mount drift). (2) Escape-drift guard in fuzzy_find_and_replace — when a non-exact strategy matches and both old_string and new_string contain literal \' or \" sequences but the matched file region does not, reject the patch with a clear error pointing at the likely cause (tool-call serialization adding a spurious backslash around apostrophes/quotes). Exact matches bypass the guard, and legitimate edits that add or preserve escape sequences in files that already have them still work. Why: in a prior tool call, old_string was sent with \' where the file has ' (tool-call transport drift). The fuzzy matcher's block_anchor strategy matched anyway and produced a diff the tool reported as successful — but the file was never modified on disk. The agent moved on believing the edit landed when it hadn't. Tests: added TestPatchReplacePostWriteVerification (3 cases) and TestEscapeDriftGuard (6 cases). All pass, existing fuzzy match and file_operations tests unaffected.
2026-04-27 01:11:40 +00:00 · 2026-04-19 12:27:34 -07:00 · 2026-04-19 12:27:34 -07:00 · d2c2e34469
commit d2c2e34469
parent db60c98276
4 changed files with 254 additions and 2 deletions
--- a/tests/tools/test_file_operations.py
+++ b/tests/tools/test_file_operations.py
@ -355,3 +355,101 @@ class TestShellFileOpsWriteDenied:
        result = ops.move_file("/tmp/nonexistent.txt", "/tmp/dest.txt")
        assert result.error is not None
        assert "Failed to move" in result.error
+
+
+class TestPatchReplacePostWriteVerification:
+    """Tests for the post-write verification added in patch_replace.
+
+    Confirms that a silent persistence failure (where write_file's command
+    appears to succeed but the bytes on disk don't match new_content) is
+    surfaced as an error instead of being reported as a successful patch.
+    """
+
+    def test_patch_replace_fails_when_file_not_persisted(self, mock_env):
+        """write_file reports success but the re-read returns old content:
+        patch_replace must return an error, not success-with-diff."""
+        file_contents = {"/tmp/test/a.py": "hello world\n"}
+
+        def side_effect(command, **kwargs):
+            # cat reads the file — both the initial read and the verify read
+            if command.startswith("cat "):
+                # Extract path from cat command (strip quotes)
+                for path in file_contents:
+                    if path in command:
+                        return {"output": file_contents[path], "returncode": 0}
+                return {"output": "", "returncode": 1}
+            # mkdir for parent dir
+            if command.startswith("mkdir "):
+                return {"output": "", "returncode": 0}
+            # wc -c for byte count after write
+            if command.startswith("wc -c"):
+                for path in file_contents:
+                    if path in command:
+                        return {"output": str(len(file_contents[path].encode())), "returncode": 0}
+                return {"output": "0", "returncode": 0}
+            # Everything else (including the write itself) pretends to succeed
+            # but DOESN'T update file_contents — simulates silent failure
+            return {"output": "", "returncode": 0}
+
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.patch_replace("/tmp/test/a.py", "hello", "hi")
+        assert result.error is not None, (
+            "Silent persistence failure must surface as error, got: "
+            f"success={result.success}, diff={result.diff}"
+        )
+        assert "verification failed" in result.error.lower()
+        assert "did not persist" in result.error.lower()
+
+    def test_patch_replace_succeeds_when_file_persisted(self, mock_env):
+        """Normal success path: write persists, verify read returns new bytes."""
+        state = {"content": "hello world\n"}
+
+        def side_effect(command, stdin_data=None, **kwargs):
+            # Write is `cat > path` — detect by the `>` redirect, NOT just `cat `
+            if command.startswith("cat >"):
+                if stdin_data is not None:
+                    state["content"] = stdin_data
+                return {"output": "", "returncode": 0}
+            if command.startswith("cat "):  # read
+                return {"output": state["content"], "returncode": 0}
+            if command.startswith("mkdir "):
+                return {"output": "", "returncode": 0}
+            if command.startswith("wc -c"):
+                return {"output": str(len(state["content"].encode())), "returncode": 0}
+            return {"output": "", "returncode": 0}
+
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.patch_replace("/tmp/test/a.py", "hello", "hi")
+        assert result.error is None, f"Unexpected error: {result.error}"
+        assert result.success is True
+        assert state["content"] == "hi world\n", f"File not actually updated: {state['content']!r}"
+
+    def test_patch_replace_fails_when_verify_read_errors(self, mock_env):
+        """If the verify-read step itself fails (exit code != 0), return an error."""
+        call_count = {"cat": 0}
+        state = {"content": "hello world\n"}
+
+        def side_effect(command, stdin_data=None, **kwargs):
+            if command.startswith("cat >"):  # write
+                if stdin_data is not None:
+                    state["content"] = stdin_data
+                return {"output": "", "returncode": 0}
+            if command.startswith("cat "):  # read
+                call_count["cat"] += 1
+                # First read (initial fetch) succeeds; second read (verify) fails
+                if call_count["cat"] == 1:
+                    return {"output": state["content"], "returncode": 0}
+                return {"output": "", "returncode": 1}
+            if command.startswith("mkdir "):
+                return {"output": "", "returncode": 0}
+            if command.startswith("wc -c"):
+                return {"output": str(len(state["content"].encode())), "returncode": 0}
+            return {"output": "", "returncode": 0}
+
+        mock_env.execute.side_effect = side_effect
+        ops = ShellFileOperations(mock_env)
+        result = ops.patch_replace("/tmp/test/a.py", "hello", "hi")
+        assert result.error is not None
+        assert "could not re-read" in result.error.lower()