diff --git a/agent/display.py b/agent/display.py
index e9a19ff6192..6c5c970aeff 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -14,6 +14,7 @@ from difflib import unified_diff
from pathlib import Path
from utils import safe_json_loads
+from agent.tool_result_classification import file_mutation_result_landed
# ANSI escape codes for coloring tool failure indicators
_RED = "\033[31m"
@@ -810,6 +811,8 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
"""
if result is None:
return False, ""
+ if file_mutation_result_landed(tool_name, result):
+ return False, ""
if tool_name == "terminal":
data = safe_json_loads(result)
diff --git a/agent/tool_guardrails.py b/agent/tool_guardrails.py
index 3c85d782090..5a9ddd507ba 100644
--- a/agent/tool_guardrails.py
+++ b/agent/tool_guardrails.py
@@ -14,6 +14,7 @@ from dataclasses import dataclass, field
from typing import Any, Mapping
from utils import safe_json_loads
+from agent.tool_result_classification import file_mutation_result_landed
IDEMPOTENT_TOOL_NAMES = frozenset(
@@ -196,6 +197,8 @@ def classify_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str
"""
if result is None:
return False, ""
+ if file_mutation_result_landed(tool_name, result):
+ return False, ""
if tool_name == "terminal":
data = safe_json_loads(result)
diff --git a/agent/tool_result_classification.py b/agent/tool_result_classification.py
new file mode 100644
index 00000000000..e136e2964da
--- /dev/null
+++ b/agent/tool_result_classification.py
@@ -0,0 +1,26 @@
+"""Shared helpers for classifying tool result payloads."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+
+FILE_MUTATING_TOOL_NAMES = frozenset({"write_file", "patch"})
+
+
+def file_mutation_result_landed(tool_name: str, result: Any) -> bool:
+ """Return True when a file mutation result proves the write landed."""
+ if tool_name not in FILE_MUTATING_TOOL_NAMES or not isinstance(result, str):
+ return False
+ try:
+ data = json.loads(result.strip())
+ except Exception:
+ return False
+ if not isinstance(data, dict) or data.get("error"):
+ return False
+ if tool_name == "write_file":
+ return "bytes_written" in data
+ if tool_name == "patch":
+ return data.get("success") is True
+ return False
diff --git a/run_agent.py b/run_agent.py
index f0597c90880..a2185300931 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -181,6 +181,7 @@ from agent.tool_guardrails import (
append_toolguard_guidance,
toolguard_synthetic_result,
)
+from agent.tool_result_classification import file_mutation_result_landed
from agent.trajectory import (
convert_scratchpad_to_think, has_incomplete_scratchpad,
save_trajectory as _save_trajectory_to_file,
@@ -5347,7 +5348,8 @@ class AIAgent:
targets = _extract_file_mutation_targets(tool_name, args)
if not targets:
return
- if is_error:
+ landed = file_mutation_result_landed(tool_name, result)
+ if is_error and not landed:
preview = _extract_error_preview(result)
for path in targets:
# Keep the FIRST error we saw for a given path unless we
diff --git a/tests/agent/test_display.py b/tests/agent/test_display.py
index c6ad837af97..5e18fa17e0c 100644
--- a/tests/agent/test_display.py
+++ b/tests/agent/test_display.py
@@ -1,6 +1,7 @@
"""Tests for agent/display.py — build_tool_preview() and inline diff previews."""
import os
+import json
import pytest
from unittest.mock import MagicMock, patch
@@ -149,6 +150,27 @@ class TestCuteToolMessagePreviewLength:
assert path in line
assert "..." not in line
+ def test_write_file_lint_error_result_is_not_marked_failed(self):
+ result = json.dumps({
+ "bytes_written": 12,
+ "lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
+ })
+
+ line = get_cute_tool_message("write_file", {"path": "/tmp/a.py"}, 0.1, result=result)
+
+ assert "[error]" not in line
+
+ def test_patch_lsp_diagnostics_result_is_not_marked_failed(self):
+ result = json.dumps({
+ "success": True,
+ "diff": "--- a/tmp.py\n+++ b/tmp.py\n",
+ "lsp_diagnostics": "ERROR [1:1] type mismatch",
+ })
+
+ line = get_cute_tool_message("patch", {"path": "/tmp/a.py"}, 0.1, result=result)
+
+ assert "[error]" not in line
+
class TestEditDiffPreview:
def test_extract_edit_diff_for_patch(self):
diff --git a/tests/agent/test_tool_guardrails.py b/tests/agent/test_tool_guardrails.py
index c50be56f43e..26593b7ef62 100644
--- a/tests/agent/test_tool_guardrails.py
+++ b/tests/agent/test_tool_guardrails.py
@@ -7,6 +7,7 @@ from agent.tool_guardrails import (
ToolCallGuardrailController,
ToolCallSignature,
canonical_tool_args,
+ classify_tool_failure,
)
@@ -131,6 +132,21 @@ def test_success_resets_exact_signature_failure_streak():
assert controller.before_call("web_search", args).action == "allow"
+def test_file_mutation_lint_error_result_is_not_a_tool_failure():
+ write_result = json.dumps({
+ "bytes_written": 12,
+ "lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
+ })
+ patch_result = json.dumps({
+ "success": True,
+ "diff": "--- a/tmp.py\n+++ b/tmp.py\n",
+ "lsp_diagnostics": "ERROR [1:1] type mismatch",
+ })
+
+ assert classify_tool_failure("write_file", write_result) == (False, "")
+ assert classify_tool_failure("patch", patch_result) == (False, "")
+
+
def test_same_tool_varying_args_warns_by_default_without_halting():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(same_tool_failure_warn_after=2, same_tool_failure_halt_after=3)
diff --git a/tests/agent/test_tool_result_classification.py b/tests/agent/test_tool_result_classification.py
new file mode 100644
index 00000000000..2b4b5b150cf
--- /dev/null
+++ b/tests/agent/test_tool_result_classification.py
@@ -0,0 +1,30 @@
+"""Tests for shared tool result classification helpers."""
+
+import json
+
+from agent.tool_result_classification import file_mutation_result_landed
+
+
+def test_write_file_with_nested_lint_error_counts_as_landed():
+ result = json.dumps({
+ "bytes_written": 12,
+ "lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
+ })
+
+ assert file_mutation_result_landed("write_file", result) is True
+
+
+def test_patch_with_nested_lsp_diagnostics_counts_as_landed():
+ result = json.dumps({
+ "success": True,
+ "diff": "--- a/tmp.py\n+++ b/tmp.py\n",
+ "lsp_diagnostics": "ERROR [1:1] type mismatch",
+ })
+
+ assert file_mutation_result_landed("patch", result) is True
+
+
+def test_top_level_file_mutation_error_does_not_count_as_landed():
+ result = json.dumps({"success": True, "error": "post-write verification failed"})
+
+ assert file_mutation_result_landed("patch", result) is False
diff --git a/tests/run_agent/test_file_mutation_verifier.py b/tests/run_agent/test_file_mutation_verifier.py
index fca002d2314..73684ad1c2e 100644
--- a/tests/run_agent/test_file_mutation_verifier.py
+++ b/tests/run_agent/test_file_mutation_verifier.py
@@ -166,6 +166,56 @@ class TestRecordFileMutationResult:
)
assert agent._turn_failed_file_mutations == {}
+ def test_write_file_with_lint_error_counts_as_landed(self):
+ agent = _bare_agent()
+ agent._record_file_mutation_result(
+ "write_file",
+ {"path": "/tmp/a.py", "content": "bad"},
+ json.dumps({"error": "write failed"}),
+ is_error=True,
+ )
+ assert "/tmp/a.py" in agent._turn_failed_file_mutations
+
+ result = json.dumps({
+ "bytes_written": 24,
+ "lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
+ })
+
+ agent._record_file_mutation_result(
+ "write_file",
+ {"path": "/tmp/a.py", "content": "def nope(:\n"},
+ result,
+ is_error=True,
+ )
+
+ assert agent._turn_failed_file_mutations == {}
+
+ def test_patch_with_lsp_diagnostics_counts_as_landed(self):
+ agent = _bare_agent()
+ agent._record_file_mutation_result(
+ "patch",
+ {"mode": "replace", "path": "/tmp/a.py", "old_string": "x", "new_string": "y"},
+ json.dumps({"error": "Could not find old_string"}),
+ is_error=True,
+ )
+ assert "/tmp/a.py" in agent._turn_failed_file_mutations
+
+ result = json.dumps({
+ "success": True,
+ "diff": "--- a/tmp.py\n+++ b/tmp.py\n",
+ "files_modified": ["/tmp/a.py"],
+ "lsp_diagnostics": "ERROR [1:1] type mismatch",
+ })
+
+ agent._record_file_mutation_result(
+ "patch",
+ {"mode": "replace", "path": "/tmp/a.py", "old_string": "x", "new_string": "y"},
+ result,
+ is_error=True,
+ )
+
+ assert agent._turn_failed_file_mutations == {}
+
def test_repeated_failure_keeps_first_error(self):
agent = _bare_agent()
agent._record_file_mutation_result(