fix: classify landed file mutations with diagnostics

This commit is contained in:
GodsBoy 2026-05-13 11:36:07 +02:00 committed by kshitij
parent 71c6dd0dcf
commit da0ddbf88a
8 changed files with 153 additions and 1 deletions

View file

@ -1,6 +1,7 @@
"""Tests for agent/display.py — build_tool_preview() and inline diff previews."""
import os
import json
import pytest
from unittest.mock import MagicMock, patch
@ -149,6 +150,27 @@ class TestCuteToolMessagePreviewLength:
assert path in line
assert "..." not in line
def test_write_file_lint_error_result_is_not_marked_failed(self):
result = json.dumps({
"bytes_written": 12,
"lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
})
line = get_cute_tool_message("write_file", {"path": "/tmp/a.py"}, 0.1, result=result)
assert "[error]" not in line
def test_patch_lsp_diagnostics_result_is_not_marked_failed(self):
result = json.dumps({
"success": True,
"diff": "--- a/tmp.py\n+++ b/tmp.py\n",
"lsp_diagnostics": "<diagnostics>ERROR [1:1] type mismatch</diagnostics>",
})
line = get_cute_tool_message("patch", {"path": "/tmp/a.py"}, 0.1, result=result)
assert "[error]" not in line
class TestEditDiffPreview:
def test_extract_edit_diff_for_patch(self):

View file

@ -7,6 +7,7 @@ from agent.tool_guardrails import (
ToolCallGuardrailController,
ToolCallSignature,
canonical_tool_args,
classify_tool_failure,
)
@ -131,6 +132,21 @@ def test_success_resets_exact_signature_failure_streak():
assert controller.before_call("web_search", args).action == "allow"
def test_file_mutation_lint_error_result_is_not_a_tool_failure():
write_result = json.dumps({
"bytes_written": 12,
"lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
})
patch_result = json.dumps({
"success": True,
"diff": "--- a/tmp.py\n+++ b/tmp.py\n",
"lsp_diagnostics": "<diagnostics>ERROR [1:1] type mismatch</diagnostics>",
})
assert classify_tool_failure("write_file", write_result) == (False, "")
assert classify_tool_failure("patch", patch_result) == (False, "")
def test_same_tool_varying_args_warns_by_default_without_halting():
controller = ToolCallGuardrailController(
ToolCallGuardrailConfig(same_tool_failure_warn_after=2, same_tool_failure_halt_after=3)

View file

@ -0,0 +1,30 @@
"""Tests for shared tool result classification helpers."""
import json
from agent.tool_result_classification import file_mutation_result_landed
def test_write_file_with_nested_lint_error_counts_as_landed():
result = json.dumps({
"bytes_written": 12,
"lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
})
assert file_mutation_result_landed("write_file", result) is True
def test_patch_with_nested_lsp_diagnostics_counts_as_landed():
result = json.dumps({
"success": True,
"diff": "--- a/tmp.py\n+++ b/tmp.py\n",
"lsp_diagnostics": "<diagnostics>ERROR [1:1] type mismatch</diagnostics>",
})
assert file_mutation_result_landed("patch", result) is True
def test_top_level_file_mutation_error_does_not_count_as_landed():
result = json.dumps({"success": True, "error": "post-write verification failed"})
assert file_mutation_result_landed("patch", result) is False

View file

@ -166,6 +166,56 @@ class TestRecordFileMutationResult:
)
assert agent._turn_failed_file_mutations == {}
def test_write_file_with_lint_error_counts_as_landed(self):
agent = _bare_agent()
agent._record_file_mutation_result(
"write_file",
{"path": "/tmp/a.py", "content": "bad"},
json.dumps({"error": "write failed"}),
is_error=True,
)
assert "/tmp/a.py" in agent._turn_failed_file_mutations
result = json.dumps({
"bytes_written": 24,
"lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
})
agent._record_file_mutation_result(
"write_file",
{"path": "/tmp/a.py", "content": "def nope(:\n"},
result,
is_error=True,
)
assert agent._turn_failed_file_mutations == {}
def test_patch_with_lsp_diagnostics_counts_as_landed(self):
agent = _bare_agent()
agent._record_file_mutation_result(
"patch",
{"mode": "replace", "path": "/tmp/a.py", "old_string": "x", "new_string": "y"},
json.dumps({"error": "Could not find old_string"}),
is_error=True,
)
assert "/tmp/a.py" in agent._turn_failed_file_mutations
result = json.dumps({
"success": True,
"diff": "--- a/tmp.py\n+++ b/tmp.py\n",
"files_modified": ["/tmp/a.py"],
"lsp_diagnostics": "<diagnostics>ERROR [1:1] type mismatch</diagnostics>",
})
agent._record_file_mutation_result(
"patch",
{"mode": "replace", "path": "/tmp/a.py", "old_string": "x", "new_string": "y"},
result,
is_error=True,
)
assert agent._turn_failed_file_mutations == {}
def test_repeated_failure_keeps_first_error(self):
agent = _bare_agent()
agent._record_file_mutation_result(