mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix: classify landed file mutations with diagnostics
This commit is contained in:
parent
71c6dd0dcf
commit
da0ddbf88a
8 changed files with 153 additions and 1 deletions
|
|
@ -1,6 +1,7 @@
|
|||
"""Tests for agent/display.py — build_tool_preview() and inline diff previews."""
|
||||
|
||||
import os
|
||||
import json
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
|
@ -149,6 +150,27 @@ class TestCuteToolMessagePreviewLength:
|
|||
assert path in line
|
||||
assert "..." not in line
|
||||
|
||||
def test_write_file_lint_error_result_is_not_marked_failed(self):
|
||||
result = json.dumps({
|
||||
"bytes_written": 12,
|
||||
"lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
|
||||
})
|
||||
|
||||
line = get_cute_tool_message("write_file", {"path": "/tmp/a.py"}, 0.1, result=result)
|
||||
|
||||
assert "[error]" not in line
|
||||
|
||||
def test_patch_lsp_diagnostics_result_is_not_marked_failed(self):
|
||||
result = json.dumps({
|
||||
"success": True,
|
||||
"diff": "--- a/tmp.py\n+++ b/tmp.py\n",
|
||||
"lsp_diagnostics": "<diagnostics>ERROR [1:1] type mismatch</diagnostics>",
|
||||
})
|
||||
|
||||
line = get_cute_tool_message("patch", {"path": "/tmp/a.py"}, 0.1, result=result)
|
||||
|
||||
assert "[error]" not in line
|
||||
|
||||
|
||||
class TestEditDiffPreview:
|
||||
def test_extract_edit_diff_for_patch(self):
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from agent.tool_guardrails import (
|
|||
ToolCallGuardrailController,
|
||||
ToolCallSignature,
|
||||
canonical_tool_args,
|
||||
classify_tool_failure,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -131,6 +132,21 @@ def test_success_resets_exact_signature_failure_streak():
|
|||
assert controller.before_call("web_search", args).action == "allow"
|
||||
|
||||
|
||||
def test_file_mutation_lint_error_result_is_not_a_tool_failure():
|
||||
write_result = json.dumps({
|
||||
"bytes_written": 12,
|
||||
"lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
|
||||
})
|
||||
patch_result = json.dumps({
|
||||
"success": True,
|
||||
"diff": "--- a/tmp.py\n+++ b/tmp.py\n",
|
||||
"lsp_diagnostics": "<diagnostics>ERROR [1:1] type mismatch</diagnostics>",
|
||||
})
|
||||
|
||||
assert classify_tool_failure("write_file", write_result) == (False, "")
|
||||
assert classify_tool_failure("patch", patch_result) == (False, "")
|
||||
|
||||
|
||||
def test_same_tool_varying_args_warns_by_default_without_halting():
|
||||
controller = ToolCallGuardrailController(
|
||||
ToolCallGuardrailConfig(same_tool_failure_warn_after=2, same_tool_failure_halt_after=3)
|
||||
|
|
|
|||
30
tests/agent/test_tool_result_classification.py
Normal file
30
tests/agent/test_tool_result_classification.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
"""Tests for shared tool result classification helpers."""
|
||||
|
||||
import json
|
||||
|
||||
from agent.tool_result_classification import file_mutation_result_landed
|
||||
|
||||
|
||||
def test_write_file_with_nested_lint_error_counts_as_landed():
|
||||
result = json.dumps({
|
||||
"bytes_written": 12,
|
||||
"lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
|
||||
})
|
||||
|
||||
assert file_mutation_result_landed("write_file", result) is True
|
||||
|
||||
|
||||
def test_patch_with_nested_lsp_diagnostics_counts_as_landed():
|
||||
result = json.dumps({
|
||||
"success": True,
|
||||
"diff": "--- a/tmp.py\n+++ b/tmp.py\n",
|
||||
"lsp_diagnostics": "<diagnostics>ERROR [1:1] type mismatch</diagnostics>",
|
||||
})
|
||||
|
||||
assert file_mutation_result_landed("patch", result) is True
|
||||
|
||||
|
||||
def test_top_level_file_mutation_error_does_not_count_as_landed():
|
||||
result = json.dumps({"success": True, "error": "post-write verification failed"})
|
||||
|
||||
assert file_mutation_result_landed("patch", result) is False
|
||||
|
|
@ -166,6 +166,56 @@ class TestRecordFileMutationResult:
|
|||
)
|
||||
assert agent._turn_failed_file_mutations == {}
|
||||
|
||||
def test_write_file_with_lint_error_counts_as_landed(self):
|
||||
agent = _bare_agent()
|
||||
agent._record_file_mutation_result(
|
||||
"write_file",
|
||||
{"path": "/tmp/a.py", "content": "bad"},
|
||||
json.dumps({"error": "write failed"}),
|
||||
is_error=True,
|
||||
)
|
||||
assert "/tmp/a.py" in agent._turn_failed_file_mutations
|
||||
|
||||
result = json.dumps({
|
||||
"bytes_written": 24,
|
||||
"lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
|
||||
})
|
||||
|
||||
agent._record_file_mutation_result(
|
||||
"write_file",
|
||||
{"path": "/tmp/a.py", "content": "def nope(:\n"},
|
||||
result,
|
||||
is_error=True,
|
||||
)
|
||||
|
||||
assert agent._turn_failed_file_mutations == {}
|
||||
|
||||
def test_patch_with_lsp_diagnostics_counts_as_landed(self):
|
||||
agent = _bare_agent()
|
||||
agent._record_file_mutation_result(
|
||||
"patch",
|
||||
{"mode": "replace", "path": "/tmp/a.py", "old_string": "x", "new_string": "y"},
|
||||
json.dumps({"error": "Could not find old_string"}),
|
||||
is_error=True,
|
||||
)
|
||||
assert "/tmp/a.py" in agent._turn_failed_file_mutations
|
||||
|
||||
result = json.dumps({
|
||||
"success": True,
|
||||
"diff": "--- a/tmp.py\n+++ b/tmp.py\n",
|
||||
"files_modified": ["/tmp/a.py"],
|
||||
"lsp_diagnostics": "<diagnostics>ERROR [1:1] type mismatch</diagnostics>",
|
||||
})
|
||||
|
||||
agent._record_file_mutation_result(
|
||||
"patch",
|
||||
{"mode": "replace", "path": "/tmp/a.py", "old_string": "x", "new_string": "y"},
|
||||
result,
|
||||
is_error=True,
|
||||
)
|
||||
|
||||
assert agent._turn_failed_file_mutations == {}
|
||||
|
||||
def test_repeated_failure_keeps_first_error(self):
|
||||
agent = _bare_agent()
|
||||
agent._record_file_mutation_result(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue