fix(acp): mark failed tool completions

2026-07-14 14:12:44 +00:00 · 2026-05-15 21:42:16 +01:00 · 2026-05-15 21:42:16 +01:00 · b38d2d133b
commit b38d2d133b
parent 375c7f9cc3
2 changed files with 47 additions and 1 deletions
--- a/acp_adapter/tools.py
+++ b/acp_adapter/tools.py
@ -202,6 +202,28 @@ def _json_loads_maybe(value: Optional[str]) -> Any:
        return None


+def _tool_result_failed(result: Optional[str]) -> bool:
+    """Return True when a structured Hermes tool result clearly failed.
+
+    Keep this deliberately conservative. Plain text can contain words like
+    "error" because tests failed or a command printed diagnostics; Zed should
+    only receive ACP failed status for structured tool-level failures.
+    """
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return False
+
+    for key in ("success", "ok"):
+        if data.get(key) is False:
+            return True
+
+    exit_code = data.get("exit_code", data.get("returncode"))
+    if isinstance(exit_code, int) and exit_code != 0:
+        return True
+
+    return False
+
+
 def _truncate_text(text: str, limit: int = 5000) -> str:
    if len(text) <= limit:
        return text
@ -1296,7 +1318,7 @@ def build_tool_complete(
    return acp.update_tool_call(
        tool_call_id,
        kind=kind,
-        status="completed",
+        status="failed" if _tool_result_failed(result) else "completed",
        content=content,
        raw_output=None if tool_name in _POLISHED_TOOLS or _is_structured_json_result(result) else result,
    )
--- a/tests/acp/test_tools.py
+++ b/tests/acp/test_tools.py
@ -345,6 +345,30 @@ class TestBuildToolComplete:
        assert "hello" in text
        assert result.raw_output is None

+    def test_build_tool_complete_marks_success_false_as_failed(self):
+        result = build_tool_complete("tc-fail", "skill_manage", '{"success": false, "error": "boom"}')
+        assert result.status == "failed"
+
+    def test_build_tool_complete_marks_ok_false_as_failed(self):
+        result = build_tool_complete("tc-fail", "some_tool", '{"ok": false, "error": "boom"}')
+        assert result.status == "failed"
+
+    def test_build_tool_complete_marks_exit_code_nonzero_as_failed(self):
+        result = build_tool_complete("tc-fail", "terminal", '{"output": "bad", "exit_code": 2}')
+        assert result.status == "failed"
+
+    def test_build_tool_complete_marks_returncode_nonzero_as_failed(self):
+        result = build_tool_complete("tc-fail", "execute_code", '{"output": "bad", "returncode": 2}')
+        assert result.status == "failed"
+
+    def test_build_tool_complete_keeps_plain_error_text_completed(self):
+        result = build_tool_complete("tc-ok", "terminal", "tests failed: 1 assertion error")
+        assert result.status == "completed"
+
+    def test_build_tool_complete_keeps_json_error_without_failure_flag_completed(self):
+        result = build_tool_complete("tc-ok", "some_tool", '{"error": "timeout while reading optional source"}')
+        assert result.status == "completed"
+
    def test_build_tool_complete_for_skill_manage_summarizes_without_raw_json(self):
        result = build_tool_complete(
            "tc-skill-manage",