From 2b10024ee8eff2074ccd853d6968569acd3110df Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 23 May 2026 20:56:18 -0700
Subject: [PATCH] test(display): cover failure-suffix rendering + update
 scrollback test

The original PR #17194 description claimed test_display_tool_preview.py
but only ever shipped test_display_todo_progress.py. Add the missing
coverage for the failure-suffix path:

- _trim_error: whitespace strip, length cap, File-not-found path collapse
- _detect_tool_failure: terminal exit codes, memory full, structured
  {error}/{message} extraction, malformed JSON, None result
- get_cute_tool_message E2E: read_file failure, terminal exit-only,
  terminal stderr message, memory full, success path, no-result path

Also update test_tool_progress_scrollback.test_error_suffix_on_failed_tool
to reflect the new behavior: the generic '[error]' fallback in cli.py
has been removed; failure suffixes now come from the result-aware
_detect_tool_failure (e.g. '[exit 1]', '[File not found: x]').
---
 tests/agent/test_display_tool_failure.py   | 185 +++++++++++++++++++++
 tests/cli/test_tool_progress_scrollback.py |  15 +-
 2 files changed, 196 insertions(+), 4 deletions(-)
 create mode 100644 tests/agent/test_display_tool_failure.py

diff --git a/tests/agent/test_display_tool_failure.py b/tests/agent/test_display_tool_failure.py
new file mode 100644
index 00000000000..ca56e20f3a1
--- /dev/null
+++ b/tests/agent/test_display_tool_failure.py
@@ -0,0 +1,185 @@
+"""Tests for _detect_tool_failure + _trim_error + get_cute_tool_message
+inline failure suffix rendering.
+
+Covers the user-visible promise: when a tool fails, the CLI shows a short,
+specific reason in square brackets at the end of the completion line —
+not a generic "[error]".
+"""
+
+import json
+import pytest
+
+from agent.display import (
+    _detect_tool_failure,
+    _trim_error,
+    _ERROR_SUFFIX_MAX_LEN,
+    get_cute_tool_message,
+)
+
+
+class TestTrimError:
+    """The helper that shrinks an error message for inline display."""
+
+    def test_short_message_unchanged(self):
+        assert _trim_error("nope") == "nope"
+
+    def test_whitespace_stripped(self):
+        assert _trim_error("  bad input  ") == "bad input"
+
+    def test_long_message_truncated_to_cap(self):
+        msg = "x" * 200
+        trimmed = _trim_error(msg)
+        assert len(trimmed) <= _ERROR_SUFFIX_MAX_LEN
+        assert trimmed.endswith("...")
+
+    def test_file_not_found_path_collapsed_to_filename(self):
+        long_path = "File not found: /home/teknium/.hermes/hermes-agent/very/deep/path/foo.py"
+        assert _trim_error(long_path) == "File not found: foo.py"
+
+    def test_file_not_found_already_short_unchanged(self):
+        assert _trim_error("File not found: foo.py") == "File not found: foo.py"
+
+    def test_file_not_found_relative_path_unchanged(self):
+        # Without a slash there's no path to trim.
+        assert _trim_error("File not found: foo.py") == "File not found: foo.py"
+
+
+class TestDetectToolFailureTerminal:
+    """terminal: non-zero exit_code is the canonical failure signal."""
+
+    def test_success_returns_no_suffix(self):
+        result = json.dumps({"output": "ok\n", "exit_code": 0})
+        assert _detect_tool_failure("terminal", result) == (False, "")
+
+    def test_nonzero_exit_with_no_error_shows_exit_code(self):
+        result = json.dumps({"output": "", "exit_code": 1})
+        is_failure, suffix = _detect_tool_failure("terminal", result)
+        assert is_failure is True
+        assert suffix == " [exit 1]"
+
+    def test_nonzero_exit_with_error_shows_message(self):
+        result = json.dumps({
+            "output": "",
+            "exit_code": 127,
+            "error": "ls: cannot access 'foo': No such file or directory",
+        })
+        is_failure, suffix = _detect_tool_failure("terminal", result)
+        assert is_failure is True
+        assert "cannot access" in suffix
+        # Trimmed to the cap, in brackets
+        assert suffix.startswith(" [")
+        assert suffix.endswith("]")
+
+    def test_malformed_json_returns_no_suffix(self):
+        # Terminal is special: only exit_code matters. Malformed JSON should
+        # not crash and should not be flagged as failure.
+        assert _detect_tool_failure("terminal", "not json") == (False, "")
+
+    def test_none_result_returns_no_suffix(self):
+        assert _detect_tool_failure("terminal", None) == (False, "")
+
+
+class TestDetectToolFailureMemory:
+    """memory: 'full' is distinct from real errors."""
+
+    def test_memory_full_returns_full_suffix(self):
+        result = json.dumps({"success": False, "error": "would exceed the limit"})
+        assert _detect_tool_failure("memory", result) == (True, " [full]")
+
+    def test_memory_other_error_returns_specific_message(self):
+        # An error that's NOT a "full" overflow falls through to the
+        # structured-error path and surfaces the actual message.
+        result = json.dumps({"success": False, "error": "invalid action: zap"})
+        is_failure, suffix = _detect_tool_failure("memory", result)
+        assert is_failure is True
+        assert "invalid action" in suffix
+
+
+class TestDetectToolFailureStructured:
+    """Generic path: any tool that returns {"error": ...} JSON."""
+
+    def test_read_file_error_surfaced(self):
+        result = json.dumps({
+            "path": "/nope/missing.py",
+            "success": False,
+            "error": "File not found: /nope/missing.py",
+        })
+        is_failure, suffix = _detect_tool_failure("read_file", result)
+        assert is_failure is True
+        # _trim_error reduces the path to the basename.
+        assert suffix == " [File not found: missing.py]"
+
+    def test_error_without_success_key_still_flagged(self):
+        # Some tools return {"error": "..."} with no explicit success flag.
+        result = json.dumps({"error": "remote unavailable"})
+        is_failure, suffix = _detect_tool_failure("web_search", result)
+        assert is_failure is True
+        assert suffix == " [remote unavailable]"
+
+    def test_message_field_only_with_success_false_flagged(self):
+        # When success is False and only 'message' is set, surface it.
+        result = json.dumps({"success": False, "message": "rate limited"})
+        is_failure, suffix = _detect_tool_failure("web_search", result)
+        assert is_failure is True
+        assert "rate limited" in suffix
+
+    def test_successful_result_not_flagged(self):
+        result = json.dumps({"success": True, "data": "hello"})
+        assert _detect_tool_failure("web_search", result) == (False, "")
+
+    def test_dict_without_error_or_success_uses_generic_heuristic(self):
+        # Plain successful dict — should pass through the generic
+        # heuristic which only fires on the string "Error" / '"error"' / etc.
+        result = json.dumps({"data": "hello"})
+        is_failure, _ = _detect_tool_failure("web_search", result)
+        assert is_failure is False
+
+
+class TestGetCuteToolMessageFailureSuffix:
+    """End-to-end: failure suffix is appended by get_cute_tool_message."""
+
+    def test_read_file_failure_suffix_appended(self):
+        fail = json.dumps({
+            "path": "/etc/missing",
+            "success": False,
+            "error": "File not found: /etc/missing",
+        })
+        line = get_cute_tool_message("read_file", {"path": "/etc/missing"}, 0.1, result=fail)
+        assert "[File not found: missing]" in line
+
+    def test_terminal_exit_only_suffix(self):
+        fail = json.dumps({"output": "", "exit_code": 2})
+        line = get_cute_tool_message("terminal", {"command": "false"}, 0.1, result=fail)
+        assert "[exit 2]" in line
+
+    def test_terminal_with_stderr_uses_message(self):
+        fail = json.dumps({
+            "output": "",
+            "exit_code": 127,
+            "error": "command not found: notathing",
+        })
+        line = get_cute_tool_message("terminal", {"command": "notathing"}, 0.1, result=fail)
+        assert "command not found" in line
+        # No '[exit 127]' tag when we have a specific message
+        assert "exit 127" not in line
+
+    def test_memory_full_suffix(self):
+        fail = json.dumps({"success": False, "error": "would exceed the limit"})
+        line = get_cute_tool_message(
+            "memory",
+            {"action": "add", "target": "memory", "content": "x"},
+            0.05,
+            result=fail,
+        )
+        assert "[full]" in line
+
+    def test_success_has_no_suffix(self):
+        ok = json.dumps({"success": True, "data": "hi"})
+        line = get_cute_tool_message("web_search", {"query": "hi"}, 0.2, result=ok)
+        assert "[" not in line.split("0.2s", 1)[1]
+
+    def test_no_result_has_no_suffix(self):
+        # No result passed at all — display function should not invent a
+        # failure suffix.
+        line = get_cute_tool_message("terminal", {"command": "ls"}, 0.2)
+        assert "[" not in line.split("0.2s", 1)[1]
diff --git a/tests/cli/test_tool_progress_scrollback.py b/tests/cli/test_tool_progress_scrollback.py
index a8e807da2ec..f99358aa141 100644
--- a/tests/cli/test_tool_progress_scrollback.py
+++ b/tests/cli/test_tool_progress_scrollback.py
@@ -125,14 +125,21 @@ class TestToolProgressScrollback:
         mock_print.assert_not_called()
 
     def test_error_suffix_on_failed_tool(self):
-        """When is_error=True, the stacked line includes [error]."""
+        """When a failed tool's result is forwarded, the stacked line surfaces
+        the specific error (e.g. ``[exit 1]`` or ``[File not found: x]``)
+        instead of the legacy generic ``[error]`` suffix."""
+        import json
         cli = _make_cli(tool_progress="all")
-        cli._on_tool_progress("tool.started", "terminal", "bad cmd", {"command": "bad cmd"})
+        cli._on_tool_progress("tool.started", "terminal", "false", {"command": "false"})
         with patch.object(_cli_mod, "_cprint") as mock_print:
-            cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.5, is_error=True)
+            cli._on_tool_progress(
+                "tool.completed", "terminal", None, None,
+                duration=0.5, is_error=True,
+                result=json.dumps({"output": "", "exit_code": 1}),
+            )
 
         line = mock_print.call_args[0][0]
-        assert "[error]" in line
+        assert "[exit 1]" in line
 
     def test_spinner_still_updates_on_started(self):
         """tool.started still updates the spinner text for live display."""