From 094d732378059a341c7660ab97d1adbe782403fa Mon Sep 17 00:00:00 2001
From: "Albert.Zhou" <albert748@gmail.com>
Date: Sat, 23 May 2026 20:54:17 -0700
Subject: [PATCH] fix(cli): surface tool failures with specific error messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Improves the failure suffix on tool completion lines. Instead of always
showing '[error]' for non-terminal failures, parse the tool's JSON result
and surface the actual message:

  Before:  ┊ 📖 read      foo.py  0.1s [error]
  After:   ┊ 📖 read      foo.py  0.1s [File not found: foo.py]

  Before:  ┊ 💻 $         ls bad  0.1s [exit 127]
  After:   ┊ 💻 $         ls bad  0.1s [ls: cannot access 'bad'...]

Adds a _trim_error helper that strips long absolute paths down to the
filename and caps the suffix at 48 chars so it stays readable on narrow
terminals.

Threads the tool result through the tool.completed progress callback so
agent/display.get_cute_tool_message can inspect it. The cli.py [error]
post-suffix is removed in favor of the richer suffix _detect_tool_failure
now produces directly.

Originally proposed in PR #17194 by Albert.Zhou; salvaged onto current
main with the dead-code preview-length bumps dropped (tool_preview_length
config already strictly caps previews, so the per-tool n= defaults are
unreachable).

Co-authored-by: Albert.Zhou <albert748@gmail.com>
---
 agent/display.py       | 44 ++++++++++++++++++++++++++++++++++++------
 agent/tool_executor.py |  2 ++
 cli.py                 |  4 +---
 3 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/agent/display.py b/agent/display.py
index cdfc88f46a3..d1460ea46fd 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -787,33 +787,65 @@ class KawaiiSpinner:
 # Cute tool message (completion line that replaces the spinner)
 # =========================================================================
 
+_ERROR_SUFFIX_MAX_LEN = 48
+
+
+def _trim_error(msg: str) -> str:
+    """Shrink an error message for inline display in a tool status line.
+
+    Strips overly long absolute paths down to just the filename so the
+    suffix stays readable on narrow terminals.
+    """
+    msg = msg.strip()
+    # Common case: "File not found: /very/long/absolute/path/foo.py"
+    if "File not found:" in msg:
+        _, _, tail = msg.partition("File not found:")
+        tail = tail.strip()
+        if "/" in tail:
+            msg = f"File not found: {tail.rsplit('/', 1)[-1]}"
+    if len(msg) > _ERROR_SUFFIX_MAX_LEN:
+        msg = msg[: _ERROR_SUFFIX_MAX_LEN - 3] + "..."
+    return msg
+
+
 def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
     """Inspect a tool result string for signs of failure.
 
-    Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
-    like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
-    failures.  On success, returns ``(False, "")``.
+    Returns ``(is_failure, suffix)`` where *suffix* is a short informational
+    tag like ``" [exit 1]"`` for terminal failures, ``" [full]"`` for memory
+    overflow, or a trimmed error message (``" [File not found: foo.py]"``).
+    On success returns ``(False, "")``.
     """
     if result is None:
         return False, ""
     if file_mutation_result_landed(tool_name, result):
         return False, ""
 
+    data = safe_json_loads(result)
+
+    # Terminal: non-zero exit code is the canonical failure signal.
     if tool_name == "terminal":
-        data = safe_json_loads(result)
         if isinstance(data, dict):
             exit_code = data.get("exit_code")
             if exit_code is not None and exit_code != 0:
+                err_msg = data.get("error")
+                if err_msg:
+                    return True, f" [{_trim_error(str(err_msg))}]"
                 return True, f" [exit {exit_code}]"
         return False, ""
 
-    # Memory-specific: distinguish "full" from real errors
+    # Memory: distinguish "store full" from real errors.
     if tool_name == "memory":
-        data = safe_json_loads(result)
         if isinstance(data, dict):
             if data.get("success") is False and "exceed the limit" in data.get("error", ""):
                 return True, " [full]"
 
+    # Structured error in JSON result (any tool that surfaces {"error": ...}).
+    if isinstance(data, dict):
+        err = data.get("error") or data.get("message")
+        if err and (data.get("success") is False or "error" in data):
+            return True, f" [{_trim_error(str(err))}]"
+
     # Generic heuristic for non-terminal tools
     # Multimodal tool results (dicts with _multimodal=True) are not strings —
     # treat them as successes since failures would be JSON-encoded strings.
diff --git a/agent/tool_executor.py b/agent/tool_executor.py
index e350994b4dc..438a6337074 100644
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@@ -388,6 +388,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
                     agent.tool_progress_callback(
                         "tool.completed", function_name, None, None,
                         duration=tool_duration, is_error=is_error,
+                        result=function_result,
                     )
                 except Exception as cb_err:
                     logging.debug(f"Tool progress callback error: {cb_err}")
@@ -822,6 +823,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                 agent.tool_progress_callback(
                     "tool.completed", function_name, None, None,
                     duration=tool_duration, is_error=_is_error_result,
+                    result=function_result,
                 )
             except Exception as cb_err:
                 logging.debug(f"Tool progress callback error: {cb_err}")
diff --git a/cli.py b/cli.py
index e20472c7623..53bf7bf5dea 100644
--- a/cli.py
+++ b/cli.py
@@ -10264,9 +10264,7 @@ class HermesCLI:
                 self._last_scrollback_tool = function_name
                 try:
                     from agent.display import get_cute_tool_message
-                    line = get_cute_tool_message(function_name, stored_args, duration)
-                    if is_error:
-                        line = f"{line} [error]"
+                    line = get_cute_tool_message(function_name, stored_args, duration, result=kwargs.get("result"))
                     _cprint(f"  {line}")
                 except Exception:
                     pass