From 094d732378059a341c7660ab97d1adbe782403fa Mon Sep 17 00:00:00 2001 From: "Albert.Zhou" Date: Sat, 23 May 2026 20:54:17 -0700 Subject: [PATCH] fix(cli): surface tool failures with specific error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improves the failure suffix on tool completion lines. Instead of always showing '[error]' for non-terminal failures, parse the tool's JSON result and surface the actual message: Before: ┊ 📖 read foo.py 0.1s [error] After: ┊ 📖 read foo.py 0.1s [File not found: foo.py] Before: ┊ 💻 $ ls bad 0.1s [exit 127] After: ┊ 💻 $ ls bad 0.1s [ls: cannot access 'bad'...] Adds a _trim_error helper that strips long absolute paths down to the filename and caps the suffix at 48 chars so it stays readable on narrow terminals. Threads the tool result through the tool.completed progress callback so agent/display.get_cute_tool_message can inspect it. The cli.py [error] post-suffix is removed in favor of the richer suffix _detect_tool_failure now produces directly. Originally proposed in PR #17194 by Albert.Zhou; salvaged onto current main with the dead-code preview-length bumps dropped (tool_preview_length config already strictly caps previews, so the per-tool n= defaults are unreachable). Co-authored-by: Albert.Zhou --- agent/display.py | 44 ++++++++++++++++++++++++++++++++++++------ agent/tool_executor.py | 2 ++ cli.py | 4 +--- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/agent/display.py b/agent/display.py index cdfc88f46a3..d1460ea46fd 100644 --- a/agent/display.py +++ b/agent/display.py @@ -787,33 +787,65 @@ class KawaiiSpinner: # Cute tool message (completion line that replaces the spinner) # ========================================================================= +_ERROR_SUFFIX_MAX_LEN = 48 + + +def _trim_error(msg: str) -> str: + """Shrink an error message for inline display in a tool status line. + + Strips overly long absolute paths down to just the filename so the + suffix stays readable on narrow terminals. + """ + msg = msg.strip() + # Common case: "File not found: /very/long/absolute/path/foo.py" + if "File not found:" in msg: + _, _, tail = msg.partition("File not found:") + tail = tail.strip() + if "/" in tail: + msg = f"File not found: {tail.rsplit('/', 1)[-1]}" + if len(msg) > _ERROR_SUFFIX_MAX_LEN: + msg = msg[: _ERROR_SUFFIX_MAX_LEN - 3] + "..." + return msg + + def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]: """Inspect a tool result string for signs of failure. - Returns ``(is_failure, suffix)`` where *suffix* is an informational tag - like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic - failures. On success, returns ``(False, "")``. + Returns ``(is_failure, suffix)`` where *suffix* is a short informational + tag like ``" [exit 1]"`` for terminal failures, ``" [full]"`` for memory + overflow, or a trimmed error message (``" [File not found: foo.py]"``). + On success returns ``(False, "")``. """ if result is None: return False, "" if file_mutation_result_landed(tool_name, result): return False, "" + data = safe_json_loads(result) + + # Terminal: non-zero exit code is the canonical failure signal. if tool_name == "terminal": - data = safe_json_loads(result) if isinstance(data, dict): exit_code = data.get("exit_code") if exit_code is not None and exit_code != 0: + err_msg = data.get("error") + if err_msg: + return True, f" [{_trim_error(str(err_msg))}]" return True, f" [exit {exit_code}]" return False, "" - # Memory-specific: distinguish "full" from real errors + # Memory: distinguish "store full" from real errors. if tool_name == "memory": - data = safe_json_loads(result) if isinstance(data, dict): if data.get("success") is False and "exceed the limit" in data.get("error", ""): return True, " [full]" + # Structured error in JSON result (any tool that surfaces {"error": ...}). + if isinstance(data, dict): + err = data.get("error") or data.get("message") + if err and (data.get("success") is False or "error" in data): + return True, f" [{_trim_error(str(err))}]" + # Generic heuristic for non-terminal tools # Multimodal tool results (dicts with _multimodal=True) are not strings — # treat them as successes since failures would be JSON-encoded strings. diff --git a/agent/tool_executor.py b/agent/tool_executor.py index e350994b4dc..438a6337074 100644 --- a/agent/tool_executor.py +++ b/agent/tool_executor.py @@ -388,6 +388,7 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe agent.tool_progress_callback( "tool.completed", function_name, None, None, duration=tool_duration, is_error=is_error, + result=function_result, ) except Exception as cb_err: logging.debug(f"Tool progress callback error: {cb_err}") @@ -822,6 +823,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe agent.tool_progress_callback( "tool.completed", function_name, None, None, duration=tool_duration, is_error=_is_error_result, + result=function_result, ) except Exception as cb_err: logging.debug(f"Tool progress callback error: {cb_err}") diff --git a/cli.py b/cli.py index e20472c7623..53bf7bf5dea 100644 --- a/cli.py +++ b/cli.py @@ -10264,9 +10264,7 @@ class HermesCLI: self._last_scrollback_tool = function_name try: from agent.display import get_cute_tool_message - line = get_cute_tool_message(function_name, stored_args, duration) - if is_error: - line = f"{line} [error]" + line = get_cute_tool_message(function_name, stored_args, duration, result=kwargs.get("result")) _cprint(f" {line}") except Exception: pass