feat(display): friendly human-phrased tool labels for built-in tools (#55166)

* feat(display): friendly human-phrased tool labels for built-in tools Built-in tools now render ChatGPT-style status verbs ('Searching the web for ...', 'Reading <file>', 'Browsing <url>') on the CLI spinner and gateway/desktop tool-progress instead of the raw tool name. - agent/display.py: _TOOL_VERBS map + build_tool_label() + set/get friendly-labels flag (default on). Custom/plugin/MCP tools fall back to the raw preview; verbose gateway mode left untouched (debug surface). - tool_executor.py / tui_gateway / gateway: route the three spinner sites, the TUI _tool_ctx, and the gateway all/new progress line through the label. - config: display.friendly_tool_labels (default True, per-platform aware). Zero new core tool / schema footprint — pure display layer. * docs: add PR infographic for friendly tool labels * fix(display): preserve arg preview in gateway friendly labels + update tests The first gateway pass re-derived the label from the callback's `args`, which is empty ({}) at the gateway tool.started callsite — the command/query lives in the `preview` string, so terminal rendered as a bare '💻 Running' and dedup collapsed consecutive commands. Now the gateway prefixes the verb onto the already-computed preview via get_tool_verb/tool_verb_connector/verb_drops_preview, preserving the command/url/query. CLI spinner path (real args) keeps build_tool_label. Tests: update test_run_progress_topics exact-format assertions to the friendly form ('💻 Running pwd'), add a format-agnostic preview extractor for the truncation tests (works for both quoted-legacy and verb-prefixed output). * test(tui): update resume-display context to friendly tool label _tool_ctx now uses build_tool_label, so the desktop resume-view context for a search_files turn reads 'Searching files for resume' instead of the bare 'resume' preview — consistent with live tool-progress. Update the assertion. * test(tui): harden no-race worker test against sibling shard leakage test_session_create_no_race_keeps_worker_alive flaked under -j 8: a daemon build thread leaked from a prior session.create test in the same shard process fires close/unregister against its own (foreign) session_key after this test patches the global approval hooks, polluting the captured lists. Scope the assertions to this session's own session_key so the regression intent (this session's worker/notify must survive) is preserved while the test becomes immune to shard composition. Not related to friendly-tool-labels.
2026-06-30 11:52:04 +00:00 · 2026-06-29 20:31:17 -07:00 · 2026-06-29 20:31:17 -07:00 · 481caa66f2
commit 481caa66f2
parent 41c85fb946
10 changed files with 285 additions and 25 deletions
--- a/agent/display.py
+++ b/agent/display.py
@ -537,6 +537,122 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
    return preview


+# =========================================================================
+# Friendly tool labels (human-phrased verbs for built-in tools)
+#
+# Turns "web_search <query>" into "Searching the web for <query>" — the
+# ChatGPT-style "Searching…/Reading…" surface.  Curated and built-in only:
+# we know each core tool's semantics, so the verb is fixed, not computed.
+# Custom/plugin/MCP tools have no entry and fall back to the raw preview.
+# =========================================================================
+
+# Each entry maps a built-in tool name to its present-participle verb phrase.
+# A trailing space-then-preview is appended by build_tool_label() when the
+# tool's argument preview is available (e.g. "Reading docs/api.md").
+_TOOL_VERBS: dict[str, str] = {
+    "web_search": "Searching the web",
+    "web_extract": "Reading",
+    "browser_navigate": "Browsing",
+    "browser_click": "Clicking",
+    "browser_type": "Typing",
+    "read_file": "Reading",
+    "write_file": "Writing",
+    "patch": "Editing",
+    "search_files": "Searching files",
+    "terminal": "Running",
+    "execute_code": "Running code",
+    "image_generate": "Generating image",
+    "video_generate": "Generating video",
+    "text_to_speech": "Generating speech",
+    "vision_analyze": "Looking at the image",
+    "session_search": "Searching past sessions",
+    "skill_view": "Reading skill",
+    "skills_list": "Listing skills",
+    "skill_manage": "Updating skill",
+    "delegate_task": "Delegating",
+    "cronjob": "Scheduling",
+    "clarify": "Asking",
+    "memory": "Updating memory",
+    "todo": "Updating tasks",
+}
+
+# Verbs that read better without the raw argument preview appended.
+_TOOL_VERBS_NO_PREVIEW: frozenset[str] = frozenset({
+    "skills_list",
+    "session_search",
+})
+
+# Verbs that take a "for" connector before the preview (search-style phrasing):
+# "Searching the web for <query>" reads better than "Searching the web <query>".
+_TOOL_VERBS_FOR_CONNECTOR: frozenset[str] = frozenset({
+    "web_search",
+    "search_files",
+})
+
+_friendly_tool_labels: bool = True
+
+
+def set_friendly_tool_labels(enabled: bool) -> None:
+    """Toggle friendly human-phrased tool labels (display.friendly_tool_labels)."""
+    global _friendly_tool_labels
+    _friendly_tool_labels = bool(enabled)
+
+
+def get_friendly_tool_labels() -> bool:
+    """Return whether friendly tool labels are enabled."""
+    return _friendly_tool_labels
+
+
+def get_tool_verb(tool_name: str) -> str | None:
+    """Return the friendly verb for a built-in tool, or None.
+
+    Returns None when friendly labels are disabled or the tool has no curated
+    verb (custom/plugin/MCP tools).  Callers that already hold a computed
+    argument preview can compose ``f"{verb} {preview}"`` themselves; use
+    :func:`tool_verb_connector` to pick the right joiner.
+    """
+    if not _friendly_tool_labels:
+        return None
+    return _TOOL_VERBS.get(tool_name)
+
+
+def tool_verb_connector(tool_name: str) -> str:
+    """Return the connector between a verb and its preview (" for " or " ")."""
+    return " for " if tool_name in _TOOL_VERBS_FOR_CONNECTOR else " "
+
+
+def verb_drops_preview(tool_name: str) -> bool:
+    """Whether the verb should render alone, without the argument preview."""
+    return tool_name in _TOOL_VERBS_NO_PREVIEW
+
+
+def build_tool_label(tool_name: str, args: dict, max_len: int | None = None) -> str | None:
+    """Build a human-phrased status label for a tool call.
+
+    For built-in tools with a known verb (``web_search`` -> "Searching the
+    web for ..."), returns the verb optionally followed by the argument
+    preview.  For everything else (custom/plugin/MCP tools, or when friendly
+    labels are disabled) returns the raw preview, so callers can use this as a
+    drop-in replacement for :func:`build_tool_preview`.
+    """
+    if not _friendly_tool_labels:
+        return build_tool_preview(tool_name, args, max_len=max_len)
+
+    verb = _TOOL_VERBS.get(tool_name)
+    if not verb:
+        return build_tool_preview(tool_name, args, max_len=max_len)
+
+    if tool_name in _TOOL_VERBS_NO_PREVIEW:
+        return verb
+
+    preview = build_tool_preview(tool_name, args, max_len=max_len)
+    if not preview:
+        return verb
+    if tool_name in _TOOL_VERBS_FOR_CONNECTOR:
+        return f"{verb} for {preview}"
+    return f"{verb} {preview}"
+
+
 # =========================================================================
 # Inline diff previews for write actions
 # =========================================================================
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@ -24,6 +24,7 @@ from typing import Any, Optional
 from agent.display import (
    KawaiiSpinner,
    build_tool_preview as _build_tool_preview,
+    build_tool_label as _build_tool_label,
    get_cute_tool_message as _get_cute_tool_message_impl,
    get_tool_emoji as _get_tool_emoji,
    redact_tool_args_for_display as _redact_tool_args_for_display,
@ -1224,7 +1225,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                face = random.choice(KawaiiSpinner.get_waiting_faces())
                emoji = _get_tool_emoji(function_name)
                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-                preview = _build_tool_preview(function_name, display_args) or function_name
+                preview = _build_tool_label(function_name, display_args) or function_name
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
                spinner.start()
            _ce_result = None
@ -1258,7 +1259,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                face = random.choice(KawaiiSpinner.get_waiting_faces())
                emoji = _get_tool_emoji(function_name)
                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-                preview = _build_tool_preview(function_name, display_args) or function_name
+                preview = _build_tool_label(function_name, display_args) or function_name
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
                spinner.start()
            _mem_result = None
@ -1290,7 +1291,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                face = random.choice(KawaiiSpinner.get_waiting_faces())
                emoji = _get_tool_emoji(function_name)
                display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
-                preview = _build_tool_preview(function_name, display_args) or function_name
+                preview = _build_tool_label(function_name, display_args) or function_name
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
                spinner.start()
            _spinner_result = None
--- a/cli.py
+++ b/cli.py
@ -751,6 +751,14 @@ try:
 except Exception:
    pass

+# Initialize friendly tool labels from config (default on)
+try:
+    from agent.display import set_friendly_tool_labels
+    _ftl = CLI_CONFIG.get("display", {}).get("friendly_tool_labels", True)
+    set_friendly_tool_labels(bool(_ftl))
+except Exception:
+    pass
+
 # Neuter AsyncHttpxClientWrapper.__del__ before any AsyncOpenAI clients are
 # created.  The SDK's __del__ schedules aclose() on asyncio.get_running_loop()
 # which, during CLI idle time, finds prompt_toolkit's event loop and tries to
--- a/gateway/run.py
+++ b/gateway/run.py
@ -15385,6 +15385,14 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
        except Exception:
            pass

+        # Apply friendly tool labels config (default on) — per-platform aware
+        try:
+            from agent.display import set_friendly_tool_labels
+            _ftl = resolve_display_setting(user_config, platform_key, "friendly_tool_labels", True)
+            set_friendly_tool_labels(bool(_ftl))
+        except Exception:
+            pass
+
        # Tool progress mode — resolved per-platform with env var fallback
        _resolved_tp = resolve_display_setting(user_config, platform_key, "tool_progress")
        _env_tp = os.getenv("HERMES_TOOL_PROGRESS_MODE")
@ -15678,12 +15686,29 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                msg = _code_block_short
                last_was_terminal_block[0] = True
            elif preview:
-                from agent.display import get_tool_preview_max_len
+                from agent.display import (
+                    get_tool_preview_max_len,
+                    get_tool_verb,
+                    tool_verb_connector,
+                    verb_drops_preview,
+                )
                _pl = get_tool_preview_max_len()
                _cap = _pl if _pl > 0 else 40
                if len(preview) > _cap:
                    preview = preview[:_cap - 3] + "..."
-                msg = f"{emoji} {tool_name}: \"{preview}\""
+                # Friendly labels: render a human-phrased line for built-in
+                # tools ("🔍 Searching the web for ...") by prefixing the verb
+                # onto the preview the callback already computed (so the
+                # command/url/query is preserved).  Custom/plugin/MCP tools
+                # have no verb and fall back to the raw "tool_name: ..." form.
+                _verb = get_tool_verb(tool_name)
+                if _verb:
+                    if verb_drops_preview(tool_name):
+                        msg = f"{emoji} {_verb}"
+                    else:
+                        msg = f"{emoji} {_verb}{tool_verb_connector(tool_name)}{preview}"
+                else:
+                    msg = f"{emoji} {tool_name}: \"{preview}\""
                last_was_terminal_block[0] = False
            else:
                msg = f"{emoji} {tool_name}..."
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -1725,6 +1725,11 @@ DEFAULT_CONFIG = {
        "tool_progress_command": False,  # Enable /verbose command in messaging gateway
        "tool_progress_overrides": {},  # DEPRECATED — use display.platforms instead
        "tool_preview_length": 0,  # Max chars for tool call previews (0 = no limit, show full paths/commands)
+        # Human-phrased tool status labels for built-in tools: "Searching the
+        # web for ...", "Reading <file>", "Browsing <url>" instead of the raw
+        # tool name. Applies to CLI spinner + gateway/desktop tool-progress.
+        # Custom/plugin/MCP tools always fall back to the raw preview.
+        "friendly_tool_labels": True,
        # How gateway tool-progress is grouped on platforms that support message
        # editing: "accumulate" (default) edits one bubble in place; "separate"
        # sends one message per tool (the pre-v0.9 behavior, noisier). Only
--- a/infographic/friendly-tool-labels/infographic.png
+++ b/infographic/friendly-tool-labels/infographic.png
--- a/tests/agent/test_display.py
+++ b/tests/agent/test_display.py
@ -401,3 +401,82 @@ class TestEditDiffPreview:
        assert any("a/file2.py" in line for line in rendered)
        assert not any("a/file7.py" in line for line in rendered)
        assert "additional file" in rendered[-1]
+
+
+class TestBuildToolLabel:
+    """Friendly human-phrased tool labels for built-in tools."""
+
+    @pytest.fixture(autouse=True)
+    def _enable_friendly(self):
+        from agent.display import set_friendly_tool_labels
+        set_friendly_tool_labels(True)
+        yield
+        set_friendly_tool_labels(True)
+
+    def test_web_search_uses_for_connector(self):
+        from agent.display import build_tool_label
+        label = build_tool_label("web_search", {"query": "weather in NYC"})
+        assert label == 'Searching the web for weather in NYC'
+
+    def test_web_extract_reads_url(self):
+        from agent.display import build_tool_label
+        label = build_tool_label("web_extract", {"urls": ["https://example.com/page"]})
+        assert label is not None
+        assert label.startswith("Reading ")
+        assert "example.com/page" in label
+
+    def test_browser_navigate_browses_url(self):
+        from agent.display import build_tool_label
+        label = build_tool_label("browser_navigate", {"url": "https://news.site"})
+        assert label == "Browsing https://news.site"
+
+    def test_read_file_uses_basename(self):
+        from agent.display import build_tool_label
+        label = build_tool_label("read_file", {"path": "/home/u/project/main.py"})
+        assert label is not None
+        assert label.startswith("Reading ")
+        assert "main.py" in label
+
+    def test_search_files_uses_for_connector(self):
+        from agent.display import build_tool_label
+        label = build_tool_label("search_files", {"pattern": "TODO"})
+        assert label == "Searching files for TODO"
+
+    def test_verb_only_for_no_preview_tools(self):
+        from agent.display import build_tool_label
+        # session_search is verb-only — no redundant query echo
+        label = build_tool_label("session_search", {"query": "auth refactor"})
+        assert label == "Searching past sessions"
+
+    def test_verb_only_when_no_preview_available(self):
+        from agent.display import build_tool_label
+        # image_generate with empty args still yields the verb (no preview)
+        label = build_tool_label("image_generate", {})
+        assert label == "Generating image"
+
+    def test_unknown_tool_falls_back_to_preview(self):
+        from agent.display import build_tool_label, build_tool_preview
+        args = {"some_arg": "value"}
+        # A custom/plugin/MCP tool with no verb entry → raw preview behavior
+        label = build_tool_label("custom_mcp_tool", args)
+        assert label == build_tool_preview("custom_mcp_tool", args)
+
+    def test_disabled_falls_back_to_preview(self):
+        from agent.display import (
+            build_tool_label,
+            build_tool_preview,
+            set_friendly_tool_labels,
+        )
+        set_friendly_tool_labels(False)
+        args = {"query": "weather in NYC"}
+        label = build_tool_label("web_search", args)
+        # With the feature off, must match the raw preview exactly
+        assert label == build_tool_preview("web_search", args)
+        assert "Searching the web" not in (label or "")
+
+    def test_every_known_verb_renders_without_error(self):
+        from agent.display import build_tool_label, _TOOL_VERBS
+        # Each built-in verb must produce a non-empty label given minimal args.
+        for tool_name in _TOOL_VERBS:
+            label = build_tool_label(tool_name, {"query": "x", "path": "x", "url": "x"})
+            assert label, f"{tool_name} produced empty label"
--- a/tests/gateway/test_run_progress_topics.py
+++ b/tests/gateway/test_run_progress_topics.py
@ -308,7 +308,7 @@ async def test_run_agent_progress_stays_in_originating_topic(monkeypatch, tmp_pa
    assert adapter.sent == [
        {
            "chat_id": "-1001",
-            "content": '💻 terminal: "pwd"',
+            "content": '💻 Running pwd',
            "reply_to": None,
            "metadata": {"thread_id": "17585"},
        }
@ -496,6 +496,27 @@ async def test_run_agent_feishu_progress_replies_inside_existing_thread(monkeypa
 # ---------------------------------------------------------------------------


+def _extract_progress_preview(content: str) -> str | None:
+    """Extract the argument-preview portion from a tool-progress message.
+
+    Handles both render styles:
+    - Legacy / custom tools:  ``🔧 tool_name: "<preview>"`` (quoted)
+    - Friendly built-in verb: ``💻 Running <preview>`` (verb prefix, no quotes)
+    """
+    import re
+
+    # Legacy quoted form takes precedence when present.
+    match = re.search(r'"(.+)"', content)
+    if match:
+        return match.group(1)
+    # Friendly form: "<emoji> <verb> <preview>". The terminal verb is "Running".
+    marker = " Running "
+    idx = content.find(marker)
+    if idx != -1:
+        return content[idx + len(marker):].strip()
+    return None
+
+
 def _run_long_preview_helper(monkeypatch, tmp_path, preview_length=0):
    """Shared setup for long-preview truncation tests.

@ -552,13 +573,10 @@ def test_all_mode_default_truncation_40_chars(monkeypatch, tmp_path):
    assert result["final_response"] == "done"
    assert adapter.sent
    content = adapter.sent[0]["content"]
-    # The long command should be truncated — total preview <= 40 chars
+    # The long command should be truncated — the preview portion <= 40 chars.
    assert "..." in content
-    # Extract the preview part between quotes
-    import re
-    match = re.search(r'"(.+)"', content)
-    assert match, f"No quoted preview found in: {content}"
-    preview_text = match.group(1)
+    preview_text = _extract_progress_preview(content)
+    assert preview_text is not None, f"No preview found in: {content}"
    assert len(preview_text) <= 40, f"Preview too long ({len(preview_text)}): {preview_text}"


@ -568,11 +586,9 @@ def test_all_mode_respects_custom_preview_length(monkeypatch, tmp_path):
    assert result["final_response"] == "done"
    assert adapter.sent
    content = adapter.sent[0]["content"]
-    # With 120-char cap, the command (165 chars) should still be truncated but longer
-    import re
-    match = re.search(r'"(.+)"', content)
-    assert match, f"No quoted preview found in: {content}"
-    preview_text = match.group(1)
+    # With 120-char cap, the command (165 chars) should still be truncated but longer.
+    preview_text = _extract_progress_preview(content)
+    assert preview_text is not None, f"No preview found in: {content}"
    # Should be longer than the 40-char default
    assert len(preview_text) > 40, f"Preview suspiciously short ({len(preview_text)}): {preview_text}"
    # But still capped at 120
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@ -854,7 +854,7 @@ def test_history_to_messages_preserves_tool_calls_for_resume_display():

    assert server._history_to_messages(history) == [
        {"role": "user", "text": "first prompt"},
-        {"context": "resume", "name": "search_files", "role": "tool"},
+        {"context": "Searching files for resume", "name": "search_files", "role": "tool"},
        {"role": "assistant", "text": "first answer"},
        {"role": "user", "text": "second prompt"},
    ]
@ -5461,13 +5461,23 @@ def test_session_create_no_race_keeps_worker_alive(monkeypatch):
        assert built, "agent build did not complete within timeout"

        # Build finished without a close race — nothing should have been
-        # cleaned up by the orphan check.
+        # cleaned up by the orphan check.  Scope the assertions to THIS
+        # test's own session_key: a daemon build thread leaked from a prior
+        # session.create test in the same shard process can fire close/
+        # unregister against its own (foreign) key after we've patched the
+        # global hooks, polluting these lists.  Filtering by this session's
+        # key keeps the regression intent (this session's worker/notify must
+        # survive) while making the test immune to shard composition.
+        # (flaky under -j 8: foreign key e.g. 20260629_210208_d4f545)
+        own_key = session["session_key"]
+        own_closed = [k for k in closed_workers if k == own_key]
+        own_unregistered = [k for k in unregistered_keys if k == own_key]
        assert (
-            closed_workers == []
-        ), f"build thread closed its own worker despite no race: {closed_workers}"
+            own_closed == []
+        ), f"build thread closed its own worker despite no race: {own_closed}"
        assert (
-            unregistered_keys == []
-        ), f"build thread unregistered its own notify despite no race: {unregistered_keys}"
+            own_unregistered == []
+        ), f"build thread unregistered its own notify despite no race: {own_unregistered}"

        # Session should have the live worker installed.
        assert session.get("slash_worker") is not None
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -3160,9 +3160,9 @@ def _session_info(agent, session: dict | None = None) -> dict:

 def _tool_ctx(name: str, args: dict) -> str:
    try:
-        from agent.display import build_tool_preview
+        from agent.display import build_tool_label

-        return build_tool_preview(name, args, max_len=80) or ""
+        return build_tool_label(name, args, max_len=80) or ""
    except Exception:
        return ""