feat(display): friendly human-phrased tool labels for built-in tools (#55166)

* feat(display): friendly human-phrased tool labels for built-in tools

Built-in tools now render ChatGPT-style status verbs ('Searching the web
for ...', 'Reading <file>', 'Browsing <url>') on the CLI spinner and
gateway/desktop tool-progress instead of the raw tool name.

- agent/display.py: _TOOL_VERBS map + build_tool_label() + set/get
  friendly-labels flag (default on). Custom/plugin/MCP tools fall back to
  the raw preview; verbose gateway mode left untouched (debug surface).
- tool_executor.py / tui_gateway / gateway: route the three spinner sites,
  the TUI _tool_ctx, and the gateway all/new progress line through the label.
- config: display.friendly_tool_labels (default True, per-platform aware).

Zero new core tool / schema footprint — pure display layer.

* docs: add PR infographic for friendly tool labels

* fix(display): preserve arg preview in gateway friendly labels + update tests

The first gateway pass re-derived the label from the callback's `args`, which
is empty ({}) at the gateway tool.started callsite — the command/query lives in
the `preview` string, so terminal rendered as a bare '💻 Running' and dedup
collapsed consecutive commands. Now the gateway prefixes the verb onto the
already-computed preview via get_tool_verb/tool_verb_connector/verb_drops_preview,
preserving the command/url/query. CLI spinner path (real args) keeps build_tool_label.

Tests: update test_run_progress_topics exact-format assertions to the friendly
form ('💻 Running pwd'), add a format-agnostic preview extractor for the
truncation tests (works for both quoted-legacy and verb-prefixed output).

* test(tui): update resume-display context to friendly tool label

_tool_ctx now uses build_tool_label, so the desktop resume-view context for a
search_files turn reads 'Searching files for resume' instead of the bare
'resume' preview — consistent with live tool-progress. Update the assertion.

* test(tui): harden no-race worker test against sibling shard leakage

test_session_create_no_race_keeps_worker_alive flaked under -j 8: a daemon
build thread leaked from a prior session.create test in the same shard process
fires close/unregister against its own (foreign) session_key after this test
patches the global approval hooks, polluting the captured lists. Scope the
assertions to this session's own session_key so the regression intent
(this session's worker/notify must survive) is preserved while the test
becomes immune to shard composition. Not related to friendly-tool-labels.
This commit is contained in:
Teknium 2026-06-29 20:31:17 -07:00 committed by GitHub
parent 41c85fb946
commit 481caa66f2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 285 additions and 25 deletions

View file

@ -537,6 +537,122 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
return preview
# =========================================================================
# Friendly tool labels (human-phrased verbs for built-in tools)
#
# Turns "web_search <query>" into "Searching the web for <query>" — the
# ChatGPT-style "Searching…/Reading…" surface. Curated and built-in only:
# we know each core tool's semantics, so the verb is fixed, not computed.
# Custom/plugin/MCP tools have no entry and fall back to the raw preview.
# =========================================================================
# Each entry maps a built-in tool name to its present-participle verb phrase.
# A trailing space-then-preview is appended by build_tool_label() when the
# tool's argument preview is available (e.g. "Reading docs/api.md").
_TOOL_VERBS: dict[str, str] = {
"web_search": "Searching the web",
"web_extract": "Reading",
"browser_navigate": "Browsing",
"browser_click": "Clicking",
"browser_type": "Typing",
"read_file": "Reading",
"write_file": "Writing",
"patch": "Editing",
"search_files": "Searching files",
"terminal": "Running",
"execute_code": "Running code",
"image_generate": "Generating image",
"video_generate": "Generating video",
"text_to_speech": "Generating speech",
"vision_analyze": "Looking at the image",
"session_search": "Searching past sessions",
"skill_view": "Reading skill",
"skills_list": "Listing skills",
"skill_manage": "Updating skill",
"delegate_task": "Delegating",
"cronjob": "Scheduling",
"clarify": "Asking",
"memory": "Updating memory",
"todo": "Updating tasks",
}
# Verbs that read better without the raw argument preview appended.
_TOOL_VERBS_NO_PREVIEW: frozenset[str] = frozenset({
"skills_list",
"session_search",
})
# Verbs that take a "for" connector before the preview (search-style phrasing):
# "Searching the web for <query>" reads better than "Searching the web <query>".
_TOOL_VERBS_FOR_CONNECTOR: frozenset[str] = frozenset({
"web_search",
"search_files",
})
_friendly_tool_labels: bool = True
def set_friendly_tool_labels(enabled: bool) -> None:
"""Toggle friendly human-phrased tool labels (display.friendly_tool_labels)."""
global _friendly_tool_labels
_friendly_tool_labels = bool(enabled)
def get_friendly_tool_labels() -> bool:
"""Return whether friendly tool labels are enabled."""
return _friendly_tool_labels
def get_tool_verb(tool_name: str) -> str | None:
"""Return the friendly verb for a built-in tool, or None.
Returns None when friendly labels are disabled or the tool has no curated
verb (custom/plugin/MCP tools). Callers that already hold a computed
argument preview can compose ``f"{verb} {preview}"`` themselves; use
:func:`tool_verb_connector` to pick the right joiner.
"""
if not _friendly_tool_labels:
return None
return _TOOL_VERBS.get(tool_name)
def tool_verb_connector(tool_name: str) -> str:
"""Return the connector between a verb and its preview (" for " or " ")."""
return " for " if tool_name in _TOOL_VERBS_FOR_CONNECTOR else " "
def verb_drops_preview(tool_name: str) -> bool:
"""Whether the verb should render alone, without the argument preview."""
return tool_name in _TOOL_VERBS_NO_PREVIEW
def build_tool_label(tool_name: str, args: dict, max_len: int | None = None) -> str | None:
"""Build a human-phrased status label for a tool call.
For built-in tools with a known verb (``web_search`` -> "Searching the
web for ..."), returns the verb optionally followed by the argument
preview. For everything else (custom/plugin/MCP tools, or when friendly
labels are disabled) returns the raw preview, so callers can use this as a
drop-in replacement for :func:`build_tool_preview`.
"""
if not _friendly_tool_labels:
return build_tool_preview(tool_name, args, max_len=max_len)
verb = _TOOL_VERBS.get(tool_name)
if not verb:
return build_tool_preview(tool_name, args, max_len=max_len)
if tool_name in _TOOL_VERBS_NO_PREVIEW:
return verb
preview = build_tool_preview(tool_name, args, max_len=max_len)
if not preview:
return verb
if tool_name in _TOOL_VERBS_FOR_CONNECTOR:
return f"{verb} for {preview}"
return f"{verb} {preview}"
# =========================================================================
# Inline diff previews for write actions
# =========================================================================

View file

@ -24,6 +24,7 @@ from typing import Any, Optional
from agent.display import (
KawaiiSpinner,
build_tool_preview as _build_tool_preview,
build_tool_label as _build_tool_label,
get_cute_tool_message as _get_cute_tool_message_impl,
get_tool_emoji as _get_tool_emoji,
redact_tool_args_for_display as _redact_tool_args_for_display,
@ -1224,7 +1225,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args) or function_name
preview = _build_tool_label(function_name, display_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_ce_result = None
@ -1258,7 +1259,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args) or function_name
preview = _build_tool_label(function_name, display_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_mem_result = None
@ -1290,7 +1291,7 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
display_args = _redact_tool_args_for_display(function_name, function_args) or function_args
preview = _build_tool_preview(function_name, display_args) or function_name
preview = _build_tool_label(function_name, display_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=agent._print_fn)
spinner.start()
_spinner_result = None

8
cli.py
View file

@ -751,6 +751,14 @@ try:
except Exception:
pass
# Initialize friendly tool labels from config (default on)
try:
from agent.display import set_friendly_tool_labels
_ftl = CLI_CONFIG.get("display", {}).get("friendly_tool_labels", True)
set_friendly_tool_labels(bool(_ftl))
except Exception:
pass
# Neuter AsyncHttpxClientWrapper.__del__ before any AsyncOpenAI clients are
# created. The SDK's __del__ schedules aclose() on asyncio.get_running_loop()
# which, during CLI idle time, finds prompt_toolkit's event loop and tries to

View file

@ -15385,6 +15385,14 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
except Exception:
pass
# Apply friendly tool labels config (default on) — per-platform aware
try:
from agent.display import set_friendly_tool_labels
_ftl = resolve_display_setting(user_config, platform_key, "friendly_tool_labels", True)
set_friendly_tool_labels(bool(_ftl))
except Exception:
pass
# Tool progress mode — resolved per-platform with env var fallback
_resolved_tp = resolve_display_setting(user_config, platform_key, "tool_progress")
_env_tp = os.getenv("HERMES_TOOL_PROGRESS_MODE")
@ -15678,12 +15686,29 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
msg = _code_block_short
last_was_terminal_block[0] = True
elif preview:
from agent.display import get_tool_preview_max_len
from agent.display import (
get_tool_preview_max_len,
get_tool_verb,
tool_verb_connector,
verb_drops_preview,
)
_pl = get_tool_preview_max_len()
_cap = _pl if _pl > 0 else 40
if len(preview) > _cap:
preview = preview[:_cap - 3] + "..."
msg = f"{emoji} {tool_name}: \"{preview}\""
# Friendly labels: render a human-phrased line for built-in
# tools ("🔍 Searching the web for ...") by prefixing the verb
# onto the preview the callback already computed (so the
# command/url/query is preserved). Custom/plugin/MCP tools
# have no verb and fall back to the raw "tool_name: ..." form.
_verb = get_tool_verb(tool_name)
if _verb:
if verb_drops_preview(tool_name):
msg = f"{emoji} {_verb}"
else:
msg = f"{emoji} {_verb}{tool_verb_connector(tool_name)}{preview}"
else:
msg = f"{emoji} {tool_name}: \"{preview}\""
last_was_terminal_block[0] = False
else:
msg = f"{emoji} {tool_name}..."

View file

@ -1725,6 +1725,11 @@ DEFAULT_CONFIG = {
"tool_progress_command": False, # Enable /verbose command in messaging gateway
"tool_progress_overrides": {}, # DEPRECATED — use display.platforms instead
"tool_preview_length": 0, # Max chars for tool call previews (0 = no limit, show full paths/commands)
# Human-phrased tool status labels for built-in tools: "Searching the
# web for ...", "Reading <file>", "Browsing <url>" instead of the raw
# tool name. Applies to CLI spinner + gateway/desktop tool-progress.
# Custom/plugin/MCP tools always fall back to the raw preview.
"friendly_tool_labels": True,
# How gateway tool-progress is grouped on platforms that support message
# editing: "accumulate" (default) edits one bubble in place; "separate"
# sends one message per tool (the pre-v0.9 behavior, noisier). Only

Binary file not shown.

After

Width:  |  Height:  |  Size: 900 KiB

View file

@ -401,3 +401,82 @@ class TestEditDiffPreview:
assert any("a/file2.py" in line for line in rendered)
assert not any("a/file7.py" in line for line in rendered)
assert "additional file" in rendered[-1]
class TestBuildToolLabel:
"""Friendly human-phrased tool labels for built-in tools."""
@pytest.fixture(autouse=True)
def _enable_friendly(self):
from agent.display import set_friendly_tool_labels
set_friendly_tool_labels(True)
yield
set_friendly_tool_labels(True)
def test_web_search_uses_for_connector(self):
from agent.display import build_tool_label
label = build_tool_label("web_search", {"query": "weather in NYC"})
assert label == 'Searching the web for weather in NYC'
def test_web_extract_reads_url(self):
from agent.display import build_tool_label
label = build_tool_label("web_extract", {"urls": ["https://example.com/page"]})
assert label is not None
assert label.startswith("Reading ")
assert "example.com/page" in label
def test_browser_navigate_browses_url(self):
from agent.display import build_tool_label
label = build_tool_label("browser_navigate", {"url": "https://news.site"})
assert label == "Browsing https://news.site"
def test_read_file_uses_basename(self):
from agent.display import build_tool_label
label = build_tool_label("read_file", {"path": "/home/u/project/main.py"})
assert label is not None
assert label.startswith("Reading ")
assert "main.py" in label
def test_search_files_uses_for_connector(self):
from agent.display import build_tool_label
label = build_tool_label("search_files", {"pattern": "TODO"})
assert label == "Searching files for TODO"
def test_verb_only_for_no_preview_tools(self):
from agent.display import build_tool_label
# session_search is verb-only — no redundant query echo
label = build_tool_label("session_search", {"query": "auth refactor"})
assert label == "Searching past sessions"
def test_verb_only_when_no_preview_available(self):
from agent.display import build_tool_label
# image_generate with empty args still yields the verb (no preview)
label = build_tool_label("image_generate", {})
assert label == "Generating image"
def test_unknown_tool_falls_back_to_preview(self):
from agent.display import build_tool_label, build_tool_preview
args = {"some_arg": "value"}
# A custom/plugin/MCP tool with no verb entry → raw preview behavior
label = build_tool_label("custom_mcp_tool", args)
assert label == build_tool_preview("custom_mcp_tool", args)
def test_disabled_falls_back_to_preview(self):
from agent.display import (
build_tool_label,
build_tool_preview,
set_friendly_tool_labels,
)
set_friendly_tool_labels(False)
args = {"query": "weather in NYC"}
label = build_tool_label("web_search", args)
# With the feature off, must match the raw preview exactly
assert label == build_tool_preview("web_search", args)
assert "Searching the web" not in (label or "")
def test_every_known_verb_renders_without_error(self):
from agent.display import build_tool_label, _TOOL_VERBS
# Each built-in verb must produce a non-empty label given minimal args.
for tool_name in _TOOL_VERBS:
label = build_tool_label(tool_name, {"query": "x", "path": "x", "url": "x"})
assert label, f"{tool_name} produced empty label"

View file

@ -308,7 +308,7 @@ async def test_run_agent_progress_stays_in_originating_topic(monkeypatch, tmp_pa
assert adapter.sent == [
{
"chat_id": "-1001",
"content": '💻 terminal: "pwd"',
"content": '💻 Running pwd',
"reply_to": None,
"metadata": {"thread_id": "17585"},
}
@ -496,6 +496,27 @@ async def test_run_agent_feishu_progress_replies_inside_existing_thread(monkeypa
# ---------------------------------------------------------------------------
def _extract_progress_preview(content: str) -> str | None:
"""Extract the argument-preview portion from a tool-progress message.
Handles both render styles:
- Legacy / custom tools: ``🔧 tool_name: "<preview>"`` (quoted)
- Friendly built-in verb: ``💻 Running <preview>`` (verb prefix, no quotes)
"""
import re
# Legacy quoted form takes precedence when present.
match = re.search(r'"(.+)"', content)
if match:
return match.group(1)
# Friendly form: "<emoji> <verb> <preview>". The terminal verb is "Running".
marker = " Running "
idx = content.find(marker)
if idx != -1:
return content[idx + len(marker):].strip()
return None
def _run_long_preview_helper(monkeypatch, tmp_path, preview_length=0):
"""Shared setup for long-preview truncation tests.
@ -552,13 +573,10 @@ def test_all_mode_default_truncation_40_chars(monkeypatch, tmp_path):
assert result["final_response"] == "done"
assert adapter.sent
content = adapter.sent[0]["content"]
# The long command should be truncated — total preview <= 40 chars
# The long command should be truncated — the preview portion <= 40 chars.
assert "..." in content
# Extract the preview part between quotes
import re
match = re.search(r'"(.+)"', content)
assert match, f"No quoted preview found in: {content}"
preview_text = match.group(1)
preview_text = _extract_progress_preview(content)
assert preview_text is not None, f"No preview found in: {content}"
assert len(preview_text) <= 40, f"Preview too long ({len(preview_text)}): {preview_text}"
@ -568,11 +586,9 @@ def test_all_mode_respects_custom_preview_length(monkeypatch, tmp_path):
assert result["final_response"] == "done"
assert adapter.sent
content = adapter.sent[0]["content"]
# With 120-char cap, the command (165 chars) should still be truncated but longer
import re
match = re.search(r'"(.+)"', content)
assert match, f"No quoted preview found in: {content}"
preview_text = match.group(1)
# With 120-char cap, the command (165 chars) should still be truncated but longer.
preview_text = _extract_progress_preview(content)
assert preview_text is not None, f"No preview found in: {content}"
# Should be longer than the 40-char default
assert len(preview_text) > 40, f"Preview suspiciously short ({len(preview_text)}): {preview_text}"
# But still capped at 120

View file

@ -854,7 +854,7 @@ def test_history_to_messages_preserves_tool_calls_for_resume_display():
assert server._history_to_messages(history) == [
{"role": "user", "text": "first prompt"},
{"context": "resume", "name": "search_files", "role": "tool"},
{"context": "Searching files for resume", "name": "search_files", "role": "tool"},
{"role": "assistant", "text": "first answer"},
{"role": "user", "text": "second prompt"},
]
@ -5461,13 +5461,23 @@ def test_session_create_no_race_keeps_worker_alive(monkeypatch):
assert built, "agent build did not complete within timeout"
# Build finished without a close race — nothing should have been
# cleaned up by the orphan check.
# cleaned up by the orphan check. Scope the assertions to THIS
# test's own session_key: a daemon build thread leaked from a prior
# session.create test in the same shard process can fire close/
# unregister against its own (foreign) key after we've patched the
# global hooks, polluting these lists. Filtering by this session's
# key keeps the regression intent (this session's worker/notify must
# survive) while making the test immune to shard composition.
# (flaky under -j 8: foreign key e.g. 20260629_210208_d4f545)
own_key = session["session_key"]
own_closed = [k for k in closed_workers if k == own_key]
own_unregistered = [k for k in unregistered_keys if k == own_key]
assert (
closed_workers == []
), f"build thread closed its own worker despite no race: {closed_workers}"
own_closed == []
), f"build thread closed its own worker despite no race: {own_closed}"
assert (
unregistered_keys == []
), f"build thread unregistered its own notify despite no race: {unregistered_keys}"
own_unregistered == []
), f"build thread unregistered its own notify despite no race: {own_unregistered}"
# Session should have the live worker installed.
assert session.get("slash_worker") is not None

View file

@ -3160,9 +3160,9 @@ def _session_info(agent, session: dict | None = None) -> dict:
def _tool_ctx(name: str, args: dict) -> str:
try:
from agent.display import build_tool_preview
from agent.display import build_tool_label
return build_tool_preview(name, args, max_len=80) or ""
return build_tool_label(name, args, max_len=80) or ""
except Exception:
return ""