mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
* feat(display): friendly human-phrased tool labels for built-in tools
Built-in tools now render ChatGPT-style status verbs ('Searching the web
for ...', 'Reading <file>', 'Browsing <url>') on the CLI spinner and
gateway/desktop tool-progress instead of the raw tool name.
- agent/display.py: _TOOL_VERBS map + build_tool_label() + set/get
friendly-labels flag (default on). Custom/plugin/MCP tools fall back to
the raw preview; verbose gateway mode left untouched (debug surface).
- tool_executor.py / tui_gateway / gateway: route the three spinner sites,
the TUI _tool_ctx, and the gateway all/new progress line through the label.
- config: display.friendly_tool_labels (default True, per-platform aware).
Zero new core tool / schema footprint — pure display layer.
* docs: add PR infographic for friendly tool labels
* fix(display): preserve arg preview in gateway friendly labels + update tests
The first gateway pass re-derived the label from the callback's `args`, which
is empty ({}) at the gateway tool.started callsite — the command/query lives in
the `preview` string, so terminal rendered as a bare '💻 Running' and dedup
collapsed consecutive commands. Now the gateway prefixes the verb onto the
already-computed preview via get_tool_verb/tool_verb_connector/verb_drops_preview,
preserving the command/url/query. CLI spinner path (real args) keeps build_tool_label.
Tests: update test_run_progress_topics exact-format assertions to the friendly
form ('💻 Running pwd'), add a format-agnostic preview extractor for the
truncation tests (works for both quoted-legacy and verb-prefixed output).
* test(tui): update resume-display context to friendly tool label
_tool_ctx now uses build_tool_label, so the desktop resume-view context for a
search_files turn reads 'Searching files for resume' instead of the bare
'resume' preview — consistent with live tool-progress. Update the assertion.
* test(tui): harden no-race worker test against sibling shard leakage
test_session_create_no_race_keeps_worker_alive flaked under -j 8: a daemon
build thread leaked from a prior session.create test in the same shard process
fires close/unregister against its own (foreign) session_key after this test
patches the global approval hooks, polluting the captured lists. Scope the
assertions to this session's own session_key so the regression intent
(this session's worker/notify must survive) is preserved while the test
becomes immune to shard composition. Not related to friendly-tool-labels.
482 lines
18 KiB
Python
482 lines
18 KiB
Python
"""Tests for agent/display.py — build_tool_preview() and inline diff previews."""
|
|
|
|
import json
|
|
import pytest
|
|
from unittest.mock import MagicMock
|
|
|
|
from agent.display import (
|
|
build_tool_preview,
|
|
capture_local_edit_snapshot,
|
|
extract_edit_diff,
|
|
get_cute_tool_message,
|
|
redact_tool_args_for_display,
|
|
set_tool_preview_max_len,
|
|
_render_inline_unified_diff,
|
|
_summarize_rendered_diff_sections,
|
|
render_edit_diff_with_delta,
|
|
)
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def reset_tool_preview_max_len():
|
|
set_tool_preview_max_len(0)
|
|
yield
|
|
set_tool_preview_max_len(0)
|
|
|
|
|
|
class TestBuildToolPreview:
|
|
"""Tests for build_tool_preview defensive handling and normal operation."""
|
|
|
|
def test_none_args_returns_none(self):
|
|
"""PR #453: None args should not crash, should return None."""
|
|
assert build_tool_preview("terminal", None) is None
|
|
|
|
def test_empty_dict_returns_none(self):
|
|
"""Empty dict has no keys to preview."""
|
|
assert build_tool_preview("terminal", {}) is None
|
|
|
|
def test_known_tool_with_primary_arg(self):
|
|
"""Known tool with its primary arg should return a preview string."""
|
|
result = build_tool_preview("terminal", {"command": "ls -la"})
|
|
assert result is not None
|
|
assert "ls -la" in result
|
|
|
|
def test_terminal_preview_compacts_shell_plumbing(self):
|
|
result = build_tool_preview(
|
|
"terminal",
|
|
{
|
|
"command": (
|
|
'cd /Users/brooklyn/www/bb-rainbows && pnpm run lint 2>&1 '
|
|
'| tail -20; echo "lint_exit=${PIPESTATUS[0]}"'
|
|
)
|
|
},
|
|
)
|
|
assert result == "pnpm run lint"
|
|
|
|
def test_terminal_preview_compacts_multi_command_probe(self):
|
|
result = build_tool_preview(
|
|
"terminal",
|
|
{
|
|
"command": (
|
|
'which node pnpm corepack; node -v; echo "---"; '
|
|
'corepack --version 2>&1; echo "---pnpm via corepack---"; '
|
|
'pnpm --version 2>&1 | tail -5'
|
|
)
|
|
},
|
|
)
|
|
assert result == "which node pnpm corepack + 3 commands"
|
|
|
|
def test_execute_code_preview_uses_same_shell_summary(self):
|
|
result = build_tool_preview(
|
|
"execute_code",
|
|
{"code": 'cd /tmp/demo && python -m pytest -q 2>&1 | tail -5; echo "exit=$?"'},
|
|
)
|
|
assert result == "python -m pytest -q"
|
|
|
|
def test_web_search_preview(self):
|
|
result = build_tool_preview("web_search", {"query": "hello world"})
|
|
assert result is not None
|
|
assert "hello world" in result
|
|
|
|
def test_read_file_preview(self):
|
|
result = build_tool_preview("read_file", {"path": "/tmp/test.py", "offset": 1})
|
|
assert result is not None
|
|
assert result == "test.py L1"
|
|
|
|
def test_read_file_preview_includes_requested_line_range(self):
|
|
result = build_tool_preview("read_file", {"path": "./package.json", "offset": 1, "limit": 5})
|
|
assert result == "package.json L1-5"
|
|
|
|
def test_browser_type_preview_redacts_api_key(self):
|
|
secret = "sk-proj-ABCD1234567890EFGH"
|
|
result = build_tool_preview("browser_type", {"ref": "@e3", "text": secret})
|
|
assert result is not None
|
|
assert secret not in result
|
|
assert "sk-pro" in result and "..." in result
|
|
|
|
def test_browser_type_preview_keeps_normal_text(self):
|
|
text = "hello world search query"
|
|
result = build_tool_preview("browser_type", {"ref": "@e3", "text": text})
|
|
assert result is not None
|
|
assert text in result
|
|
|
|
def test_browser_type_display_args_redact_api_key(self):
|
|
secret = "ghp_ABCDEFGHIJ1234567890"
|
|
safe_args = redact_tool_args_for_display(
|
|
"browser_type", {"ref": "@e3", "text": secret}
|
|
)
|
|
assert secret not in str(safe_args)
|
|
assert safe_args["ref"] == "@e3"
|
|
assert safe_args["text"].startswith("ghp_AB")
|
|
|
|
def test_browser_type_display_args_keep_normal_text(self):
|
|
text = "my_normal_password_123"
|
|
safe_args = redact_tool_args_for_display(
|
|
"browser_type", {"ref": "@e3", "text": text}
|
|
)
|
|
assert safe_args == {"ref": "@e3", "text": text}
|
|
|
|
def test_unknown_tool_with_fallback_key(self):
|
|
"""Unknown tool but with a recognized fallback key should still preview."""
|
|
result = build_tool_preview("custom_tool", {"query": "test query"})
|
|
assert result is not None
|
|
assert "test query" in result
|
|
|
|
def test_unknown_tool_no_matching_key(self):
|
|
"""Unknown tool with no recognized keys should return None."""
|
|
result = build_tool_preview("custom_tool", {"foo": "bar"})
|
|
assert result is None
|
|
|
|
def test_long_value_truncated(self):
|
|
"""Preview should truncate long values."""
|
|
long_cmd = "a" * 100
|
|
result = build_tool_preview("terminal", {"command": long_cmd}, max_len=40)
|
|
assert result is not None
|
|
assert len(result) <= 43 # max_len + "..."
|
|
|
|
def test_process_tool_with_none_args(self):
|
|
"""Process tool special case should also handle None args."""
|
|
assert build_tool_preview("process", None) is None
|
|
|
|
def test_process_tool_normal(self):
|
|
result = build_tool_preview("process", {"action": "poll", "session_id": "abc123"})
|
|
assert result is not None
|
|
assert "poll" in result
|
|
|
|
def test_todo_tool_read(self):
|
|
result = build_tool_preview("todo", {"merge": False})
|
|
assert result is not None
|
|
assert "reading" in result
|
|
|
|
def test_todo_tool_with_todos(self):
|
|
result = build_tool_preview("todo", {"todos": [{"id": "1", "content": "test", "status": "pending"}]})
|
|
assert result is not None
|
|
assert "1 task" in result
|
|
|
|
def test_memory_tool_add(self):
|
|
result = build_tool_preview("memory", {"action": "add", "target": "user", "content": "test note"})
|
|
assert result is not None
|
|
assert "user" in result
|
|
|
|
def test_memory_replace_missing_old_text_marked(self):
|
|
# Avoid empty quotes "" in the preview when old_text is missing/None.
|
|
result = build_tool_preview("memory", {"action": "replace", "target": "memory"})
|
|
assert result == '~memory: "<missing old_text>"'
|
|
result = build_tool_preview("memory", {"action": "remove", "target": "memory", "old_text": None})
|
|
assert result == '-memory: "<missing old_text>"'
|
|
|
|
def test_session_search_preview(self):
|
|
result = build_tool_preview("session_search", {"query": "find something"})
|
|
assert result is not None
|
|
assert "find something" in result
|
|
|
|
def test_delegate_task_single_goal_preview(self):
|
|
result = build_tool_preview("delegate_task", {"goal": "Review gateway status"})
|
|
assert result == "Review gateway status"
|
|
|
|
def test_delegate_task_batch_goal_preview(self):
|
|
result = build_tool_preview(
|
|
"delegate_task",
|
|
{"tasks": [{"goal": "Review PR A"}, {"goal": "Review PR B"}]},
|
|
)
|
|
assert result == "2 tasks: Review PR A | Review PR B"
|
|
|
|
def test_delegate_task_batch_preview_handles_missing_non_string_goals(self):
|
|
result = build_tool_preview(
|
|
"delegate_task",
|
|
{"tasks": [{"goal": None}, {"goal": 123}, "not-a-task"]},
|
|
)
|
|
assert result == "2 tasks: ? | 123"
|
|
|
|
def test_delegate_task_batch_preview_respects_max_len(self):
|
|
result = build_tool_preview(
|
|
"delegate_task",
|
|
{"tasks": [{"goal": "A" * 80}, {"goal": "B" * 80}]},
|
|
max_len=30,
|
|
)
|
|
assert result == "2 tasks: AAAAAAAAAAAAAAAAAA..."
|
|
assert len(result) == 30
|
|
|
|
def test_false_like_args_zero(self):
|
|
"""Non-dict falsy values should return None, not crash."""
|
|
assert build_tool_preview("terminal", 0) is None
|
|
assert build_tool_preview("terminal", "") is None
|
|
assert build_tool_preview("terminal", []) is None
|
|
|
|
|
|
class TestCuteToolMessagePreviewLength:
|
|
def test_terminal_preview_unlimited_when_config_is_zero(self):
|
|
set_tool_preview_max_len(0)
|
|
command = "curl -s http://localhost:9222/json/list | jq -r '.[] | select(.type==\"page\")' | head -5"
|
|
|
|
line = get_cute_tool_message("terminal", {"command": command}, 0.1)
|
|
|
|
assert "curl -s http://localhost:9222/json/list | jq -r '.[] | select(.type==\"page\")'" in line
|
|
assert "head -5" not in line
|
|
assert "..." not in line
|
|
|
|
def test_terminal_preview_uses_positive_configured_limit(self):
|
|
set_tool_preview_max_len(80)
|
|
command = "curl -s http://localhost:9222/json/list | jq -r '.[] | select(.type==\"page\")' | head -5"
|
|
|
|
line = get_cute_tool_message("terminal", {"command": command}, 0.1)
|
|
|
|
assert "curl -s http://localhost:9222/json/list | jq -r '.[] | select(.type==\"page\")'" in line
|
|
assert "..." not in line
|
|
assert "head -5" not in line
|
|
|
|
def test_search_files_preview_uses_positive_configured_limit_not_default(self):
|
|
set_tool_preview_max_len(80)
|
|
pattern = "function.formatToolCall.context.preview.compactPreview.maxLength.truncate"
|
|
|
|
line = get_cute_tool_message("search_files", {"pattern": pattern}, 0.1)
|
|
|
|
assert pattern in line
|
|
assert "..." not in line
|
|
|
|
def test_path_preview_uses_positive_configured_limit_not_default(self):
|
|
set_tool_preview_max_len(80)
|
|
path = "/tmp/hermes-test-preview-length/deeply/nested/path/test-output.txt"
|
|
|
|
line = get_cute_tool_message("read_file", {"path": path}, 0.1)
|
|
|
|
assert "test-output.txt" in line
|
|
assert "..." not in line
|
|
|
|
def test_write_file_lint_error_result_is_not_marked_failed(self):
|
|
result = json.dumps({
|
|
"bytes_written": 12,
|
|
"lint": {"status": "error", "output": "SyntaxError: invalid syntax"},
|
|
})
|
|
|
|
line = get_cute_tool_message("write_file", {"path": "/tmp/a.py"}, 0.1, result=result)
|
|
|
|
assert "[error]" not in line
|
|
|
|
def test_patch_lsp_diagnostics_result_is_not_marked_failed(self):
|
|
result = json.dumps({
|
|
"success": True,
|
|
"diff": "--- a/tmp.py\n+++ b/tmp.py\n",
|
|
"lsp_diagnostics": "<diagnostics>ERROR [1:1] type mismatch</diagnostics>",
|
|
})
|
|
|
|
line = get_cute_tool_message("patch", {"path": "/tmp/a.py"}, 0.1, result=result)
|
|
|
|
assert "[error]" not in line
|
|
|
|
def test_delegate_task_batch_message_includes_goals(self):
|
|
line = get_cute_tool_message(
|
|
"delegate_task",
|
|
{"tasks": [{"goal": "Review PR A"}, {"goal": "Review PR B"}]},
|
|
1.2,
|
|
)
|
|
assert "2x: Review PR A | Review PR B" in line
|
|
|
|
def test_browser_type_cute_message_redacts_api_key(self):
|
|
secret = "sk-proj-ABCD1234567890EFGH"
|
|
line = get_cute_tool_message(
|
|
"browser_type",
|
|
{"ref": "@password", "text": secret},
|
|
0.1,
|
|
result='{"success": true, "typed": "sk-pro...EFGH"}',
|
|
)
|
|
|
|
assert secret not in line
|
|
assert "sk-pro" in line
|
|
|
|
def test_browser_type_cute_message_keeps_normal_text(self):
|
|
text = "hello world"
|
|
line = get_cute_tool_message(
|
|
"browser_type",
|
|
{"ref": "@search", "text": text},
|
|
0.1,
|
|
result='{"success": true, "typed": "hello world"}',
|
|
)
|
|
|
|
assert text in line
|
|
|
|
|
|
class TestEditDiffPreview:
|
|
def test_extract_edit_diff_for_patch(self):
|
|
diff = extract_edit_diff("patch", '{"success": true, "diff": "--- a/x\\n+++ b/x\\n"}')
|
|
assert diff is not None
|
|
assert "+++ b/x" in diff
|
|
|
|
def test_render_inline_unified_diff_colors_added_and_removed_lines(self):
|
|
rendered = _render_inline_unified_diff(
|
|
"--- a/cli.py\n"
|
|
"+++ b/cli.py\n"
|
|
"@@ -1,2 +1,2 @@\n"
|
|
"-old line\n"
|
|
"+new line\n"
|
|
" context\n"
|
|
)
|
|
|
|
assert "a/cli.py" in rendered[0]
|
|
assert "b/cli.py" in rendered[0]
|
|
assert any("old line" in line for line in rendered)
|
|
assert any("new line" in line for line in rendered)
|
|
assert any("48;2;" in line for line in rendered)
|
|
|
|
def test_extract_edit_diff_ignores_non_edit_tools(self):
|
|
assert extract_edit_diff("web_search", '{"diff": "--- a\\n+++ b\\n"}') is None
|
|
|
|
def test_extract_edit_diff_uses_local_snapshot_for_write_file(self, tmp_path):
|
|
target = tmp_path / "note.txt"
|
|
target.write_text("old\n", encoding="utf-8")
|
|
|
|
snapshot = capture_local_edit_snapshot("write_file", {"path": str(target)})
|
|
|
|
target.write_text("new\n", encoding="utf-8")
|
|
|
|
diff = extract_edit_diff(
|
|
"write_file",
|
|
'{"bytes_written": 4}',
|
|
function_args={"path": str(target)},
|
|
snapshot=snapshot,
|
|
)
|
|
|
|
assert diff is not None
|
|
assert "--- a/" in diff
|
|
assert "+++ b/" in diff
|
|
assert "-old" in diff
|
|
assert "+new" in diff
|
|
|
|
def test_render_edit_diff_with_delta_invokes_printer(self):
|
|
printer = MagicMock()
|
|
|
|
rendered = render_edit_diff_with_delta(
|
|
"patch",
|
|
'{"diff": "--- a/x\\n+++ b/x\\n@@ -1 +1 @@\\n-old\\n+new\\n"}',
|
|
print_fn=printer,
|
|
)
|
|
|
|
assert rendered is True
|
|
assert printer.call_count >= 2
|
|
calls = [call.args[0] for call in printer.call_args_list]
|
|
assert any("a/x" in line and "b/x" in line for line in calls)
|
|
assert any("old" in line for line in calls)
|
|
assert any("new" in line for line in calls)
|
|
|
|
def test_render_edit_diff_with_delta_skips_without_diff(self):
|
|
rendered = render_edit_diff_with_delta(
|
|
"patch",
|
|
'{"success": true}',
|
|
)
|
|
|
|
assert rendered is False
|
|
|
|
def test_render_edit_diff_with_delta_handles_renderer_errors(self, monkeypatch):
|
|
printer = MagicMock()
|
|
|
|
monkeypatch.setattr("agent.display._summarize_rendered_diff_sections", MagicMock(side_effect=RuntimeError("boom")))
|
|
|
|
rendered = render_edit_diff_with_delta(
|
|
"patch",
|
|
'{"diff": "--- a/x\\n+++ b/x\\n"}',
|
|
print_fn=printer,
|
|
)
|
|
|
|
assert rendered is False
|
|
assert printer.call_count == 0
|
|
|
|
def test_summarize_rendered_diff_sections_truncates_large_diff(self):
|
|
diff = "--- a/x.py\n+++ b/x.py\n" + "".join(f"+line{i}\n" for i in range(120))
|
|
|
|
rendered = _summarize_rendered_diff_sections(diff, max_lines=20)
|
|
|
|
assert len(rendered) == 21
|
|
assert "omitted" in rendered[-1]
|
|
|
|
def test_summarize_rendered_diff_sections_limits_file_count(self):
|
|
diff = "".join(
|
|
f"--- a/file{i}.py\n+++ b/file{i}.py\n+line{i}\n"
|
|
for i in range(8)
|
|
)
|
|
|
|
rendered = _summarize_rendered_diff_sections(diff, max_files=3, max_lines=50)
|
|
|
|
assert any("a/file0.py" in line for line in rendered)
|
|
assert any("a/file1.py" in line for line in rendered)
|
|
assert any("a/file2.py" in line for line in rendered)
|
|
assert not any("a/file7.py" in line for line in rendered)
|
|
assert "additional file" in rendered[-1]
|
|
|
|
|
|
class TestBuildToolLabel:
|
|
"""Friendly human-phrased tool labels for built-in tools."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _enable_friendly(self):
|
|
from agent.display import set_friendly_tool_labels
|
|
set_friendly_tool_labels(True)
|
|
yield
|
|
set_friendly_tool_labels(True)
|
|
|
|
def test_web_search_uses_for_connector(self):
|
|
from agent.display import build_tool_label
|
|
label = build_tool_label("web_search", {"query": "weather in NYC"})
|
|
assert label == 'Searching the web for weather in NYC'
|
|
|
|
def test_web_extract_reads_url(self):
|
|
from agent.display import build_tool_label
|
|
label = build_tool_label("web_extract", {"urls": ["https://example.com/page"]})
|
|
assert label is not None
|
|
assert label.startswith("Reading ")
|
|
assert "example.com/page" in label
|
|
|
|
def test_browser_navigate_browses_url(self):
|
|
from agent.display import build_tool_label
|
|
label = build_tool_label("browser_navigate", {"url": "https://news.site"})
|
|
assert label == "Browsing https://news.site"
|
|
|
|
def test_read_file_uses_basename(self):
|
|
from agent.display import build_tool_label
|
|
label = build_tool_label("read_file", {"path": "/home/u/project/main.py"})
|
|
assert label is not None
|
|
assert label.startswith("Reading ")
|
|
assert "main.py" in label
|
|
|
|
def test_search_files_uses_for_connector(self):
|
|
from agent.display import build_tool_label
|
|
label = build_tool_label("search_files", {"pattern": "TODO"})
|
|
assert label == "Searching files for TODO"
|
|
|
|
def test_verb_only_for_no_preview_tools(self):
|
|
from agent.display import build_tool_label
|
|
# session_search is verb-only — no redundant query echo
|
|
label = build_tool_label("session_search", {"query": "auth refactor"})
|
|
assert label == "Searching past sessions"
|
|
|
|
def test_verb_only_when_no_preview_available(self):
|
|
from agent.display import build_tool_label
|
|
# image_generate with empty args still yields the verb (no preview)
|
|
label = build_tool_label("image_generate", {})
|
|
assert label == "Generating image"
|
|
|
|
def test_unknown_tool_falls_back_to_preview(self):
|
|
from agent.display import build_tool_label, build_tool_preview
|
|
args = {"some_arg": "value"}
|
|
# A custom/plugin/MCP tool with no verb entry → raw preview behavior
|
|
label = build_tool_label("custom_mcp_tool", args)
|
|
assert label == build_tool_preview("custom_mcp_tool", args)
|
|
|
|
def test_disabled_falls_back_to_preview(self):
|
|
from agent.display import (
|
|
build_tool_label,
|
|
build_tool_preview,
|
|
set_friendly_tool_labels,
|
|
)
|
|
set_friendly_tool_labels(False)
|
|
args = {"query": "weather in NYC"}
|
|
label = build_tool_label("web_search", args)
|
|
# With the feature off, must match the raw preview exactly
|
|
assert label == build_tool_preview("web_search", args)
|
|
assert "Searching the web" not in (label or "")
|
|
|
|
def test_every_known_verb_renders_without_error(self):
|
|
from agent.display import build_tool_label, _TOOL_VERBS
|
|
# Each built-in verb must produce a non-empty label given minimal args.
|
|
for tool_name in _TOOL_VERBS:
|
|
label = build_tool_label(tool_name, {"query": "x", "path": "x", "url": "x"})
|
|
assert label, f"{tool_name} produced empty label"
|