mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
When a MoA preset is selected, each reference model's answer now renders in the CLI as a thinking-style block labelled with its source model, BEFORE the aggregator responds — so the mixture-of-agents process is visible instead of a silent pause. The aggregator's response (and its tool actions) follow as normal. Mechanism (shared seam, all surfaces): - MoAChatCompletions/MoAClient take an optional reference_callback and emit 'moa.reference' (index/count/label/text) per reference, then 'moa.aggregating' (aggregator label) once. agent_init wires this to the agent's tool_progress_callback, which every surface already consumes — so the events reach CLI/TUI/desktop/gateway with no new plumbing. - CLI _on_tool_progress renders 'moa.reference' as a labelled '┊ ◇ Reference i/n — <model>' header + a thinking-style preview (reusing _emit_reasoning_ preview), and 'moa.aggregating' as a spinner transition. Display-only; never touches message history (cache-safe). Turn-scoped reference cache: the agent loop calls the facade once per tool-loop iteration, but the advisory message view is identical across iterations within a turn, so references are now run AND displayed once per user turn (keyed by the advisory view's signature) instead of re-running/re-spamming on every iteration. This also cuts reference API cost from O(iterations) back to O(turns). Verified live via interactive PTY on the opus-gpt preset (gpt-5.5 + opus refs): reference blocks render once per turn, labelled by model, before the aggregator; fresh blocks on each new turn; aggregator tool actions still execute. Follow-up: TUI/desktop rich rendering + gateway batched-summary already receive the events via tool_progress_callback; their surface-specific renderers are a separate change.
295 lines
14 KiB
Python
295 lines
14 KiB
Python
"""Tests for stacked tool progress scrollback lines in the CLI TUI.
|
|
|
|
When tool_progress_mode is "all" or "new", _on_tool_progress should print
|
|
persistent lines to scrollback on tool.completed, restoring the stacked
|
|
tool history that was lost when the TUI switched to a single-line spinner.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import importlib
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
# Module-level reference to the cli module (set by _make_cli on first call)
|
|
_cli_mod = None
|
|
_UNSET = object()
|
|
|
|
|
|
def _make_cli(tool_progress="all", verbose=_UNSET):
|
|
"""Create a HermesCLI instance with minimal mocking."""
|
|
global _cli_mod
|
|
_clean_config = {
|
|
"model": {
|
|
"default": "anthropic/claude-opus-4.6",
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
"provider": "auto",
|
|
},
|
|
"display": {"compact": False, "tool_progress": tool_progress},
|
|
"agent": {},
|
|
"terminal": {"env_type": "local"},
|
|
}
|
|
clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
|
|
prompt_toolkit_stubs = {
|
|
"prompt_toolkit": MagicMock(),
|
|
"prompt_toolkit.history": MagicMock(),
|
|
"prompt_toolkit.styles": MagicMock(),
|
|
"prompt_toolkit.patch_stdout": MagicMock(),
|
|
"prompt_toolkit.application": MagicMock(),
|
|
"prompt_toolkit.layout": MagicMock(),
|
|
"prompt_toolkit.layout.processors": MagicMock(),
|
|
"prompt_toolkit.filters": MagicMock(),
|
|
"prompt_toolkit.layout.dimension": MagicMock(),
|
|
"prompt_toolkit.layout.menus": MagicMock(),
|
|
"prompt_toolkit.widgets": MagicMock(),
|
|
"prompt_toolkit.key_binding": MagicMock(),
|
|
"prompt_toolkit.completion": MagicMock(),
|
|
"prompt_toolkit.formatted_text": MagicMock(),
|
|
"prompt_toolkit.auto_suggest": MagicMock(),
|
|
}
|
|
with patch.dict(sys.modules, prompt_toolkit_stubs), \
|
|
patch.dict("os.environ", clean_env, clear=False):
|
|
import cli as mod
|
|
mod = importlib.reload(mod)
|
|
_cli_mod = mod
|
|
with patch.object(mod, "get_tool_definitions", return_value=[]), \
|
|
patch.dict(mod.__dict__, {"CLI_CONFIG": _clean_config}):
|
|
if verbose is _UNSET:
|
|
return mod.HermesCLI()
|
|
return mod.HermesCLI(verbose=verbose)
|
|
|
|
|
|
class TestToolProgressScrollback:
|
|
"""Stacked scrollback lines for 'all' and 'new' modes."""
|
|
|
|
def test_all_mode_prints_scrollback_on_completed(self):
|
|
"""In 'all' mode, tool.completed prints a stacked line."""
|
|
cli = _make_cli(tool_progress="all")
|
|
# Simulate tool.started
|
|
cli._on_tool_progress("tool.started", "terminal", "git log", {"command": "git log"})
|
|
# Simulate tool.completed
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress("tool.completed", "terminal", None, None, duration=1.5, is_error=False)
|
|
|
|
mock_print.assert_called_once()
|
|
line = mock_print.call_args[0][0]
|
|
# Should contain tool info (the cute message format has "git log" for terminal)
|
|
assert "git log" in line or "$" in line
|
|
|
|
def test_all_mode_prints_every_call(self):
|
|
"""In 'all' mode, consecutive calls to the same tool each get a line."""
|
|
cli = _make_cli(tool_progress="all")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
# First call
|
|
cli._on_tool_progress("tool.started", "read_file", "cli.py", {"path": "cli.py"})
|
|
cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.1, is_error=False)
|
|
# Second call (same tool)
|
|
cli._on_tool_progress("tool.started", "read_file", "run_agent.py", {"path": "run_agent.py"})
|
|
cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.2, is_error=False)
|
|
|
|
assert mock_print.call_count == 2
|
|
|
|
def test_new_mode_skips_consecutive_repeats(self):
|
|
"""In 'new' mode, consecutive calls to the same tool only print once."""
|
|
cli = _make_cli(tool_progress="new")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress("tool.started", "read_file", "cli.py", {"path": "cli.py"})
|
|
cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.1, is_error=False)
|
|
cli._on_tool_progress("tool.started", "read_file", "run_agent.py", {"path": "run_agent.py"})
|
|
cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.2, is_error=False)
|
|
|
|
assert mock_print.call_count == 1 # Only the first read_file
|
|
|
|
def test_new_mode_prints_when_tool_changes(self):
|
|
"""In 'new' mode, a different tool name triggers a new line."""
|
|
cli = _make_cli(tool_progress="new")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress("tool.started", "read_file", "cli.py", {"path": "cli.py"})
|
|
cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.1, is_error=False)
|
|
cli._on_tool_progress("tool.started", "search_files", "pattern", {"pattern": "test"})
|
|
cli._on_tool_progress("tool.completed", "search_files", None, None, duration=0.3, is_error=False)
|
|
cli._on_tool_progress("tool.started", "read_file", "run_agent.py", {"path": "run_agent.py"})
|
|
cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.2, is_error=False)
|
|
|
|
# read_file, search_files, read_file (3rd prints because search_files broke the streak)
|
|
assert mock_print.call_count == 3
|
|
|
|
def test_off_mode_no_scrollback(self):
|
|
"""In 'off' mode, no stacked lines are printed."""
|
|
cli = _make_cli(tool_progress="off")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress("tool.started", "terminal", "ls", {"command": "ls"})
|
|
cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.5, is_error=False)
|
|
|
|
mock_print.assert_not_called()
|
|
|
|
def test_error_suffix_on_failed_tool(self):
|
|
"""When a failed tool's result is forwarded, the stacked line surfaces
|
|
the specific error (e.g. ``[exit 1]`` or ``[File not found: x]``)
|
|
instead of the legacy generic ``[error]`` suffix."""
|
|
import json
|
|
cli = _make_cli(tool_progress="all")
|
|
cli._on_tool_progress("tool.started", "terminal", "false", {"command": "false"})
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress(
|
|
"tool.completed", "terminal", None, None,
|
|
duration=0.5, is_error=True,
|
|
result=json.dumps({"output": "", "exit_code": 1}),
|
|
)
|
|
|
|
line = mock_print.call_args[0][0]
|
|
assert "[exit 1]" in line
|
|
|
|
def test_spinner_still_updates_on_started(self):
|
|
"""tool.started still updates the spinner text for live display."""
|
|
cli = _make_cli(tool_progress="all")
|
|
cli._on_tool_progress("tool.started", "terminal", "git status", {"command": "git status"})
|
|
assert "git status" in cli._spinner_text
|
|
|
|
def test_spinner_timer_clears_on_completed(self):
|
|
"""tool.completed still clears the tool timer."""
|
|
cli = _make_cli(tool_progress="all")
|
|
cli._on_tool_progress("tool.started", "terminal", "git status", {"command": "git status"})
|
|
assert cli._tool_start_time > 0
|
|
with patch.object(_cli_mod, "_cprint"):
|
|
cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.5, is_error=False)
|
|
assert cli._tool_start_time == 0.0
|
|
|
|
def test_concurrent_tools_produce_stacked_lines(self):
|
|
"""Multiple tool.started followed by multiple tool.completed all produce lines."""
|
|
cli = _make_cli(tool_progress="all")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
# All start first (concurrent pattern)
|
|
cli._on_tool_progress("tool.started", "web_search", "query 1", {"query": "test 1"})
|
|
cli._on_tool_progress("tool.started", "web_search", "query 2", {"query": "test 2"})
|
|
# All complete
|
|
cli._on_tool_progress("tool.completed", "web_search", None, None, duration=1.0, is_error=False)
|
|
cli._on_tool_progress("tool.completed", "web_search", None, None, duration=1.5, is_error=False)
|
|
|
|
assert mock_print.call_count == 2
|
|
|
|
def test_verbose_mode_commits_scrollback_line(self):
|
|
"""In 'verbose' mode, tool.completed commits a persistent scrollback line.
|
|
|
|
Regression: 'verbose' used to be omitted from the scrollback gate on
|
|
the premise that run_agent renders verbose output. That premise is
|
|
false in the interactive CLI — run_agent's verbose prints are gated on
|
|
``not quiet_mode`` and the interactive CLI runs quiet_mode=True. So a
|
|
non-streaming model call (MoA aggregator, copilot-acp) under 'verbose'
|
|
rendered each tool only into the self-overwriting spinner, building no
|
|
scrollable history. 'verbose' is strictly more than 'all', so it must
|
|
commit at least the same line.
|
|
"""
|
|
cli = _make_cli(tool_progress="verbose")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress("tool.started", "terminal", "ls", {"command": "ls"})
|
|
cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.5, is_error=False)
|
|
|
|
mock_print.assert_called_once()
|
|
|
|
def test_verbose_mode_commits_every_call(self):
|
|
"""In 'verbose' mode, consecutive same-tool calls each commit a line.
|
|
|
|
Mirrors 'all' (no consecutive-repeat suppression — that is 'new'-only),
|
|
so a multi-step turn builds a full scrollable tool history.
|
|
"""
|
|
cli = _make_cli(tool_progress="verbose")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress("tool.started", "terminal", "echo one", {"command": "echo one"})
|
|
cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.1, is_error=False)
|
|
cli._on_tool_progress("tool.started", "terminal", "echo two", {"command": "echo two"})
|
|
cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.1, is_error=False)
|
|
|
|
assert mock_print.call_count == 2
|
|
|
|
def test_verbose_mode_config_does_not_enable_global_debug_logging(self):
|
|
"""display.tool_progress=verbose controls TOOL-CALL DISPLAY ONLY.
|
|
|
|
It must NOT auto-flip self.verbose, which controls root-logger DEBUG
|
|
level for the entire process (every module spews to console). PR
|
|
#6a1aa420e had coupled them, causing all debug logs to flood the
|
|
terminal whenever a user picked tool_progress: verbose for richer
|
|
per-tool rendering.
|
|
"""
|
|
cli = _make_cli(tool_progress="verbose")
|
|
|
|
assert cli.tool_progress_mode == "verbose"
|
|
assert cli.verbose is False
|
|
|
|
def test_explicit_verbose_argument_wins_over_config(self):
|
|
"""Explicit verbose=True from the CLI flag still enables DEBUG logging
|
|
regardless of tool_progress_mode."""
|
|
cli = _make_cli(tool_progress="off", verbose=True)
|
|
|
|
assert cli.tool_progress_mode == "off"
|
|
assert cli.verbose is True
|
|
|
|
def test_explicit_non_verbose_argument_keeps_debug_logging_off(self):
|
|
"""Explicit verbose=False overrides any default to enable DEBUG."""
|
|
cli = _make_cli(tool_progress="verbose", verbose=False)
|
|
|
|
assert cli.tool_progress_mode == "verbose"
|
|
assert cli.verbose is False
|
|
|
|
def test_pending_info_stores_on_started(self):
|
|
"""tool.started stores args for later use by tool.completed."""
|
|
cli = _make_cli(tool_progress="all")
|
|
cli._on_tool_progress("tool.started", "terminal", "ls", {"command": "ls"})
|
|
assert "terminal" in cli._pending_tool_info
|
|
assert len(cli._pending_tool_info["terminal"]) == 1
|
|
assert cli._pending_tool_info["terminal"][0] == {"command": "ls"}
|
|
|
|
def test_pending_info_consumed_on_completed(self):
|
|
"""tool.completed consumes stored args (FIFO for concurrent)."""
|
|
cli = _make_cli(tool_progress="all")
|
|
cli._on_tool_progress("tool.started", "terminal", "ls", {"command": "ls"})
|
|
cli._on_tool_progress("tool.started", "terminal", "pwd", {"command": "pwd"})
|
|
assert len(cli._pending_tool_info["terminal"]) == 2
|
|
with patch.object(_cli_mod, "_cprint"):
|
|
cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.1, is_error=False)
|
|
# First entry consumed, second remains
|
|
assert len(cli._pending_tool_info.get("terminal", [])) == 1
|
|
assert cli._pending_tool_info["terminal"][0] == {"command": "pwd"}
|
|
|
|
|
|
class TestMoAReferenceBlocks:
|
|
"""moa.reference renders a labelled thinking-style block; moa.aggregating
|
|
updates the spinner. Both are display-only and must commit regardless of
|
|
tool_progress_mode (MoA is non-streaming)."""
|
|
|
|
def test_reference_event_prints_labelled_block(self):
|
|
cli = _make_cli(tool_progress="all")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress(
|
|
"moa.reference",
|
|
"openrouter:openai/gpt-5.5",
|
|
"Paris is the capital.",
|
|
None,
|
|
moa_index=1,
|
|
moa_count=2,
|
|
)
|
|
printed = " ".join(str(c.args[0]) for c in mock_print.call_args_list)
|
|
# Header names the source model + index/count; body carries the text.
|
|
assert "openrouter:openai/gpt-5.5" in printed
|
|
assert "Reference 1/2" in printed
|
|
assert "Paris is the capital." in printed
|
|
|
|
def test_reference_event_prints_even_when_progress_off(self):
|
|
"""Reference blocks are the MoA process view, not tool progress — they
|
|
must show even with tool_progress: off."""
|
|
cli = _make_cli(tool_progress="off")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress(
|
|
"moa.reference", "openrouter:anthropic/claude-opus-4.8", "Four.", None,
|
|
moa_index=2, moa_count=2,
|
|
)
|
|
assert mock_print.called
|
|
|
|
def test_aggregating_event_updates_spinner_only(self):
|
|
cli = _make_cli(tool_progress="all")
|
|
with patch.object(_cli_mod, "_cprint") as mock_print:
|
|
cli._on_tool_progress("moa.aggregating", "openrouter:anthropic/claude-opus-4.8", None, None)
|
|
assert "aggregating" in cli._spinner_text
|
|
# aggregating is a spinner-only transition; no committed scrollback line.
|
|
mock_print.assert_not_called()
|