diff --git a/cli.py b/cli.py index 1ca28606e5..4a4ce15b77 100644 --- a/cli.py +++ b/cli.py @@ -8724,6 +8724,7 @@ def main( route_label=turn_route["label"], ): cli.agent.quiet_mode = True + cli.agent.suppress_status_output = True result = cli.agent.run_conversation( user_message=query, conversation_history=cli.conversation_history, diff --git a/run_agent.py b/run_agent.py index fcaa67f6e8..bbd5a854fc 100644 --- a/run_agent.py +++ b/run_agent.py @@ -622,6 +622,7 @@ class AIAgent: self.tool_progress_callback = tool_progress_callback self.tool_start_callback = tool_start_callback self.tool_complete_callback = tool_complete_callback + self.suppress_status_output = False self.thinking_callback = thinking_callback self.reasoning_callback = reasoning_callback self._reasoning_deltas_fired = False # Set by _fire_reasoning_delta, reset per API call @@ -1460,7 +1461,14 @@ class AIAgent: After the main response has been delivered and the remaining tool calls are post-response housekeeping (``_mute_post_response``), all non-forced output is suppressed. + + ``suppress_status_output`` is a stricter CLI automation mode used by + parseable single-query flows such as ``hermes chat -q``. In that mode, + all status/diagnostic prints routed through ``_vprint`` are suppressed + so stdout stays machine-readable. """ + if getattr(self, "suppress_status_output", False): + return if not force and getattr(self, "_mute_post_response", False): return if not force and self._has_stream_consumers() and not self._executing_tools: diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index e58170c801..438612a3fa 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1090,6 +1090,46 @@ class TestExecuteToolCalls: assert len(messages) == 1 assert messages[0]["role"] == "tool" + def test_vprint_suppressed_in_parseable_quiet_mode(self, agent): + agent.suppress_status_output = True + + with patch.object(agent, "_safe_print") as mock_print: + agent._vprint("status line", force=True) + agent._vprint("normal line") + + mock_print.assert_not_called() + + def test_run_conversation_suppresses_retry_noise_in_parseable_quiet_mode(self, agent): + class _RateLimitError(Exception): + status_code = 429 + + def __str__(self): + return "Error code: 429 - Rate limit exceeded." + + responses = [_RateLimitError(), _mock_response(content="Recovered")] + + def _fake_api_call(api_kwargs): + result = responses.pop(0) + if isinstance(result, Exception): + raise result + return result + + agent.suppress_status_output = True + agent._interruptible_api_call = _fake_api_call + agent._persist_session = lambda *args, **kwargs: None + agent._save_trajectory = lambda *args, **kwargs: None + agent._save_session_log = lambda *args, **kwargs: None + + with patch("run_agent.time.sleep", return_value=None), \ + patch.object(agent, "_vprint") as mock_vprint: + result = agent.run_conversation("hello") + + assert result["completed"] is True + assert result["final_response"] == "Recovered" + rendered = [" ".join(str(arg) for arg in call.args) for call in mock_vprint.call_args_list] + assert not any("API call failed" in line for line in rendered) + assert not any("Rate limit reached" in line for line in rendered) + class TestConcurrentToolExecution: """Tests for _execute_tool_calls_concurrent and dispatch logic."""