diff --git a/cli.py b/cli.py
index 1ca28606e5..4a4ce15b77 100644
--- a/cli.py
+++ b/cli.py
@@ -8724,6 +8724,7 @@ def main(
                     route_label=turn_route["label"],
                 ):
                     cli.agent.quiet_mode = True
+                    cli.agent.suppress_status_output = True
                     result = cli.agent.run_conversation(
                         user_message=query,
                         conversation_history=cli.conversation_history,
diff --git a/run_agent.py b/run_agent.py
index fcaa67f6e8..bbd5a854fc 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -622,6 +622,7 @@ class AIAgent:
         self.tool_progress_callback = tool_progress_callback
         self.tool_start_callback = tool_start_callback
         self.tool_complete_callback = tool_complete_callback
+        self.suppress_status_output = False
         self.thinking_callback = thinking_callback
         self.reasoning_callback = reasoning_callback
         self._reasoning_deltas_fired = False  # Set by _fire_reasoning_delta, reset per API call
@@ -1460,7 +1461,14 @@ class AIAgent:
         After the main response has been delivered and the remaining tool
         calls are post-response housekeeping (``_mute_post_response``),
         all non-forced output is suppressed.
+
+        ``suppress_status_output`` is a stricter CLI automation mode used by
+        parseable single-query flows such as ``hermes chat -q``. In that mode,
+        all status/diagnostic prints routed through ``_vprint`` are suppressed
+        so stdout stays machine-readable.
         """
+        if getattr(self, "suppress_status_output", False):
+            return
         if not force and getattr(self, "_mute_post_response", False):
             return
         if not force and self._has_stream_consumers() and not self._executing_tools:
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index e58170c801..438612a3fa 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1090,6 +1090,46 @@ class TestExecuteToolCalls:
         assert len(messages) == 1
         assert messages[0]["role"] == "tool"
 
+    def test_vprint_suppressed_in_parseable_quiet_mode(self, agent):
+        agent.suppress_status_output = True
+
+        with patch.object(agent, "_safe_print") as mock_print:
+            agent._vprint("status line", force=True)
+            agent._vprint("normal line")
+
+        mock_print.assert_not_called()
+
+    def test_run_conversation_suppresses_retry_noise_in_parseable_quiet_mode(self, agent):
+        class _RateLimitError(Exception):
+            status_code = 429
+
+            def __str__(self):
+                return "Error code: 429 - Rate limit exceeded."
+
+        responses = [_RateLimitError(), _mock_response(content="Recovered")]
+
+        def _fake_api_call(api_kwargs):
+            result = responses.pop(0)
+            if isinstance(result, Exception):
+                raise result
+            return result
+
+        agent.suppress_status_output = True
+        agent._interruptible_api_call = _fake_api_call
+        agent._persist_session = lambda *args, **kwargs: None
+        agent._save_trajectory = lambda *args, **kwargs: None
+        agent._save_session_log = lambda *args, **kwargs: None
+
+        with patch("run_agent.time.sleep", return_value=None), \
+             patch.object(agent, "_vprint") as mock_vprint:
+            result = agent.run_conversation("hello")
+
+        assert result["completed"] is True
+        assert result["final_response"] == "Recovered"
+        rendered = [" ".join(str(arg) for arg in call.args) for call in mock_vprint.call_args_list]
+        assert not any("API call failed" in line for line in rendered)
+        assert not any("Rate limit reached" in line for line in rendered)
+
 
 class TestConcurrentToolExecution:
     """Tests for _execute_tool_calls_concurrent and dispatch logic."""