diff --git a/gateway/run.py b/gateway/run.py index 11f0c5056e6..02fdfa1b540 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -16278,7 +16278,15 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew # Per-message state — callbacks and reasoning config change every # turn and must not be baked into the cached agent constructor. - agent.tool_progress_callback = progress_callback if tool_progress_enabled else None + # Gate on needs_progress_queue (tool_progress OR thinking_progress) + # rather than tool_progress alone: the progress_callback also relays + # _thinking assistant scratch text, which is gated on + # thinking_progress and is intentionally independent of tool + # progress. With the old `tool_progress_enabled`-only gate, a user + # who set thinking_progress:true but kept tool_progress:off got a + # None callback — so _thinking scratch bubbles never relayed even + # though the progress queue was created for them. + agent.tool_progress_callback = progress_callback if needs_progress_queue else None # Discord voice verbal-ack hook (fires once per turn on first tool # call; armed only when in a voice channel with the mixer running). agent.tool_start_callback = ( @@ -17004,9 +17012,14 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew "response_transformed": result.get("response_transformed", False), } - # Start progress message sender if enabled + # Start progress message sender if enabled. Gate on needs_progress_queue + # (tool_progress OR thinking_progress), not tool_progress alone: the + # sender drains BOTH tool-progress lines and _thinking scratch bubbles. + # With the old tool_progress-only gate, a thinking_progress:true / + # tool_progress:off user had the callback queue _thinking messages that + # no task ever drained — so they silently never appeared. progress_task = None - if tool_progress_enabled: + if needs_progress_queue: progress_task = asyncio.create_task(send_progress_messages()) # Start stream consumer task — polls for consumer creation since it diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py index fed22fa7782..ba97e570c26 100644 --- a/tests/gateway/test_run_progress_topics.py +++ b/tests/gateway/test_run_progress_topics.py @@ -145,6 +145,29 @@ class FakeAgent: } +class ThinkingAgent: + """Agent that emits _thinking scratch text (no tool calls). + + Used to prove the progress callback relays _thinking bubbles when + thinking_progress is enabled but tool_progress is off. + """ + + def __init__(self, **kwargs): + self.tool_progress_callback = kwargs.get("tool_progress_callback") + self.tools = [] + + def run_conversation(self, message, conversation_history=None, task_id=None): + cb = self.tool_progress_callback + if cb is not None: + cb("_thinking", "weighing the options here") + time.sleep(0.35) + return { + "final_response": "done", + "messages": [], + "api_calls": 1, + } + + class LongPreviewAgent: """Agent that emits a tool call with a very long preview string.""" LONG_CMD = "cd /home/teknium/.hermes/hermes-agent/.worktrees/hermes-d8860339 && source .venv/bin/activate && python -m pytest tests/gateway/test_run_progress_topics.py -n0 -q" @@ -1565,3 +1588,49 @@ async def test_consecutive_terminal_progress_collapses_headers(monkeypatch, tmp_ # Exactly TWO terminal headers: one for the first run of three calls, # one for the terminal call after web_search broke the streak. assert final.count("terminal\n```") == 2 + + +@pytest.mark.asyncio +async def test_run_agent_relays_thinking_when_tool_progress_off(monkeypatch, tmp_path): + """_thinking scratch text relays as a bubble when thinking_progress is on, + even with tool_progress off. + + Regression: agent.tool_progress_callback used to be gated on + tool_progress_enabled alone, so enabling only thinking_progress left the + callback None and _thinking never relayed — despite the progress queue + being created for it (needs_progress_queue = tool OR thinking). + """ + monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "off") + adapter, result = await _run_with_agent( + monkeypatch, + tmp_path, + ThinkingAgent, + session_id="sess-thinking-on", + config_data={"display": {"thinking_progress": True, "tool_progress": "off"}}, + ) + + assert result["final_response"] == "done" + blob = "\n".join( + [c["content"] for c in adapter.sent] + [c["content"] for c in adapter.edits] + ) + assert "weighing the options here" in blob + + +@pytest.mark.asyncio +async def test_run_agent_suppresses_thinking_when_thinking_off(monkeypatch, tmp_path): + """With thinking_progress off and tool_progress off, _thinking is suppressed + (no callback wired → no relay).""" + monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "off") + adapter, result = await _run_with_agent( + monkeypatch, + tmp_path, + ThinkingAgent, + session_id="sess-thinking-off", + config_data={"display": {"thinking_progress": False, "tool_progress": "off"}}, + ) + + assert result["final_response"] == "done" + blob = "\n".join( + [c["content"] for c in adapter.sent] + [c["content"] for c in adapter.edits] + ) + assert "weighing the options here" not in blob