From 1d32e5d98c766d925ab2e4ebedc415d50cf97616 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sat, 27 Jun 2026 15:48:20 -0700
Subject: [PATCH] fix(gateway): relay _thinking bubbles when thinking_progress
 is on but tool_progress is off (#53849)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

display.thinking_progress is documented as independent of tool_progress —
users can keep tool progress quiet while opting into mid-turn assistant
scratch-text bubbles. But two gates were keyed on tool_progress_enabled alone,
so with tool_progress:off the _thinking relay was silently dead even when
thinking_progress:true:

1. agent.tool_progress_callback was set to None unless tool_progress_enabled,
   so the callback that queues _thinking text never fired.
2. The send_progress_messages drain task was only started when
   tool_progress_enabled, so even queued messages had no consumer.

Both now gate on needs_progress_queue (tool_progress OR thinking_progress) —
the same condition that already decides whether to create the progress queue
at all. No effect when both are off (queue is None) or when tool_progress is
on (unchanged).

Tests: _thinking relays with thinking_progress:on/tool_progress:off, and is
suppressed when thinking_progress:off. Full progress-topics suite: 35 pass.
---
 gateway/run.py                            | 19 ++++++-
 tests/gateway/test_run_progress_topics.py | 69 +++++++++++++++++++++++
 2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 11f0c5056e6..02fdfa1b540 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -16278,7 +16278,15 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
 
             # Per-message state — callbacks and reasoning config change every
             # turn and must not be baked into the cached agent constructor.
-            agent.tool_progress_callback = progress_callback if tool_progress_enabled else None
+            # Gate on needs_progress_queue (tool_progress OR thinking_progress)
+            # rather than tool_progress alone: the progress_callback also relays
+            # _thinking assistant scratch text, which is gated on
+            # thinking_progress and is intentionally independent of tool
+            # progress. With the old `tool_progress_enabled`-only gate, a user
+            # who set thinking_progress:true but kept tool_progress:off got a
+            # None callback — so _thinking scratch bubbles never relayed even
+            # though the progress queue was created for them.
+            agent.tool_progress_callback = progress_callback if needs_progress_queue else None
             # Discord voice verbal-ack hook (fires once per turn on first tool
             # call; armed only when in a voice channel with the mixer running).
             agent.tool_start_callback = (
@@ -17004,9 +17012,14 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                 "response_transformed": result.get("response_transformed", False),
             }
         
-        # Start progress message sender if enabled
+        # Start progress message sender if enabled. Gate on needs_progress_queue
+        # (tool_progress OR thinking_progress), not tool_progress alone: the
+        # sender drains BOTH tool-progress lines and _thinking scratch bubbles.
+        # With the old tool_progress-only gate, a thinking_progress:true /
+        # tool_progress:off user had the callback queue _thinking messages that
+        # no task ever drained — so they silently never appeared.
         progress_task = None
-        if tool_progress_enabled:
+        if needs_progress_queue:
             progress_task = asyncio.create_task(send_progress_messages())
 
         # Start stream consumer task — polls for consumer creation since it
diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py
index fed22fa7782..ba97e570c26 100644
--- a/tests/gateway/test_run_progress_topics.py
+++ b/tests/gateway/test_run_progress_topics.py
@@ -145,6 +145,29 @@ class FakeAgent:
         }
 
 
+class ThinkingAgent:
+    """Agent that emits _thinking scratch text (no tool calls).
+
+    Used to prove the progress callback relays _thinking bubbles when
+    thinking_progress is enabled but tool_progress is off.
+    """
+
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        cb = self.tool_progress_callback
+        if cb is not None:
+            cb("_thinking", "weighing the options here")
+            time.sleep(0.35)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
 class LongPreviewAgent:
     """Agent that emits a tool call with a very long preview string."""
     LONG_CMD = "cd /home/teknium/.hermes/hermes-agent/.worktrees/hermes-d8860339 && source .venv/bin/activate && python -m pytest tests/gateway/test_run_progress_topics.py -n0 -q"
@@ -1565,3 +1588,49 @@ async def test_consecutive_terminal_progress_collapses_headers(monkeypatch, tmp_
     # Exactly TWO terminal headers: one for the first run of three calls,
     # one for the terminal call after web_search broke the streak.
     assert final.count("terminal\n```") == 2
+
+
+@pytest.mark.asyncio
+async def test_run_agent_relays_thinking_when_tool_progress_off(monkeypatch, tmp_path):
+    """_thinking scratch text relays as a bubble when thinking_progress is on,
+    even with tool_progress off.
+
+    Regression: agent.tool_progress_callback used to be gated on
+    tool_progress_enabled alone, so enabling only thinking_progress left the
+    callback None and _thinking never relayed — despite the progress queue
+    being created for it (needs_progress_queue = tool OR thinking).
+    """
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "off")
+    adapter, result = await _run_with_agent(
+        monkeypatch,
+        tmp_path,
+        ThinkingAgent,
+        session_id="sess-thinking-on",
+        config_data={"display": {"thinking_progress": True, "tool_progress": "off"}},
+    )
+
+    assert result["final_response"] == "done"
+    blob = "\n".join(
+        [c["content"] for c in adapter.sent] + [c["content"] for c in adapter.edits]
+    )
+    assert "weighing the options here" in blob
+
+
+@pytest.mark.asyncio
+async def test_run_agent_suppresses_thinking_when_thinking_off(monkeypatch, tmp_path):
+    """With thinking_progress off and tool_progress off, _thinking is suppressed
+    (no callback wired → no relay)."""
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "off")
+    adapter, result = await _run_with_agent(
+        monkeypatch,
+        tmp_path,
+        ThinkingAgent,
+        session_id="sess-thinking-off",
+        config_data={"display": {"thinking_progress": False, "tool_progress": "off"}},
+    )
+
+    assert result["final_response"] == "done"
+    blob = "\n".join(
+        [c["content"] for c in adapter.sent] + [c["content"] for c in adapter.edits]
+    )
+    assert "weighing the options here" not in blob