From bd01ec7885f9cc05ef44d8e3e71ce043617b0dda Mon Sep 17 00:00:00 2001
From: yeyitech <yeyitech@users.noreply.github.com>
Date: Sat, 18 Apr 2026 19:18:14 -0700
Subject: [PATCH] fix(cli): strip all reasoning tag variants from /resume recap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

HermesCLI._display_resumed_history() calls the module-level _strip_reasoning_tags() to clean assistant content before rendering the recap panel.  The tag list was missing <thought> (Gemma 4) and there was no pass for stray orphan </tag> closes, so those variants leaked internal reasoning into the recap display (#11316).

- Add <thought> to _REASONING_TAGS.
- Add a third regex pass that strips orphan close tags (e.g. 'stuff</think>answer' → 'stuffanswer').
- Apply IGNORECASE to closed-pair and unclosed-pair passes so mixed-case variants (<THINK>, <Thinking>) are handled uniformly — previously both 'THINKING' and 'thinking' had to be listed explicitly as distinct tuple entries, which missed <Thinking>.

7 new regression tests in tests/cli/test_resume_display.py covering: <think>, <thinking>, <reasoning>, <thought>, unclosed <think>, multiple interleaved blocks, and orphan </think> close.

Resolves #11316.

Originally proposed as PR #11366.
---
 cli.py                           |  42 ++++++++++-
 tests/cli/test_resume_display.py | 121 +++++++++++++++++++++++++++++++
 2 files changed, 159 insertions(+), 4 deletions(-)
diff --git a/cli.py b/cli.py
index 02c1a4f7e..c9ce95e9f 100644
--- a/cli.py
+++ b/cli.py
@@ -83,17 +83,51 @@ load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
 _REASONING_TAGS = (
     "REASONING_SCRATCHPAD",
     "think",
-    "reasoning",
-    "THINKING",
     "thinking",
+    "reasoning",
+    "thought",
 )
 
 
 def _strip_reasoning_tags(text: str) -> str:
+    """Remove reasoning/thinking blocks from displayed text.
+
+    Handles every case:
+      * Closed pairs ``<tag>…</tag>`` (case-insensitive, multi-line).
+      * Unterminated open tags that run to end-of-text (e.g. truncated
+        generations on NIM/MiniMax where the close tag is dropped).
+      * Stray orphan close tags (``stuff</think>answer``) left behind by
+        partial-content dumps.
+
+    Covers the variants emitted by reasoning models today: ``<think>``,
+    ``<thinking>``, ``<reasoning>``, ``<REASONING_SCRATCHPAD>``, and
+    ``<thought>`` (Gemma 4).  Must stay in sync with
+    ``run_agent.py::_strip_think_blocks`` and the stream consumer's
+    ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
+    """
     cleaned = text
     for tag in _REASONING_TAGS:
-        cleaned = re.sub(rf"<{tag}>.*?</{tag}>\s*", "", cleaned, flags=re.DOTALL)
-        cleaned = re.sub(rf"<{tag}>.*$", "", cleaned, flags=re.DOTALL)
+        # Closed pair — case-insensitive so <THINK>…</THINK> is handled too.
+        cleaned = re.sub(
+            rf"<{tag}>.*?</{tag}>\s*",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        # Unterminated open tag — strip from the tag to end of text.
+        cleaned = re.sub(
+            rf"<{tag}>.*$",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        # Stray orphan close tag left behind by partial dumps.
+        cleaned = re.sub(
+            rf"</{tag}>\s*",
+            "",
+            cleaned,
+            flags=re.IGNORECASE,
+        )
     return cleaned.strip()
 
 
diff --git a/tests/cli/test_resume_display.py b/tests/cli/test_resume_display.py
index d183e48b2..bb931bb1f 100644
--- a/tests/cli/test_resume_display.py
+++ b/tests/cli/test_resume_display.py
@@ -344,6 +344,127 @@ class TestDisplayResumedHistory:
         assert "Just thinking" not in output
         assert "Hi there!" in output
 
+    def test_think_tags_stripped(self):
+        """<think>...</think> blocks should be stripped from display (#11316)."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Solve this"},
+            {
+                "role": "assistant",
+                "content": "<think>\nI need to reason carefully here.\n</think>\n\nThe answer is 7.",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<think>" not in output
+        assert "</think>" not in output
+        assert "I need to reason carefully here" not in output
+        assert "The answer is 7" in output
+
+    def test_thinking_tags_stripped(self):
+        """<thinking>...</thinking> blocks should be stripped from display."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "What is 2+2?"},
+            {
+                "role": "assistant",
+                "content": "<thinking>\nLet me compute: 2 + 2 = 4\n</thinking>\n\nThe answer is 4.",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<thinking>" not in output
+        assert "Let me compute" not in output
+        assert "The answer is 4" in output
+
+    def test_reasoning_tags_stripped(self):
+        """<reasoning>...</reasoning> blocks should be stripped from display."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Explain gravity"},
+            {
+                "role": "assistant",
+                "content": (
+                    "<reasoning>\nGravity is a fundamental force...\n</reasoning>\n\n"
+                    "Gravity pulls objects together."
+                ),
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<reasoning>" not in output
+        assert "fundamental force" not in output
+        assert "Gravity pulls objects together" in output
+
+    def test_thought_tags_stripped(self):
+        """<thought>...</thought> blocks (Gemma 4) should be stripped."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Say hello"},
+            {
+                "role": "assistant",
+                "content": "<thought>\nInternal thought here.\n</thought>\n\nHello!",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<thought>" not in output
+        assert "Internal thought here" not in output
+        assert "Hello!" in output
+
+    def test_unclosed_think_tag_stripped(self):
+        """Unclosed <think> (truncated generation) should not leak reasoning."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Truncated response"},
+            {
+                "role": "assistant",
+                "content": "Some text before.\n<think>\nUnfinished reasoning...",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "<think>" not in output
+        assert "Unfinished reasoning" not in output
+        assert "Some text before" in output
+
+    def test_multiple_reasoning_blocks_all_stripped(self):
+        """Multiple interleaved reasoning blocks are all stripped."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Complex question"},
+            {
+                "role": "assistant",
+                "content": (
+                    "<think>\nFirst thought.\n</think>\n"
+                    "Partial text.\n"
+                    "<reasoning>\nSecond thought.\n</reasoning>\n"
+                    "Final answer."
+                ),
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "First thought" not in output
+        assert "Second thought" not in output
+        assert "Partial text" in output
+        assert "Final answer" in output
+
+    def test_orphan_closing_think_tag_stripped(self):
+        """A stray </think> with no matching open should not render to user."""
+        cli = _make_cli()
+        cli.conversation_history = [
+            {"role": "user", "content": "Broken output"},
+            {
+                "role": "assistant",
+                "content": "some leftover reasoning</think>Visible answer.",
+            },
+        ]
+        output = self._capture_display(cli)
+
+        assert "</think>" not in output
+        assert "Visible answer" in output
+
     def test_assistant_with_text_and_tool_calls(self):
         """When an assistant message has both text content AND tool_calls."""
         cli = _make_cli()