diff --git a/cli.py b/cli.py
index 02c1a4f7e..c9ce95e9f 100644
--- a/cli.py
+++ b/cli.py
@@ -83,17 +83,51 @@ load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
_REASONING_TAGS = (
"REASONING_SCRATCHPAD",
"think",
- "reasoning",
- "THINKING",
"thinking",
+ "reasoning",
+ "thought",
)
def _strip_reasoning_tags(text: str) -> str:
+ """Remove reasoning/thinking blocks from displayed text.
+
+ Handles every case:
+ * Closed pairs ``…`` (case-insensitive, multi-line).
+ * Unterminated open tags that run to end-of-text (e.g. truncated
+ generations on NIM/MiniMax where the close tag is dropped).
+ * Stray orphan close tags (``stuffanswer``) left behind by
+ partial-content dumps.
+
+ Covers the variants emitted by reasoning models today: ````,
+ ````, ````, ````, and
+ ```` (Gemma 4). Must stay in sync with
+ ``run_agent.py::_strip_think_blocks`` and the stream consumer's
+ ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
+ """
cleaned = text
for tag in _REASONING_TAGS:
- cleaned = re.sub(rf"<{tag}>.*?{tag}>\s*", "", cleaned, flags=re.DOTALL)
- cleaned = re.sub(rf"<{tag}>.*$", "", cleaned, flags=re.DOTALL)
+ # Closed pair — case-insensitive so … is handled too.
+ cleaned = re.sub(
+ rf"<{tag}>.*?{tag}>\s*",
+ "",
+ cleaned,
+ flags=re.DOTALL | re.IGNORECASE,
+ )
+ # Unterminated open tag — strip from the tag to end of text.
+ cleaned = re.sub(
+ rf"<{tag}>.*$",
+ "",
+ cleaned,
+ flags=re.DOTALL | re.IGNORECASE,
+ )
+ # Stray orphan close tag left behind by partial dumps.
+ cleaned = re.sub(
+ rf"{tag}>\s*",
+ "",
+ cleaned,
+ flags=re.IGNORECASE,
+ )
return cleaned.strip()
diff --git a/tests/cli/test_resume_display.py b/tests/cli/test_resume_display.py
index d183e48b2..bb931bb1f 100644
--- a/tests/cli/test_resume_display.py
+++ b/tests/cli/test_resume_display.py
@@ -344,6 +344,127 @@ class TestDisplayResumedHistory:
assert "Just thinking" not in output
assert "Hi there!" in output
+ def test_think_tags_stripped(self):
+ """... blocks should be stripped from display (#11316)."""
+ cli = _make_cli()
+ cli.conversation_history = [
+ {"role": "user", "content": "Solve this"},
+ {
+ "role": "assistant",
+ "content": "\nI need to reason carefully here.\n\n\nThe answer is 7.",
+ },
+ ]
+ output = self._capture_display(cli)
+
+ assert "" not in output
+ assert "" not in output
+ assert "I need to reason carefully here" not in output
+ assert "The answer is 7" in output
+
+ def test_thinking_tags_stripped(self):
+ """... blocks should be stripped from display."""
+ cli = _make_cli()
+ cli.conversation_history = [
+ {"role": "user", "content": "What is 2+2?"},
+ {
+ "role": "assistant",
+ "content": "\nLet me compute: 2 + 2 = 4\n\n\nThe answer is 4.",
+ },
+ ]
+ output = self._capture_display(cli)
+
+ assert "" not in output
+ assert "Let me compute" not in output
+ assert "The answer is 4" in output
+
+ def test_reasoning_tags_stripped(self):
+ """... blocks should be stripped from display."""
+ cli = _make_cli()
+ cli.conversation_history = [
+ {"role": "user", "content": "Explain gravity"},
+ {
+ "role": "assistant",
+ "content": (
+ "\nGravity is a fundamental force...\n\n\n"
+ "Gravity pulls objects together."
+ ),
+ },
+ ]
+ output = self._capture_display(cli)
+
+ assert "" not in output
+ assert "fundamental force" not in output
+ assert "Gravity pulls objects together" in output
+
+ def test_thought_tags_stripped(self):
+ """... blocks (Gemma 4) should be stripped."""
+ cli = _make_cli()
+ cli.conversation_history = [
+ {"role": "user", "content": "Say hello"},
+ {
+ "role": "assistant",
+ "content": "\nInternal thought here.\n\n\nHello!",
+ },
+ ]
+ output = self._capture_display(cli)
+
+ assert "" not in output
+ assert "Internal thought here" not in output
+ assert "Hello!" in output
+
+ def test_unclosed_think_tag_stripped(self):
+ """Unclosed (truncated generation) should not leak reasoning."""
+ cli = _make_cli()
+ cli.conversation_history = [
+ {"role": "user", "content": "Truncated response"},
+ {
+ "role": "assistant",
+ "content": "Some text before.\n\nUnfinished reasoning...",
+ },
+ ]
+ output = self._capture_display(cli)
+
+ assert "" not in output
+ assert "Unfinished reasoning" not in output
+ assert "Some text before" in output
+
+ def test_multiple_reasoning_blocks_all_stripped(self):
+ """Multiple interleaved reasoning blocks are all stripped."""
+ cli = _make_cli()
+ cli.conversation_history = [
+ {"role": "user", "content": "Complex question"},
+ {
+ "role": "assistant",
+ "content": (
+ "\nFirst thought.\n\n"
+ "Partial text.\n"
+ "\nSecond thought.\n\n"
+ "Final answer."
+ ),
+ },
+ ]
+ output = self._capture_display(cli)
+
+ assert "First thought" not in output
+ assert "Second thought" not in output
+ assert "Partial text" in output
+ assert "Final answer" in output
+
+ def test_orphan_closing_think_tag_stripped(self):
+ """A stray with no matching open should not render to user."""
+ cli = _make_cli()
+ cli.conversation_history = [
+ {"role": "user", "content": "Broken output"},
+ {
+ "role": "assistant",
+ "content": "some leftover reasoningVisible answer.",
+ },
+ ]
+ output = self._capture_display(cli)
+
+ assert "" not in output
+ assert "Visible answer" in output
+
def test_assistant_with_text_and_tool_calls(self):
"""When an assistant message has both text content AND tool_calls."""
cli = _make_cli()