diff --git a/cli.py b/cli.py index 02c1a4f7e..c9ce95e9f 100644 --- a/cli.py +++ b/cli.py @@ -83,17 +83,51 @@ load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env) _REASONING_TAGS = ( "REASONING_SCRATCHPAD", "think", - "reasoning", - "THINKING", "thinking", + "reasoning", + "thought", ) def _strip_reasoning_tags(text: str) -> str: + """Remove reasoning/thinking blocks from displayed text. + + Handles every case: + * Closed pairs ```` (case-insensitive, multi-line). + * Unterminated open tags that run to end-of-text (e.g. truncated + generations on NIM/MiniMax where the close tag is dropped). + * Stray orphan close tags (``stuffanswer``) left behind by + partial-content dumps. + + Covers the variants emitted by reasoning models today: ````, + ````, ````, ````, and + ```` (Gemma 4). Must stay in sync with + ``run_agent.py::_strip_think_blocks`` and the stream consumer's + ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples. + """ cleaned = text for tag in _REASONING_TAGS: - cleaned = re.sub(rf"<{tag}>.*?\s*", "", cleaned, flags=re.DOTALL) - cleaned = re.sub(rf"<{tag}>.*$", "", cleaned, flags=re.DOTALL) + # Closed pair — case-insensitive so is handled too. + cleaned = re.sub( + rf"<{tag}>.*?\s*", + "", + cleaned, + flags=re.DOTALL | re.IGNORECASE, + ) + # Unterminated open tag — strip from the tag to end of text. + cleaned = re.sub( + rf"<{tag}>.*$", + "", + cleaned, + flags=re.DOTALL | re.IGNORECASE, + ) + # Stray orphan close tag left behind by partial dumps. + cleaned = re.sub( + rf"\s*", + "", + cleaned, + flags=re.IGNORECASE, + ) return cleaned.strip() diff --git a/tests/cli/test_resume_display.py b/tests/cli/test_resume_display.py index d183e48b2..bb931bb1f 100644 --- a/tests/cli/test_resume_display.py +++ b/tests/cli/test_resume_display.py @@ -344,6 +344,127 @@ class TestDisplayResumedHistory: assert "Just thinking" not in output assert "Hi there!" in output + def test_think_tags_stripped(self): + """... blocks should be stripped from display (#11316).""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Solve this"}, + { + "role": "assistant", + "content": "\nI need to reason carefully here.\n\n\nThe answer is 7.", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "" not in output + assert "I need to reason carefully here" not in output + assert "The answer is 7" in output + + def test_thinking_tags_stripped(self): + """... blocks should be stripped from display.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "What is 2+2?"}, + { + "role": "assistant", + "content": "\nLet me compute: 2 + 2 = 4\n\n\nThe answer is 4.", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "Let me compute" not in output + assert "The answer is 4" in output + + def test_reasoning_tags_stripped(self): + """... blocks should be stripped from display.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Explain gravity"}, + { + "role": "assistant", + "content": ( + "\nGravity is a fundamental force...\n\n\n" + "Gravity pulls objects together." + ), + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "fundamental force" not in output + assert "Gravity pulls objects together" in output + + def test_thought_tags_stripped(self): + """... blocks (Gemma 4) should be stripped.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Say hello"}, + { + "role": "assistant", + "content": "\nInternal thought here.\n\n\nHello!", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "Internal thought here" not in output + assert "Hello!" in output + + def test_unclosed_think_tag_stripped(self): + """Unclosed (truncated generation) should not leak reasoning.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Truncated response"}, + { + "role": "assistant", + "content": "Some text before.\n\nUnfinished reasoning...", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "Unfinished reasoning" not in output + assert "Some text before" in output + + def test_multiple_reasoning_blocks_all_stripped(self): + """Multiple interleaved reasoning blocks are all stripped.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Complex question"}, + { + "role": "assistant", + "content": ( + "\nFirst thought.\n\n" + "Partial text.\n" + "\nSecond thought.\n\n" + "Final answer." + ), + }, + ] + output = self._capture_display(cli) + + assert "First thought" not in output + assert "Second thought" not in output + assert "Partial text" in output + assert "Final answer" in output + + def test_orphan_closing_think_tag_stripped(self): + """A stray with no matching open should not render to user.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Broken output"}, + { + "role": "assistant", + "content": "some leftover reasoningVisible answer.", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "Visible answer" in output + def test_assistant_with_text_and_tool_calls(self): """When an assistant message has both text content AND tool_calls.""" cli = _make_cli()