From bd01ec7885f9cc05ef44d8e3e71ce043617b0dda Mon Sep 17 00:00:00 2001 From: yeyitech Date: Sat, 18 Apr 2026 19:18:14 -0700 Subject: [PATCH] fix(cli): strip all reasoning tag variants from /resume recap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HermesCLI._display_resumed_history() calls the module-level _strip_reasoning_tags() to clean assistant content before rendering the recap panel. The tag list was missing (Gemma 4) and there was no pass for stray orphan closes, so those variants leaked internal reasoning into the recap display (#11316). - Add to _REASONING_TAGS. - Add a third regex pass that strips orphan close tags (e.g. 'stuffanswer' → 'stuffanswer'). - Apply IGNORECASE to closed-pair and unclosed-pair passes so mixed-case variants (, ) are handled uniformly — previously both 'THINKING' and 'thinking' had to be listed explicitly as distinct tuple entries, which missed . 7 new regression tests in tests/cli/test_resume_display.py covering: , , , , unclosed , multiple interleaved blocks, and orphan close. Resolves #11316. Originally proposed as PR #11366. --- cli.py | 42 ++++++++++- tests/cli/test_resume_display.py | 121 +++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 4 deletions(-) diff --git a/cli.py b/cli.py index 02c1a4f7e..c9ce95e9f 100644 --- a/cli.py +++ b/cli.py @@ -83,17 +83,51 @@ load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env) _REASONING_TAGS = ( "REASONING_SCRATCHPAD", "think", - "reasoning", - "THINKING", "thinking", + "reasoning", + "thought", ) def _strip_reasoning_tags(text: str) -> str: + """Remove reasoning/thinking blocks from displayed text. + + Handles every case: + * Closed pairs ```` (case-insensitive, multi-line). + * Unterminated open tags that run to end-of-text (e.g. truncated + generations on NIM/MiniMax where the close tag is dropped). + * Stray orphan close tags (``stuffanswer``) left behind by + partial-content dumps. + + Covers the variants emitted by reasoning models today: ````, + ````, ````, ````, and + ```` (Gemma 4). Must stay in sync with + ``run_agent.py::_strip_think_blocks`` and the stream consumer's + ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples. + """ cleaned = text for tag in _REASONING_TAGS: - cleaned = re.sub(rf"<{tag}>.*?\s*", "", cleaned, flags=re.DOTALL) - cleaned = re.sub(rf"<{tag}>.*$", "", cleaned, flags=re.DOTALL) + # Closed pair — case-insensitive so is handled too. + cleaned = re.sub( + rf"<{tag}>.*?\s*", + "", + cleaned, + flags=re.DOTALL | re.IGNORECASE, + ) + # Unterminated open tag — strip from the tag to end of text. + cleaned = re.sub( + rf"<{tag}>.*$", + "", + cleaned, + flags=re.DOTALL | re.IGNORECASE, + ) + # Stray orphan close tag left behind by partial dumps. + cleaned = re.sub( + rf"\s*", + "", + cleaned, + flags=re.IGNORECASE, + ) return cleaned.strip() diff --git a/tests/cli/test_resume_display.py b/tests/cli/test_resume_display.py index d183e48b2..bb931bb1f 100644 --- a/tests/cli/test_resume_display.py +++ b/tests/cli/test_resume_display.py @@ -344,6 +344,127 @@ class TestDisplayResumedHistory: assert "Just thinking" not in output assert "Hi there!" in output + def test_think_tags_stripped(self): + """... blocks should be stripped from display (#11316).""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Solve this"}, + { + "role": "assistant", + "content": "\nI need to reason carefully here.\n\n\nThe answer is 7.", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "" not in output + assert "I need to reason carefully here" not in output + assert "The answer is 7" in output + + def test_thinking_tags_stripped(self): + """... blocks should be stripped from display.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "What is 2+2?"}, + { + "role": "assistant", + "content": "\nLet me compute: 2 + 2 = 4\n\n\nThe answer is 4.", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "Let me compute" not in output + assert "The answer is 4" in output + + def test_reasoning_tags_stripped(self): + """... blocks should be stripped from display.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Explain gravity"}, + { + "role": "assistant", + "content": ( + "\nGravity is a fundamental force...\n\n\n" + "Gravity pulls objects together." + ), + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "fundamental force" not in output + assert "Gravity pulls objects together" in output + + def test_thought_tags_stripped(self): + """... blocks (Gemma 4) should be stripped.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Say hello"}, + { + "role": "assistant", + "content": "\nInternal thought here.\n\n\nHello!", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "Internal thought here" not in output + assert "Hello!" in output + + def test_unclosed_think_tag_stripped(self): + """Unclosed (truncated generation) should not leak reasoning.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Truncated response"}, + { + "role": "assistant", + "content": "Some text before.\n\nUnfinished reasoning...", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "Unfinished reasoning" not in output + assert "Some text before" in output + + def test_multiple_reasoning_blocks_all_stripped(self): + """Multiple interleaved reasoning blocks are all stripped.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Complex question"}, + { + "role": "assistant", + "content": ( + "\nFirst thought.\n\n" + "Partial text.\n" + "\nSecond thought.\n\n" + "Final answer." + ), + }, + ] + output = self._capture_display(cli) + + assert "First thought" not in output + assert "Second thought" not in output + assert "Partial text" in output + assert "Final answer" in output + + def test_orphan_closing_think_tag_stripped(self): + """A stray with no matching open should not render to user.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Broken output"}, + { + "role": "assistant", + "content": "some leftover reasoningVisible answer.", + }, + ] + output = self._capture_display(cli) + + assert "" not in output + assert "Visible answer" in output + def test_assistant_with_text_and_tool_calls(self): """When an assistant message has both text content AND tool_calls.""" cli = _make_cli()