diff --git a/cli.py b/cli.py index 0e5e9ff660..b6c027ebbf 100644 --- a/cli.py +++ b/cli.py @@ -104,6 +104,11 @@ def _strip_reasoning_tags(text: str) -> str: ```` (Gemma 4). Must stay in sync with ``run_agent.py::_strip_think_blocks`` and the stream consumer's ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples. + + Also strips tool-call XML blocks some open models leak into visible + content (````, ````, Gemma-style + ````). Ported from + openclaw/openclaw#67318. """ cleaned = text for tag in _REASONING_TAGS: @@ -128,6 +133,31 @@ def _strip_reasoning_tags(text: str) -> str: cleaned, flags=re.IGNORECASE, ) + # Tool-call XML blocks (openclaw/openclaw#67318). + for tc_tag in ("tool_call", "tool_calls", "tool_result", + "function_call", "function_calls"): + cleaned = re.sub( + rf"<{tc_tag}\b[^>]*>.*?\s*", + "", + cleaned, + flags=re.DOTALL | re.IGNORECASE, + ) + # — boundary + attribute gated to avoid prose FPs. + cleaned = re.sub( + r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*' + r']*\bname\s*=[^>]*>' + r'(?:(?:(?!).)*)\s*', + '', + cleaned, + flags=re.DOTALL | re.IGNORECASE, + ) + # Stray tool-call close tags. + cleaned = re.sub( + r'\s*', + '', + cleaned, + flags=re.IGNORECASE, + ) return cleaned.strip() diff --git a/run_agent.py b/run_agent.py index 85eaad1b37..602e69fc6d 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2226,6 +2226,20 @@ class AIAgent: 4. Tag variants: ````, ````, ````, ````, ```` (Gemma 4), all case-insensitive. + + Additionally strips standalone tool-call XML blocks that some open + models (notably Gemma variants on OpenRouter) emit inside assistant + content instead of via the structured ``tool_calls`` field: + * ```` + * ```` + * ```` + * ```` + * ```` + * ```` (Gemma style) + Ported from openclaw/openclaw#67318. The ```` variant is + boundary-gated (only strips when the tag sits at start-of-line or + after punctuation and carries a ``name="..."`` attribute) so prose + mentions like "Use in JavaScript" are preserved. """ if not content: return "" @@ -2237,6 +2251,30 @@ class AIAgent: content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) + # 1b. Tool-call XML blocks (openclaw/openclaw#67318). Handle the + # generic tag names first — they have no attribute gating since + # a literal in prose is already vanishingly rare. + for _tc_name in ("tool_call", "tool_calls", "tool_result", + "function_call", "function_calls"): + content = re.sub( + rf'<{_tc_name}\b[^>]*>.*?', + '', + content, + flags=re.DOTALL | re.IGNORECASE, + ) + # 1c. ... — Gemma-style standalone + # tool call. Only strip when the tag sits at a block boundary + # (start of text, after a newline, or after sentence-ending + # punctuation) AND carries a name="..." attribute. This keeps + # prose mentions like "Use to declare" safe. + content = re.sub( + r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*' + r']*\bname\s*=[^>]*>' + r'(?:(?:(?!).)*)', + '', + content, + flags=re.DOTALL | re.IGNORECASE, + ) # 2. Unterminated reasoning block — open tag at a block boundary # (start of text, or after a newline) with no matching close. # Strip from the tag to end of string. Fixes #8878 / #9568 @@ -2254,6 +2292,16 @@ class AIAgent: content, flags=re.IGNORECASE, ) + # 3b. Stray tool-call closers. (We do NOT strip bare or + # unterminated because a truncated tail + # during streaming may still be valuable to the user; matches + # OpenClaw's intentional asymmetry.) + content = re.sub( + r'\s*', + '', + content, + flags=re.IGNORECASE, + ) return content @staticmethod diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 9bc637135c..9d7e891cb8 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -371,6 +371,91 @@ class TestStripThinkBlocks: assert "mixed" not in result assert "final" in result + # ─── Tool-call XML block stripping (openclaw/openclaw#67318) ───────── + # Some open models (notably Gemma variants via OpenRouter) emit + # standalone tool-call XML inside assistant content instead of via the + # structured `tool_calls` field. Left unstripped, raw XML leaks to + # gateway users (Discord/Telegram/Matrix) and the CLI. + + def test_tool_call_block_stripped(self, agent): + text = '{"name": "read_file", "arguments": {"path": "/tmp/x"}} done' + result = agent._strip_think_blocks(text) + assert "" not in result + assert "read_file" not in result + assert "done" in result + + def test_function_calls_block_stripped(self, agent): + text = '[{"name":"x"}]after' + result = agent._strip_think_blocks(text) + assert "" not in result + assert "after" in result + + def test_gemma_function_name_block_stripped(self, agent): + """Gemma-style: ....""" + text = ( + 'Let me check the file.\n' + '/tmp/x.md\n' + 'Here is the result.' + ) + result = agent._strip_think_blocks(text) + assert '' not in result + assert "/tmp/x.md" not in result + assert "Let me check the file." in result + assert "Here is the result." in result + + def test_gemma_function_multiline_payload_stripped(self, agent): + text = ( + 'Reading now.\n' + '\n' + ' /etc/passwd\n' + '\n' + 'Done.' + ) + result = agent._strip_think_blocks(text) + assert "/etc/passwd" not in result + assert "Reading now." in result + assert "Done." in result + + def test_function_mention_in_prose_preserved(self, agent): + """'Use in JavaScript.' — no name attr, not at block boundary + in a way that suggests tool call. Must survive.""" + text = "In JS you can use declarations for hoisting." + result = agent._strip_think_blocks(text) + # Prose mention has no name="..." attribute -> not stripped + assert "declarations for hoisting" in result + + def test_function_with_attr_in_middle_of_sentence_preserved(self, agent): + """Docs example: 'Use ... in docs.' + The sentence-middle position without a preceding punctuation block + boundary means it is NOT stripped. Prose context remains.""" + text = 'You can write y inline.' + result = agent._strip_think_blocks(text) + # Without a leading block boundary (no punctuation before), leaves intact + assert "You can write" in result + assert "inline" in result + + def test_stray_function_close_tag_removed(self, agent): + text = "answer trailing" + result = agent._strip_think_blocks(text) + assert "" not in result + assert "answer" in result + assert "trailing" in result + + def test_dangling_function_open_tag_preserved(self, agent): + """A streamed-but-truncated block with no close + is intentionally NOT stripped (OpenClaw's asymmetry). The tail of a + streaming reply may still be valuable to the user.""" + text = 'Checking: ' + result = agent._strip_think_blocks(text) + assert "Checking:" in result + + def test_mixed_reasoning_and_tool_call_both_stripped(self, agent): + text = 'let me plan{"name":"x"}final answer' + result = agent._strip_think_blocks(text) + assert "let me plan" not in result + assert "" not in result + assert "final answer" in result + class TestExtractReasoning: def test_reasoning_field(self, agent): diff --git a/tests/run_agent/test_strip_reasoning_tags_cli.py b/tests/run_agent/test_strip_reasoning_tags_cli.py new file mode 100644 index 0000000000..7eb15daf43 --- /dev/null +++ b/tests/run_agent/test_strip_reasoning_tags_cli.py @@ -0,0 +1,69 @@ +"""Tests for cli.py::_strip_reasoning_tags — specifically the tool-call +XML stripping added in openclaw/openclaw#67318 port. + +The CLI has its own copy of the stripper because it needs to run on the +final displayed assistant text (after streaming) without depending on the +AIAgent instance. It must stay in sync with run_agent.py::_strip_think_blocks +for tool-call tag coverage.""" + +import pytest + +from cli import _strip_reasoning_tags + + +class TestToolCallStripping: + def test_tool_call_block_stripped(self): + text = '{"name": "x"}result' + result = _strip_reasoning_tags(text) + assert "" not in result + assert "result" in result + + def test_function_calls_block_stripped(self): + text = '[{}]\nanswer' + result = _strip_reasoning_tags(text) + assert "" not in result + assert "answer" in result + + def test_gemma_function_name_block_stripped(self): + text = ( + 'Reading.\n' + '/tmp/x\n' + 'Done.' + ) + result = _strip_reasoning_tags(text) + assert '' not in result + assert "/tmp/x" not in result + assert "Reading." in result + assert "Done." in result + + def test_prose_mention_of_function_preserved(self): + text = "Use declarations in JavaScript." + result = _strip_reasoning_tags(text) + assert "JavaScript" in result + + def test_reasoning_still_stripped(self): + """Regression: make sure existing think-tag stripping still works.""" + text = "reasoning answer" + result = _strip_reasoning_tags(text) + assert "reasoning" not in result + assert "answer" in result + + def test_mixed_reasoning_and_tool_call(self): + text = 'plan{"x":1}final' + result = _strip_reasoning_tags(text) + assert "plan" not in result + assert "" not in result + assert "final" in result + + def test_stray_function_close(self): + text = "visible tail" + result = _strip_reasoning_tags(text) + assert "" not in result + assert "visible" in result + assert "tail" in result + + def test_empty_string(self): + assert _strip_reasoning_tags("") == "" + + def test_plain_text_unchanged(self): + assert _strip_reasoning_tags("just text") == "just text"