fix(display): strip standalone tool-call XML tags from visible text

Port from openclaw/openclaw#67318. Some open models (notably Gemma variants served via OpenRouter) emit tool calls as XML blocks inside assistant content instead of via the structured tool_calls field: <function name="read_file"><parameter name="path">/tmp/x</parameter></function> <tool_call>{"name":"x"}</tool_call> <function_calls>[{...}]</function_calls> Left unstripped, this raw XML leaked to gateway users (Discord, Telegram, Matrix, Feishu, Signal, WhatsApp, etc.) and the CLI, since hermes-agent's existing reasoning-tag stripper handled only <think>/<thinking>/<thought> variants. Extend _strip_think_blocks (run_agent.py) and _strip_reasoning_tags (cli.py) to cover: * <tool_call>, <tool_calls>, <tool_result> * <function_call>, <function_calls> * <function name="..."> ... </function> (Gemma-style) The <function> variant is boundary-gated (only strips when the tag sits at start-of-line or after sentence punctuation AND carries a name="..." attribute) so prose mentions like 'Use <function> declarations in JS' are preserved. Dangling <function name="..."> with no close is intentionally left visible — matches OpenClaw's asymmetry so a truncated streaming tail still reaches the user. Tests: 9 new cases in TestStripThinkBlocks (run_agent) + 9 in new file tests/run_agent/test_strip_reasoning_tags_cli.py. Covers Qwen-style <tool_call>, Gemma-style <function name="...">, multi-line payloads, prose preservation, stray close tags, dangling open tags, and mixed reasoning+tool_call content. Note: this port covers the post-streaming final-text path, which is what gateway adapters and CLI display consume. Extending the per-delta stream filter in gateway/stream_consumer.py to hide these tags live as they stream is a separate follow-up; for now users may see raw XML briefly during a stream before the final cleaned text replaces it. Refs: openclaw/openclaw#67318
2026-04-25 00:51:20 +00:00 · 2026-04-19 17:22:26 -07:00 · 2026-04-19 17:22:26 -07:00 · ef1a7b690d
commit ef1a7b690d
parent 73d0b08351
4 changed files with 232 additions and 0 deletions
--- a/cli.py
+++ b/cli.py
@ -104,6 +104,11 @@ def _strip_reasoning_tags(text: str) -> str:
    ``<thought>`` (Gemma 4).  Must stay in sync with
    ``run_agent.py::_strip_think_blocks`` and the stream consumer's
    ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
+
+    Also strips tool-call XML blocks some open models leak into visible
+    content (``<tool_call>``, ``<function_calls>``, Gemma-style
+    ``<function name="…">…</function>``). Ported from
+    openclaw/openclaw#67318.
    """
    cleaned = text
    for tag in _REASONING_TAGS:
@ -128,6 +133,31 @@ def _strip_reasoning_tags(text: str) -> str:
            cleaned,
            flags=re.IGNORECASE,
        )
+    # Tool-call XML blocks (openclaw/openclaw#67318).
+    for tc_tag in ("tool_call", "tool_calls", "tool_result",
+                   "function_call", "function_calls"):
+        cleaned = re.sub(
+            rf"<{tc_tag}\b[^>]*>.*?</{tc_tag}>\s*",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+    # <function name="..."> — boundary + attribute gated to avoid prose FPs.
+    cleaned = re.sub(
+        r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
+        r'<function\b[^>]*\bname\s*=[^>]*>'
+        r'(?:(?:(?!</function>).)*)</function>\s*',
+        '',
+        cleaned,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+    # Stray tool-call close tags.
+    cleaned = re.sub(
+        r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
+        '',
+        cleaned,
+        flags=re.IGNORECASE,
+    )
    return cleaned.strip()


--- a/run_agent.py
+++ b/run_agent.py
@ -2226,6 +2226,20 @@ class AIAgent:
          4. Tag variants: ``<think>``, ``<thinking>``, ``<reasoning>``,
             ``<REASONING_SCRATCHPAD>``, ``<thought>`` (Gemma 4), all
             case-insensitive.
+
+        Additionally strips standalone tool-call XML blocks that some open
+        models (notably Gemma variants on OpenRouter) emit inside assistant
+        content instead of via the structured ``tool_calls`` field:
+          * ``<tool_call>…</tool_call>``
+          * ``<tool_calls>…</tool_calls>``
+          * ``<tool_result>…</tool_result>``
+          * ``<function_call>…</function_call>``
+          * ``<function_calls>…</function_calls>``
+          * ``<function name="…">…</function>`` (Gemma style)
+        Ported from openclaw/openclaw#67318. The ``<function>`` variant is
+        boundary-gated (only strips when the tag sits at start-of-line or
+        after punctuation and carries a ``name="..."`` attribute) so prose
+        mentions like "Use <function> in JavaScript" are preserved.
        """
        if not content:
            return ""
@ -2237,6 +2251,30 @@ class AIAgent:
        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
+        # 1b. Tool-call XML blocks (openclaw/openclaw#67318). Handle the
+        #     generic tag names first — they have no attribute gating since
+        #     a literal <tool_call> in prose is already vanishingly rare.
+        for _tc_name in ("tool_call", "tool_calls", "tool_result",
+                          "function_call", "function_calls"):
+            content = re.sub(
+                rf'<{_tc_name}\b[^>]*>.*?</{_tc_name}>',
+                '',
+                content,
+                flags=re.DOTALL | re.IGNORECASE,
+            )
+        # 1c. <function name="...">...</function> — Gemma-style standalone
+        #     tool call. Only strip when the tag sits at a block boundary
+        #     (start of text, after a newline, or after sentence-ending
+        #     punctuation) AND carries a name="..." attribute. This keeps
+        #     prose mentions like "Use <function> to declare" safe.
+        content = re.sub(
+            r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
+            r'<function\b[^>]*\bname\s*=[^>]*>'
+            r'(?:(?:(?!</function>).)*)</function>',
+            '',
+            content,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
        # 2. Unterminated reasoning block — open tag at a block boundary
        #    (start of text, or after a newline) with no matching close.
        #    Strip from the tag to end of string.  Fixes #8878 / #9568
@ -2254,6 +2292,16 @@ class AIAgent:
            content,
            flags=re.IGNORECASE,
        )
+        # 3b. Stray tool-call closers. (We do NOT strip bare <function> or
+        #     unterminated <function name="..."> because a truncated tail
+        #     during streaming may still be valuable to the user; matches
+        #     OpenClaw's intentional asymmetry.)
+        content = re.sub(
+            r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
+            '',
+            content,
+            flags=re.IGNORECASE,
+        )
        return content

    @staticmethod
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -371,6 +371,91 @@ class TestStripThinkBlocks:
        assert "mixed" not in result
        assert "final" in result

+    # ─── Tool-call XML block stripping (openclaw/openclaw#67318) ─────────
+    # Some open models (notably Gemma variants via OpenRouter) emit
+    # standalone tool-call XML inside assistant content instead of via the
+    # structured `tool_calls` field. Left unstripped, raw XML leaks to
+    # gateway users (Discord/Telegram/Matrix) and the CLI.
+
+    def test_tool_call_block_stripped(self, agent):
+        text = '<tool_call>{"name": "read_file", "arguments": {"path": "/tmp/x"}}</tool_call> done'
+        result = agent._strip_think_blocks(text)
+        assert "<tool_call>" not in result
+        assert "read_file" not in result
+        assert "done" in result
+
+    def test_function_calls_block_stripped(self, agent):
+        text = '<function_calls>[{"name":"x"}]</function_calls>after'
+        result = agent._strip_think_blocks(text)
+        assert "<function_calls>" not in result
+        assert "after" in result
+
+    def test_gemma_function_name_block_stripped(self, agent):
+        """Gemma-style: <function name="read"><parameter>...</parameter></function>."""
+        text = (
+            'Let me check the file.\n'
+            '<function name="read_file"><parameter name="path">/tmp/x.md</parameter></function>\n'
+            'Here is the result.'
+        )
+        result = agent._strip_think_blocks(text)
+        assert '<function name="read_file">' not in result
+        assert "/tmp/x.md" not in result
+        assert "Let me check the file." in result
+        assert "Here is the result." in result
+
+    def test_gemma_function_multiline_payload_stripped(self, agent):
+        text = (
+            'Reading now.\n'
+            '<function name="read_file">\n'
+            '  <parameter name="path">/etc/passwd</parameter>\n'
+            '</function>\n'
+            'Done.'
+        )
+        result = agent._strip_think_blocks(text)
+        assert "/etc/passwd" not in result
+        assert "Reading now." in result
+        assert "Done." in result
+
+    def test_function_mention_in_prose_preserved(self, agent):
+        """'Use <function> in JavaScript.' — no name attr, not at block boundary
+        in a way that suggests tool call. Must survive."""
+        text = "In JS you can use <function> declarations for hoisting."
+        result = agent._strip_think_blocks(text)
+        # Prose mention has no name="..." attribute -> not stripped
+        assert "declarations for hoisting" in result
+
+    def test_function_with_attr_in_middle_of_sentence_preserved(self, agent):
+        """Docs example: 'Use <function name="x">...</function> in docs.'
+        The sentence-middle position without a preceding punctuation block
+        boundary means it is NOT stripped. Prose context remains."""
+        text = 'You can write <function name="x">y</function> inline.'
+        result = agent._strip_think_blocks(text)
+        # Without a leading block boundary (no punctuation before), leaves intact
+        assert "You can write" in result
+        assert "inline" in result
+
+    def test_stray_function_close_tag_removed(self, agent):
+        text = "answer</function> trailing"
+        result = agent._strip_think_blocks(text)
+        assert "</function>" not in result
+        assert "answer" in result
+        assert "trailing" in result
+
+    def test_dangling_function_open_tag_preserved(self, agent):
+        """A streamed-but-truncated <function name="..."> block with no close
+        is intentionally NOT stripped (OpenClaw's asymmetry). The tail of a
+        streaming reply may still be valuable to the user."""
+        text = 'Checking: <function name="read">'
+        result = agent._strip_think_blocks(text)
+        assert "Checking:" in result
+
+    def test_mixed_reasoning_and_tool_call_both_stripped(self, agent):
+        text = '<think>let me plan</think><tool_call>{"name":"x"}</tool_call>final answer'
+        result = agent._strip_think_blocks(text)
+        assert "let me plan" not in result
+        assert "<tool_call>" not in result
+        assert "final answer" in result
+

 class TestExtractReasoning:
    def test_reasoning_field(self, agent):
--- a/tests/run_agent/test_strip_reasoning_tags_cli.py
+++ b/tests/run_agent/test_strip_reasoning_tags_cli.py
@ -0,0 +1,69 @@
+"""Tests for cli.py::_strip_reasoning_tags — specifically the tool-call
+XML stripping added in openclaw/openclaw#67318 port.
+
+The CLI has its own copy of the stripper because it needs to run on the
+final displayed assistant text (after streaming) without depending on the
+AIAgent instance. It must stay in sync with run_agent.py::_strip_think_blocks
+for tool-call tag coverage."""
+
+import pytest
+
+from cli import _strip_reasoning_tags
+
+
+class TestToolCallStripping:
+    def test_tool_call_block_stripped(self):
+        text = '<tool_call>{"name": "x"}</tool_call>result'
+        result = _strip_reasoning_tags(text)
+        assert "<tool_call>" not in result
+        assert "result" in result
+
+    def test_function_calls_block_stripped(self):
+        text = '<function_calls>[{}]</function_calls>\nanswer'
+        result = _strip_reasoning_tags(text)
+        assert "<function_calls>" not in result
+        assert "answer" in result
+
+    def test_gemma_function_name_block_stripped(self):
+        text = (
+            'Reading.\n'
+            '<function name="r"><parameter name="p">/tmp/x</parameter></function>\n'
+            'Done.'
+        )
+        result = _strip_reasoning_tags(text)
+        assert '<function name="r">' not in result
+        assert "/tmp/x" not in result
+        assert "Reading." in result
+        assert "Done." in result
+
+    def test_prose_mention_of_function_preserved(self):
+        text = "Use <function> declarations in JavaScript."
+        result = _strip_reasoning_tags(text)
+        assert "JavaScript" in result
+
+    def test_reasoning_still_stripped(self):
+        """Regression: make sure existing think-tag stripping still works."""
+        text = "<think>reasoning</think> answer"
+        result = _strip_reasoning_tags(text)
+        assert "reasoning" not in result
+        assert "answer" in result
+
+    def test_mixed_reasoning_and_tool_call(self):
+        text = '<think>plan</think><tool_call>{"x":1}</tool_call>final'
+        result = _strip_reasoning_tags(text)
+        assert "plan" not in result
+        assert "<tool_call>" not in result
+        assert "final" in result
+
+    def test_stray_function_close(self):
+        text = "visible</function> tail"
+        result = _strip_reasoning_tags(text)
+        assert "</function>" not in result
+        assert "visible" in result
+        assert "tail" in result
+
+    def test_empty_string(self):
+        assert _strip_reasoning_tags("") == ""
+
+    def test_plain_text_unchanged(self):
+        assert _strip_reasoning_tags("just text") == "just text"