fix(anthropic): smart thinking block signature management (#6112)

Anthropic signs thinking blocks against the full turn content. Any upstream mutation (context compression, session truncation, orphan stripping, message merging) invalidates the signature, causing HTTP 400 'Invalid signature in thinking block' — especially in long-lived gateway sessions. Strategy (following clawdbot/OpenClaw pattern): 1. Strip thinking/redacted_thinking from all assistant messages EXCEPT the last one — preserves reasoning continuity on the current tool-use chain while avoiding stale signature errors on older turns. 2. Downgrade unsigned thinking blocks to plain text — Anthropic can't validate them, but the reasoning content is preserved. 3. Strip cache_control from thinking/redacted_thinking blocks to prevent cache markers from interfering with signature validation. 4. Drop thinking blocks from the second message when merging consecutive assistant messages (role alternation enforcement). 5. Error recovery: on HTTP 400 mentioning 'signature' and 'thinking', strip all reasoning_details from the conversation and retry once. This is the safety net for edge cases the proactive stripping misses. Addresses the issue reported in PR #6086 by @mingginwan while preserving reasoning continuity (their PR stripped ALL thinking blocks unconditionally). Files changed: - agent/anthropic_adapter.py: thinking block management in convert_messages_to_anthropic (strip old turns, downgrade unsigned, strip cache_control, merge-time strip) - run_agent.py: one-shot signature error recovery in retry loop - tests/test_anthropic_adapter.py: 10 new tests covering all cases
2026-04-25 00:51:20 +00:00 · 2026-04-08 03:38:08 -07:00 · 2026-04-08 03:38:08 -07:00 · 1368caf66f
commit 1368caf66f
parent 30ea423ce8
3 changed files with 356 additions and 3 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -1102,7 +1102,15 @@ def convert_messages_to_anthropic(
                        curr_content = [{"type": "text", "text": curr_content}]
                    fixed[-1]["content"] = prev_content + curr_content
            else:
-                # Consecutive assistant messages — merge text content
+                # Consecutive assistant messages — merge text content.
+                # Drop thinking blocks from the *second* message: their
+                # signature was computed against a different turn boundary
+                # and becomes invalid once merged.
+                if isinstance(m["content"], list):
+                    m["content"] = [
+                        b for b in m["content"]
+                        if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
+                    ]
                prev_blocks = fixed[-1]["content"]
                curr_blocks = m["content"]
                if isinstance(prev_blocks, list) and isinstance(curr_blocks, list):
@ -1120,6 +1128,68 @@ def convert_messages_to_anthropic(
            fixed.append(m)
    result = fixed

+    # ── Thinking block signature management ──────────────────────────
+    # Anthropic signs thinking blocks against the full turn content.
+    # Any upstream mutation (context compression, session truncation,
+    # orphan stripping, message merging) invalidates the signature,
+    # causing HTTP 400 "Invalid signature in thinking block".
+    #
+    # Strategy (following clawdbot/OpenClaw pattern):
+    # 1. Strip thinking/redacted_thinking from all assistant messages
+    #    EXCEPT the last one — preserves reasoning continuity on the
+    #    current tool-use chain while avoiding stale signature errors.
+    # 2. Downgrade unsigned thinking blocks (no signature) to text —
+    #    Anthropic can't validate them and will reject them.
+    # 3. Strip cache_control from thinking/redacted_thinking blocks —
+    #    cache markers can interfere with signature validation.
+    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
+
+    last_assistant_idx = None
+    for i in range(len(result) - 1, -1, -1):
+        if result[i].get("role") == "assistant":
+            last_assistant_idx = i
+            break
+
+    for idx, m in enumerate(result):
+        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
+            continue
+
+        if idx != last_assistant_idx:
+            # Strip ALL thinking blocks from non-latest assistant messages
+            stripped = [
+                b for b in m["content"]
+                if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
+            ]
+            m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
+        else:
+            # Latest assistant: keep signed thinking blocks for reasoning
+            # continuity; downgrade unsigned ones to plain text.
+            new_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    new_content.append(b)
+                    continue
+                if b.get("type") == "redacted_thinking":
+                    # Redacted blocks use 'data' for the signature payload
+                    if b.get("data"):
+                        new_content.append(b)
+                    # else: drop — no data means it can't be validated
+                elif b.get("signature"):
+                    # Signed thinking block — keep it
+                    new_content.append(b)
+                else:
+                    # Unsigned thinking — downgrade to text so it's not lost
+                    thinking_text = b.get("thinking", "")
+                    if thinking_text:
+                        new_content.append({"type": "text", "text": thinking_text})
+            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
+
+        # Strip cache_control from any remaining thinking/redacted_thinking
+        # blocks — cache markers interfere with signature validation.
+        for b in m["content"]:
+            if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
+                b.pop("cache_control", None)
+
    return system, result


--- a/run_agent.py
+++ b/run_agent.py
@ -7288,6 +7288,7 @@ class AIAgent:
            codex_auth_retry_attempted=False
            anthropic_auth_retry_attempted=False
            nous_auth_retry_attempted=False
+            thinking_sig_retry_attempted = False
            has_retried_429 = False
            restart_with_compressed_messages = False
            restart_with_length_continuation = False
@ -7877,8 +7878,38 @@ class AIAgent:
                        print(f"{self.log_prefix}     • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values")
                        print(f"{self.log_prefix}     • For API keys: verify at https://console.anthropic.com/settings/keys")
                        print(f"{self.log_prefix}     • For Claude Code: run 'claude /login' to refresh, then retry")
-                        print(f"{self.log_prefix}     • Clear stale keys: hermes config set ANTHROPIC_TOKEN \"\"")
-                        print(f"{self.log_prefix}     • Legacy cleanup: hermes config set ANTHROPIC_API_KEY \"\"")
+                        print(f"{self.log_prefix}     • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"")
+                        print(f"{self.log_prefix}     • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"")
+
+                    # ── Thinking block signature recovery ─────────────────
+                    # Anthropic signs thinking blocks against the full turn
+                    # content.  Any upstream mutation (context compression,
+                    # session truncation, message merging) invalidates the
+                    # signature → HTTP 400.  Recovery: strip reasoning_details
+                    # from all messages so the next retry sends no thinking
+                    # blocks at all.  One-shot — don't retry infinitely.
+                    if (
+                        self.api_mode == "anthropic_messages"
+                        and status_code == 400
+                        and not thinking_sig_retry_attempted
+                    ):
+                        _err_msg_lower = str(api_error).lower()
+                        if "signature" in _err_msg_lower and "thinking" in _err_msg_lower:
+                            thinking_sig_retry_attempted = True
+                            for _m in messages:
+                                if isinstance(_m, dict):
+                                    _m.pop("reasoning_details", None)
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Thinking block signature invalid — "
+                                f"stripped all thinking blocks, retrying...",
+                                force=True,
+                            )
+                            logging.warning(
+                                "%sThinking block signature recovery: stripped "
+                                "reasoning_details from %d messages",
+                                self.log_prefix, len(messages),
+                            )
+                            continue

                    retry_count += 1
                    elapsed_time = time.time() - api_start_time
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@ -1276,6 +1276,258 @@ class TestRoleAlternation:
        assert [m["role"] for m in result] == ["user", "assistant", "user"]


+# ---------------------------------------------------------------------------
+# Thinking block signature management
+# ---------------------------------------------------------------------------
+
+
+class TestThinkingBlockSignatureManagement:
+    """Tests for the thinking block handling strategy:
+    strip from old turns, preserve latest signed, downgrade unsigned."""
+
+    def test_thinking_stripped_from_non_last_assistant(self):
+        """Thinking blocks are removed from all assistant messages except the last."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {"id": "tc_1", "function": {"name": "tool1", "arguments": "{}"}},
+                ],
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Old reasoning.", "signature": "sig_old"},
+                ],
+            },
+            {"role": "tool", "tool_call_id": "tc_1", "content": "result 1"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {"id": "tc_2", "function": {"name": "tool2", "arguments": "{}"}},
+                ],
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Latest reasoning.", "signature": "sig_new"},
+                ],
+            },
+            {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+
+        # Find both assistant messages
+        assistants = [m for m in result if m["role"] == "assistant"]
+        assert len(assistants) == 2
+
+        # First (non-last) assistant: no thinking blocks
+        first_types = [b.get("type") for b in assistants[0]["content"]]
+        assert "thinking" not in first_types
+        assert "redacted_thinking" not in first_types
+        assert "tool_use" in first_types  # tool_use should survive
+
+        # Last assistant: thinking block preserved with signature
+        last_blocks = assistants[1]["content"]
+        thinking_blocks = [b for b in last_blocks if b.get("type") == "thinking"]
+        assert len(thinking_blocks) == 1
+        assert thinking_blocks[0]["thinking"] == "Latest reasoning."
+        assert thinking_blocks[0]["signature"] == "sig_new"
+
+    def test_signed_thinking_preserved_on_last_turn(self):
+        """A signed thinking block on the last assistant message is kept."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "The answer is 42.",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Deep thought.", "signature": "sig_valid"},
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+        thinking = [b for b in blocks if b.get("type") == "thinking"]
+        assert len(thinking) == 1
+        assert thinking[0]["signature"] == "sig_valid"
+
+    def test_unsigned_thinking_downgraded_to_text_on_last_turn(self):
+        """Unsigned thinking blocks on the last turn become text blocks."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "Response text.",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Unsigned reasoning."},
+                    # No 'signature' field
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+
+        # No thinking blocks should remain
+        assert not any(b.get("type") == "thinking" for b in blocks)
+        # The reasoning text should be preserved as a text block
+        text_contents = [b.get("text", "") for b in blocks if b.get("type") == "text"]
+        assert "Unsigned reasoning." in text_contents
+
+    def test_redacted_thinking_with_data_preserved(self):
+        """Redacted thinking with 'data' field is kept on last turn."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "Response.",
+                "reasoning_details": [
+                    {"type": "redacted_thinking", "data": "opaque_signature_data"},
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+        redacted = [b for b in blocks if b.get("type") == "redacted_thinking"]
+        assert len(redacted) == 1
+        assert redacted[0]["data"] == "opaque_signature_data"
+
+    def test_redacted_thinking_without_data_dropped(self):
+        """Redacted thinking without 'data' is dropped — can't be validated."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "Response.",
+                "reasoning_details": [
+                    {"type": "redacted_thinking"},
+                    # No 'data' field
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+        assert not any(b.get("type") == "redacted_thinking" for b in blocks)
+
+    def test_cache_control_stripped_from_thinking_blocks(self):
+        """cache_control markers are removed from thinking/redacted_thinking blocks."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {"id": "tc_1", "function": {"name": "t", "arguments": "{}"}},
+                ],
+                "reasoning_details": [
+                    {
+                        "type": "thinking",
+                        "thinking": "Reasoning.",
+                        "signature": "sig_1",
+                        "cache_control": {"type": "ephemeral"},
+                    },
+                ],
+            },
+            {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        assistant = next(m for m in result if m["role"] == "assistant")
+        for block in assistant["content"]:
+            if block.get("type") in ("thinking", "redacted_thinking"):
+                assert "cache_control" not in block
+
+    def test_thinking_stripped_from_merged_consecutive_assistants(self):
+        """When consecutive assistants are merged, second one's thinking is dropped."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "First response.",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "First thought.", "signature": "sig_1"},
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": "Second response.",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Second thought.", "signature": "sig_2"},
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+
+        # Should be merged into one assistant message
+        assistants = [m for m in result if m["role"] == "assistant"]
+        assert len(assistants) == 1
+
+        # Only the first thinking block should remain (signed, on the last/only assistant)
+        blocks = assistants[0]["content"]
+        thinking = [b for b in blocks if b.get("type") == "thinking"]
+        assert len(thinking) == 1
+        assert thinking[0]["thinking"] == "First thought."
+
+    def test_empty_content_after_strip_gets_placeholder(self):
+        """If stripping thinking leaves an empty message, a placeholder is added."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Only thinking, no text."},
+                    # Unsigned — will be downgraded, but content was empty string
+                ],
+            },
+            {"role": "user", "content": "Next message."},
+            {"role": "assistant", "content": "Final."},
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        # First assistant is non-last, so thinking is stripped completely.
+        # The original content was empty and thinking was unsigned → placeholder
+        first_assistant = result[0]
+        assert first_assistant["role"] == "assistant"
+        assert len(first_assistant["content"]) >= 1
+
+    def test_multi_turn_conversation_preserves_only_last(self):
+        """Full multi-turn conversation: only last assistant keeps thinking."""
+        messages = [
+            {"role": "user", "content": "Question 1"},
+            {
+                "role": "assistant",
+                "content": "Answer 1",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Thought 1", "signature": "sig_1"},
+                ],
+            },
+            {"role": "user", "content": "Question 2"},
+            {
+                "role": "assistant",
+                "content": "Answer 2",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Thought 2", "signature": "sig_2"},
+                ],
+            },
+            {"role": "user", "content": "Question 3"},
+            {
+                "role": "assistant",
+                "content": "Answer 3",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Thought 3", "signature": "sig_3"},
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+
+        assistants = [m for m in result if m["role"] == "assistant"]
+        assert len(assistants) == 3
+
+        # First two: no thinking blocks
+        for a in assistants[:2]:
+            assert not any(
+                b.get("type") in ("thinking", "redacted_thinking")
+                for b in a["content"]
+                if isinstance(b, dict)
+            )
+
+        # Last one: thinking preserved
+        last_thinking = [
+            b for b in assistants[2]["content"]
+            if isinstance(b, dict) and b.get("type") == "thinking"
+        ]
+        assert len(last_thinking) == 1
+        assert last_thinking[0]["signature"] == "sig_3"
+
+
 # ---------------------------------------------------------------------------
 # Tool choice
 # ---------------------------------------------------------------------------