fix(anthropic): smart thinking block signature management (#6112)

Anthropic signs thinking blocks against the full turn content. Any
upstream mutation (context compression, session truncation, orphan
stripping, message merging) invalidates the signature, causing HTTP 400
'Invalid signature in thinking block' — especially in long-lived
gateway sessions.

Strategy (following clawdbot/OpenClaw pattern):

1. Strip thinking/redacted_thinking from all assistant messages EXCEPT
   the last one — preserves reasoning continuity on the current
   tool-use chain while avoiding stale signature errors on older turns.

2. Downgrade unsigned thinking blocks to plain text — Anthropic can't
   validate them, but the reasoning content is preserved.

3. Strip cache_control from thinking/redacted_thinking blocks to
   prevent cache markers from interfering with signature validation.

4. Drop thinking blocks from the second message when merging
   consecutive assistant messages (role alternation enforcement).

5. Error recovery: on HTTP 400 mentioning 'signature' and 'thinking',
   strip all reasoning_details from the conversation and retry once.
   This is the safety net for edge cases the proactive stripping
   misses.

Addresses the issue reported in PR #6086 by @mingginwan while
preserving reasoning continuity (their PR stripped ALL thinking
blocks unconditionally).

Files changed:
- agent/anthropic_adapter.py: thinking block management in
  convert_messages_to_anthropic (strip old turns, downgrade unsigned,
  strip cache_control, merge-time strip)
- run_agent.py: one-shot signature error recovery in retry loop
- tests/test_anthropic_adapter.py: 10 new tests covering all cases
This commit is contained in:
Teknium 2026-04-08 03:38:08 -07:00 committed by GitHub
parent 30ea423ce8
commit 1368caf66f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 356 additions and 3 deletions

View file

@ -1102,7 +1102,15 @@ def convert_messages_to_anthropic(
curr_content = [{"type": "text", "text": curr_content}]
fixed[-1]["content"] = prev_content + curr_content
else:
# Consecutive assistant messages — merge text content
# Consecutive assistant messages — merge text content.
# Drop thinking blocks from the *second* message: their
# signature was computed against a different turn boundary
# and becomes invalid once merged.
if isinstance(m["content"], list):
m["content"] = [
b for b in m["content"]
if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
]
prev_blocks = fixed[-1]["content"]
curr_blocks = m["content"]
if isinstance(prev_blocks, list) and isinstance(curr_blocks, list):
@ -1120,6 +1128,68 @@ def convert_messages_to_anthropic(
fixed.append(m)
result = fixed
# ── Thinking block signature management ──────────────────────────
# Anthropic signs thinking blocks against the full turn content.
# Any upstream mutation (context compression, session truncation,
# orphan stripping, message merging) invalidates the signature,
# causing HTTP 400 "Invalid signature in thinking block".
#
# Strategy (following clawdbot/OpenClaw pattern):
# 1. Strip thinking/redacted_thinking from all assistant messages
# EXCEPT the last one — preserves reasoning continuity on the
# current tool-use chain while avoiding stale signature errors.
# 2. Downgrade unsigned thinking blocks (no signature) to text —
# Anthropic can't validate them and will reject them.
# 3. Strip cache_control from thinking/redacted_thinking blocks —
# cache markers can interfere with signature validation.
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
last_assistant_idx = None
for i in range(len(result) - 1, -1, -1):
if result[i].get("role") == "assistant":
last_assistant_idx = i
break
for idx, m in enumerate(result):
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
continue
if idx != last_assistant_idx:
# Strip ALL thinking blocks from non-latest assistant messages
stripped = [
b for b in m["content"]
if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
]
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
else:
# Latest assistant: keep signed thinking blocks for reasoning
# continuity; downgrade unsigned ones to plain text.
new_content = []
for b in m["content"]:
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
new_content.append(b)
continue
if b.get("type") == "redacted_thinking":
# Redacted blocks use 'data' for the signature payload
if b.get("data"):
new_content.append(b)
# else: drop — no data means it can't be validated
elif b.get("signature"):
# Signed thinking block — keep it
new_content.append(b)
else:
# Unsigned thinking — downgrade to text so it's not lost
thinking_text = b.get("thinking", "")
if thinking_text:
new_content.append({"type": "text", "text": thinking_text})
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
# Strip cache_control from any remaining thinking/redacted_thinking
# blocks — cache markers interfere with signature validation.
for b in m["content"]:
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
b.pop("cache_control", None)
return system, result

View file

@ -7288,6 +7288,7 @@ class AIAgent:
codex_auth_retry_attempted=False
anthropic_auth_retry_attempted=False
nous_auth_retry_attempted=False
thinking_sig_retry_attempted = False
has_retried_429 = False
restart_with_compressed_messages = False
restart_with_length_continuation = False
@ -7877,8 +7878,38 @@ class AIAgent:
print(f"{self.log_prefix} • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values")
print(f"{self.log_prefix} • For API keys: verify at https://console.anthropic.com/settings/keys")
print(f"{self.log_prefix} • For Claude Code: run 'claude /login' to refresh, then retry")
print(f"{self.log_prefix} • Clear stale keys: hermes config set ANTHROPIC_TOKEN \"\"")
print(f"{self.log_prefix} • Legacy cleanup: hermes config set ANTHROPIC_API_KEY \"\"")
print(f"{self.log_prefix} • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"")
print(f"{self.log_prefix} • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"")
# ── Thinking block signature recovery ─────────────────
# Anthropic signs thinking blocks against the full turn
# content. Any upstream mutation (context compression,
# session truncation, message merging) invalidates the
# signature → HTTP 400. Recovery: strip reasoning_details
# from all messages so the next retry sends no thinking
# blocks at all. One-shot — don't retry infinitely.
if (
self.api_mode == "anthropic_messages"
and status_code == 400
and not thinking_sig_retry_attempted
):
_err_msg_lower = str(api_error).lower()
if "signature" in _err_msg_lower and "thinking" in _err_msg_lower:
thinking_sig_retry_attempted = True
for _m in messages:
if isinstance(_m, dict):
_m.pop("reasoning_details", None)
self._vprint(
f"{self.log_prefix}⚠️ Thinking block signature invalid — "
f"stripped all thinking blocks, retrying...",
force=True,
)
logging.warning(
"%sThinking block signature recovery: stripped "
"reasoning_details from %d messages",
self.log_prefix, len(messages),
)
continue
retry_count += 1
elapsed_time = time.time() - api_start_time

View file

@ -1276,6 +1276,258 @@ class TestRoleAlternation:
assert [m["role"] for m in result] == ["user", "assistant", "user"]
# ---------------------------------------------------------------------------
# Thinking block signature management
# ---------------------------------------------------------------------------
class TestThinkingBlockSignatureManagement:
"""Tests for the thinking block handling strategy:
strip from old turns, preserve latest signed, downgrade unsigned."""
def test_thinking_stripped_from_non_last_assistant(self):
"""Thinking blocks are removed from all assistant messages except the last."""
messages = [
{
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "tc_1", "function": {"name": "tool1", "arguments": "{}"}},
],
"reasoning_details": [
{"type": "thinking", "thinking": "Old reasoning.", "signature": "sig_old"},
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "result 1"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "tc_2", "function": {"name": "tool2", "arguments": "{}"}},
],
"reasoning_details": [
{"type": "thinking", "thinking": "Latest reasoning.", "signature": "sig_new"},
],
},
{"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
]
_, result = convert_messages_to_anthropic(messages)
# Find both assistant messages
assistants = [m for m in result if m["role"] == "assistant"]
assert len(assistants) == 2
# First (non-last) assistant: no thinking blocks
first_types = [b.get("type") for b in assistants[0]["content"]]
assert "thinking" not in first_types
assert "redacted_thinking" not in first_types
assert "tool_use" in first_types # tool_use should survive
# Last assistant: thinking block preserved with signature
last_blocks = assistants[1]["content"]
thinking_blocks = [b for b in last_blocks if b.get("type") == "thinking"]
assert len(thinking_blocks) == 1
assert thinking_blocks[0]["thinking"] == "Latest reasoning."
assert thinking_blocks[0]["signature"] == "sig_new"
def test_signed_thinking_preserved_on_last_turn(self):
"""A signed thinking block on the last assistant message is kept."""
messages = [
{
"role": "assistant",
"content": "The answer is 42.",
"reasoning_details": [
{"type": "thinking", "thinking": "Deep thought.", "signature": "sig_valid"},
],
},
]
_, result = convert_messages_to_anthropic(messages)
blocks = result[0]["content"]
thinking = [b for b in blocks if b.get("type") == "thinking"]
assert len(thinking) == 1
assert thinking[0]["signature"] == "sig_valid"
def test_unsigned_thinking_downgraded_to_text_on_last_turn(self):
"""Unsigned thinking blocks on the last turn become text blocks."""
messages = [
{
"role": "assistant",
"content": "Response text.",
"reasoning_details": [
{"type": "thinking", "thinking": "Unsigned reasoning."},
# No 'signature' field
],
},
]
_, result = convert_messages_to_anthropic(messages)
blocks = result[0]["content"]
# No thinking blocks should remain
assert not any(b.get("type") == "thinking" for b in blocks)
# The reasoning text should be preserved as a text block
text_contents = [b.get("text", "") for b in blocks if b.get("type") == "text"]
assert "Unsigned reasoning." in text_contents
def test_redacted_thinking_with_data_preserved(self):
"""Redacted thinking with 'data' field is kept on last turn."""
messages = [
{
"role": "assistant",
"content": "Response.",
"reasoning_details": [
{"type": "redacted_thinking", "data": "opaque_signature_data"},
],
},
]
_, result = convert_messages_to_anthropic(messages)
blocks = result[0]["content"]
redacted = [b for b in blocks if b.get("type") == "redacted_thinking"]
assert len(redacted) == 1
assert redacted[0]["data"] == "opaque_signature_data"
def test_redacted_thinking_without_data_dropped(self):
"""Redacted thinking without 'data' is dropped — can't be validated."""
messages = [
{
"role": "assistant",
"content": "Response.",
"reasoning_details": [
{"type": "redacted_thinking"},
# No 'data' field
],
},
]
_, result = convert_messages_to_anthropic(messages)
blocks = result[0]["content"]
assert not any(b.get("type") == "redacted_thinking" for b in blocks)
def test_cache_control_stripped_from_thinking_blocks(self):
"""cache_control markers are removed from thinking/redacted_thinking blocks."""
messages = [
{
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "tc_1", "function": {"name": "t", "arguments": "{}"}},
],
"reasoning_details": [
{
"type": "thinking",
"thinking": "Reasoning.",
"signature": "sig_1",
"cache_control": {"type": "ephemeral"},
},
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "result"},
]
_, result = convert_messages_to_anthropic(messages)
assistant = next(m for m in result if m["role"] == "assistant")
for block in assistant["content"]:
if block.get("type") in ("thinking", "redacted_thinking"):
assert "cache_control" not in block
def test_thinking_stripped_from_merged_consecutive_assistants(self):
"""When consecutive assistants are merged, second one's thinking is dropped."""
messages = [
{
"role": "assistant",
"content": "First response.",
"reasoning_details": [
{"type": "thinking", "thinking": "First thought.", "signature": "sig_1"},
],
},
{
"role": "assistant",
"content": "Second response.",
"reasoning_details": [
{"type": "thinking", "thinking": "Second thought.", "signature": "sig_2"},
],
},
]
_, result = convert_messages_to_anthropic(messages)
# Should be merged into one assistant message
assistants = [m for m in result if m["role"] == "assistant"]
assert len(assistants) == 1
# Only the first thinking block should remain (signed, on the last/only assistant)
blocks = assistants[0]["content"]
thinking = [b for b in blocks if b.get("type") == "thinking"]
assert len(thinking) == 1
assert thinking[0]["thinking"] == "First thought."
def test_empty_content_after_strip_gets_placeholder(self):
"""If stripping thinking leaves an empty message, a placeholder is added."""
messages = [
{
"role": "assistant",
"content": "",
"reasoning_details": [
{"type": "thinking", "thinking": "Only thinking, no text."},
# Unsigned — will be downgraded, but content was empty string
],
},
{"role": "user", "content": "Next message."},
{"role": "assistant", "content": "Final."},
]
_, result = convert_messages_to_anthropic(messages)
# First assistant is non-last, so thinking is stripped completely.
# The original content was empty and thinking was unsigned → placeholder
first_assistant = result[0]
assert first_assistant["role"] == "assistant"
assert len(first_assistant["content"]) >= 1
def test_multi_turn_conversation_preserves_only_last(self):
"""Full multi-turn conversation: only last assistant keeps thinking."""
messages = [
{"role": "user", "content": "Question 1"},
{
"role": "assistant",
"content": "Answer 1",
"reasoning_details": [
{"type": "thinking", "thinking": "Thought 1", "signature": "sig_1"},
],
},
{"role": "user", "content": "Question 2"},
{
"role": "assistant",
"content": "Answer 2",
"reasoning_details": [
{"type": "thinking", "thinking": "Thought 2", "signature": "sig_2"},
],
},
{"role": "user", "content": "Question 3"},
{
"role": "assistant",
"content": "Answer 3",
"reasoning_details": [
{"type": "thinking", "thinking": "Thought 3", "signature": "sig_3"},
],
},
]
_, result = convert_messages_to_anthropic(messages)
assistants = [m for m in result if m["role"] == "assistant"]
assert len(assistants) == 3
# First two: no thinking blocks
for a in assistants[:2]:
assert not any(
b.get("type") in ("thinking", "redacted_thinking")
for b in a["content"]
if isinstance(b, dict)
)
# Last one: thinking preserved
last_thinking = [
b for b in assistants[2]["content"]
if isinstance(b, dict) and b.get("type") == "thinking"
]
assert len(last_thinking) == 1
assert last_thinking[0]["signature"] == "sig_3"
# ---------------------------------------------------------------------------
# Tool choice
# ---------------------------------------------------------------------------