fix(anthropic): preserve interleaved thinking/tool_use block order on replay

Interleaved-thinking turns (adaptive thinking, Claude 4.6+/Opus 4.8) emit content blocks like: thinking_1(signed) tool_use_1 thinking_2(signed) tool_use_2 Anthropic signs each thinking block against the turn content preceding it at its position. normalize_response split the turn into two parallel lists (reasoning_details + tool_calls), discarding cross-type order, and _convert_assistant_message rebuilt it as [all thinking][text][all tool_use]. That moved thinking_2 ahead of tool_use_1, invalidating its signature, so Anthropic rejected the latest assistant message with HTTP 400: messages.N.content.M: `thinking` or `redacted_thinking` blocks in the latest assistant message cannot be modified. Observed repeatedly in agent.conversation_loop against api.anthropic.com / claude-opus-4-8, recurring across sessions on multi-thinking-block turns. Fix: carry a verbatim, order-preserving copy of the turn's content blocks (anthropic_content_blocks) end-to-end - capture in normalize_response, persist/restore through state.db, and replay unchanged for the latest assistant message. Gated to turns that actually interleave signed thinking with tool_use, so normal turns are unaffected. Adds 3 regression tests including a SQLite round-trip covering the crash-recovery reload path.
2026-07-26 17:38:36 +00:00 · 2026-05-30 19:37:27 -04:00 · 2026-05-30 19:37:27 -04:00 · aaccaada28
commit aaccaada28
parent ad9012097b
7 changed files with 344 additions and 7 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -1692,6 +1692,29 @@ def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
    reasoning_content injection for Kimi/DeepSeek endpoints.
    """
    content = m.get("content", "")
+    # Anthropic interleaved-thinking fast path: when this turn carries a
+    # verbatim, order-preserving block list (set by normalize_response only
+    # for turns that interleave SIGNED thinking with tool_use), replay it
+    # unchanged. Reconstructing from the parallel reasoning_details +
+    # tool_calls fields front-loads thinking and reorders signed blocks,
+    # which Anthropic rejects with HTTP 400 ("thinking ... blocks in the
+    # latest assistant message cannot be modified"). Block order — and thus
+    # each thinking block's signature — must survive verbatim. tool_use IDs
+    # are sanitized to match the tool_result IDs produced elsewhere; the
+    # downstream mcp_ prefixing pass handles tool names on these blocks.
+    ordered_blocks = m.get("anthropic_content_blocks")
+    if isinstance(ordered_blocks, list) and ordered_blocks:
+        replayed: List[Dict[str, Any]] = []
+        for b in ordered_blocks:
+            if not isinstance(b, dict):
+                continue
+            blk = copy.deepcopy(b)
+            if blk.get("type") == "tool_use" and "id" in blk:
+                blk["id"] = _sanitize_tool_id(blk.get("id", ""))
+            replayed.append(blk)
+        if replayed:
+            return {"role": "assistant", "content": replayed}
+
    blocks = _extract_preserved_thinking_blocks(m)
    if content:
        if isinstance(content, list):
--- a/agent/chat_completion_helpers.py
+++ b/agent/chat_completion_helpers.py
@ -952,6 +952,18 @@ def build_assistant_message(agent, assistant_message, finish_reason: str) -> dic
        if preserved:
            msg["reasoning_details"] = preserved

+    # Anthropic interleaved-thinking replay: when a turn interleaves signed
+    # thinking blocks with tool_use, the parallel reasoning_details +
+    # tool_calls fields lose the cross-type ordering, and reconstruction
+    # front-loads thinking — reordering signed blocks and triggering HTTP 400
+    # ("thinking ... blocks in the latest assistant message cannot be
+    # modified"). Carry the verbatim ordered block list so the adapter can
+    # replay the latest assistant message unchanged. See
+    # agent/transports/anthropic.py and agent/anthropic_adapter.py.
+    ordered_blocks = getattr(assistant_message, "anthropic_content_blocks", None)
+    if ordered_blocks:
+        msg["anthropic_content_blocks"] = ordered_blocks
+
    # Codex Responses API: preserve encrypted reasoning items for
    # multi-turn continuity. These get replayed as input on the next turn.
    codex_items = getattr(assistant_message, "codex_reasoning_items", None)
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@ -94,13 +94,27 @@ class AnthropicTransport(ProviderTransport):
        reasoning_parts = []
        reasoning_details = []
        tool_calls = []
+        # Verbatim, order-preserving copy of every content block in the turn.
+        # Anthropic signs each thinking block against the turn content that
+        # PRECEDES it at its position; when a turn interleaves thinking and
+        # tool_use (adaptive/interleaved thinking, Claude 4.6+), the parallel
+        # reasoning_details + tool_calls lists below lose that cross-type
+        # ordering. Replaying the latest assistant message in the wrong order
+        # invalidates the signatures -> HTTP 400 "thinking ... blocks in the
+        # latest assistant message cannot be modified". Preserve the exact
+        # block sequence here so the adapter can replay it unchanged. See
+        # tests/agent/test_anthropic_thinking_block_order.py.
+        ordered_blocks = []

        for block in response.content:
+            block_dict = _to_plain_data(block)
+            if isinstance(block_dict, dict):
+                ordered_blocks.append(block_dict)
            if block.type == "text":
                text_parts.append(block.text)
-            elif block.type == "thinking":
-                reasoning_parts.append(block.thinking)
-                block_dict = _to_plain_data(block)
+            elif block.type in ("thinking", "redacted_thinking"):
+                if block.type == "thinking":
+                    reasoning_parts.append(block.thinking)
                if isinstance(block_dict, dict):
                    reasoning_details.append(block_dict)
            elif block.type == "tool_use":
@ -130,6 +144,23 @@ class AnthropicTransport(ProviderTransport):
        provider_data = {}
        if reasoning_details:
            provider_data["reasoning_details"] = reasoning_details
+        # Only worth carrying the ordered-blocks channel when the turn
+        # actually interleaves signed thinking with tool_use — that's the
+        # only shape the parallel lists reconstruct incorrectly. A turn that
+        # is purely text, or thinking-then-tools with a single leading
+        # thinking block, replays correctly without it.
+        _has_signed_thinking = any(
+            isinstance(b, dict)
+            and b.get("type") in ("thinking", "redacted_thinking")
+            and (b.get("signature") or b.get("data"))
+            for b in ordered_blocks
+        )
+        _has_tool_use = any(
+            isinstance(b, dict) and b.get("type") == "tool_use"
+            for b in ordered_blocks
+        )
+        if _has_signed_thinking and _has_tool_use:
+            provider_data["anthropic_content_blocks"] = ordered_blocks

        return NormalizedResponse(
            content="\n".join(text_parts) if text_parts else None,
--- a/agent/transports/types.py
+++ b/agent/transports/types.py
@ -121,6 +121,18 @@ class NormalizedResponse:
        pd = self.provider_data or {}
        return pd.get("reasoning_details")

+    @property
+    def anthropic_content_blocks(self):
+        """Verbatim, order-preserving Anthropic content blocks for a turn.
+
+        Present only when an Anthropic turn interleaves signed thinking with
+        tool_use — the one shape the parallel reasoning_details + tool_calls
+        lists reconstruct in the wrong order, invalidating thinking-block
+        signatures on replay. See agent/transports/anthropic.py.
+        """
+        pd = self.provider_data or {}
+        return pd.get("anthropic_content_blocks")
+
    @property
    def codex_reasoning_items(self):
        pd = self.provider_data or {}
--- a/hermes_state.py
+++ b/hermes_state.py
@ -488,6 +488,7 @@ CREATE TABLE IF NOT EXISTS messages (
    reasoning TEXT,
    reasoning_content TEXT,
    reasoning_details TEXT,
+    anthropic_content_blocks TEXT,
    codex_reasoning_items TEXT,
    codex_message_items TEXT,
    platform_message_id TEXT,
@ -2240,6 +2241,7 @@ class SessionDB:
        reasoning: str = None,
        reasoning_content: str = None,
        reasoning_details: Any = None,
+        anthropic_content_blocks: Any = None,
        codex_reasoning_items: Any = None,
        codex_message_items: Any = None,
        platform_message_id: str = None,
@ -2262,6 +2264,10 @@ class SessionDB:
            json.dumps(reasoning_details)
            if reasoning_details else None
        )
+        anthropic_content_blocks_json = (
+            json.dumps(anthropic_content_blocks)
+            if anthropic_content_blocks else None
+        )
        codex_items_json = (
            json.dumps(codex_reasoning_items)
            if codex_reasoning_items else None
@ -2284,9 +2290,10 @@ class SessionDB:
            cursor = conn.execute(
                """INSERT INTO messages (session_id, role, content, tool_call_id,
                   tool_calls, tool_name, timestamp, token_count, finish_reason,
-                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
+                   reasoning, reasoning_content, reasoning_details, anthropic_content_blocks,
+                   codex_reasoning_items,
                   codex_message_items, platform_message_id, observed)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    session_id,
                    role,
@ -2300,6 +2307,7 @@ class SessionDB:
                    reasoning,
                    reasoning_content,
                    reasoning_details_json,
+                    anthropic_content_blocks_json,
                    codex_items_json,
                    codex_message_items_json,
                    platform_message_id,
@ -2348,6 +2356,9 @@ class SessionDB:
                role = msg.get("role", "unknown")
                tool_calls = msg.get("tool_calls")
                reasoning_details = msg.get("reasoning_details") if role == "assistant" else None
+                anthropic_content_blocks = (
+                    msg.get("anthropic_content_blocks") if role == "assistant" else None
+                )
                codex_reasoning_items = (
                    msg.get("codex_reasoning_items") if role == "assistant" else None
                )
@ -2358,6 +2369,9 @@ class SessionDB:
                reasoning_details_json = (
                    json.dumps(reasoning_details) if reasoning_details else None
                )
+                anthropic_content_blocks_json = (
+                    json.dumps(anthropic_content_blocks) if anthropic_content_blocks else None
+                )
                codex_items_json = (
                    json.dumps(codex_reasoning_items) if codex_reasoning_items else None
                )
@ -2374,9 +2388,10 @@ class SessionDB:
                conn.execute(
                    """INSERT INTO messages (session_id, role, content, tool_call_id,
                       tool_calls, tool_name, timestamp, token_count, finish_reason,
-                       reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
+                       reasoning, reasoning_content, reasoning_details, anthropic_content_blocks,
+                       codex_reasoning_items,
                       codex_message_items, platform_message_id, observed)
-                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                    (
                        session_id,
                        role,
@ -2390,6 +2405,7 @@ class SessionDB:
                        msg.get("reasoning") if role == "assistant" else None,
                        msg.get("reasoning_content") if role == "assistant" else None,
                        reasoning_details_json,
+                        anthropic_content_blocks_json,
                        codex_items_json,
                        codex_message_items_json,
                        platform_msg_id,
@ -2732,6 +2748,7 @@ class SessionDB:
            rows = self._conn.execute(
                "SELECT role, content, tool_call_id, tool_calls, tool_name, "
                "finish_reason, reasoning, reasoning_content, reasoning_details, "
+                "anthropic_content_blocks, "
                "codex_reasoning_items, codex_message_items, platform_message_id, observed "
                f"FROM messages WHERE session_id IN ({placeholders})"
                f"{active_clause} ORDER BY id",
@ -2779,6 +2796,12 @@ class SessionDB:
                    except (json.JSONDecodeError, TypeError):
                        logger.warning("Failed to deserialize reasoning_details, falling back to None")
                        msg["reasoning_details"] = None
+                if row["anthropic_content_blocks"]:
+                    try:
+                        msg["anthropic_content_blocks"] = json.loads(row["anthropic_content_blocks"])
+                    except (json.JSONDecodeError, TypeError):
+                        logger.warning("Failed to deserialize anthropic_content_blocks, falling back to None")
+                        msg["anthropic_content_blocks"] = None
                if row["codex_reasoning_items"]:
                    try:
                        msg["codex_reasoning_items"] = json.loads(row["codex_reasoning_items"])
--- a/run_agent.py
+++ b/run_agent.py
@ -1597,6 +1597,7 @@ class AIAgent:
                    reasoning=msg.get("reasoning") if role == "assistant" else None,
                    reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                    reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
+                    anthropic_content_blocks=msg.get("anthropic_content_blocks") if role == "assistant" else None,
                    codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                    codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
                )
--- a/tests/agent/test_anthropic_thinking_block_order.py
+++ b/tests/agent/test_anthropic_thinking_block_order.py
@ -0,0 +1,235 @@
+"""Regression test for the Anthropic interleaved thinking-block 400.
+
+Reproduces: HTTP 400 ``messages.N.content.M: thinking or redacted_thinking
+blocks in the latest assistant message cannot be modified. These blocks must
+remain as they were in the original response.``
+
+Root cause under test
+----------------------
+With adaptive / interleaved thinking (Claude 4.6+, e.g. Opus 4.8), a single
+assistant turn can emit content blocks in an interleaved order::
+
+    thinking_1 (signed) · tool_use_1 · thinking_2 (signed) · tool_use_2
+
+Anthropic signs each thinking block against the turn content that precedes it
+at its position.  ``thinking_2`` is signed with ``tool_use_1`` before it.
+
+``AnthropicTransport.normalize_response`` (agent/transports/anthropic.py)
+splits the turn into two *parallel* lists — ``reasoning_details`` (thinking
+blocks) and ``tool_calls`` (tool_use blocks) — discarding the cross-type
+ordering.  ``run_agent`` stores those as separate fields on the assistant
+message.  On replay, ``_convert_assistant_message`` (agent/anthropic_adapter.py)
+rebuilds the content as ``[all thinking][text][all tool_use]``, which reorders
+``thinking_2`` ahead of ``tool_use_1``.  The signature no longer matches its
+original position, so Anthropic rejects the latest assistant message with the
+400 above.
+
+This test asserts that an interleaved turn round-trips through
+normalize_response -> stored message -> convert_messages_to_anthropic with its
+block order preserved.  It FAILS on the current code (documenting the bug) and
+should PASS once block ordering is preserved on replay.
+"""
+
+import json
+from types import SimpleNamespace
+
+import pytest
+
+from agent.transports import get_transport
+from agent.anthropic_adapter import convert_messages_to_anthropic
+
+
+def _thinking_block(text: str, signature: str) -> SimpleNamespace:
+    """A signed Anthropic thinking block, shaped like the SDK object."""
+    return SimpleNamespace(type="thinking", thinking=text, signature=signature)
+
+
+def _tool_use_block(block_id: str, name: str, payload: dict) -> SimpleNamespace:
+    return SimpleNamespace(type="tool_use", id=block_id, name=name, input=payload)
+
+
+def _interleaved_response() -> SimpleNamespace:
+    """An assistant turn with thinking interleaved between two tool_use blocks."""
+    return SimpleNamespace(
+        content=[
+            _thinking_block("Plan: inspect file A first.", "sig-AAA"),
+            _tool_use_block("toolu_1", "read_file", {"path": "a.py"}),
+            _thinking_block("A looked fine; now inspect B.", "sig-BBB"),
+            _tool_use_block("toolu_2", "read_file", {"path": "b.py"}),
+        ],
+        stop_reason="tool_use",
+        usage=None,
+    )
+
+
+def _stored_assistant_message(normalized) -> dict:
+    """Reconstruct the OpenAI-style assistant message the way run_agent stores it.
+
+    run_agent.py persists assistant turns as separate fields: content,
+    reasoning_details (from provider_data), and tool_calls.  See
+    run_agent.py L1513-1516 and hermes_state.py.
+    """
+    provider_data = normalized.provider_data or {}
+    tool_calls = []
+    for tc in (normalized.tool_calls or []):
+        tool_calls.append({
+            "id": tc.id,
+            "type": "function",
+            "function": {"name": tc.name, "arguments": tc.arguments},
+        })
+    msg = {
+        "role": "assistant",
+        "content": normalized.content or "",
+        "reasoning_details": provider_data.get("reasoning_details"),
+        "tool_calls": tool_calls,
+    }
+    # build_assistant_message lifts the verbatim ordered-block channel onto
+    # the stored message; mirror that here.
+    blocks = provider_data.get("anthropic_content_blocks")
+    if blocks:
+        msg["anthropic_content_blocks"] = blocks
+    return msg
+
+
+def _original_block_order(response) -> list:
+    """The (type, key) sequence of the original interleaved response."""
+    order = []
+    for b in response.content:
+        if b.type == "thinking":
+            order.append(("thinking", b.signature))
+        elif b.type == "tool_use":
+            order.append(("tool_use", b.id))
+    return order
+
+
+def _replayed_block_order(assistant_content) -> list:
+    order = []
+    for b in assistant_content:
+        if not isinstance(b, dict):
+            continue
+        if b.get("type") in ("thinking", "redacted_thinking"):
+            order.append(("thinking", b.get("signature")))
+        elif b.get("type") == "tool_use":
+            order.append(("tool_use", b.get("id")))
+    return order
+
+
+class TestInterleavedThinkingBlockOrder:
+    def test_normalize_response_loses_interleaving(self):
+        """Confirm the lossy split: normalize_response stores thinking and
+        tool_use in independent fields with no positional linkage."""
+        transport = get_transport("anthropic_messages")
+        normalized = transport.normalize_response(_interleaved_response())
+
+        # Both thinking blocks are captured...
+        details = (normalized.provider_data or {}).get("reasoning_details")
+        assert details is not None and len(details) == 2
+        # ...and both tool calls...
+        assert normalized.tool_calls is not None and len(normalized.tool_calls) == 2
+        # ...but they live in separate fields. There is no single ordered
+        # structure recording that thinking_2 sat between the two tool calls.
+        # (This is the structural precondition for the reorder bug.)
+
+    def test_interleaved_order_preserved_on_replay(self):
+        """The latest assistant message must replay blocks in their ORIGINAL
+        order, or Anthropic rejects the signed thinking blocks with a 400.
+
+        FAILS on current code: _convert_assistant_message front-loads all
+        thinking blocks, producing
+            thinking_1 · thinking_2 · tool_use_1 · tool_use_2
+        instead of the original
+            thinking_1 · tool_use_1 · thinking_2 · tool_use_2
+        """
+        response = _interleaved_response()
+        original_order = _original_block_order(response)
+
+        transport = get_transport("anthropic_messages")
+        normalized = transport.normalize_response(response)
+        assistant_msg = _stored_assistant_message(normalized)
+
+        # Build a minimal conversation where this assistant turn is the LATEST
+        # assistant message (the one whose signed blocks are sent verbatim).
+        messages = [
+            {"role": "user", "content": "Inspect a.py and b.py."},
+            assistant_msg,
+            {"role": "tool", "tool_call_id": "toolu_1", "content": "a.py: ok"},
+            {"role": "tool", "tool_call_id": "toolu_2", "content": "b.py: ok"},
+        ]
+
+        _system, anthropic_messages = convert_messages_to_anthropic(
+            messages,
+            base_url=None,             # direct Anthropic
+            model="claude-opus-4-8",   # adaptive thinking family
+        )
+
+        # Find the (latest) assistant message in the converted output.
+        assistant_out = [m for m in anthropic_messages if m.get("role") == "assistant"]
+        assert assistant_out, "no assistant message in converted output"
+        replayed_order = _replayed_block_order(assistant_out[-1]["content"])
+
+        assert replayed_order == original_order, (
+            "Interleaved thinking/tool_use order was not preserved on replay.\n"
+            f"  original: {original_order}\n"
+            f"  replayed: {replayed_order}\n"
+            "Anthropic signs thinking blocks against their original position; "
+            "reordering invalidates the signature -> HTTP 400 'thinking blocks "
+            "in the latest assistant message cannot be modified'."
+        )
+
+    def test_interleaved_order_survives_db_roundtrip(self, tmp_path):
+        """The ordered-block channel must survive SQLite persistence + reload.
+
+        This is the exact path that fails after a gateway crash: the session
+        is reloaded from state.db via get_messages_as_conversation, then
+        replayed. If the verbatim block list is dropped or not deserialized,
+        the reconstruction reorders signed thinking blocks -> HTTP 400.
+        """
+        import hermes_state
+
+        response = _interleaved_response()
+        original_order = _original_block_order(response)
+
+        transport = get_transport("anthropic_messages")
+        normalized = transport.normalize_response(response)
+        assistant_msg = _stored_assistant_message(normalized)
+
+        db = hermes_state.SessionDB(tmp_path / "state.db")
+        sid = "sess_roundtrip"
+        db.create_session(sid, source="test")
+        db.append_message(
+            session_id=sid,
+            role="assistant",
+            content=assistant_msg["content"],
+            tool_calls=assistant_msg["tool_calls"],
+            reasoning_details=assistant_msg.get("reasoning_details"),
+            anthropic_content_blocks=assistant_msg.get("anthropic_content_blocks"),
+        )
+        db.append_message(session_id=sid, role="tool", tool_call_id="toolu_1", content="a ok")
+        db.append_message(session_id=sid, role="tool", tool_call_id="toolu_2", content="b ok")
+
+        # Reload via the conversation-restore path used on resume / crash recovery.
+        loaded = db.get_messages_as_conversation(sid)
+        reloaded_assistant = [m for m in loaded if m.get("role") == "assistant"]
+        assert reloaded_assistant, "no assistant message after DB reload"
+        # The ordered-block channel must come back as a deserialized list.
+        blocks = reloaded_assistant[0].get("anthropic_content_blocks")
+        assert isinstance(blocks, list) and len(blocks) == 4, (
+            "anthropic_content_blocks was not persisted/deserialized correctly"
+        )
+
+        _system, anthropic_messages = convert_messages_to_anthropic(
+            loaded, base_url=None, model="claude-opus-4-8",
+        )
+        assistant_out = [m for m in anthropic_messages if m.get("role") == "assistant"]
+        assert assistant_out, "no assistant message in converted output"
+        replayed_order = _replayed_block_order(assistant_out[-1]["content"])
+
+        assert replayed_order == original_order, (
+            "Interleaved block order was lost across the SQLite round-trip.\n"
+            f"  original: {original_order}\n"
+            f"  replayed: {replayed_order}"
+        )
+
+
+if __name__ == "__main__":
+    raise SystemExit(pytest.main([__file__, "-v"]))