refactor: extract 7 helpers from convert_messages_to_anthropic

Split convert_messages_to_anthropic (complexity 79) into 7 focused helpers: - _convert_assistant_message — assistant msg to content blocks - _convert_tool_message_to_result — tool msg to tool_result + merge - _convert_user_message — user msg validation + conversion - _strip_orphaned_tool_blocks — orphan tool_use + tool_result removal - _merge_consecutive_roles — role alternation enforcement - _manage_thinking_signatures — strip/preserve/downgrade by endpoint - _evict_old_screenshots — keep only 3 most recent images Main function complexity: 79 → 10 (below C901 threshold). Zero logic changes — pure extraction. Net -4 lines (refactor itself); +45/-17 follow-up polish for annotation tightening (List[Dict] → List[Dict[str, Any]]), restored rationale comments in _manage_thinking_signatures (third-party endpoint examples, #13848/#16748 issue refs, redacted_thinking 'data'-as-signature note), and "Mutates ``result`` in place." docstring lines on the four mutating helpers.
2026-07-13 14:02:16 +00:00 · 2026-05-12 00:36:51 +05:30 · 2026-05-12 00:36:51 +05:30 · 9d61408837
commit 9d61408837
parent ec2ab5bfaf
1 changed files with 254 additions and 230 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -1606,182 +1606,155 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
    return out


-def convert_messages_to_anthropic(
-    messages: List[Dict],
-    base_url: str | None = None,
-    model: str | None = None,
-) -> Tuple[Optional[Any], List[Dict]]:
-    """Convert OpenAI-format messages to Anthropic format.
+def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert an assistant message to Anthropic content blocks.

-    Returns (system_prompt, anthropic_messages).
-    System messages are extracted since Anthropic takes them as a separate param.
-    system_prompt is a string or list of content blocks (when cache_control present).
-
-    When *base_url* is provided and points to a third-party Anthropic-compatible
-    endpoint, all thinking block signatures are stripped.  Signatures are
-    Anthropic-proprietary — third-party endpoints cannot validate them and will
-    reject them with HTTP 400 "Invalid signature in thinking block".
-
-    When *model* is provided and matches the Kimi / Moonshot family (or
-    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
-    synthesised from ``reasoning_content`` are preserved on replayed
-    assistant tool-call messages — Kimi requires the field to exist, even
-    if empty.
+    Handles thinking blocks, regular content, tool calls, and
+    reasoning_content injection for Kimi/DeepSeek endpoints.
    """
-    system = None
-    result = []
-
-    for m in messages:
-        role = m.get("role", "user")
-        content = m.get("content", "")
-
-        if role == "system":
-            if isinstance(content, list):
-                # Preserve cache_control markers on content blocks
-                has_cache = any(
-                    p.get("cache_control") for p in content if isinstance(p, dict)
-                )
-                if has_cache:
-                    system = [p for p in content if isinstance(p, dict)]
-                else:
-                    system = "\n".join(
-                        p["text"] for p in content if p.get("type") == "text"
-                    )
-            else:
-                system = content
-            continue
-
-        if role == "assistant":
-            blocks = _extract_preserved_thinking_blocks(m)
-            if content:
-                if isinstance(content, list):
-                    converted_content = _convert_content_to_anthropic(content)
-                    if isinstance(converted_content, list):
-                        blocks.extend(converted_content)
-                else:
-                    blocks.append({"type": "text", "text": str(content)})
-            for tc in m.get("tool_calls", []):
-                if not tc or not isinstance(tc, dict):
-                    continue
-                fn = tc.get("function", {})
-                args = fn.get("arguments", "{}")
-                try:
-                    parsed_args = json.loads(args) if isinstance(args, str) else args
-                except (json.JSONDecodeError, ValueError):
-                    parsed_args = {}
-                blocks.append({
-                    "type": "tool_use",
-                    "id": _sanitize_tool_id(tc.get("id", "")),
-                    "name": fn.get("name", ""),
-                    "input": parsed_args,
-                })
-            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
-            # tool-call messages to carry reasoning_content when thinking is
-            # enabled server-side.  Preserve it as a thinking block so Kimi
-            # can validate the message history.  See hermes-agent#13848.
-            #
-            # Accept empty string "" — _copy_reasoning_content_for_api()
-            # injects "" as a tier-3 fallback for Kimi tool-call messages
-            # that had no reasoning.  Kimi requires the field to exist, even
-            # if empty.
-            #
-            # Prepend (not append): Anthropic protocol requires thinking
-            # blocks before text and tool_use blocks.
-            #
-            # Guard: only add when reasoning_details didn't already contribute
-            # thinking blocks.  On native Anthropic, reasoning_details produces
-            # signed thinking blocks — adding another unsigned one from
-            # reasoning_content would create a duplicate (same text) that gets
-            # downgraded to a spurious text block on the last assistant message.
-            reasoning_content = m.get("reasoning_content")
-            _already_has_thinking = any(
-                isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
-                for b in blocks
-            )
-            if isinstance(reasoning_content, str) and not _already_has_thinking:
-                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
-            # Anthropic rejects empty assistant content
-            effective = blocks or content
-            if not effective or effective == "":
-                effective = [{"type": "text", "text": "(empty)"}]
-            result.append({"role": "assistant", "content": effective})
-            continue
-
-        if role == "tool":
-            # Sanitize tool_use_id and ensure non-empty content.
-            # Computer-use (and other multimodal) tool results arrive as
-            # either a list of OpenAI-style content parts, or a dict
-            # marked `_multimodal` with an embedded `content` list. Convert
-            # both into Anthropic `tool_result` inner blocks (text + image).
-            multimodal_blocks: Optional[List[Dict[str, Any]]] = None
-            if isinstance(content, dict) and content.get("_multimodal"):
-                multimodal_blocks = _content_parts_to_anthropic_blocks(
-                    content.get("content") or []
-                )
-                # Fallback text if the conversion produced nothing usable.
-                if not multimodal_blocks and content.get("text_summary"):
-                    multimodal_blocks = [
-                        {"type": "text", "text": str(content["text_summary"])}
-                    ]
-            elif isinstance(content, list):
-                converted = _content_parts_to_anthropic_blocks(content)
-                if any(b.get("type") == "image" for b in converted):
-                    multimodal_blocks = converted
-            # Back-compat: some callers stash blocks under a private key.
-            if multimodal_blocks is None:
-                stashed = m.get("_anthropic_content_blocks")
-                if isinstance(stashed, list) and stashed:
-                    text_content = content if isinstance(content, str) and content.strip() else None
-                    multimodal_blocks = (
-                        [{"type": "text", "text": text_content}] + stashed
-                        if text_content else list(stashed)
-                    )
-
-            if multimodal_blocks:
-                result_content: Any = multimodal_blocks
-            elif isinstance(content, str):
-                result_content = content
-            else:
-                result_content = json.dumps(content) if content else "(no output)"
-            if not result_content:
-                result_content = "(no output)"
-            tool_result = {
-                "type": "tool_result",
-                "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
-                "content": result_content,
-            }
-            if isinstance(m.get("cache_control"), dict):
-                tool_result["cache_control"] = dict(m["cache_control"])
-            # Merge consecutive tool results into one user message
-            if (
-                result
-                and result[-1]["role"] == "user"
-                and isinstance(result[-1]["content"], list)
-                and result[-1]["content"]
-                and result[-1]["content"][0].get("type") == "tool_result"
-            ):
-                result[-1]["content"].append(tool_result)
-            else:
-                result.append({"role": "user", "content": [tool_result]})
-            continue
-
-        # Regular user message — validate non-empty content (Anthropic rejects empty)
+    content = m.get("content", "")
+    blocks = _extract_preserved_thinking_blocks(m)
+    if content:
        if isinstance(content, list):
-            converted_blocks = _convert_content_to_anthropic(content)
-            # Check if all text blocks are empty
-            if not converted_blocks or all(
-                b.get("text", "").strip() == ""
-                for b in converted_blocks
-                if isinstance(b, dict) and b.get("type") == "text"
-            ):
-                converted_blocks = [{"type": "text", "text": "(empty message)"}]
-            result.append({"role": "user", "content": converted_blocks})
+            converted_content = _convert_content_to_anthropic(content)
+            if isinstance(converted_content, list):
+                blocks.extend(converted_content)
        else:
-            # Validate string content is non-empty
-            if not content or (isinstance(content, str) and not content.strip()):
-                content = "(empty message)"
-            result.append({"role": "user", "content": content})
+            blocks.append({"type": "text", "text": str(content)})
+    for tc in m.get("tool_calls", []):
+        if not tc or not isinstance(tc, dict):
+            continue
+        fn = tc.get("function", {})
+        args = fn.get("arguments", "{}")
+        try:
+            parsed_args = json.loads(args) if isinstance(args, str) else args
+        except (json.JSONDecodeError, ValueError):
+            parsed_args = {}
+        blocks.append({
+            "type": "tool_use",
+            "id": _sanitize_tool_id(tc.get("id", "")),
+            "name": fn.get("name", ""),
+            "input": parsed_args,
+        })
+    # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+    # tool-call messages to carry reasoning_content when thinking is
+    # enabled server-side.  Preserve it as a thinking block so Kimi
+    # can validate the message history.  See hermes-agent#13848.
+    #
+    # Accept empty string "" — _copy_reasoning_content_for_api()
+    # injects "" as a tier-3 fallback for Kimi tool-call messages
+    # that had no reasoning.  Kimi requires the field to exist, even
+    # if empty.
+    #
+    # Prepend (not append): Anthropic protocol requires thinking
+    # blocks before text and tool_use blocks.
+    #
+    # Guard: only add when reasoning_details didn't already contribute
+    # thinking blocks.  On native Anthropic, reasoning_details produces
+    # signed thinking blocks — adding another unsigned one from
+    # reasoning_content would create a duplicate (same text) that gets
+    # downgraded to a spurious text block on the last assistant message.
+    reasoning_content = m.get("reasoning_content")
+    _already_has_thinking = any(
+        isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
+        for b in blocks
+    )
+    if isinstance(reasoning_content, str) and not _already_has_thinking:
+        blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
+    # Anthropic rejects empty assistant content
+    effective = blocks or content
+    if not effective or effective == "":
+        effective = [{"type": "text", "text": "(empty)"}]
+    return {"role": "assistant", "content": effective}

+
+def _convert_tool_message_to_result(
+    result: List[Dict[str, Any]], m: Dict[str, Any]
+) -> None:
+    """Convert a tool message to an Anthropic tool_result, merging consecutive
+    results into one user message.
+
+    Mutates ``result`` in place — either appends a new user message or extends
+    the trailing user message's tool_result list.
+    """
+    content = m.get("content", "")
+    multimodal_blocks: Optional[List[Dict[str, Any]]] = None
+    if isinstance(content, dict) and content.get("_multimodal"):
+        multimodal_blocks = _content_parts_to_anthropic_blocks(
+            content.get("content") or []
+        )
+        # Fallback text if the conversion produced nothing usable.
+        if not multimodal_blocks and content.get("text_summary"):
+            multimodal_blocks = [
+                {"type": "text", "text": str(content["text_summary"])}
+            ]
+    elif isinstance(content, list):
+        converted = _content_parts_to_anthropic_blocks(content)
+        if any(b.get("type") == "image" for b in converted):
+            multimodal_blocks = converted
+    # Back-compat: some callers stash blocks under a private key.
+    if multimodal_blocks is None:
+        stashed = m.get("_anthropic_content_blocks")
+        if isinstance(stashed, list) and stashed:
+            text_content = content if isinstance(content, str) and content.strip() else None
+            multimodal_blocks = (
+                [{"type": "text", "text": text_content}] + stashed
+                if text_content else list(stashed)
+            )
+
+    if multimodal_blocks:
+        result_content: Any = multimodal_blocks
+    elif isinstance(content, str):
+        result_content = content
+    else:
+        result_content = json.dumps(content) if content else "(no output)"
+    if not result_content:
+        result_content = "(no output)"
+    tool_result = {
+        "type": "tool_result",
+        "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
+        "content": result_content,
+    }
+    if isinstance(m.get("cache_control"), dict):
+        tool_result["cache_control"] = dict(m["cache_control"])
+    # Merge consecutive tool results into one user message
+    if (
+        result
+        and result[-1]["role"] == "user"
+        and isinstance(result[-1]["content"], list)
+        and result[-1]["content"]
+        and result[-1]["content"][0].get("type") == "tool_result"
+    ):
+        result[-1]["content"].append(tool_result)
+    else:
+        result.append({"role": "user", "content": [tool_result]})
+
+
+def _convert_user_message(content: Any) -> Dict[str, Any]:
+    """Validate and convert a user message to anthropic format."""
+    if isinstance(content, list):
+        converted_blocks = _convert_content_to_anthropic(content)
+        if not converted_blocks or all(
+            b.get("text", "").strip() == ""
+            for b in converted_blocks
+            if isinstance(b, dict) and b.get("type") == "text"
+        ):
+            converted_blocks = [{"type": "text", "text": "(empty message)"}]
+        return {"role": "user", "content": converted_blocks}
+    else:
+        if not content or (isinstance(content, str) and not content.strip()):
+            content = "(empty message)"
+        return {"role": "user", "content": content}
+
+
+def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
+    """Strip tool_use blocks with no matching tool_result, and vice versa.
+
+    Context compression or session truncation can remove either side of a
+    tool-call pair.  Anthropic rejects both orphans with HTTP 400.
+
+    Mutates ``result`` in place.
+    """
    # Strip orphaned tool_use blocks (no matching tool_result follows)
    tool_result_ids = set()
    for m in result:
@ -1799,10 +1772,7 @@ def convert_messages_to_anthropic(
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool call removed)"}]

-    # Strip orphaned tool_result blocks (no matching tool_use precedes them).
-    # This is the mirror of the above: context compression or session truncation
-    # can remove an assistant message containing a tool_use while leaving the
-    # subsequent tool_result intact.  Anthropic rejects these with a 400.
+    # Strip orphaned tool_result blocks (no matching tool_use precedes them)
    tool_use_ids = set()
    for m in result:
        if m["role"] == "assistant" and isinstance(m["content"], list):
@ -1819,12 +1789,16 @@ def convert_messages_to_anthropic(
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool result removed)"}]

-    # Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
+
+def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Merge consecutive same-role messages to enforce Anthropic alternation.
+
+    Returns a new list (caller must rebind ``result``).
+    """
    fixed = []
    for m in result:
        if fixed and fixed[-1]["role"] == m["role"]:
            if m["role"] == "user":
-                # Merge consecutive user messages
                prev_content = fixed[-1]["content"]
                curr_content = m["content"]
                if isinstance(prev_content, str) and isinstance(curr_content, str):
@ -1832,7 +1806,6 @@ def convert_messages_to_anthropic(
                elif isinstance(prev_content, list) and isinstance(curr_content, list):
                    fixed[-1]["content"] = prev_content + curr_content
                else:
-                    # Mixed types — wrap string in list
                    if isinstance(prev_content, str):
                        prev_content = [{"type": "text", "text": prev_content}]
                    if isinstance(curr_content, str):
@ -1855,7 +1828,6 @@ def convert_messages_to_anthropic(
                elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
                    fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
                else:
-                    # Mixed types — normalize both to list and merge
                    if isinstance(prev_blocks, str):
                        prev_blocks = [{"type": "text", "text": prev_blocks}]
                    if isinstance(curr_blocks, str):
@ -1863,37 +1835,34 @@ def convert_messages_to_anthropic(
                    fixed[-1]["content"] = prev_blocks + curr_blocks
        else:
            fixed.append(m)
-    result = fixed
+    return fixed

-    # ── Thinking block signature management ──────────────────────────
-    # Anthropic signs thinking blocks against the full turn content.
-    # Any upstream mutation (context compression, session truncation,
-    # orphan stripping, message merging) invalidates the signature,
-    # causing HTTP 400 "Invalid signature in thinking block".
-    #
-    # Signatures are Anthropic-proprietary.  Third-party endpoints
-    # (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
-    # them and will reject them outright.  When targeting a third-party
-    # endpoint, strip ALL thinking/redacted_thinking blocks from every
-    # assistant message — the third-party will generate its own
-    # thinking blocks if it supports extended thinking.
-    #
-    # For direct Anthropic (strategy following clawdbot/OpenClaw):
-    # 1. Strip thinking/redacted_thinking from all assistant messages
-    #    EXCEPT the last one — preserves reasoning continuity on the
-    #    current tool-use chain while avoiding stale signature errors.
-    # 2. Downgrade unsigned thinking blocks (no signature) to text —
-    #    Anthropic can't validate them and will reject them.
-    # 3. Strip cache_control from thinking/redacted_thinking blocks —
-    #    cache markers can interfere with signature validation.
+
+def _manage_thinking_signatures(
+    result: List[Dict[str, Any]], base_url: str | None, model: str | None
+) -> None:
+    """Strip or preserve thinking blocks based on endpoint type.
+
+    Anthropic signs thinking blocks against the full turn content.
+    Any upstream mutation (context compression, session truncation, orphan
+    stripping, message merging) invalidates the signature, causing HTTP 400
+    "Invalid signature in thinking block".
+
+    Signatures are Anthropic-proprietary.  Third-party endpoints (MiniMax,
+    Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
+    and will reject them outright.  Kimi's /coding and DeepSeek's /anthropic
+    endpoints speak the Anthropic protocol upstream but require unsigned
+    thinking blocks (synthesised from ``reasoning_content``) to round-trip on
+    replayed assistant tool-call messages.  See hermes-agent#13848 (Kimi) and
+    hermes-agent#16748 (DeepSeek).
+
+    Mutates ``result`` in place.
+    """
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
-    # Anthropic Messages protocol upstream but require that thinking blocks
-    # synthesised from reasoning_content round-trip on subsequent turns when
-    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
-    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
-    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
+    # Kimi / DeepSeek share a contract: strip signed Anthropic blocks
+    # (neither upstream can validate Anthropic signatures), preserve unsigned
+    # ones synthesised from reasoning_content.  See #13848, #16748.
    _preserve_unsigned_thinking = (
        _is_kimi_family_endpoint(base_url, model)
        or _is_deepseek_anthropic_endpoint(base_url)
@ -1910,26 +1879,19 @@ def convert_messages_to_anthropic(
            continue

        if _preserve_unsigned_thinking:
-            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
-            # thinking server-side and require unsigned thinking blocks on
-            # replayed assistant tool-call messages.  Strip signed Anthropic
-            # blocks (neither upstream can validate Anthropic signatures) but
-            # preserve the unsigned ones we synthesised from reasoning_content.
+            # Kimi / DeepSeek: strip signed, preserve unsigned.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — upstream can't validate, strip
+                    # Signed (or redacted-with-data) — upstream can't validate, strip.
                    continue
-                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: the upstream needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
-            # Third-party endpoint: strip ALL thinking blocks from every
-            # assistant message — signatures are Anthropic-proprietary.
+            # Third-party: strip ALL thinking blocks (signatures are proprietary).
            # Direct Anthropic: strip from non-latest assistant messages only.
            stripped = [
                b for b in m["content"]
@ -1937,24 +1899,21 @@ def convert_messages_to_anthropic(
            ]
            m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
        else:
-            # Latest assistant on direct Anthropic: keep signed thinking
-            # blocks for reasoning continuity; downgrade unsigned ones to
-            # plain text.
+            # Latest assistant on direct Anthropic: keep signed, downgrade unsigned
+            # to text so the reasoning isn't lost.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("type") == "redacted_thinking":
-                    # Redacted blocks use 'data' for the signature payload
+                    # Redacted blocks use 'data' for the signature payload —
+                    # drop the block when 'data' is missing (can't be validated).
                    if b.get("data"):
                        new_content.append(b)
-                    # else: drop — no data means it can't be validated
                elif b.get("signature"):
-                    # Signed thinking block — keep it
                    new_content.append(b)
                else:
-                    # Unsigned thinking — downgrade to text so it's not lost
                    thinking_text = b.get("thinking", "")
                    if thinking_text:
                        new_content.append({"type": "text", "text": thinking_text})
@ -1966,12 +1925,15 @@ def convert_messages_to_anthropic(
            if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
                b.pop("cache_control", None)

-    # ── Image eviction: keep only the most recent N screenshots ─────
-    # computer_use screenshots (base64 images) sit inside tool_result
-    # blocks: they accumulate and are sent with every API call. Each
-    # costs ~1,465 tokens; after 10+ the conversation becomes slow
-    # even for simple text queries. Walk backward, keep the most recent
-    # _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
+
+def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
+    """Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
+
+    Base64 images cost ~1,465 tokens each and accumulate across tool calls.
+    Walk backward, keep the most recent N, replace older ones with a placeholder.
+
+    Mutates ``result`` in place.
+    """
    _MAX_KEEP_IMAGES = 3
    _image_count = 0
    for msg in reversed(result):
@ -1998,6 +1960,68 @@ def convert_messages_to_anthropic(
                    for b in inner
                ]

+
+def convert_messages_to_anthropic(
+    messages: List[Dict],
+    base_url: str | None = None,
+    model: str | None = None,
+) -> Tuple[Optional[Any], List[Dict]]:
+    """Convert OpenAI-format messages to Anthropic format.
+
+    Returns (system_prompt, anthropic_messages).
+    System messages are extracted since Anthropic takes them as a separate param.
+    system_prompt is a string or list of content blocks (when cache_control present).
+
+    When *base_url* is provided and points to a third-party Anthropic-compatible
+    endpoint, all thinking block signatures are stripped.  Signatures are
+    Anthropic-proprietary — third-party endpoints cannot validate them and will
+    reject them with HTTP 400 "Invalid signature in thinking block".
+
+    When *model* is provided and matches the Kimi / Moonshot family (or
+    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
+    synthesised from ``reasoning_content`` are preserved on replayed
+    assistant tool-call messages — Kimi requires the field to exist, even
+    if empty.
+    """
+    system = None
+    result: List[Dict[str, Any]] = []
+
+    for m in messages:
+        role = m.get("role", "user")
+        content = m.get("content", "")
+
+        if role == "system":
+            if isinstance(content, list):
+                # Preserve cache_control markers on content blocks
+                has_cache = any(
+                    p.get("cache_control") for p in content if isinstance(p, dict)
+                )
+                if has_cache:
+                    system = [p for p in content if isinstance(p, dict)]
+                else:
+                    system = "\n".join(
+                        p["text"] for p in content if p.get("type") == "text"
+                    )
+            else:
+                system = content
+            continue
+
+        if role == "assistant":
+            result.append(_convert_assistant_message(m))
+            continue
+
+        if role == "tool":
+            _convert_tool_message_to_result(result, m)
+            continue
+
+        # Regular user message
+        result.append(_convert_user_message(content))
+
+    _strip_orphaned_tool_blocks(result)
+    result = _merge_consecutive_roles(result)
+    _manage_thinking_signatures(result, base_url, model)
+    _evict_old_screenshots(result)
+
    return system, result