refactor: extract 7 helpers from convert_messages_to_anthropic

Split convert_messages_to_anthropic (complexity 79) into 7 focused helpers:

- _convert_assistant_message    — assistant msg to content blocks
- _convert_tool_message_to_result — tool msg to tool_result + merge
- _convert_user_message         — user msg validation + conversion
- _strip_orphaned_tool_blocks   — orphan tool_use + tool_result removal
- _merge_consecutive_roles      — role alternation enforcement
- _manage_thinking_signatures   — strip/preserve/downgrade by endpoint
- _evict_old_screenshots        — keep only 3 most recent images

Main function complexity: 79 → 10 (below C901 threshold).
Zero logic changes — pure extraction. Net -4 lines (refactor itself);
+45/-17 follow-up polish for annotation tightening (List[Dict] →
List[Dict[str, Any]]), restored rationale comments in
_manage_thinking_signatures (third-party endpoint examples, #13848/#16748
issue refs, redacted_thinking 'data'-as-signature note), and "Mutates
``result`` in place." docstring lines on the four mutating helpers.
This commit is contained in:
kshitijk4poor 2026-05-12 00:36:51 +05:30 committed by Teknium
parent 8b49012a0a
commit 9c102b9378
No known key found for this signature in database

View file

@ -1606,182 +1606,155 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
return out return out
def convert_messages_to_anthropic( def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
messages: List[Dict], """Convert an assistant message to Anthropic content blocks.
base_url: str | None = None,
model: str | None = None,
) -> Tuple[Optional[Any], List[Dict]]:
"""Convert OpenAI-format messages to Anthropic format.
Returns (system_prompt, anthropic_messages). Handles thinking blocks, regular content, tool calls, and
System messages are extracted since Anthropic takes them as a separate param. reasoning_content injection for Kimi/DeepSeek endpoints.
system_prompt is a string or list of content blocks (when cache_control present).
When *base_url* is provided and points to a third-party Anthropic-compatible
endpoint, all thinking block signatures are stripped. Signatures are
Anthropic-proprietary third-party endpoints cannot validate them and will
reject them with HTTP 400 "Invalid signature in thinking block".
When *model* is provided and matches the Kimi / Moonshot family (or
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
synthesised from ``reasoning_content`` are preserved on replayed
assistant tool-call messages Kimi requires the field to exist, even
if empty.
""" """
system = None content = m.get("content", "")
result = [] blocks = _extract_preserved_thinking_blocks(m)
if content:
for m in messages:
role = m.get("role", "user")
content = m.get("content", "")
if role == "system":
if isinstance(content, list):
# Preserve cache_control markers on content blocks
has_cache = any(
p.get("cache_control") for p in content if isinstance(p, dict)
)
if has_cache:
system = [p for p in content if isinstance(p, dict)]
else:
system = "\n".join(
p["text"] for p in content if p.get("type") == "text"
)
else:
system = content
continue
if role == "assistant":
blocks = _extract_preserved_thinking_blocks(m)
if content:
if isinstance(content, list):
converted_content = _convert_content_to_anthropic(content)
if isinstance(converted_content, list):
blocks.extend(converted_content)
else:
blocks.append({"type": "text", "text": str(content)})
for tc in m.get("tool_calls", []):
if not tc or not isinstance(tc, dict):
continue
fn = tc.get("function", {})
args = fn.get("arguments", "{}")
try:
parsed_args = json.loads(args) if isinstance(args, str) else args
except (json.JSONDecodeError, ValueError):
parsed_args = {}
blocks.append({
"type": "tool_use",
"id": _sanitize_tool_id(tc.get("id", "")),
"name": fn.get("name", ""),
"input": parsed_args,
})
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
# tool-call messages to carry reasoning_content when thinking is
# enabled server-side. Preserve it as a thinking block so Kimi
# can validate the message history. See hermes-agent#13848.
#
# Accept empty string "" — _copy_reasoning_content_for_api()
# injects "" as a tier-3 fallback for Kimi tool-call messages
# that had no reasoning. Kimi requires the field to exist, even
# if empty.
#
# Prepend (not append): Anthropic protocol requires thinking
# blocks before text and tool_use blocks.
#
# Guard: only add when reasoning_details didn't already contribute
# thinking blocks. On native Anthropic, reasoning_details produces
# signed thinking blocks — adding another unsigned one from
# reasoning_content would create a duplicate (same text) that gets
# downgraded to a spurious text block on the last assistant message.
reasoning_content = m.get("reasoning_content")
_already_has_thinking = any(
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
for b in blocks
)
if isinstance(reasoning_content, str) and not _already_has_thinking:
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
# Anthropic rejects empty assistant content
effective = blocks or content
if not effective or effective == "":
effective = [{"type": "text", "text": "(empty)"}]
result.append({"role": "assistant", "content": effective})
continue
if role == "tool":
# Sanitize tool_use_id and ensure non-empty content.
# Computer-use (and other multimodal) tool results arrive as
# either a list of OpenAI-style content parts, or a dict
# marked `_multimodal` with an embedded `content` list. Convert
# both into Anthropic `tool_result` inner blocks (text + image).
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
if isinstance(content, dict) and content.get("_multimodal"):
multimodal_blocks = _content_parts_to_anthropic_blocks(
content.get("content") or []
)
# Fallback text if the conversion produced nothing usable.
if not multimodal_blocks and content.get("text_summary"):
multimodal_blocks = [
{"type": "text", "text": str(content["text_summary"])}
]
elif isinstance(content, list):
converted = _content_parts_to_anthropic_blocks(content)
if any(b.get("type") == "image" for b in converted):
multimodal_blocks = converted
# Back-compat: some callers stash blocks under a private key.
if multimodal_blocks is None:
stashed = m.get("_anthropic_content_blocks")
if isinstance(stashed, list) and stashed:
text_content = content if isinstance(content, str) and content.strip() else None
multimodal_blocks = (
[{"type": "text", "text": text_content}] + stashed
if text_content else list(stashed)
)
if multimodal_blocks:
result_content: Any = multimodal_blocks
elif isinstance(content, str):
result_content = content
else:
result_content = json.dumps(content) if content else "(no output)"
if not result_content:
result_content = "(no output)"
tool_result = {
"type": "tool_result",
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
"content": result_content,
}
if isinstance(m.get("cache_control"), dict):
tool_result["cache_control"] = dict(m["cache_control"])
# Merge consecutive tool results into one user message
if (
result
and result[-1]["role"] == "user"
and isinstance(result[-1]["content"], list)
and result[-1]["content"]
and result[-1]["content"][0].get("type") == "tool_result"
):
result[-1]["content"].append(tool_result)
else:
result.append({"role": "user", "content": [tool_result]})
continue
# Regular user message — validate non-empty content (Anthropic rejects empty)
if isinstance(content, list): if isinstance(content, list):
converted_blocks = _convert_content_to_anthropic(content) converted_content = _convert_content_to_anthropic(content)
# Check if all text blocks are empty if isinstance(converted_content, list):
if not converted_blocks or all( blocks.extend(converted_content)
b.get("text", "").strip() == ""
for b in converted_blocks
if isinstance(b, dict) and b.get("type") == "text"
):
converted_blocks = [{"type": "text", "text": "(empty message)"}]
result.append({"role": "user", "content": converted_blocks})
else: else:
# Validate string content is non-empty blocks.append({"type": "text", "text": str(content)})
if not content or (isinstance(content, str) and not content.strip()): for tc in m.get("tool_calls", []):
content = "(empty message)" if not tc or not isinstance(tc, dict):
result.append({"role": "user", "content": content}) continue
fn = tc.get("function", {})
args = fn.get("arguments", "{}")
try:
parsed_args = json.loads(args) if isinstance(args, str) else args
except (json.JSONDecodeError, ValueError):
parsed_args = {}
blocks.append({
"type": "tool_use",
"id": _sanitize_tool_id(tc.get("id", "")),
"name": fn.get("name", ""),
"input": parsed_args,
})
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
# tool-call messages to carry reasoning_content when thinking is
# enabled server-side. Preserve it as a thinking block so Kimi
# can validate the message history. See hermes-agent#13848.
#
# Accept empty string "" — _copy_reasoning_content_for_api()
# injects "" as a tier-3 fallback for Kimi tool-call messages
# that had no reasoning. Kimi requires the field to exist, even
# if empty.
#
# Prepend (not append): Anthropic protocol requires thinking
# blocks before text and tool_use blocks.
#
# Guard: only add when reasoning_details didn't already contribute
# thinking blocks. On native Anthropic, reasoning_details produces
# signed thinking blocks — adding another unsigned one from
# reasoning_content would create a duplicate (same text) that gets
# downgraded to a spurious text block on the last assistant message.
reasoning_content = m.get("reasoning_content")
_already_has_thinking = any(
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
for b in blocks
)
if isinstance(reasoning_content, str) and not _already_has_thinking:
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
# Anthropic rejects empty assistant content
effective = blocks or content
if not effective or effective == "":
effective = [{"type": "text", "text": "(empty)"}]
return {"role": "assistant", "content": effective}
def _convert_tool_message_to_result(
result: List[Dict[str, Any]], m: Dict[str, Any]
) -> None:
"""Convert a tool message to an Anthropic tool_result, merging consecutive
results into one user message.
Mutates ``result`` in place either appends a new user message or extends
the trailing user message's tool_result list.
"""
content = m.get("content", "")
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
if isinstance(content, dict) and content.get("_multimodal"):
multimodal_blocks = _content_parts_to_anthropic_blocks(
content.get("content") or []
)
# Fallback text if the conversion produced nothing usable.
if not multimodal_blocks and content.get("text_summary"):
multimodal_blocks = [
{"type": "text", "text": str(content["text_summary"])}
]
elif isinstance(content, list):
converted = _content_parts_to_anthropic_blocks(content)
if any(b.get("type") == "image" for b in converted):
multimodal_blocks = converted
# Back-compat: some callers stash blocks under a private key.
if multimodal_blocks is None:
stashed = m.get("_anthropic_content_blocks")
if isinstance(stashed, list) and stashed:
text_content = content if isinstance(content, str) and content.strip() else None
multimodal_blocks = (
[{"type": "text", "text": text_content}] + stashed
if text_content else list(stashed)
)
if multimodal_blocks:
result_content: Any = multimodal_blocks
elif isinstance(content, str):
result_content = content
else:
result_content = json.dumps(content) if content else "(no output)"
if not result_content:
result_content = "(no output)"
tool_result = {
"type": "tool_result",
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
"content": result_content,
}
if isinstance(m.get("cache_control"), dict):
tool_result["cache_control"] = dict(m["cache_control"])
# Merge consecutive tool results into one user message
if (
result
and result[-1]["role"] == "user"
and isinstance(result[-1]["content"], list)
and result[-1]["content"]
and result[-1]["content"][0].get("type") == "tool_result"
):
result[-1]["content"].append(tool_result)
else:
result.append({"role": "user", "content": [tool_result]})
def _convert_user_message(content: Any) -> Dict[str, Any]:
"""Validate and convert a user message to anthropic format."""
if isinstance(content, list):
converted_blocks = _convert_content_to_anthropic(content)
if not converted_blocks or all(
b.get("text", "").strip() == ""
for b in converted_blocks
if isinstance(b, dict) and b.get("type") == "text"
):
converted_blocks = [{"type": "text", "text": "(empty message)"}]
return {"role": "user", "content": converted_blocks}
else:
if not content or (isinstance(content, str) and not content.strip()):
content = "(empty message)"
return {"role": "user", "content": content}
def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
"""Strip tool_use blocks with no matching tool_result, and vice versa.
Context compression or session truncation can remove either side of a
tool-call pair. Anthropic rejects both orphans with HTTP 400.
Mutates ``result`` in place.
"""
# Strip orphaned tool_use blocks (no matching tool_result follows) # Strip orphaned tool_use blocks (no matching tool_result follows)
tool_result_ids = set() tool_result_ids = set()
for m in result: for m in result:
@ -1799,10 +1772,7 @@ def convert_messages_to_anthropic(
if not m["content"]: if not m["content"]:
m["content"] = [{"type": "text", "text": "(tool call removed)"}] m["content"] = [{"type": "text", "text": "(tool call removed)"}]
# Strip orphaned tool_result blocks (no matching tool_use precedes them). # Strip orphaned tool_result blocks (no matching tool_use precedes them)
# This is the mirror of the above: context compression or session truncation
# can remove an assistant message containing a tool_use while leaving the
# subsequent tool_result intact. Anthropic rejects these with a 400.
tool_use_ids = set() tool_use_ids = set()
for m in result: for m in result:
if m["role"] == "assistant" and isinstance(m["content"], list): if m["role"] == "assistant" and isinstance(m["content"], list):
@ -1819,12 +1789,16 @@ def convert_messages_to_anthropic(
if not m["content"]: if not m["content"]:
m["content"] = [{"type": "text", "text": "(tool result removed)"}] m["content"] = [{"type": "text", "text": "(tool result removed)"}]
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Merge consecutive same-role messages to enforce Anthropic alternation.
Returns a new list (caller must rebind ``result``).
"""
fixed = [] fixed = []
for m in result: for m in result:
if fixed and fixed[-1]["role"] == m["role"]: if fixed and fixed[-1]["role"] == m["role"]:
if m["role"] == "user": if m["role"] == "user":
# Merge consecutive user messages
prev_content = fixed[-1]["content"] prev_content = fixed[-1]["content"]
curr_content = m["content"] curr_content = m["content"]
if isinstance(prev_content, str) and isinstance(curr_content, str): if isinstance(prev_content, str) and isinstance(curr_content, str):
@ -1832,7 +1806,6 @@ def convert_messages_to_anthropic(
elif isinstance(prev_content, list) and isinstance(curr_content, list): elif isinstance(prev_content, list) and isinstance(curr_content, list):
fixed[-1]["content"] = prev_content + curr_content fixed[-1]["content"] = prev_content + curr_content
else: else:
# Mixed types — wrap string in list
if isinstance(prev_content, str): if isinstance(prev_content, str):
prev_content = [{"type": "text", "text": prev_content}] prev_content = [{"type": "text", "text": prev_content}]
if isinstance(curr_content, str): if isinstance(curr_content, str):
@ -1855,7 +1828,6 @@ def convert_messages_to_anthropic(
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str): elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
else: else:
# Mixed types — normalize both to list and merge
if isinstance(prev_blocks, str): if isinstance(prev_blocks, str):
prev_blocks = [{"type": "text", "text": prev_blocks}] prev_blocks = [{"type": "text", "text": prev_blocks}]
if isinstance(curr_blocks, str): if isinstance(curr_blocks, str):
@ -1863,37 +1835,34 @@ def convert_messages_to_anthropic(
fixed[-1]["content"] = prev_blocks + curr_blocks fixed[-1]["content"] = prev_blocks + curr_blocks
else: else:
fixed.append(m) fixed.append(m)
result = fixed return fixed
# ── Thinking block signature management ──────────────────────────
# Anthropic signs thinking blocks against the full turn content. def _manage_thinking_signatures(
# Any upstream mutation (context compression, session truncation, result: List[Dict[str, Any]], base_url: str | None, model: str | None
# orphan stripping, message merging) invalidates the signature, ) -> None:
# causing HTTP 400 "Invalid signature in thinking block". """Strip or preserve thinking blocks based on endpoint type.
#
# Signatures are Anthropic-proprietary. Third-party endpoints Anthropic signs thinking blocks against the full turn content.
# (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate Any upstream mutation (context compression, session truncation, orphan
# them and will reject them outright. When targeting a third-party stripping, message merging) invalidates the signature, causing HTTP 400
# endpoint, strip ALL thinking/redacted_thinking blocks from every "Invalid signature in thinking block".
# assistant message — the third-party will generate its own
# thinking blocks if it supports extended thinking. Signatures are Anthropic-proprietary. Third-party endpoints (MiniMax,
# Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
# For direct Anthropic (strategy following clawdbot/OpenClaw): and will reject them outright. Kimi's /coding and DeepSeek's /anthropic
# 1. Strip thinking/redacted_thinking from all assistant messages endpoints speak the Anthropic protocol upstream but require unsigned
# EXCEPT the last one — preserves reasoning continuity on the thinking blocks (synthesised from ``reasoning_content``) to round-trip on
# current tool-use chain while avoiding stale signature errors. replayed assistant tool-call messages. See hermes-agent#13848 (Kimi) and
# 2. Downgrade unsigned thinking blocks (no signature) to text — hermes-agent#16748 (DeepSeek).
# Anthropic can't validate them and will reject them.
# 3. Strip cache_control from thinking/redacted_thinking blocks — Mutates ``result`` in place.
# cache markers can interfere with signature validation. """
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking")) _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
_is_third_party = _is_third_party_anthropic_endpoint(base_url) _is_third_party = _is_third_party_anthropic_endpoint(base_url)
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the # Kimi / DeepSeek share a contract: strip signed Anthropic blocks
# Anthropic Messages protocol upstream but require that thinking blocks # (neither upstream can validate Anthropic signatures), preserve unsigned
# synthesised from reasoning_content round-trip on subsequent turns when # ones synthesised from reasoning_content. See #13848, #16748.
# thinking is enabled. Signed Anthropic blocks still have to be stripped
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
_preserve_unsigned_thinking = ( _preserve_unsigned_thinking = (
_is_kimi_family_endpoint(base_url, model) _is_kimi_family_endpoint(base_url, model)
or _is_deepseek_anthropic_endpoint(base_url) or _is_deepseek_anthropic_endpoint(base_url)
@ -1910,26 +1879,19 @@ def convert_messages_to_anthropic(
continue continue
if _preserve_unsigned_thinking: if _preserve_unsigned_thinking:
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable # Kimi / DeepSeek: strip signed, preserve unsigned.
# thinking server-side and require unsigned thinking blocks on
# replayed assistant tool-call messages. Strip signed Anthropic
# blocks (neither upstream can validate Anthropic signatures) but
# preserve the unsigned ones we synthesised from reasoning_content.
new_content = [] new_content = []
for b in m["content"]: for b in m["content"]:
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES: if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
new_content.append(b) new_content.append(b)
continue continue
if b.get("signature") or b.get("data"): if b.get("signature") or b.get("data"):
# Anthropic-signed block — upstream can't validate, strip # Signed (or redacted-with-data) — upstream can't validate, strip.
continue continue
# Unsigned thinking (synthesised from reasoning_content) —
# keep it: the upstream needs it for message-history validation.
new_content.append(b) new_content.append(b)
m["content"] = new_content or [{"type": "text", "text": "(empty)"}] m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
elif _is_third_party or idx != last_assistant_idx: elif _is_third_party or idx != last_assistant_idx:
# Third-party endpoint: strip ALL thinking blocks from every # Third-party: strip ALL thinking blocks (signatures are proprietary).
# assistant message — signatures are Anthropic-proprietary.
# Direct Anthropic: strip from non-latest assistant messages only. # Direct Anthropic: strip from non-latest assistant messages only.
stripped = [ stripped = [
b for b in m["content"] b for b in m["content"]
@ -1937,24 +1899,21 @@ def convert_messages_to_anthropic(
] ]
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}] m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
else: else:
# Latest assistant on direct Anthropic: keep signed thinking # Latest assistant on direct Anthropic: keep signed, downgrade unsigned
# blocks for reasoning continuity; downgrade unsigned ones to # to text so the reasoning isn't lost.
# plain text.
new_content = [] new_content = []
for b in m["content"]: for b in m["content"]:
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES: if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
new_content.append(b) new_content.append(b)
continue continue
if b.get("type") == "redacted_thinking": if b.get("type") == "redacted_thinking":
# Redacted blocks use 'data' for the signature payload # Redacted blocks use 'data' for the signature payload —
# drop the block when 'data' is missing (can't be validated).
if b.get("data"): if b.get("data"):
new_content.append(b) new_content.append(b)
# else: drop — no data means it can't be validated
elif b.get("signature"): elif b.get("signature"):
# Signed thinking block — keep it
new_content.append(b) new_content.append(b)
else: else:
# Unsigned thinking — downgrade to text so it's not lost
thinking_text = b.get("thinking", "") thinking_text = b.get("thinking", "")
if thinking_text: if thinking_text:
new_content.append({"type": "text", "text": thinking_text}) new_content.append({"type": "text", "text": thinking_text})
@ -1966,12 +1925,15 @@ def convert_messages_to_anthropic(
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES: if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
b.pop("cache_control", None) b.pop("cache_control", None)
# ── Image eviction: keep only the most recent N screenshots ─────
# computer_use screenshots (base64 images) sit inside tool_result def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
# blocks: they accumulate and are sent with every API call. Each """Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
# costs ~1,465 tokens; after 10+ the conversation becomes slow
# even for simple text queries. Walk backward, keep the most recent Base64 images cost ~1,465 tokens each and accumulate across tool calls.
# _MAX_KEEP_IMAGES, replace older ones with a text placeholder. Walk backward, keep the most recent N, replace older ones with a placeholder.
Mutates ``result`` in place.
"""
_MAX_KEEP_IMAGES = 3 _MAX_KEEP_IMAGES = 3
_image_count = 0 _image_count = 0
for msg in reversed(result): for msg in reversed(result):
@ -1998,6 +1960,68 @@ def convert_messages_to_anthropic(
for b in inner for b in inner
] ]
def convert_messages_to_anthropic(
messages: List[Dict],
base_url: str | None = None,
model: str | None = None,
) -> Tuple[Optional[Any], List[Dict]]:
"""Convert OpenAI-format messages to Anthropic format.
Returns (system_prompt, anthropic_messages).
System messages are extracted since Anthropic takes them as a separate param.
system_prompt is a string or list of content blocks (when cache_control present).
When *base_url* is provided and points to a third-party Anthropic-compatible
endpoint, all thinking block signatures are stripped. Signatures are
Anthropic-proprietary third-party endpoints cannot validate them and will
reject them with HTTP 400 "Invalid signature in thinking block".
When *model* is provided and matches the Kimi / Moonshot family (or
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
synthesised from ``reasoning_content`` are preserved on replayed
assistant tool-call messages Kimi requires the field to exist, even
if empty.
"""
system = None
result: List[Dict[str, Any]] = []
for m in messages:
role = m.get("role", "user")
content = m.get("content", "")
if role == "system":
if isinstance(content, list):
# Preserve cache_control markers on content blocks
has_cache = any(
p.get("cache_control") for p in content if isinstance(p, dict)
)
if has_cache:
system = [p for p in content if isinstance(p, dict)]
else:
system = "\n".join(
p["text"] for p in content if p.get("type") == "text"
)
else:
system = content
continue
if role == "assistant":
result.append(_convert_assistant_message(m))
continue
if role == "tool":
_convert_tool_message_to_result(result, m)
continue
# Regular user message
result.append(_convert_user_message(content))
_strip_orphaned_tool_blocks(result)
result = _merge_consecutive_roles(result)
_manage_thinking_signatures(result, base_url, model)
_evict_old_screenshots(result)
return system, result return system, result