mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
refactor: extract 7 helpers from convert_messages_to_anthropic
Split convert_messages_to_anthropic (complexity 79) into 7 focused helpers: - _convert_assistant_message — assistant msg to content blocks - _convert_tool_message_to_result — tool msg to tool_result + merge - _convert_user_message — user msg validation + conversion - _strip_orphaned_tool_blocks — orphan tool_use + tool_result removal - _merge_consecutive_roles — role alternation enforcement - _manage_thinking_signatures — strip/preserve/downgrade by endpoint - _evict_old_screenshots — keep only 3 most recent images Main function complexity: 79 → 10 (below C901 threshold). Zero logic changes — pure extraction. Net -4 lines (refactor itself); +45/-17 follow-up polish for annotation tightening (List[Dict] → List[Dict[str, Any]]), restored rationale comments in _manage_thinking_signatures (third-party endpoint examples, #13848/#16748 issue refs, redacted_thinking 'data'-as-signature note), and "Mutates ``result`` in place." docstring lines on the four mutating helpers.
This commit is contained in:
parent
ec2ab5bfaf
commit
9d61408837
1 changed files with 254 additions and 230 deletions
|
|
@ -1606,182 +1606,155 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
|
|||
return out
|
||||
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert an assistant message to Anthropic content blocks.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
Handles thinking blocks, regular content, tool calls, and
|
||||
reasoning_content injection for Kimi/DeepSeek endpoints.
|
||||
"""
|
||||
system = None
|
||||
result = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
result.append({"role": "assistant", "content": effective})
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
# Sanitize tool_use_id and ensure non-empty content.
|
||||
# Computer-use (and other multimodal) tool results arrive as
|
||||
# either a list of OpenAI-style content parts, or a dict
|
||||
# marked `_multimodal` with an embedded `content` list. Convert
|
||||
# both into Anthropic `tool_result` inner blocks (text + image).
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
)
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
continue
|
||||
|
||||
# Regular user message — validate non-empty content (Anthropic rejects empty)
|
||||
content = m.get("content", "")
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
# Check if all text blocks are empty
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
result.append({"role": "user", "content": converted_blocks})
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
# Validate string content is non-empty
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
result.append({"role": "user", "content": content})
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
return {"role": "assistant", "content": effective}
|
||||
|
||||
|
||||
def _convert_tool_message_to_result(
|
||||
result: List[Dict[str, Any]], m: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Convert a tool message to an Anthropic tool_result, merging consecutive
|
||||
results into one user message.
|
||||
|
||||
Mutates ``result`` in place — either appends a new user message or extends
|
||||
the trailing user message's tool_result list.
|
||||
"""
|
||||
content = m.get("content", "")
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
)
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
|
||||
|
||||
def _convert_user_message(content: Any) -> Dict[str, Any]:
|
||||
"""Validate and convert a user message to anthropic format."""
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
return {"role": "user", "content": converted_blocks}
|
||||
else:
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
return {"role": "user", "content": content}
|
||||
|
||||
|
||||
def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||
"""Strip tool_use blocks with no matching tool_result, and vice versa.
|
||||
|
||||
Context compression or session truncation can remove either side of a
|
||||
tool-call pair. Anthropic rejects both orphans with HTTP 400.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
||||
tool_result_ids = set()
|
||||
for m in result:
|
||||
|
|
@ -1799,10 +1772,7 @@ def convert_messages_to_anthropic(
|
|||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
||||
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them).
|
||||
# This is the mirror of the above: context compression or session truncation
|
||||
# can remove an assistant message containing a tool_use while leaving the
|
||||
# subsequent tool_result intact. Anthropic rejects these with a 400.
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them)
|
||||
tool_use_ids = set()
|
||||
for m in result:
|
||||
if m["role"] == "assistant" and isinstance(m["content"], list):
|
||||
|
|
@ -1819,12 +1789,16 @@ def convert_messages_to_anthropic(
|
|||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
||||
|
||||
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
||||
|
||||
def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Merge consecutive same-role messages to enforce Anthropic alternation.
|
||||
|
||||
Returns a new list (caller must rebind ``result``).
|
||||
"""
|
||||
fixed = []
|
||||
for m in result:
|
||||
if fixed and fixed[-1]["role"] == m["role"]:
|
||||
if m["role"] == "user":
|
||||
# Merge consecutive user messages
|
||||
prev_content = fixed[-1]["content"]
|
||||
curr_content = m["content"]
|
||||
if isinstance(prev_content, str) and isinstance(curr_content, str):
|
||||
|
|
@ -1832,7 +1806,6 @@ def convert_messages_to_anthropic(
|
|||
elif isinstance(prev_content, list) and isinstance(curr_content, list):
|
||||
fixed[-1]["content"] = prev_content + curr_content
|
||||
else:
|
||||
# Mixed types — wrap string in list
|
||||
if isinstance(prev_content, str):
|
||||
prev_content = [{"type": "text", "text": prev_content}]
|
||||
if isinstance(curr_content, str):
|
||||
|
|
@ -1855,7 +1828,6 @@ def convert_messages_to_anthropic(
|
|||
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
||||
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
||||
else:
|
||||
# Mixed types — normalize both to list and merge
|
||||
if isinstance(prev_blocks, str):
|
||||
prev_blocks = [{"type": "text", "text": prev_blocks}]
|
||||
if isinstance(curr_blocks, str):
|
||||
|
|
@ -1863,37 +1835,34 @@ def convert_messages_to_anthropic(
|
|||
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||||
else:
|
||||
fixed.append(m)
|
||||
result = fixed
|
||||
return fixed
|
||||
|
||||
# ── Thinking block signature management ──────────────────────────
|
||||
# Anthropic signs thinking blocks against the full turn content.
|
||||
# Any upstream mutation (context compression, session truncation,
|
||||
# orphan stripping, message merging) invalidates the signature,
|
||||
# causing HTTP 400 "Invalid signature in thinking block".
|
||||
#
|
||||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||||
# (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
|
||||
# them and will reject them outright. When targeting a third-party
|
||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||||
# assistant message — the third-party will generate its own
|
||||
# thinking blocks if it supports extended thinking.
|
||||
#
|
||||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
||||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
||||
# EXCEPT the last one — preserves reasoning continuity on the
|
||||
# current tool-use chain while avoiding stale signature errors.
|
||||
# 2. Downgrade unsigned thinking blocks (no signature) to text —
|
||||
# Anthropic can't validate them and will reject them.
|
||||
# 3. Strip cache_control from thinking/redacted_thinking blocks —
|
||||
# cache markers can interfere with signature validation.
|
||||
|
||||
def _manage_thinking_signatures(
|
||||
result: List[Dict[str, Any]], base_url: str | None, model: str | None
|
||||
) -> None:
|
||||
"""Strip or preserve thinking blocks based on endpoint type.
|
||||
|
||||
Anthropic signs thinking blocks against the full turn content.
|
||||
Any upstream mutation (context compression, session truncation, orphan
|
||||
stripping, message merging) invalidates the signature, causing HTTP 400
|
||||
"Invalid signature in thinking block".
|
||||
|
||||
Signatures are Anthropic-proprietary. Third-party endpoints (MiniMax,
|
||||
Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
|
||||
and will reject them outright. Kimi's /coding and DeepSeek's /anthropic
|
||||
endpoints speak the Anthropic protocol upstream but require unsigned
|
||||
thinking blocks (synthesised from ``reasoning_content``) to round-trip on
|
||||
replayed assistant tool-call messages. See hermes-agent#13848 (Kimi) and
|
||||
hermes-agent#16748 (DeepSeek).
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the
|
||||
# Anthropic Messages protocol upstream but require that thinking blocks
|
||||
# synthesised from reasoning_content round-trip on subsequent turns when
|
||||
# thinking is enabled. Signed Anthropic blocks still have to be stripped
|
||||
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
|
||||
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
|
||||
# Kimi / DeepSeek share a contract: strip signed Anthropic blocks
|
||||
# (neither upstream can validate Anthropic signatures), preserve unsigned
|
||||
# ones synthesised from reasoning_content. See #13848, #16748.
|
||||
_preserve_unsigned_thinking = (
|
||||
_is_kimi_family_endpoint(base_url, model)
|
||||
or _is_deepseek_anthropic_endpoint(base_url)
|
||||
|
|
@ -1910,26 +1879,19 @@ def convert_messages_to_anthropic(
|
|||
continue
|
||||
|
||||
if _preserve_unsigned_thinking:
|
||||
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable
|
||||
# thinking server-side and require unsigned thinking blocks on
|
||||
# replayed assistant tool-call messages. Strip signed Anthropic
|
||||
# blocks (neither upstream can validate Anthropic signatures) but
|
||||
# preserve the unsigned ones we synthesised from reasoning_content.
|
||||
# Kimi / DeepSeek: strip signed, preserve unsigned.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Anthropic-signed block — upstream can't validate, strip
|
||||
# Signed (or redacted-with-data) — upstream can't validate, strip.
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: the upstream needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Third-party: strip ALL thinking blocks (signatures are proprietary).
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
stripped = [
|
||||
b for b in m["content"]
|
||||
|
|
@ -1937,24 +1899,21 @@ def convert_messages_to_anthropic(
|
|||
]
|
||||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||||
else:
|
||||
# Latest assistant on direct Anthropic: keep signed thinking
|
||||
# blocks for reasoning continuity; downgrade unsigned ones to
|
||||
# plain text.
|
||||
# Latest assistant on direct Anthropic: keep signed, downgrade unsigned
|
||||
# to text so the reasoning isn't lost.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("type") == "redacted_thinking":
|
||||
# Redacted blocks use 'data' for the signature payload
|
||||
# Redacted blocks use 'data' for the signature payload —
|
||||
# drop the block when 'data' is missing (can't be validated).
|
||||
if b.get("data"):
|
||||
new_content.append(b)
|
||||
# else: drop — no data means it can't be validated
|
||||
elif b.get("signature"):
|
||||
# Signed thinking block — keep it
|
||||
new_content.append(b)
|
||||
else:
|
||||
# Unsigned thinking — downgrade to text so it's not lost
|
||||
thinking_text = b.get("thinking", "")
|
||||
if thinking_text:
|
||||
new_content.append({"type": "text", "text": thinking_text})
|
||||
|
|
@ -1966,12 +1925,15 @@ def convert_messages_to_anthropic(
|
|||
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
|
||||
b.pop("cache_control", None)
|
||||
|
||||
# ── Image eviction: keep only the most recent N screenshots ─────
|
||||
# computer_use screenshots (base64 images) sit inside tool_result
|
||||
# blocks: they accumulate and are sent with every API call. Each
|
||||
# costs ~1,465 tokens; after 10+ the conversation becomes slow
|
||||
# even for simple text queries. Walk backward, keep the most recent
|
||||
# _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
|
||||
|
||||
def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
|
||||
"""Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
|
||||
|
||||
Base64 images cost ~1,465 tokens each and accumulate across tool calls.
|
||||
Walk backward, keep the most recent N, replace older ones with a placeholder.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
_MAX_KEEP_IMAGES = 3
|
||||
_image_count = 0
|
||||
for msg in reversed(result):
|
||||
|
|
@ -1998,6 +1960,68 @@ def convert_messages_to_anthropic(
|
|||
for b in inner
|
||||
]
|
||||
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
system = None
|
||||
result: List[Dict[str, Any]] = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
result.append(_convert_assistant_message(m))
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
_convert_tool_message_to_result(result, m)
|
||||
continue
|
||||
|
||||
# Regular user message
|
||||
result.append(_convert_user_message(content))
|
||||
|
||||
_strip_orphaned_tool_blocks(result)
|
||||
result = _merge_consecutive_roles(result)
|
||||
_manage_thinking_signatures(result, base_url, model)
|
||||
_evict_old_screenshots(result)
|
||||
|
||||
return system, result
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue