mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-05 07:41:39 +00:00
refactor: extract 7 helpers from convert_messages_to_anthropic
Split convert_messages_to_anthropic (complexity 79) into 7 focused helpers: - _convert_assistant_message — assistant msg to content blocks - _convert_tool_message_to_result — tool msg to tool_result + merge - _convert_user_message — user msg validation + conversion - _strip_orphaned_tool_blocks — orphan tool_use + tool_result removal - _merge_consecutive_roles — role alternation enforcement - _manage_thinking_signatures — strip/preserve/downgrade by endpoint - _evict_old_screenshots — keep only 3 most recent images Main function complexity: 79 → 10 (below C901 threshold). Zero logic changes — pure extraction. Net -4 lines (refactor itself); +45/-17 follow-up polish for annotation tightening (List[Dict] → List[Dict[str, Any]]), restored rationale comments in _manage_thinking_signatures (third-party endpoint examples, #13848/#16748 issue refs, redacted_thinking 'data'-as-signature note), and "Mutates ``result`` in place." docstring lines on the four mutating helpers.
This commit is contained in:
parent
8b49012a0a
commit
9c102b9378
1 changed files with 254 additions and 230 deletions
|
|
@ -1606,182 +1606,155 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def convert_messages_to_anthropic(
|
def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
messages: List[Dict],
|
"""Convert an assistant message to Anthropic content blocks.
|
||||||
base_url: str | None = None,
|
|
||||||
model: str | None = None,
|
|
||||||
) -> Tuple[Optional[Any], List[Dict]]:
|
|
||||||
"""Convert OpenAI-format messages to Anthropic format.
|
|
||||||
|
|
||||||
Returns (system_prompt, anthropic_messages).
|
Handles thinking blocks, regular content, tool calls, and
|
||||||
System messages are extracted since Anthropic takes them as a separate param.
|
reasoning_content injection for Kimi/DeepSeek endpoints.
|
||||||
system_prompt is a string or list of content blocks (when cache_control present).
|
|
||||||
|
|
||||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
|
||||||
endpoint, all thinking block signatures are stripped. Signatures are
|
|
||||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
|
||||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
|
||||||
|
|
||||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
|
||||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
|
||||||
synthesised from ``reasoning_content`` are preserved on replayed
|
|
||||||
assistant tool-call messages — Kimi requires the field to exist, even
|
|
||||||
if empty.
|
|
||||||
"""
|
"""
|
||||||
system = None
|
content = m.get("content", "")
|
||||||
result = []
|
blocks = _extract_preserved_thinking_blocks(m)
|
||||||
|
if content:
|
||||||
for m in messages:
|
|
||||||
role = m.get("role", "user")
|
|
||||||
content = m.get("content", "")
|
|
||||||
|
|
||||||
if role == "system":
|
|
||||||
if isinstance(content, list):
|
|
||||||
# Preserve cache_control markers on content blocks
|
|
||||||
has_cache = any(
|
|
||||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
|
||||||
)
|
|
||||||
if has_cache:
|
|
||||||
system = [p for p in content if isinstance(p, dict)]
|
|
||||||
else:
|
|
||||||
system = "\n".join(
|
|
||||||
p["text"] for p in content if p.get("type") == "text"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
system = content
|
|
||||||
continue
|
|
||||||
|
|
||||||
if role == "assistant":
|
|
||||||
blocks = _extract_preserved_thinking_blocks(m)
|
|
||||||
if content:
|
|
||||||
if isinstance(content, list):
|
|
||||||
converted_content = _convert_content_to_anthropic(content)
|
|
||||||
if isinstance(converted_content, list):
|
|
||||||
blocks.extend(converted_content)
|
|
||||||
else:
|
|
||||||
blocks.append({"type": "text", "text": str(content)})
|
|
||||||
for tc in m.get("tool_calls", []):
|
|
||||||
if not tc or not isinstance(tc, dict):
|
|
||||||
continue
|
|
||||||
fn = tc.get("function", {})
|
|
||||||
args = fn.get("arguments", "{}")
|
|
||||||
try:
|
|
||||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
|
||||||
except (json.JSONDecodeError, ValueError):
|
|
||||||
parsed_args = {}
|
|
||||||
blocks.append({
|
|
||||||
"type": "tool_use",
|
|
||||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
|
||||||
"name": fn.get("name", ""),
|
|
||||||
"input": parsed_args,
|
|
||||||
})
|
|
||||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
|
||||||
# tool-call messages to carry reasoning_content when thinking is
|
|
||||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
|
||||||
# can validate the message history. See hermes-agent#13848.
|
|
||||||
#
|
|
||||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
|
||||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
|
||||||
# that had no reasoning. Kimi requires the field to exist, even
|
|
||||||
# if empty.
|
|
||||||
#
|
|
||||||
# Prepend (not append): Anthropic protocol requires thinking
|
|
||||||
# blocks before text and tool_use blocks.
|
|
||||||
#
|
|
||||||
# Guard: only add when reasoning_details didn't already contribute
|
|
||||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
|
||||||
# signed thinking blocks — adding another unsigned one from
|
|
||||||
# reasoning_content would create a duplicate (same text) that gets
|
|
||||||
# downgraded to a spurious text block on the last assistant message.
|
|
||||||
reasoning_content = m.get("reasoning_content")
|
|
||||||
_already_has_thinking = any(
|
|
||||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
|
||||||
for b in blocks
|
|
||||||
)
|
|
||||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
|
||||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
|
||||||
# Anthropic rejects empty assistant content
|
|
||||||
effective = blocks or content
|
|
||||||
if not effective or effective == "":
|
|
||||||
effective = [{"type": "text", "text": "(empty)"}]
|
|
||||||
result.append({"role": "assistant", "content": effective})
|
|
||||||
continue
|
|
||||||
|
|
||||||
if role == "tool":
|
|
||||||
# Sanitize tool_use_id and ensure non-empty content.
|
|
||||||
# Computer-use (and other multimodal) tool results arrive as
|
|
||||||
# either a list of OpenAI-style content parts, or a dict
|
|
||||||
# marked `_multimodal` with an embedded `content` list. Convert
|
|
||||||
# both into Anthropic `tool_result` inner blocks (text + image).
|
|
||||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
|
||||||
if isinstance(content, dict) and content.get("_multimodal"):
|
|
||||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
|
||||||
content.get("content") or []
|
|
||||||
)
|
|
||||||
# Fallback text if the conversion produced nothing usable.
|
|
||||||
if not multimodal_blocks and content.get("text_summary"):
|
|
||||||
multimodal_blocks = [
|
|
||||||
{"type": "text", "text": str(content["text_summary"])}
|
|
||||||
]
|
|
||||||
elif isinstance(content, list):
|
|
||||||
converted = _content_parts_to_anthropic_blocks(content)
|
|
||||||
if any(b.get("type") == "image" for b in converted):
|
|
||||||
multimodal_blocks = converted
|
|
||||||
# Back-compat: some callers stash blocks under a private key.
|
|
||||||
if multimodal_blocks is None:
|
|
||||||
stashed = m.get("_anthropic_content_blocks")
|
|
||||||
if isinstance(stashed, list) and stashed:
|
|
||||||
text_content = content if isinstance(content, str) and content.strip() else None
|
|
||||||
multimodal_blocks = (
|
|
||||||
[{"type": "text", "text": text_content}] + stashed
|
|
||||||
if text_content else list(stashed)
|
|
||||||
)
|
|
||||||
|
|
||||||
if multimodal_blocks:
|
|
||||||
result_content: Any = multimodal_blocks
|
|
||||||
elif isinstance(content, str):
|
|
||||||
result_content = content
|
|
||||||
else:
|
|
||||||
result_content = json.dumps(content) if content else "(no output)"
|
|
||||||
if not result_content:
|
|
||||||
result_content = "(no output)"
|
|
||||||
tool_result = {
|
|
||||||
"type": "tool_result",
|
|
||||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
|
||||||
"content": result_content,
|
|
||||||
}
|
|
||||||
if isinstance(m.get("cache_control"), dict):
|
|
||||||
tool_result["cache_control"] = dict(m["cache_control"])
|
|
||||||
# Merge consecutive tool results into one user message
|
|
||||||
if (
|
|
||||||
result
|
|
||||||
and result[-1]["role"] == "user"
|
|
||||||
and isinstance(result[-1]["content"], list)
|
|
||||||
and result[-1]["content"]
|
|
||||||
and result[-1]["content"][0].get("type") == "tool_result"
|
|
||||||
):
|
|
||||||
result[-1]["content"].append(tool_result)
|
|
||||||
else:
|
|
||||||
result.append({"role": "user", "content": [tool_result]})
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Regular user message — validate non-empty content (Anthropic rejects empty)
|
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
converted_blocks = _convert_content_to_anthropic(content)
|
converted_content = _convert_content_to_anthropic(content)
|
||||||
# Check if all text blocks are empty
|
if isinstance(converted_content, list):
|
||||||
if not converted_blocks or all(
|
blocks.extend(converted_content)
|
||||||
b.get("text", "").strip() == ""
|
|
||||||
for b in converted_blocks
|
|
||||||
if isinstance(b, dict) and b.get("type") == "text"
|
|
||||||
):
|
|
||||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
|
||||||
result.append({"role": "user", "content": converted_blocks})
|
|
||||||
else:
|
else:
|
||||||
# Validate string content is non-empty
|
blocks.append({"type": "text", "text": str(content)})
|
||||||
if not content or (isinstance(content, str) and not content.strip()):
|
for tc in m.get("tool_calls", []):
|
||||||
content = "(empty message)"
|
if not tc or not isinstance(tc, dict):
|
||||||
result.append({"role": "user", "content": content})
|
continue
|
||||||
|
fn = tc.get("function", {})
|
||||||
|
args = fn.get("arguments", "{}")
|
||||||
|
try:
|
||||||
|
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
parsed_args = {}
|
||||||
|
blocks.append({
|
||||||
|
"type": "tool_use",
|
||||||
|
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||||
|
"name": fn.get("name", ""),
|
||||||
|
"input": parsed_args,
|
||||||
|
})
|
||||||
|
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||||
|
# tool-call messages to carry reasoning_content when thinking is
|
||||||
|
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||||
|
# can validate the message history. See hermes-agent#13848.
|
||||||
|
#
|
||||||
|
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||||
|
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||||
|
# that had no reasoning. Kimi requires the field to exist, even
|
||||||
|
# if empty.
|
||||||
|
#
|
||||||
|
# Prepend (not append): Anthropic protocol requires thinking
|
||||||
|
# blocks before text and tool_use blocks.
|
||||||
|
#
|
||||||
|
# Guard: only add when reasoning_details didn't already contribute
|
||||||
|
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||||
|
# signed thinking blocks — adding another unsigned one from
|
||||||
|
# reasoning_content would create a duplicate (same text) that gets
|
||||||
|
# downgraded to a spurious text block on the last assistant message.
|
||||||
|
reasoning_content = m.get("reasoning_content")
|
||||||
|
_already_has_thinking = any(
|
||||||
|
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||||
|
for b in blocks
|
||||||
|
)
|
||||||
|
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||||
|
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||||
|
# Anthropic rejects empty assistant content
|
||||||
|
effective = blocks or content
|
||||||
|
if not effective or effective == "":
|
||||||
|
effective = [{"type": "text", "text": "(empty)"}]
|
||||||
|
return {"role": "assistant", "content": effective}
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_tool_message_to_result(
|
||||||
|
result: List[Dict[str, Any]], m: Dict[str, Any]
|
||||||
|
) -> None:
|
||||||
|
"""Convert a tool message to an Anthropic tool_result, merging consecutive
|
||||||
|
results into one user message.
|
||||||
|
|
||||||
|
Mutates ``result`` in place — either appends a new user message or extends
|
||||||
|
the trailing user message's tool_result list.
|
||||||
|
"""
|
||||||
|
content = m.get("content", "")
|
||||||
|
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||||
|
if isinstance(content, dict) and content.get("_multimodal"):
|
||||||
|
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||||
|
content.get("content") or []
|
||||||
|
)
|
||||||
|
# Fallback text if the conversion produced nothing usable.
|
||||||
|
if not multimodal_blocks and content.get("text_summary"):
|
||||||
|
multimodal_blocks = [
|
||||||
|
{"type": "text", "text": str(content["text_summary"])}
|
||||||
|
]
|
||||||
|
elif isinstance(content, list):
|
||||||
|
converted = _content_parts_to_anthropic_blocks(content)
|
||||||
|
if any(b.get("type") == "image" for b in converted):
|
||||||
|
multimodal_blocks = converted
|
||||||
|
# Back-compat: some callers stash blocks under a private key.
|
||||||
|
if multimodal_blocks is None:
|
||||||
|
stashed = m.get("_anthropic_content_blocks")
|
||||||
|
if isinstance(stashed, list) and stashed:
|
||||||
|
text_content = content if isinstance(content, str) and content.strip() else None
|
||||||
|
multimodal_blocks = (
|
||||||
|
[{"type": "text", "text": text_content}] + stashed
|
||||||
|
if text_content else list(stashed)
|
||||||
|
)
|
||||||
|
|
||||||
|
if multimodal_blocks:
|
||||||
|
result_content: Any = multimodal_blocks
|
||||||
|
elif isinstance(content, str):
|
||||||
|
result_content = content
|
||||||
|
else:
|
||||||
|
result_content = json.dumps(content) if content else "(no output)"
|
||||||
|
if not result_content:
|
||||||
|
result_content = "(no output)"
|
||||||
|
tool_result = {
|
||||||
|
"type": "tool_result",
|
||||||
|
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||||
|
"content": result_content,
|
||||||
|
}
|
||||||
|
if isinstance(m.get("cache_control"), dict):
|
||||||
|
tool_result["cache_control"] = dict(m["cache_control"])
|
||||||
|
# Merge consecutive tool results into one user message
|
||||||
|
if (
|
||||||
|
result
|
||||||
|
and result[-1]["role"] == "user"
|
||||||
|
and isinstance(result[-1]["content"], list)
|
||||||
|
and result[-1]["content"]
|
||||||
|
and result[-1]["content"][0].get("type") == "tool_result"
|
||||||
|
):
|
||||||
|
result[-1]["content"].append(tool_result)
|
||||||
|
else:
|
||||||
|
result.append({"role": "user", "content": [tool_result]})
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_user_message(content: Any) -> Dict[str, Any]:
|
||||||
|
"""Validate and convert a user message to anthropic format."""
|
||||||
|
if isinstance(content, list):
|
||||||
|
converted_blocks = _convert_content_to_anthropic(content)
|
||||||
|
if not converted_blocks or all(
|
||||||
|
b.get("text", "").strip() == ""
|
||||||
|
for b in converted_blocks
|
||||||
|
if isinstance(b, dict) and b.get("type") == "text"
|
||||||
|
):
|
||||||
|
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||||
|
return {"role": "user", "content": converted_blocks}
|
||||||
|
else:
|
||||||
|
if not content or (isinstance(content, str) and not content.strip()):
|
||||||
|
content = "(empty message)"
|
||||||
|
return {"role": "user", "content": content}
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||||
|
"""Strip tool_use blocks with no matching tool_result, and vice versa.
|
||||||
|
|
||||||
|
Context compression or session truncation can remove either side of a
|
||||||
|
tool-call pair. Anthropic rejects both orphans with HTTP 400.
|
||||||
|
|
||||||
|
Mutates ``result`` in place.
|
||||||
|
"""
|
||||||
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
||||||
tool_result_ids = set()
|
tool_result_ids = set()
|
||||||
for m in result:
|
for m in result:
|
||||||
|
|
@ -1799,10 +1772,7 @@ def convert_messages_to_anthropic(
|
||||||
if not m["content"]:
|
if not m["content"]:
|
||||||
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
||||||
|
|
||||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them).
|
# Strip orphaned tool_result blocks (no matching tool_use precedes them)
|
||||||
# This is the mirror of the above: context compression or session truncation
|
|
||||||
# can remove an assistant message containing a tool_use while leaving the
|
|
||||||
# subsequent tool_result intact. Anthropic rejects these with a 400.
|
|
||||||
tool_use_ids = set()
|
tool_use_ids = set()
|
||||||
for m in result:
|
for m in result:
|
||||||
if m["role"] == "assistant" and isinstance(m["content"], list):
|
if m["role"] == "assistant" and isinstance(m["content"], list):
|
||||||
|
|
@ -1819,12 +1789,16 @@ def convert_messages_to_anthropic(
|
||||||
if not m["content"]:
|
if not m["content"]:
|
||||||
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
||||||
|
|
||||||
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
|
||||||
|
def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""Merge consecutive same-role messages to enforce Anthropic alternation.
|
||||||
|
|
||||||
|
Returns a new list (caller must rebind ``result``).
|
||||||
|
"""
|
||||||
fixed = []
|
fixed = []
|
||||||
for m in result:
|
for m in result:
|
||||||
if fixed and fixed[-1]["role"] == m["role"]:
|
if fixed and fixed[-1]["role"] == m["role"]:
|
||||||
if m["role"] == "user":
|
if m["role"] == "user":
|
||||||
# Merge consecutive user messages
|
|
||||||
prev_content = fixed[-1]["content"]
|
prev_content = fixed[-1]["content"]
|
||||||
curr_content = m["content"]
|
curr_content = m["content"]
|
||||||
if isinstance(prev_content, str) and isinstance(curr_content, str):
|
if isinstance(prev_content, str) and isinstance(curr_content, str):
|
||||||
|
|
@ -1832,7 +1806,6 @@ def convert_messages_to_anthropic(
|
||||||
elif isinstance(prev_content, list) and isinstance(curr_content, list):
|
elif isinstance(prev_content, list) and isinstance(curr_content, list):
|
||||||
fixed[-1]["content"] = prev_content + curr_content
|
fixed[-1]["content"] = prev_content + curr_content
|
||||||
else:
|
else:
|
||||||
# Mixed types — wrap string in list
|
|
||||||
if isinstance(prev_content, str):
|
if isinstance(prev_content, str):
|
||||||
prev_content = [{"type": "text", "text": prev_content}]
|
prev_content = [{"type": "text", "text": prev_content}]
|
||||||
if isinstance(curr_content, str):
|
if isinstance(curr_content, str):
|
||||||
|
|
@ -1855,7 +1828,6 @@ def convert_messages_to_anthropic(
|
||||||
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
||||||
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
||||||
else:
|
else:
|
||||||
# Mixed types — normalize both to list and merge
|
|
||||||
if isinstance(prev_blocks, str):
|
if isinstance(prev_blocks, str):
|
||||||
prev_blocks = [{"type": "text", "text": prev_blocks}]
|
prev_blocks = [{"type": "text", "text": prev_blocks}]
|
||||||
if isinstance(curr_blocks, str):
|
if isinstance(curr_blocks, str):
|
||||||
|
|
@ -1863,37 +1835,34 @@ def convert_messages_to_anthropic(
|
||||||
fixed[-1]["content"] = prev_blocks + curr_blocks
|
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||||||
else:
|
else:
|
||||||
fixed.append(m)
|
fixed.append(m)
|
||||||
result = fixed
|
return fixed
|
||||||
|
|
||||||
# ── Thinking block signature management ──────────────────────────
|
|
||||||
# Anthropic signs thinking blocks against the full turn content.
|
def _manage_thinking_signatures(
|
||||||
# Any upstream mutation (context compression, session truncation,
|
result: List[Dict[str, Any]], base_url: str | None, model: str | None
|
||||||
# orphan stripping, message merging) invalidates the signature,
|
) -> None:
|
||||||
# causing HTTP 400 "Invalid signature in thinking block".
|
"""Strip or preserve thinking blocks based on endpoint type.
|
||||||
#
|
|
||||||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
Anthropic signs thinking blocks against the full turn content.
|
||||||
# (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
|
Any upstream mutation (context compression, session truncation, orphan
|
||||||
# them and will reject them outright. When targeting a third-party
|
stripping, message merging) invalidates the signature, causing HTTP 400
|
||||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
"Invalid signature in thinking block".
|
||||||
# assistant message — the third-party will generate its own
|
|
||||||
# thinking blocks if it supports extended thinking.
|
Signatures are Anthropic-proprietary. Third-party endpoints (MiniMax,
|
||||||
#
|
Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
|
||||||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
and will reject them outright. Kimi's /coding and DeepSeek's /anthropic
|
||||||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
endpoints speak the Anthropic protocol upstream but require unsigned
|
||||||
# EXCEPT the last one — preserves reasoning continuity on the
|
thinking blocks (synthesised from ``reasoning_content``) to round-trip on
|
||||||
# current tool-use chain while avoiding stale signature errors.
|
replayed assistant tool-call messages. See hermes-agent#13848 (Kimi) and
|
||||||
# 2. Downgrade unsigned thinking blocks (no signature) to text —
|
hermes-agent#16748 (DeepSeek).
|
||||||
# Anthropic can't validate them and will reject them.
|
|
||||||
# 3. Strip cache_control from thinking/redacted_thinking blocks —
|
Mutates ``result`` in place.
|
||||||
# cache markers can interfere with signature validation.
|
"""
|
||||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||||
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the
|
# Kimi / DeepSeek share a contract: strip signed Anthropic blocks
|
||||||
# Anthropic Messages protocol upstream but require that thinking blocks
|
# (neither upstream can validate Anthropic signatures), preserve unsigned
|
||||||
# synthesised from reasoning_content round-trip on subsequent turns when
|
# ones synthesised from reasoning_content. See #13848, #16748.
|
||||||
# thinking is enabled. Signed Anthropic blocks still have to be stripped
|
|
||||||
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
|
|
||||||
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
|
|
||||||
_preserve_unsigned_thinking = (
|
_preserve_unsigned_thinking = (
|
||||||
_is_kimi_family_endpoint(base_url, model)
|
_is_kimi_family_endpoint(base_url, model)
|
||||||
or _is_deepseek_anthropic_endpoint(base_url)
|
or _is_deepseek_anthropic_endpoint(base_url)
|
||||||
|
|
@ -1910,26 +1879,19 @@ def convert_messages_to_anthropic(
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if _preserve_unsigned_thinking:
|
if _preserve_unsigned_thinking:
|
||||||
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable
|
# Kimi / DeepSeek: strip signed, preserve unsigned.
|
||||||
# thinking server-side and require unsigned thinking blocks on
|
|
||||||
# replayed assistant tool-call messages. Strip signed Anthropic
|
|
||||||
# blocks (neither upstream can validate Anthropic signatures) but
|
|
||||||
# preserve the unsigned ones we synthesised from reasoning_content.
|
|
||||||
new_content = []
|
new_content = []
|
||||||
for b in m["content"]:
|
for b in m["content"]:
|
||||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||||
new_content.append(b)
|
new_content.append(b)
|
||||||
continue
|
continue
|
||||||
if b.get("signature") or b.get("data"):
|
if b.get("signature") or b.get("data"):
|
||||||
# Anthropic-signed block — upstream can't validate, strip
|
# Signed (or redacted-with-data) — upstream can't validate, strip.
|
||||||
continue
|
continue
|
||||||
# Unsigned thinking (synthesised from reasoning_content) —
|
|
||||||
# keep it: the upstream needs it for message-history validation.
|
|
||||||
new_content.append(b)
|
new_content.append(b)
|
||||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||||
elif _is_third_party or idx != last_assistant_idx:
|
elif _is_third_party or idx != last_assistant_idx:
|
||||||
# Third-party endpoint: strip ALL thinking blocks from every
|
# Third-party: strip ALL thinking blocks (signatures are proprietary).
|
||||||
# assistant message — signatures are Anthropic-proprietary.
|
|
||||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||||
stripped = [
|
stripped = [
|
||||||
b for b in m["content"]
|
b for b in m["content"]
|
||||||
|
|
@ -1937,24 +1899,21 @@ def convert_messages_to_anthropic(
|
||||||
]
|
]
|
||||||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||||||
else:
|
else:
|
||||||
# Latest assistant on direct Anthropic: keep signed thinking
|
# Latest assistant on direct Anthropic: keep signed, downgrade unsigned
|
||||||
# blocks for reasoning continuity; downgrade unsigned ones to
|
# to text so the reasoning isn't lost.
|
||||||
# plain text.
|
|
||||||
new_content = []
|
new_content = []
|
||||||
for b in m["content"]:
|
for b in m["content"]:
|
||||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||||
new_content.append(b)
|
new_content.append(b)
|
||||||
continue
|
continue
|
||||||
if b.get("type") == "redacted_thinking":
|
if b.get("type") == "redacted_thinking":
|
||||||
# Redacted blocks use 'data' for the signature payload
|
# Redacted blocks use 'data' for the signature payload —
|
||||||
|
# drop the block when 'data' is missing (can't be validated).
|
||||||
if b.get("data"):
|
if b.get("data"):
|
||||||
new_content.append(b)
|
new_content.append(b)
|
||||||
# else: drop — no data means it can't be validated
|
|
||||||
elif b.get("signature"):
|
elif b.get("signature"):
|
||||||
# Signed thinking block — keep it
|
|
||||||
new_content.append(b)
|
new_content.append(b)
|
||||||
else:
|
else:
|
||||||
# Unsigned thinking — downgrade to text so it's not lost
|
|
||||||
thinking_text = b.get("thinking", "")
|
thinking_text = b.get("thinking", "")
|
||||||
if thinking_text:
|
if thinking_text:
|
||||||
new_content.append({"type": "text", "text": thinking_text})
|
new_content.append({"type": "text", "text": thinking_text})
|
||||||
|
|
@ -1966,12 +1925,15 @@ def convert_messages_to_anthropic(
|
||||||
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
|
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
|
||||||
b.pop("cache_control", None)
|
b.pop("cache_control", None)
|
||||||
|
|
||||||
# ── Image eviction: keep only the most recent N screenshots ─────
|
|
||||||
# computer_use screenshots (base64 images) sit inside tool_result
|
def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
|
||||||
# blocks: they accumulate and are sent with every API call. Each
|
"""Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
|
||||||
# costs ~1,465 tokens; after 10+ the conversation becomes slow
|
|
||||||
# even for simple text queries. Walk backward, keep the most recent
|
Base64 images cost ~1,465 tokens each and accumulate across tool calls.
|
||||||
# _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
|
Walk backward, keep the most recent N, replace older ones with a placeholder.
|
||||||
|
|
||||||
|
Mutates ``result`` in place.
|
||||||
|
"""
|
||||||
_MAX_KEEP_IMAGES = 3
|
_MAX_KEEP_IMAGES = 3
|
||||||
_image_count = 0
|
_image_count = 0
|
||||||
for msg in reversed(result):
|
for msg in reversed(result):
|
||||||
|
|
@ -1998,6 +1960,68 @@ def convert_messages_to_anthropic(
|
||||||
for b in inner
|
for b in inner
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def convert_messages_to_anthropic(
|
||||||
|
messages: List[Dict],
|
||||||
|
base_url: str | None = None,
|
||||||
|
model: str | None = None,
|
||||||
|
) -> Tuple[Optional[Any], List[Dict]]:
|
||||||
|
"""Convert OpenAI-format messages to Anthropic format.
|
||||||
|
|
||||||
|
Returns (system_prompt, anthropic_messages).
|
||||||
|
System messages are extracted since Anthropic takes them as a separate param.
|
||||||
|
system_prompt is a string or list of content blocks (when cache_control present).
|
||||||
|
|
||||||
|
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||||
|
endpoint, all thinking block signatures are stripped. Signatures are
|
||||||
|
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||||
|
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||||
|
|
||||||
|
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||||
|
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||||
|
synthesised from ``reasoning_content`` are preserved on replayed
|
||||||
|
assistant tool-call messages — Kimi requires the field to exist, even
|
||||||
|
if empty.
|
||||||
|
"""
|
||||||
|
system = None
|
||||||
|
result: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
for m in messages:
|
||||||
|
role = m.get("role", "user")
|
||||||
|
content = m.get("content", "")
|
||||||
|
|
||||||
|
if role == "system":
|
||||||
|
if isinstance(content, list):
|
||||||
|
# Preserve cache_control markers on content blocks
|
||||||
|
has_cache = any(
|
||||||
|
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||||
|
)
|
||||||
|
if has_cache:
|
||||||
|
system = [p for p in content if isinstance(p, dict)]
|
||||||
|
else:
|
||||||
|
system = "\n".join(
|
||||||
|
p["text"] for p in content if p.get("type") == "text"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
system = content
|
||||||
|
continue
|
||||||
|
|
||||||
|
if role == "assistant":
|
||||||
|
result.append(_convert_assistant_message(m))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if role == "tool":
|
||||||
|
_convert_tool_message_to_result(result, m)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Regular user message
|
||||||
|
result.append(_convert_user_message(content))
|
||||||
|
|
||||||
|
_strip_orphaned_tool_blocks(result)
|
||||||
|
result = _merge_consecutive_roles(result)
|
||||||
|
_manage_thinking_signatures(result, base_url, model)
|
||||||
|
_evict_old_screenshots(result)
|
||||||
|
|
||||||
return system, result
|
return system, result
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue