fix(anthropic): preserve third-party thinking continuity

Downgrade third-party thinking blocks to text so reasoning context survives across turns while removing redacted payloads and stale signatures. Add regression tests for third-party thinking conversion and keep z.ai preserved-thinking behavior server-driven by removing explicit clear_thinking injection.
This commit is contained in:
Teknium 2026-04-15 20:54:44 +08:00
parent 722331a57d
commit d2f043f9cf
4 changed files with 4007 additions and 1591 deletions

View file

@ -42,26 +42,26 @@ ADAPTIVE_EFFORT_MAP = {
# starves thinking-enabled models (thinking tokens count toward the limit).
_ANTHROPIC_OUTPUT_LIMITS = {
# Claude 4.6
"claude-opus-4-6": 128_000,
"claude-sonnet-4-6": 64_000,
"claude-opus-4-6": 128_000,
"claude-sonnet-4-6": 64_000,
# Claude 4.5
"claude-opus-4-5": 64_000,
"claude-sonnet-4-5": 64_000,
"claude-haiku-4-5": 64_000,
"claude-opus-4-5": 64_000,
"claude-sonnet-4-5": 64_000,
"claude-haiku-4-5": 64_000,
# Claude 4
"claude-opus-4": 32_000,
"claude-sonnet-4": 64_000,
"claude-opus-4": 32_000,
"claude-sonnet-4": 64_000,
# Claude 3.7
"claude-3-7-sonnet": 128_000,
# Claude 3.5
"claude-3-5-sonnet": 8_192,
"claude-3-5-haiku": 8_192,
"claude-3-5-sonnet": 8_192,
"claude-3-5-haiku": 8_192,
# Claude 3
"claude-3-opus": 4_096,
"claude-3-sonnet": 4_096,
"claude-3-haiku": 4_096,
"claude-3-opus": 4_096,
"claude-3-sonnet": 4_096,
"claude-3-haiku": 4_096,
# Third-party Anthropic-compatible providers
"minimax": 131_072,
"minimax": 131_072,
}
# For any model not in the table, assume the highest current limit.
@ -138,7 +138,9 @@ def _detect_claude_code_version() -> str:
try:
result = _sp.run(
[cmd, "--version"],
capture_output=True, text=True, timeout=5,
capture_output=True,
text=True,
timeout=5,
)
if result.returncode == 0 and result.stdout.strip():
# Output is like "2.1.74 (Claude Code)" or just "2.1.74"
@ -224,7 +226,9 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
if not normalized:
return False
normalized = normalized.rstrip("/").lower()
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
return normalized.startswith(
("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic")
)
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
@ -357,7 +361,9 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
return now_ms < (expires_at - 60_000)
def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
def refresh_anthropic_oauth_pure(
refresh_token: str, *, use_json: bool = False
) -> Dict[str, Any]:
"""Refresh an Anthropic OAuth token without mutating local credential files."""
import time
import urllib.parse
@ -368,18 +374,22 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)
client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
if use_json:
data = json.dumps({
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": client_id,
}).encode()
data = json.dumps(
{
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": client_id,
}
).encode()
content_type = "application/json"
else:
data = urllib.parse.urlencode({
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": client_id,
}).encode()
data = urllib.parse.urlencode(
{
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": client_id,
}
).encode()
content_type = "application/x-www-form-urlencoded"
token_endpoints = [
@ -485,7 +495,9 @@ def _write_claude_code_credentials(
logger.debug("Failed to write refreshed credentials: %s", e)
def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]:
def _resolve_claude_code_token_from_credentials(
creds: Optional[Dict[str, Any]] = None,
) -> Optional[str]:
"""Resolve a token from Claude Code credential files, refreshing if needed."""
creds = creds or read_claude_code_credentials()
if creds and is_claude_code_token_valid(creds):
@ -496,11 +508,15 @@ def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]]
refreshed = _refresh_oauth_token(creds)
if refreshed:
return refreshed
logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate")
logger.debug(
"Token refresh failed — re-run 'claude setup-token' to reauthenticate"
)
return None
def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]:
def _prefer_refreshable_claude_code_token(
env_token: str, creds: Optional[Dict[str, Any]]
) -> Optional[str]:
"""Prefer Claude Code creds when a persisted env OAuth token would shadow refresh.
Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes
@ -624,9 +640,11 @@ def _generate_pkce() -> tuple:
import secrets
verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
challenge = base64.urlsafe_b64encode(
hashlib.sha256(verifier.encode()).digest()
).rstrip(b"=").decode()
challenge = (
base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest())
.rstrip(b"=")
.decode()
)
return verifier, challenge
@ -687,14 +705,16 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
try:
import urllib.request
exchange_data = json.dumps({
"grant_type": "authorization_code",
"client_id": _OAUTH_CLIENT_ID,
"code": code,
"state": state,
"redirect_uri": _OAUTH_REDIRECT_URI,
"code_verifier": verifier,
}).encode()
exchange_data = json.dumps(
{
"grant_type": "authorization_code",
"client_id": _OAUTH_CLIENT_ID,
"code": code,
"state": state,
"redirect_uri": _OAUTH_REDIRECT_URI,
"code_verifier": verifier,
}
).encode()
req = urllib.request.Request(
_OAUTH_TOKEN_URL,
@ -755,7 +775,7 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
"""
lower = model.lower()
if lower.startswith("anthropic/"):
model = model[len("anthropic/"):]
model = model[len("anthropic/") :]
if not preserve_dots:
# OpenRouter uses dots for version separators (claude-opus-4.6),
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
@ -770,6 +790,7 @@ def _sanitize_tool_id(tool_id: str) -> str:
characters with underscores and ensure non-empty.
"""
import re
if not tool_id:
return "tool_0"
sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id)
@ -783,11 +804,15 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
result = []
for t in tools:
fn = t.get("function", {})
result.append({
"name": fn.get("name", ""),
"description": fn.get("description", ""),
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
})
result.append(
{
"name": fn.get("name", ""),
"description": fn.get("description", ""),
"input_schema": fn.get(
"parameters", {"type": "object", "properties": {}}
),
}
)
return result
@ -801,7 +826,7 @@ def _image_source_from_openai_url(url: str) -> Dict[str, str]:
header, _, data = url.partition(",")
media_type = "image/jpeg"
if header.startswith("data:"):
mime_part = header[len("data:"):].split(";", 1)[0].strip()
mime_part = header[len("data:") :].split(";", 1)[0].strip()
if mime_part.startswith("image/"):
media_type = mime_part
return {
@ -828,7 +853,11 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
elif ptype in {"image_url", "input_image"}:
image_value = part.get("image_url", {})
url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
url = (
image_value.get("url", "")
if isinstance(image_value, dict)
else str(image_value or "")
)
block = {"type": "image", "source": _image_source_from_openai_url(url)}
else:
block = dict(part)
@ -864,7 +893,10 @@ def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None)
return result
if isinstance(value, dict):
_path.add(obj_id)
result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
result = {
k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
for k, v in value.items()
}
_path.discard(obj_id)
return result
if isinstance(value, (list, tuple)):
@ -925,9 +957,10 @@ def convert_messages_to_anthropic(
system_prompt is a string or list of content blocks (when cache_control present).
When *base_url* is provided and points to a third-party Anthropic-compatible
endpoint, all thinking block signatures are stripped. Signatures are
Anthropic-proprietary third-party endpoints cannot validate them and will
reject them with HTTP 400 "Invalid signature in thinking block".
endpoint, Anthropic thinking signatures are removed. Signed thinking blocks
are downgraded to plain text to preserve useful reasoning context, while
redacted_thinking blocks are dropped. Third-party endpoints cannot validate
Anthropic signatures and may reject them with HTTP 400.
"""
system = None
result = []
@ -970,12 +1003,14 @@ def convert_messages_to_anthropic(
parsed_args = json.loads(args) if isinstance(args, str) else args
except (json.JSONDecodeError, ValueError):
parsed_args = {}
blocks.append({
"type": "tool_use",
"id": _sanitize_tool_id(tc.get("id", "")),
"name": fn.get("name", ""),
"input": parsed_args,
})
blocks.append(
{
"type": "tool_use",
"id": _sanitize_tool_id(tc.get("id", "")),
"name": fn.get("name", ""),
"input": parsed_args,
}
)
# Anthropic rejects empty assistant content
effective = blocks or content
if not effective or effective == "":
@ -985,7 +1020,9 @@ def convert_messages_to_anthropic(
if role == "tool":
# Sanitize tool_use_id and ensure non-empty content
result_content = content if isinstance(content, str) else json.dumps(content)
result_content = (
content if isinstance(content, str) else json.dumps(content)
)
if not result_content:
result_content = "(no output)"
tool_result = {
@ -1057,7 +1094,8 @@ def convert_messages_to_anthropic(
m["content"] = [
b
for b in m["content"]
if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids
if b.get("type") != "tool_result"
or b.get("tool_use_id") in tool_use_ids
]
if not m["content"]:
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
@ -1088,8 +1126,12 @@ def convert_messages_to_anthropic(
# and becomes invalid once merged.
if isinstance(m["content"], list):
m["content"] = [
b for b in m["content"]
if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
b
for b in m["content"]
if not (
isinstance(b, dict)
and b.get("type") in ("thinking", "redacted_thinking")
)
]
prev_blocks = fixed[-1]["content"]
curr_blocks = m["content"]
@ -1117,9 +1159,8 @@ def convert_messages_to_anthropic(
# Signatures are Anthropic-proprietary. Third-party endpoints
# (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
# them and will reject them outright. When targeting a third-party
# endpoint, strip ALL thinking/redacted_thinking blocks from every
# assistant message — the third-party will generate its own
# thinking blocks if it supports extended thinking.
# endpoint, downgrade thinking blocks to plain text and drop
# redacted_thinking blocks.
#
# For direct Anthropic (strategy following clawdbot/OpenClaw):
# 1. Strip thinking/redacted_thinking from all assistant messages
@ -1142,12 +1183,33 @@ def convert_messages_to_anthropic(
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
continue
if _is_third_party or idx != last_assistant_idx:
# Third-party endpoint: strip ALL thinking blocks from every
# assistant message — signatures are Anthropic-proprietary.
# Direct Anthropic: strip from non-latest assistant messages only.
if _is_third_party:
# Third-party endpoint: Anthropic signatures are proprietary
# and will be rejected. Downgrade thinking blocks to plain
# text so the model retains reasoning context across turns.
# (Direct Anthropic would validate signatures; third-party
# endpoints like z.ai / GLM-5.1 don't use signatures at all.)
_tp_content = []
for b in m["content"]:
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
_tp_content.append(b)
continue
# redacted_thinking carries opaque data — drop it.
if b.get("type") == "redacted_thinking":
continue
# Regular thinking → plain text preserves reasoning for next turn.
thinking_text = b.get("thinking", "")
if thinking_text:
_tp_content.append({"type": "text", "text": thinking_text})
m["content"] = _tp_content or [
{"type": "text", "text": "(thinking elided)"}
]
elif idx != last_assistant_idx:
# Direct Anthropic: strip thinking from non-latest assistant
# messages to avoid stale-signature 400s.
stripped = [
b for b in m["content"]
b
for b in m["content"]
if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
]
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
@ -1235,7 +1297,9 @@ def build_anthropic_kwargs(
Currently only supported on native Anthropic endpoints (not third-party
compatible ones).
"""
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
system, anthropic_messages = convert_messages_to_anthropic(
messages, base_url=base_url
)
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
model = normalize_model_name(model, preserve_dots=preserve_dots)
@ -1287,7 +1351,10 @@ def build_anthropic_kwargs(
if block.get("type") == "tool_use" and "name" in block:
if not block["name"].startswith(_MCP_TOOL_PREFIX):
block["name"] = _MCP_TOOL_PREFIX + block["name"]
elif block.get("type") == "tool_result" and "tool_use_id" in block:
elif (
block.get("type") == "tool_result"
and "tool_use_id" in block
):
pass # tool_result uses ID, not name
kwargs: Dict[str, Any] = {
@ -1319,7 +1386,10 @@ def build_anthropic_kwargs(
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
if (
reasoning_config.get("enabled") is not False
and "haiku" not in model.lower()
):
effort = str(reasoning_config.get("effort", "medium")).lower()
budget = THINKING_BUDGET.get(effort, 8000)
if _supports_adaptive_thinking(model):
@ -1378,7 +1448,7 @@ def normalize_anthropic_response(
elif block.type == "tool_use":
name = block.name
if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
name = name[len(_MCP_TOOL_PREFIX):]
name = name[len(_MCP_TOOL_PREFIX) :]
tool_calls.append(
SimpleNamespace(
id=block.id,