mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 05:11:26 +00:00
fix(anthropic): preserve third-party thinking continuity
Downgrade third-party thinking blocks to text so reasoning context survives across turns while removing redacted payloads and stale signatures. Add regression tests for third-party thinking conversion and keep z.ai preserved-thinking behavior server-driven by removing explicit clear_thinking injection.
This commit is contained in:
parent
722331a57d
commit
d2f043f9cf
4 changed files with 4007 additions and 1591 deletions
|
|
@ -42,26 +42,26 @@ ADAPTIVE_EFFORT_MAP = {
|
|||
# starves thinking-enabled models (thinking tokens count toward the limit).
|
||||
_ANTHROPIC_OUTPUT_LIMITS = {
|
||||
# Claude 4.6
|
||||
"claude-opus-4-6": 128_000,
|
||||
"claude-sonnet-4-6": 64_000,
|
||||
"claude-opus-4-6": 128_000,
|
||||
"claude-sonnet-4-6": 64_000,
|
||||
# Claude 4.5
|
||||
"claude-opus-4-5": 64_000,
|
||||
"claude-sonnet-4-5": 64_000,
|
||||
"claude-haiku-4-5": 64_000,
|
||||
"claude-opus-4-5": 64_000,
|
||||
"claude-sonnet-4-5": 64_000,
|
||||
"claude-haiku-4-5": 64_000,
|
||||
# Claude 4
|
||||
"claude-opus-4": 32_000,
|
||||
"claude-sonnet-4": 64_000,
|
||||
"claude-opus-4": 32_000,
|
||||
"claude-sonnet-4": 64_000,
|
||||
# Claude 3.7
|
||||
"claude-3-7-sonnet": 128_000,
|
||||
# Claude 3.5
|
||||
"claude-3-5-sonnet": 8_192,
|
||||
"claude-3-5-haiku": 8_192,
|
||||
"claude-3-5-sonnet": 8_192,
|
||||
"claude-3-5-haiku": 8_192,
|
||||
# Claude 3
|
||||
"claude-3-opus": 4_096,
|
||||
"claude-3-sonnet": 4_096,
|
||||
"claude-3-haiku": 4_096,
|
||||
"claude-3-opus": 4_096,
|
||||
"claude-3-sonnet": 4_096,
|
||||
"claude-3-haiku": 4_096,
|
||||
# Third-party Anthropic-compatible providers
|
||||
"minimax": 131_072,
|
||||
"minimax": 131_072,
|
||||
}
|
||||
|
||||
# For any model not in the table, assume the highest current limit.
|
||||
|
|
@ -138,7 +138,9 @@ def _detect_claude_code_version() -> str:
|
|||
try:
|
||||
result = _sp.run(
|
||||
[cmd, "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
# Output is like "2.1.74 (Claude Code)" or just "2.1.74"
|
||||
|
|
@ -224,7 +226,9 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
|
|||
if not normalized:
|
||||
return False
|
||||
normalized = normalized.rstrip("/").lower()
|
||||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||||
return normalized.startswith(
|
||||
("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic")
|
||||
)
|
||||
|
||||
|
||||
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
|
|
@ -357,7 +361,9 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
|
|||
return now_ms < (expires_at - 60_000)
|
||||
|
||||
|
||||
def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
|
||||
def refresh_anthropic_oauth_pure(
|
||||
refresh_token: str, *, use_json: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""Refresh an Anthropic OAuth token without mutating local credential files."""
|
||||
import time
|
||||
import urllib.parse
|
||||
|
|
@ -368,18 +374,22 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)
|
|||
|
||||
client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
|
||||
if use_json:
|
||||
data = json.dumps({
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"client_id": client_id,
|
||||
}).encode()
|
||||
data = json.dumps(
|
||||
{
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"client_id": client_id,
|
||||
}
|
||||
).encode()
|
||||
content_type = "application/json"
|
||||
else:
|
||||
data = urllib.parse.urlencode({
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"client_id": client_id,
|
||||
}).encode()
|
||||
data = urllib.parse.urlencode(
|
||||
{
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"client_id": client_id,
|
||||
}
|
||||
).encode()
|
||||
content_type = "application/x-www-form-urlencoded"
|
||||
|
||||
token_endpoints = [
|
||||
|
|
@ -485,7 +495,9 @@ def _write_claude_code_credentials(
|
|||
logger.debug("Failed to write refreshed credentials: %s", e)
|
||||
|
||||
|
||||
def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
||||
def _resolve_claude_code_token_from_credentials(
|
||||
creds: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[str]:
|
||||
"""Resolve a token from Claude Code credential files, refreshing if needed."""
|
||||
creds = creds or read_claude_code_credentials()
|
||||
if creds and is_claude_code_token_valid(creds):
|
||||
|
|
@ -496,11 +508,15 @@ def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]]
|
|||
refreshed = _refresh_oauth_token(creds)
|
||||
if refreshed:
|
||||
return refreshed
|
||||
logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate")
|
||||
logger.debug(
|
||||
"Token refresh failed — re-run 'claude setup-token' to reauthenticate"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
def _prefer_refreshable_claude_code_token(
|
||||
env_token: str, creds: Optional[Dict[str, Any]]
|
||||
) -> Optional[str]:
|
||||
"""Prefer Claude Code creds when a persisted env OAuth token would shadow refresh.
|
||||
|
||||
Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes
|
||||
|
|
@ -624,9 +640,11 @@ def _generate_pkce() -> tuple:
|
|||
import secrets
|
||||
|
||||
verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
|
||||
challenge = base64.urlsafe_b64encode(
|
||||
hashlib.sha256(verifier.encode()).digest()
|
||||
).rstrip(b"=").decode()
|
||||
challenge = (
|
||||
base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest())
|
||||
.rstrip(b"=")
|
||||
.decode()
|
||||
)
|
||||
return verifier, challenge
|
||||
|
||||
|
||||
|
|
@ -687,14 +705,16 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
|
|||
try:
|
||||
import urllib.request
|
||||
|
||||
exchange_data = json.dumps({
|
||||
"grant_type": "authorization_code",
|
||||
"client_id": _OAUTH_CLIENT_ID,
|
||||
"code": code,
|
||||
"state": state,
|
||||
"redirect_uri": _OAUTH_REDIRECT_URI,
|
||||
"code_verifier": verifier,
|
||||
}).encode()
|
||||
exchange_data = json.dumps(
|
||||
{
|
||||
"grant_type": "authorization_code",
|
||||
"client_id": _OAUTH_CLIENT_ID,
|
||||
"code": code,
|
||||
"state": state,
|
||||
"redirect_uri": _OAUTH_REDIRECT_URI,
|
||||
"code_verifier": verifier,
|
||||
}
|
||||
).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
_OAUTH_TOKEN_URL,
|
||||
|
|
@ -755,7 +775,7 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
|||
"""
|
||||
lower = model.lower()
|
||||
if lower.startswith("anthropic/"):
|
||||
model = model[len("anthropic/"):]
|
||||
model = model[len("anthropic/") :]
|
||||
if not preserve_dots:
|
||||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||||
|
|
@ -770,6 +790,7 @@ def _sanitize_tool_id(tool_id: str) -> str:
|
|||
characters with underscores and ensure non-empty.
|
||||
"""
|
||||
import re
|
||||
|
||||
if not tool_id:
|
||||
return "tool_0"
|
||||
sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id)
|
||||
|
|
@ -783,11 +804,15 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
|||
result = []
|
||||
for t in tools:
|
||||
fn = t.get("function", {})
|
||||
result.append({
|
||||
"name": fn.get("name", ""),
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
|
||||
})
|
||||
result.append(
|
||||
{
|
||||
"name": fn.get("name", ""),
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": fn.get(
|
||||
"parameters", {"type": "object", "properties": {}}
|
||||
),
|
||||
}
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -801,7 +826,7 @@ def _image_source_from_openai_url(url: str) -> Dict[str, str]:
|
|||
header, _, data = url.partition(",")
|
||||
media_type = "image/jpeg"
|
||||
if header.startswith("data:"):
|
||||
mime_part = header[len("data:"):].split(";", 1)[0].strip()
|
||||
mime_part = header[len("data:") :].split(";", 1)[0].strip()
|
||||
if mime_part.startswith("image/"):
|
||||
media_type = mime_part
|
||||
return {
|
||||
|
|
@ -828,7 +853,11 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
|
|||
block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
|
||||
elif ptype in {"image_url", "input_image"}:
|
||||
image_value = part.get("image_url", {})
|
||||
url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
|
||||
url = (
|
||||
image_value.get("url", "")
|
||||
if isinstance(image_value, dict)
|
||||
else str(image_value or "")
|
||||
)
|
||||
block = {"type": "image", "source": _image_source_from_openai_url(url)}
|
||||
else:
|
||||
block = dict(part)
|
||||
|
|
@ -864,7 +893,10 @@ def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None)
|
|||
return result
|
||||
if isinstance(value, dict):
|
||||
_path.add(obj_id)
|
||||
result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
|
||||
result = {
|
||||
k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
|
||||
for k, v in value.items()
|
||||
}
|
||||
_path.discard(obj_id)
|
||||
return result
|
||||
if isinstance(value, (list, tuple)):
|
||||
|
|
@ -925,9 +957,10 @@ def convert_messages_to_anthropic(
|
|||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
endpoint, Anthropic thinking signatures are removed. Signed thinking blocks
|
||||
are downgraded to plain text to preserve useful reasoning context, while
|
||||
redacted_thinking blocks are dropped. Third-party endpoints cannot validate
|
||||
Anthropic signatures and may reject them with HTTP 400.
|
||||
"""
|
||||
system = None
|
||||
result = []
|
||||
|
|
@ -970,12 +1003,14 @@ def convert_messages_to_anthropic(
|
|||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
blocks.append(
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
}
|
||||
)
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
|
|
@ -985,7 +1020,9 @@ def convert_messages_to_anthropic(
|
|||
|
||||
if role == "tool":
|
||||
# Sanitize tool_use_id and ensure non-empty content
|
||||
result_content = content if isinstance(content, str) else json.dumps(content)
|
||||
result_content = (
|
||||
content if isinstance(content, str) else json.dumps(content)
|
||||
)
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
|
|
@ -1057,7 +1094,8 @@ def convert_messages_to_anthropic(
|
|||
m["content"] = [
|
||||
b
|
||||
for b in m["content"]
|
||||
if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids
|
||||
if b.get("type") != "tool_result"
|
||||
or b.get("tool_use_id") in tool_use_ids
|
||||
]
|
||||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
||||
|
|
@ -1088,8 +1126,12 @@ def convert_messages_to_anthropic(
|
|||
# and becomes invalid once merged.
|
||||
if isinstance(m["content"], list):
|
||||
m["content"] = [
|
||||
b for b in m["content"]
|
||||
if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
|
||||
b
|
||||
for b in m["content"]
|
||||
if not (
|
||||
isinstance(b, dict)
|
||||
and b.get("type") in ("thinking", "redacted_thinking")
|
||||
)
|
||||
]
|
||||
prev_blocks = fixed[-1]["content"]
|
||||
curr_blocks = m["content"]
|
||||
|
|
@ -1117,9 +1159,8 @@ def convert_messages_to_anthropic(
|
|||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||||
# (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
|
||||
# them and will reject them outright. When targeting a third-party
|
||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||||
# assistant message — the third-party will generate its own
|
||||
# thinking blocks if it supports extended thinking.
|
||||
# endpoint, downgrade thinking blocks to plain text and drop
|
||||
# redacted_thinking blocks.
|
||||
#
|
||||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
||||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
||||
|
|
@ -1142,12 +1183,33 @@ def convert_messages_to_anthropic(
|
|||
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
|
||||
continue
|
||||
|
||||
if _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
if _is_third_party:
|
||||
# Third-party endpoint: Anthropic signatures are proprietary
|
||||
# and will be rejected. Downgrade thinking blocks to plain
|
||||
# text so the model retains reasoning context across turns.
|
||||
# (Direct Anthropic would validate signatures; third-party
|
||||
# endpoints like z.ai / GLM-5.1 don't use signatures at all.)
|
||||
_tp_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
_tp_content.append(b)
|
||||
continue
|
||||
# redacted_thinking carries opaque data — drop it.
|
||||
if b.get("type") == "redacted_thinking":
|
||||
continue
|
||||
# Regular thinking → plain text preserves reasoning for next turn.
|
||||
thinking_text = b.get("thinking", "")
|
||||
if thinking_text:
|
||||
_tp_content.append({"type": "text", "text": thinking_text})
|
||||
m["content"] = _tp_content or [
|
||||
{"type": "text", "text": "(thinking elided)"}
|
||||
]
|
||||
elif idx != last_assistant_idx:
|
||||
# Direct Anthropic: strip thinking from non-latest assistant
|
||||
# messages to avoid stale-signature 400s.
|
||||
stripped = [
|
||||
b for b in m["content"]
|
||||
b
|
||||
for b in m["content"]
|
||||
if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
|
||||
]
|
||||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||||
|
|
@ -1235,7 +1297,9 @@ def build_anthropic_kwargs(
|
|||
Currently only supported on native Anthropic endpoints (not third-party
|
||||
compatible ones).
|
||||
"""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
system, anthropic_messages = convert_messages_to_anthropic(
|
||||
messages, base_url=base_url
|
||||
)
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
|
||||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||||
|
|
@ -1287,7 +1351,10 @@ def build_anthropic_kwargs(
|
|||
if block.get("type") == "tool_use" and "name" in block:
|
||||
if not block["name"].startswith(_MCP_TOOL_PREFIX):
|
||||
block["name"] = _MCP_TOOL_PREFIX + block["name"]
|
||||
elif block.get("type") == "tool_result" and "tool_use_id" in block:
|
||||
elif (
|
||||
block.get("type") == "tool_result"
|
||||
and "tool_use_id" in block
|
||||
):
|
||||
pass # tool_result uses ID, not name
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
|
|
@ -1319,7 +1386,10 @@ def build_anthropic_kwargs(
|
|||
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
|
||||
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||||
if (
|
||||
reasoning_config.get("enabled") is not False
|
||||
and "haiku" not in model.lower()
|
||||
):
|
||||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||||
budget = THINKING_BUDGET.get(effort, 8000)
|
||||
if _supports_adaptive_thinking(model):
|
||||
|
|
@ -1378,7 +1448,7 @@ def normalize_anthropic_response(
|
|||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
|
||||
name = name[len(_MCP_TOOL_PREFIX):]
|
||||
name = name[len(_MCP_TOOL_PREFIX) :]
|
||||
tool_calls.append(
|
||||
SimpleNamespace(
|
||||
id=block.id,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue