fix(anthropic): preserve third-party thinking continuity

Downgrade third-party thinking blocks to text so reasoning context survives across turns while removing redacted payloads and stale signatures. Add regression tests for third-party thinking conversion and keep z.ai preserved-thinking behavior server-driven by removing explicit clear_thinking injection.
This commit is contained in:
Teknium 2026-04-15 20:54:44 +08:00
parent 722331a57d
commit d2f043f9cf
4 changed files with 4007 additions and 1591 deletions

View file

@ -42,26 +42,26 @@ ADAPTIVE_EFFORT_MAP = {
# starves thinking-enabled models (thinking tokens count toward the limit).
_ANTHROPIC_OUTPUT_LIMITS = {
# Claude 4.6
"claude-opus-4-6": 128_000,
"claude-sonnet-4-6": 64_000,
"claude-opus-4-6": 128_000,
"claude-sonnet-4-6": 64_000,
# Claude 4.5
"claude-opus-4-5": 64_000,
"claude-sonnet-4-5": 64_000,
"claude-haiku-4-5": 64_000,
"claude-opus-4-5": 64_000,
"claude-sonnet-4-5": 64_000,
"claude-haiku-4-5": 64_000,
# Claude 4
"claude-opus-4": 32_000,
"claude-sonnet-4": 64_000,
"claude-opus-4": 32_000,
"claude-sonnet-4": 64_000,
# Claude 3.7
"claude-3-7-sonnet": 128_000,
# Claude 3.5
"claude-3-5-sonnet": 8_192,
"claude-3-5-haiku": 8_192,
"claude-3-5-sonnet": 8_192,
"claude-3-5-haiku": 8_192,
# Claude 3
"claude-3-opus": 4_096,
"claude-3-sonnet": 4_096,
"claude-3-haiku": 4_096,
"claude-3-opus": 4_096,
"claude-3-sonnet": 4_096,
"claude-3-haiku": 4_096,
# Third-party Anthropic-compatible providers
"minimax": 131_072,
"minimax": 131_072,
}
# For any model not in the table, assume the highest current limit.
@ -138,7 +138,9 @@ def _detect_claude_code_version() -> str:
try:
result = _sp.run(
[cmd, "--version"],
capture_output=True, text=True, timeout=5,
capture_output=True,
text=True,
timeout=5,
)
if result.returncode == 0 and result.stdout.strip():
# Output is like "2.1.74 (Claude Code)" or just "2.1.74"
@ -224,7 +226,9 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
if not normalized:
return False
normalized = normalized.rstrip("/").lower()
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
return normalized.startswith(
("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic")
)
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
@ -357,7 +361,9 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
return now_ms < (expires_at - 60_000)
def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
def refresh_anthropic_oauth_pure(
refresh_token: str, *, use_json: bool = False
) -> Dict[str, Any]:
"""Refresh an Anthropic OAuth token without mutating local credential files."""
import time
import urllib.parse
@ -368,18 +374,22 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)
client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
if use_json:
data = json.dumps({
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": client_id,
}).encode()
data = json.dumps(
{
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": client_id,
}
).encode()
content_type = "application/json"
else:
data = urllib.parse.urlencode({
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": client_id,
}).encode()
data = urllib.parse.urlencode(
{
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"client_id": client_id,
}
).encode()
content_type = "application/x-www-form-urlencoded"
token_endpoints = [
@ -485,7 +495,9 @@ def _write_claude_code_credentials(
logger.debug("Failed to write refreshed credentials: %s", e)
def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]:
def _resolve_claude_code_token_from_credentials(
creds: Optional[Dict[str, Any]] = None,
) -> Optional[str]:
"""Resolve a token from Claude Code credential files, refreshing if needed."""
creds = creds or read_claude_code_credentials()
if creds and is_claude_code_token_valid(creds):
@ -496,11 +508,15 @@ def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]]
refreshed = _refresh_oauth_token(creds)
if refreshed:
return refreshed
logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate")
logger.debug(
"Token refresh failed — re-run 'claude setup-token' to reauthenticate"
)
return None
def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]:
def _prefer_refreshable_claude_code_token(
env_token: str, creds: Optional[Dict[str, Any]]
) -> Optional[str]:
"""Prefer Claude Code creds when a persisted env OAuth token would shadow refresh.
Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes
@ -624,9 +640,11 @@ def _generate_pkce() -> tuple:
import secrets
verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
challenge = base64.urlsafe_b64encode(
hashlib.sha256(verifier.encode()).digest()
).rstrip(b"=").decode()
challenge = (
base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest())
.rstrip(b"=")
.decode()
)
return verifier, challenge
@ -687,14 +705,16 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
try:
import urllib.request
exchange_data = json.dumps({
"grant_type": "authorization_code",
"client_id": _OAUTH_CLIENT_ID,
"code": code,
"state": state,
"redirect_uri": _OAUTH_REDIRECT_URI,
"code_verifier": verifier,
}).encode()
exchange_data = json.dumps(
{
"grant_type": "authorization_code",
"client_id": _OAUTH_CLIENT_ID,
"code": code,
"state": state,
"redirect_uri": _OAUTH_REDIRECT_URI,
"code_verifier": verifier,
}
).encode()
req = urllib.request.Request(
_OAUTH_TOKEN_URL,
@ -755,7 +775,7 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
"""
lower = model.lower()
if lower.startswith("anthropic/"):
model = model[len("anthropic/"):]
model = model[len("anthropic/") :]
if not preserve_dots:
# OpenRouter uses dots for version separators (claude-opus-4.6),
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
@ -770,6 +790,7 @@ def _sanitize_tool_id(tool_id: str) -> str:
characters with underscores and ensure non-empty.
"""
import re
if not tool_id:
return "tool_0"
sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id)
@ -783,11 +804,15 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
result = []
for t in tools:
fn = t.get("function", {})
result.append({
"name": fn.get("name", ""),
"description": fn.get("description", ""),
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
})
result.append(
{
"name": fn.get("name", ""),
"description": fn.get("description", ""),
"input_schema": fn.get(
"parameters", {"type": "object", "properties": {}}
),
}
)
return result
@ -801,7 +826,7 @@ def _image_source_from_openai_url(url: str) -> Dict[str, str]:
header, _, data = url.partition(",")
media_type = "image/jpeg"
if header.startswith("data:"):
mime_part = header[len("data:"):].split(";", 1)[0].strip()
mime_part = header[len("data:") :].split(";", 1)[0].strip()
if mime_part.startswith("image/"):
media_type = mime_part
return {
@ -828,7 +853,11 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
elif ptype in {"image_url", "input_image"}:
image_value = part.get("image_url", {})
url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
url = (
image_value.get("url", "")
if isinstance(image_value, dict)
else str(image_value or "")
)
block = {"type": "image", "source": _image_source_from_openai_url(url)}
else:
block = dict(part)
@ -864,7 +893,10 @@ def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None)
return result
if isinstance(value, dict):
_path.add(obj_id)
result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
result = {
k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
for k, v in value.items()
}
_path.discard(obj_id)
return result
if isinstance(value, (list, tuple)):
@ -925,9 +957,10 @@ def convert_messages_to_anthropic(
system_prompt is a string or list of content blocks (when cache_control present).
When *base_url* is provided and points to a third-party Anthropic-compatible
endpoint, all thinking block signatures are stripped. Signatures are
Anthropic-proprietary third-party endpoints cannot validate them and will
reject them with HTTP 400 "Invalid signature in thinking block".
endpoint, Anthropic thinking signatures are removed. Signed thinking blocks
are downgraded to plain text to preserve useful reasoning context, while
redacted_thinking blocks are dropped. Third-party endpoints cannot validate
Anthropic signatures and may reject them with HTTP 400.
"""
system = None
result = []
@ -970,12 +1003,14 @@ def convert_messages_to_anthropic(
parsed_args = json.loads(args) if isinstance(args, str) else args
except (json.JSONDecodeError, ValueError):
parsed_args = {}
blocks.append({
"type": "tool_use",
"id": _sanitize_tool_id(tc.get("id", "")),
"name": fn.get("name", ""),
"input": parsed_args,
})
blocks.append(
{
"type": "tool_use",
"id": _sanitize_tool_id(tc.get("id", "")),
"name": fn.get("name", ""),
"input": parsed_args,
}
)
# Anthropic rejects empty assistant content
effective = blocks or content
if not effective or effective == "":
@ -985,7 +1020,9 @@ def convert_messages_to_anthropic(
if role == "tool":
# Sanitize tool_use_id and ensure non-empty content
result_content = content if isinstance(content, str) else json.dumps(content)
result_content = (
content if isinstance(content, str) else json.dumps(content)
)
if not result_content:
result_content = "(no output)"
tool_result = {
@ -1057,7 +1094,8 @@ def convert_messages_to_anthropic(
m["content"] = [
b
for b in m["content"]
if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids
if b.get("type") != "tool_result"
or b.get("tool_use_id") in tool_use_ids
]
if not m["content"]:
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
@ -1088,8 +1126,12 @@ def convert_messages_to_anthropic(
# and becomes invalid once merged.
if isinstance(m["content"], list):
m["content"] = [
b for b in m["content"]
if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
b
for b in m["content"]
if not (
isinstance(b, dict)
and b.get("type") in ("thinking", "redacted_thinking")
)
]
prev_blocks = fixed[-1]["content"]
curr_blocks = m["content"]
@ -1117,9 +1159,8 @@ def convert_messages_to_anthropic(
# Signatures are Anthropic-proprietary. Third-party endpoints
# (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
# them and will reject them outright. When targeting a third-party
# endpoint, strip ALL thinking/redacted_thinking blocks from every
# assistant message — the third-party will generate its own
# thinking blocks if it supports extended thinking.
# endpoint, downgrade thinking blocks to plain text and drop
# redacted_thinking blocks.
#
# For direct Anthropic (strategy following clawdbot/OpenClaw):
# 1. Strip thinking/redacted_thinking from all assistant messages
@ -1142,12 +1183,33 @@ def convert_messages_to_anthropic(
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
continue
if _is_third_party or idx != last_assistant_idx:
# Third-party endpoint: strip ALL thinking blocks from every
# assistant message — signatures are Anthropic-proprietary.
# Direct Anthropic: strip from non-latest assistant messages only.
if _is_third_party:
# Third-party endpoint: Anthropic signatures are proprietary
# and will be rejected. Downgrade thinking blocks to plain
# text so the model retains reasoning context across turns.
# (Direct Anthropic would validate signatures; third-party
# endpoints like z.ai / GLM-5.1 don't use signatures at all.)
_tp_content = []
for b in m["content"]:
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
_tp_content.append(b)
continue
# redacted_thinking carries opaque data — drop it.
if b.get("type") == "redacted_thinking":
continue
# Regular thinking → plain text preserves reasoning for next turn.
thinking_text = b.get("thinking", "")
if thinking_text:
_tp_content.append({"type": "text", "text": thinking_text})
m["content"] = _tp_content or [
{"type": "text", "text": "(thinking elided)"}
]
elif idx != last_assistant_idx:
# Direct Anthropic: strip thinking from non-latest assistant
# messages to avoid stale-signature 400s.
stripped = [
b for b in m["content"]
b
for b in m["content"]
if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
]
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
@ -1235,7 +1297,9 @@ def build_anthropic_kwargs(
Currently only supported on native Anthropic endpoints (not third-party
compatible ones).
"""
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
system, anthropic_messages = convert_messages_to_anthropic(
messages, base_url=base_url
)
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
model = normalize_model_name(model, preserve_dots=preserve_dots)
@ -1287,7 +1351,10 @@ def build_anthropic_kwargs(
if block.get("type") == "tool_use" and "name" in block:
if not block["name"].startswith(_MCP_TOOL_PREFIX):
block["name"] = _MCP_TOOL_PREFIX + block["name"]
elif block.get("type") == "tool_result" and "tool_use_id" in block:
elif (
block.get("type") == "tool_result"
and "tool_use_id" in block
):
pass # tool_result uses ID, not name
kwargs: Dict[str, Any] = {
@ -1319,7 +1386,10 @@ def build_anthropic_kwargs(
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
if (
reasoning_config.get("enabled") is not False
and "haiku" not in model.lower()
):
effort = str(reasoning_config.get("effort", "medium")).lower()
budget = THINKING_BUDGET.get(effort, 8000)
if _supports_adaptive_thinking(model):
@ -1378,7 +1448,7 @@ def normalize_anthropic_response(
elif block.type == "tool_use":
name = block.name
if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
name = name[len(_MCP_TOOL_PREFIX):]
name = name[len(_MCP_TOOL_PREFIX) :]
tool_calls.append(
SimpleNamespace(
id=block.id,

File diff suppressed because it is too large Load diff

View file

@ -120,13 +120,17 @@ class TestReadClaudeCodeCredentials:
def test_reads_valid_credentials(self, tmp_path, monkeypatch):
cred_file = tmp_path / ".claude" / ".credentials.json"
cred_file.parent.mkdir(parents=True)
cred_file.write_text(json.dumps({
"claudeAiOauth": {
"accessToken": "sk-ant-oat01-token",
"refreshToken": "sk-ant-oat01-refresh",
"expiresAt": int(time.time() * 1000) + 3600_000,
}
}))
cred_file.write_text(
json.dumps(
{
"claudeAiOauth": {
"accessToken": "sk-ant-oat01-token",
"refreshToken": "sk-ant-oat01-refresh",
"expiresAt": int(time.time() * 1000) + 3600_000,
}
}
)
)
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
creds = read_claude_code_credentials()
assert creds is not None
@ -134,7 +138,9 @@ class TestReadClaudeCodeCredentials:
assert creds["refreshToken"] == "sk-ant-oat01-refresh"
assert creds["source"] == "claude_code_credentials_file"
def test_ignores_primary_api_key_for_native_anthropic_resolution(self, tmp_path, monkeypatch):
def test_ignores_primary_api_key_for_native_anthropic_resolution(
self, tmp_path, monkeypatch
):
claude_json = tmp_path / ".claude.json"
claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
@ -156,9 +162,9 @@ class TestReadClaudeCodeCredentials:
def test_returns_none_for_empty_access_token(self, tmp_path, monkeypatch):
cred_file = tmp_path / ".claude" / ".credentials.json"
cred_file.parent.mkdir(parents=True)
cred_file.write_text(json.dumps({
"claudeAiOauth": {"accessToken": "", "refreshToken": "x"}
}))
cred_file.write_text(
json.dumps({"claudeAiOauth": {"accessToken": "", "refreshToken": "x"}})
)
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
assert read_claude_code_credentials() is None
@ -185,16 +191,22 @@ class TestResolveAnthropicToken:
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
assert resolve_anthropic_token() == "sk-ant-oat01-mytoken"
def test_does_not_resolve_primary_api_key_as_native_anthropic_token(self, monkeypatch, tmp_path):
def test_does_not_resolve_primary_api_key_as_native_anthropic_token(
self, monkeypatch, tmp_path
):
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
(tmp_path / ".claude.json").write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
(tmp_path / ".claude.json").write_text(
json.dumps({"primaryApiKey": "sk-ant-api03-primary"})
)
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
assert resolve_anthropic_token() is None
def test_falls_back_to_api_key_when_no_oauth_sources_exist(self, monkeypatch, tmp_path):
def test_falls_back_to_api_key_when_no_oauth_sources_exist(
self, monkeypatch, tmp_path
):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-mykey")
monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
@ -228,39 +240,53 @@ class TestResolveAnthropicToken:
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
cred_file = tmp_path / ".claude" / ".credentials.json"
cred_file.parent.mkdir(parents=True)
cred_file.write_text(json.dumps({
"claudeAiOauth": {
"accessToken": "cc-auto-token",
"refreshToken": "refresh",
"expiresAt": int(time.time() * 1000) + 3600_000,
}
}))
cred_file.write_text(
json.dumps(
{
"claudeAiOauth": {
"accessToken": "cc-auto-token",
"refreshToken": "refresh",
"expiresAt": int(time.time() * 1000) + 3600_000,
}
}
)
)
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
assert resolve_anthropic_token() == "cc-auto-token"
def test_prefers_refreshable_claude_code_credentials_over_static_anthropic_token(self, monkeypatch, tmp_path):
def test_prefers_refreshable_claude_code_credentials_over_static_anthropic_token(
self, monkeypatch, tmp_path
):
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token")
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
cred_file = tmp_path / ".claude" / ".credentials.json"
cred_file.parent.mkdir(parents=True)
cred_file.write_text(json.dumps({
"claudeAiOauth": {
"accessToken": "cc-auto-token",
"refreshToken": "refresh-token",
"expiresAt": int(time.time() * 1000) + 3600_000,
}
}))
cred_file.write_text(
json.dumps(
{
"claudeAiOauth": {
"accessToken": "cc-auto-token",
"refreshToken": "refresh-token",
"expiresAt": int(time.time() * 1000) + 3600_000,
}
}
)
)
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
assert resolve_anthropic_token() == "cc-auto-token"
def test_keeps_static_anthropic_token_when_only_non_refreshable_claude_key_exists(self, monkeypatch, tmp_path):
def test_keeps_static_anthropic_token_when_only_non_refreshable_claude_key_exists(
self, monkeypatch, tmp_path
):
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token")
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
claude_json = tmp_path / ".claude.json"
claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-managed-key"}))
claude_json.write_text(
json.dumps({"primaryApiKey": "sk-ant-api03-managed-key"})
)
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
assert resolve_anthropic_token() == "sk-ant-oat01-static-token"
@ -280,17 +306,19 @@ class TestRefreshOauthToken:
"expiresAt": int(time.time() * 1000) - 3600_000,
}
mock_response = json.dumps({
"access_token": "new-token-abc",
"refresh_token": "new-refresh-456",
"expires_in": 7200,
}).encode()
mock_response = json.dumps(
{
"access_token": "new-token-abc",
"refresh_token": "new-refresh-456",
"expires_in": 7200,
}
).encode()
with patch("urllib.request.urlopen") as mock_urlopen:
mock_ctx = MagicMock()
mock_ctx.__enter__ = MagicMock(return_value=MagicMock(
read=MagicMock(return_value=mock_response)
))
mock_ctx.__enter__ = MagicMock(
return_value=MagicMock(read=MagicMock(return_value=mock_response))
)
mock_ctx.__exit__ = MagicMock(return_value=False)
mock_urlopen.return_value = mock_ctx
@ -348,38 +376,54 @@ class TestResolveWithRefresh:
# Set up expired creds with a refresh token
cred_file = tmp_path / ".claude" / ".credentials.json"
cred_file.parent.mkdir(parents=True)
cred_file.write_text(json.dumps({
"claudeAiOauth": {
"accessToken": "expired-tok",
"refreshToken": "valid-refresh",
"expiresAt": int(time.time() * 1000) - 3600_000,
}
}))
cred_file.write_text(
json.dumps(
{
"claudeAiOauth": {
"accessToken": "expired-tok",
"refreshToken": "valid-refresh",
"expiresAt": int(time.time() * 1000) - 3600_000,
}
}
)
)
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
# Mock refresh to succeed
with patch("agent.anthropic_adapter._refresh_oauth_token", return_value="refreshed-token"):
with patch(
"agent.anthropic_adapter._refresh_oauth_token",
return_value="refreshed-token",
):
result = resolve_anthropic_token()
assert result == "refreshed-token"
def test_static_env_oauth_token_does_not_block_refreshable_claude_creds(self, monkeypatch, tmp_path):
def test_static_env_oauth_token_does_not_block_refreshable_claude_creds(
self, monkeypatch, tmp_path
):
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-expired-env-token")
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
cred_file = tmp_path / ".claude" / ".credentials.json"
cred_file.parent.mkdir(parents=True)
cred_file.write_text(json.dumps({
"claudeAiOauth": {
"accessToken": "expired-claude-creds-token",
"refreshToken": "valid-refresh",
"expiresAt": int(time.time() * 1000) - 3600_000,
}
}))
cred_file.write_text(
json.dumps(
{
"claudeAiOauth": {
"accessToken": "expired-claude-creds-token",
"refreshToken": "valid-refresh",
"expiresAt": int(time.time() * 1000) - 3600_000,
}
}
)
)
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
with patch("agent.anthropic_adapter._refresh_oauth_token", return_value="refreshed-token"):
with patch(
"agent.anthropic_adapter._refresh_oauth_token",
return_value="refreshed-token",
):
result = resolve_anthropic_token()
assert result == "refreshed-token"
@ -400,13 +444,17 @@ class TestRunOauthSetupToken:
# Pre-create credential files that will be found after subprocess
cred_file = tmp_path / ".claude" / ".credentials.json"
cred_file.parent.mkdir(parents=True)
cred_file.write_text(json.dumps({
"claudeAiOauth": {
"accessToken": "from-cred-file",
"refreshToken": "refresh",
"expiresAt": int(time.time() * 1000) + 3600_000,
}
}))
cred_file.write_text(
json.dumps(
{
"claudeAiOauth": {
"accessToken": "from-cred-file",
"refreshToken": "refresh",
"expiresAt": int(time.time() * 1000) + 3600_000,
}
}
)
)
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
with patch("subprocess.run") as mock_run:
@ -459,27 +507,45 @@ class TestRunOauthSetupToken:
class TestNormalizeModelName:
def test_strips_anthropic_prefix(self):
assert normalize_model_name("anthropic/claude-sonnet-4-20250514") == "claude-sonnet-4-20250514"
assert (
normalize_model_name("anthropic/claude-sonnet-4-20250514")
== "claude-sonnet-4-20250514"
)
def test_leaves_bare_name(self):
assert normalize_model_name("claude-sonnet-4-20250514") == "claude-sonnet-4-20250514"
assert (
normalize_model_name("claude-sonnet-4-20250514")
== "claude-sonnet-4-20250514"
)
def test_converts_dots_to_hyphens(self):
"""OpenRouter uses dots (4.6), Anthropic uses hyphens (4-6)."""
assert normalize_model_name("anthropic/claude-opus-4.6") == "claude-opus-4-6"
assert normalize_model_name("anthropic/claude-sonnet-4.5") == "claude-sonnet-4-5"
assert (
normalize_model_name("anthropic/claude-sonnet-4.5") == "claude-sonnet-4-5"
)
assert normalize_model_name("claude-opus-4.6") == "claude-opus-4-6"
def test_already_hyphenated_unchanged(self):
"""Names already in Anthropic format should pass through."""
assert normalize_model_name("claude-opus-4-6") == "claude-opus-4-6"
assert normalize_model_name("claude-opus-4-5-20251101") == "claude-opus-4-5-20251101"
assert (
normalize_model_name("claude-opus-4-5-20251101")
== "claude-opus-4-5-20251101"
)
def test_preserve_dots_for_alibaba_dashscope(self):
"""Alibaba/DashScope use dots in model names (e.g. qwen3.5-plus). Fixes #1739."""
assert normalize_model_name("qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus"
assert normalize_model_name("anthropic/qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus"
assert normalize_model_name("qwen3.5-flash", preserve_dots=True) == "qwen3.5-flash"
assert (
normalize_model_name("qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus"
)
assert (
normalize_model_name("anthropic/qwen3.5-plus", preserve_dots=True)
== "qwen3.5-plus"
)
assert (
normalize_model_name("qwen3.5-flash", preserve_dots=True) == "qwen3.5-flash"
)
# ---------------------------------------------------------------------------
@ -536,7 +602,10 @@ class TestConvertMessages:
"role": "user",
"content": [
{"type": "text", "text": "Can you see this?"},
{"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
{
"type": "image_url",
"image_url": {"url": "https://example.com/cat.png"},
},
],
}
]
@ -548,7 +617,10 @@ class TestConvertMessages:
"role": "user",
"content": [
{"type": "text", "text": "Can you see this?"},
{"type": "image", "source": {"type": "url", "url": "https://example.com/cat.png"}},
{
"type": "image",
"source": {"type": "url", "url": "https://example.com/cat.png"},
},
],
}
]
@ -613,7 +685,10 @@ class TestConvertMessages:
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
{
"id": "tc_1",
"function": {"name": "test_tool", "arguments": "{}"},
},
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "result data"},
@ -678,10 +753,9 @@ class TestConvertMessages:
# tc_gone has no matching tool_use — its tool_result should be stripped
for m in result:
if m["role"] == "user" and isinstance(m["content"], list):
assert all(
b.get("type") != "tool_result"
for b in m["content"]
), "Orphaned tool_result should have been stripped"
assert all(b.get("type") != "tool_result" for b in m["content"]), (
"Orphaned tool_result should have been stripped"
)
def test_strips_orphaned_tool_result_preserves_valid(self):
"""Orphaned tool_results are stripped while valid ones survive."""
@ -690,7 +764,10 @@ class TestConvertMessages:
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "tc_valid", "function": {"name": "search", "arguments": "{}"}},
{
"id": "tc_valid",
"function": {"name": "search", "arguments": "{}"},
},
],
},
{"role": "tool", "tool_call_id": "tc_valid", "content": "good result"},
@ -709,7 +786,11 @@ class TestConvertMessages:
{
"role": "system",
"content": [
{"type": "text", "text": "System prompt", "cache_control": {"type": "ephemeral"}},
{
"type": "text",
"text": "System prompt",
"cache_control": {"type": "ephemeral"},
},
],
},
{"role": "user", "content": "Hi"},
@ -720,10 +801,12 @@ class TestConvertMessages:
assert system[0]["cache_control"] == {"type": "ephemeral"}
def test_assistant_cache_control_blocks_are_preserved(self):
messages = apply_anthropic_cache_control([
{"role": "system", "content": "System prompt"},
{"role": "assistant", "content": "Hello from assistant"},
])
messages = apply_anthropic_cache_control(
[
{"role": "system", "content": "System prompt"},
{"role": "assistant", "content": "Hello from assistant"},
]
)
_, result = convert_messages_to_anthropic(messages)
assistant_blocks = result[0]["content"]
@ -733,17 +816,23 @@ class TestConvertMessages:
assert assistant_blocks[0]["cache_control"] == {"type": "ephemeral"}
def test_tool_cache_control_is_preserved_on_tool_result_block(self):
messages = apply_anthropic_cache_control([
{"role": "system", "content": "System prompt"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "result"},
], native_anthropic=True)
messages = apply_anthropic_cache_control(
[
{"role": "system", "content": "System prompt"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "tc_1",
"function": {"name": "test_tool", "arguments": "{}"},
},
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "result"},
],
native_anthropic=True,
)
_, result = convert_messages_to_anthropic(messages)
user_msg = [m for m in result if m["role"] == "user"][0]
@ -760,7 +849,10 @@ class TestConvertMessages:
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
{
"id": "tc_1",
"function": {"name": "test_tool", "arguments": "{}"},
},
],
"reasoning_details": [
{
@ -774,10 +866,14 @@ class TestConvertMessages:
]
_, result = convert_messages_to_anthropic(messages)
assistant_blocks = next(msg for msg in result if msg["role"] == "assistant")["content"]
assistant_blocks = next(msg for msg in result if msg["role"] == "assistant")[
"content"
]
assert assistant_blocks[0]["type"] == "thinking"
assert assistant_blocks[0]["thinking"] == "Need to inspect the tool result first."
assert (
assistant_blocks[0]["thinking"] == "Need to inspect the tool result first."
)
assert assistant_blocks[0]["signature"] == "sig_123"
assert assistant_blocks[1]["type"] == "tool_use"
@ -832,25 +928,33 @@ class TestConvertMessages:
}
def test_empty_cached_assistant_tool_turn_converts_without_empty_text_block(self):
messages = apply_anthropic_cache_control([
{"role": "system", "content": "System prompt"},
{"role": "user", "content": "Find the skill"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{"id": "tc_1", "function": {"name": "skill_view", "arguments": "{}"}},
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "result"},
])
messages = apply_anthropic_cache_control(
[
{"role": "system", "content": "System prompt"},
{"role": "user", "content": "Find the skill"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "tc_1",
"function": {"name": "skill_view", "arguments": "{}"},
},
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "result"},
]
)
_, result = convert_messages_to_anthropic(messages)
assistant_turn = next(msg for msg in result if msg["role"] == "assistant")
assistant_blocks = assistant_turn["content"]
assert all(not (b.get("type") == "text" and b.get("text") == "") for b in assistant_blocks)
assert all(
not (b.get("type") == "text" and b.get("text") == "")
for b in assistant_blocks
)
assert any(b.get("type") == "tool_use" for b in assistant_blocks)
def test_empty_user_message_string_gets_placeholder(self):
@ -888,7 +992,13 @@ class TestConvertMessages:
def test_user_message_with_empty_text_blocks_gets_placeholder(self):
"""User message with only empty text blocks should get placeholder."""
messages = [
{"role": "user", "content": [{"type": "text", "text": ""}, {"type": "text", "text": " "}]},
{
"role": "user",
"content": [
{"type": "text", "text": ""},
{"type": "text", "text": " "},
],
},
]
_, result = convert_messages_to_anthropic(messages)
assert result[0]["role"] == "user"
@ -1085,35 +1195,43 @@ class TestBuildAnthropicKwargs:
class TestGetAnthropicMaxOutput:
def test_opus_4_6(self):
from agent.anthropic_adapter import _get_anthropic_max_output
assert _get_anthropic_max_output("claude-opus-4-6") == 128_000
def test_opus_4_6_variant(self):
from agent.anthropic_adapter import _get_anthropic_max_output
assert _get_anthropic_max_output("claude-opus-4-6:1m:fast") == 128_000
def test_sonnet_4_6(self):
from agent.anthropic_adapter import _get_anthropic_max_output
assert _get_anthropic_max_output("claude-sonnet-4-6") == 64_000
def test_sonnet_4_date_stamped(self):
from agent.anthropic_adapter import _get_anthropic_max_output
assert _get_anthropic_max_output("claude-sonnet-4-20250514") == 64_000
def test_claude_3_5_sonnet(self):
from agent.anthropic_adapter import _get_anthropic_max_output
assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192
def test_claude_3_opus(self):
from agent.anthropic_adapter import _get_anthropic_max_output
assert _get_anthropic_max_output("claude-3-opus-20240229") == 4_096
def test_unknown_future_model(self):
from agent.anthropic_adapter import _get_anthropic_max_output
assert _get_anthropic_max_output("claude-ultra-5-20260101") == 128_000
def test_longest_prefix_wins(self):
"""'claude-3-5-sonnet' should match before 'claude-3-5'."""
from agent.anthropic_adapter import _get_anthropic_max_output
# claude-3-5-sonnet (8192) should win over a hypothetical shorter match
assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192
@ -1218,7 +1336,9 @@ class TestNormalizeResponse:
msg, reason = normalize_anthropic_response(self._make_response(blocks))
assert msg.content == "The answer is 42."
assert msg.reasoning == "Let me reason about this..."
assert msg.reasoning_details == [{"type": "thinking", "thinking": "Let me reason about this..."}]
assert msg.reasoning_details == [
{"type": "thinking", "thinking": "Let me reason about this..."}
]
def test_thinking_response_preserves_signature(self):
blocks = [
@ -1235,15 +1355,9 @@ class TestNormalizeResponse:
def test_stop_reason_mapping(self):
block = SimpleNamespace(type="text", text="x")
_, r1 = normalize_anthropic_response(
self._make_response([block], "end_turn")
)
_, r2 = normalize_anthropic_response(
self._make_response([block], "tool_use")
)
_, r3 = normalize_anthropic_response(
self._make_response([block], "max_tokens")
)
_, r1 = normalize_anthropic_response(self._make_response([block], "end_turn"))
_, r2 = normalize_anthropic_response(self._make_response([block], "tool_use"))
_, r3 = normalize_anthropic_response(self._make_response([block], "max_tokens"))
assert r1 == "stop"
assert r2 == "tool_calls"
assert r3 == "length"
@ -1306,7 +1420,11 @@ class TestThinkingBlockSignatureManagement:
{"id": "tc_1", "function": {"name": "tool1", "arguments": "{}"}},
],
"reasoning_details": [
{"type": "thinking", "thinking": "Old reasoning.", "signature": "sig_old"},
{
"type": "thinking",
"thinking": "Old reasoning.",
"signature": "sig_old",
},
],
},
{"role": "tool", "tool_call_id": "tc_1", "content": "result 1"},
@ -1317,7 +1435,11 @@ class TestThinkingBlockSignatureManagement:
{"id": "tc_2", "function": {"name": "tool2", "arguments": "{}"}},
],
"reasoning_details": [
{"type": "thinking", "thinking": "Latest reasoning.", "signature": "sig_new"},
{
"type": "thinking",
"thinking": "Latest reasoning.",
"signature": "sig_new",
},
],
},
{"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
@ -1348,7 +1470,11 @@ class TestThinkingBlockSignatureManagement:
"role": "assistant",
"content": "The answer is 42.",
"reasoning_details": [
{"type": "thinking", "thinking": "Deep thought.", "signature": "sig_valid"},
{
"type": "thinking",
"thinking": "Deep thought.",
"signature": "sig_valid",
},
],
},
]
@ -1445,14 +1571,22 @@ class TestThinkingBlockSignatureManagement:
"role": "assistant",
"content": "First response.",
"reasoning_details": [
{"type": "thinking", "thinking": "First thought.", "signature": "sig_1"},
{
"type": "thinking",
"thinking": "First thought.",
"signature": "sig_1",
},
],
},
{
"role": "assistant",
"content": "Second response.",
"reasoning_details": [
{"type": "thinking", "thinking": "Second thought.", "signature": "sig_2"},
{
"type": "thinking",
"thinking": "Second thought.",
"signature": "sig_2",
},
],
},
]
@ -1532,12 +1666,57 @@ class TestThinkingBlockSignatureManagement:
# Last one: thinking preserved
last_thinking = [
b for b in assistants[2]["content"]
b
for b in assistants[2]["content"]
if isinstance(b, dict) and b.get("type") == "thinking"
]
assert len(last_thinking) == 1
assert last_thinking[0]["signature"] == "sig_3"
def test_third_party_downgrades_thinking_to_text(self):
"""Third-party Anthropic-compatible endpoints get plain text thinking."""
messages = [
{
"role": "assistant",
"content": "Visible answer.",
"reasoning_details": [
{
"type": "thinking",
"thinking": "Third-party-safe reasoning.",
"signature": "sig",
},
{"type": "redacted_thinking", "data": "opaque"},
],
}
]
_, result = convert_messages_to_anthropic(
messages,
base_url="https://api.z.ai/api/paas/v4",
)
blocks = result[0]["content"]
assert not any(b.get("type") == "thinking" for b in blocks)
assert not any(b.get("type") == "redacted_thinking" for b in blocks)
text_blocks = [b.get("text", "") for b in blocks if b.get("type") == "text"]
assert "Third-party-safe reasoning." in text_blocks
assert "Visible answer." in text_blocks
def test_third_party_thinking_only_content_gets_placeholder(self):
"""If third-party turn only has redacted_thinking, use placeholder text."""
messages = [
{
"role": "assistant",
"content": "",
"reasoning_details": [
{"type": "redacted_thinking", "data": "opaque"},
],
}
]
_, result = convert_messages_to_anthropic(
messages,
base_url="https://api.minimax.io/anthropic",
)
assert result[0]["content"] == [{"type": "text", "text": "(thinking elided)"}]
# ---------------------------------------------------------------------------
# Tool choice

File diff suppressed because it is too large Load diff