fix(anthropic): preserve third-party thinking continuity

Downgrade third-party thinking blocks to text so reasoning context survives across turns while removing redacted payloads and stale signatures. Add regression tests for third-party thinking conversion and keep z.ai preserved-thinking behavior server-driven by removing explicit clear_thinking injection.
2026-07-03 12:23:08 +00:00 · 2026-04-15 20:54:44 +08:00 · 2026-04-15 20:54:44 +08:00 · d2f043f9cf
commit d2f043f9cf
parent 722331a57d
4 changed files with 4007 additions and 1591 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -42,26 +42,26 @@ ADAPTIVE_EFFORT_MAP = {
 # starves thinking-enabled models (thinking tokens count toward the limit).
 _ANTHROPIC_OUTPUT_LIMITS = {
    # Claude 4.6
-    "claude-opus-4-6":   128_000,
-    "claude-sonnet-4-6":  64_000,
+    "claude-opus-4-6": 128_000,
+    "claude-sonnet-4-6": 64_000,
    # Claude 4.5
-    "claude-opus-4-5":    64_000,
-    "claude-sonnet-4-5":  64_000,
-    "claude-haiku-4-5":   64_000,
+    "claude-opus-4-5": 64_000,
+    "claude-sonnet-4-5": 64_000,
+    "claude-haiku-4-5": 64_000,
    # Claude 4
-    "claude-opus-4":      32_000,
-    "claude-sonnet-4":    64_000,
+    "claude-opus-4": 32_000,
+    "claude-sonnet-4": 64_000,
    # Claude 3.7
    "claude-3-7-sonnet": 128_000,
    # Claude 3.5
-    "claude-3-5-sonnet":   8_192,
-    "claude-3-5-haiku":    8_192,
+    "claude-3-5-sonnet": 8_192,
+    "claude-3-5-haiku": 8_192,
    # Claude 3
-    "claude-3-opus":       4_096,
-    "claude-3-sonnet":     4_096,
-    "claude-3-haiku":      4_096,
+    "claude-3-opus": 4_096,
+    "claude-3-sonnet": 4_096,
+    "claude-3-haiku": 4_096,
    # Third-party Anthropic-compatible providers
-    "minimax":            131_072,
+    "minimax": 131_072,
 }

 # For any model not in the table, assume the highest current limit.
@ -138,7 +138,9 @@ def _detect_claude_code_version() -> str:
        try:
            result = _sp.run(
                [cmd, "--version"],
-                capture_output=True, text=True, timeout=5,
+                capture_output=True,
+                text=True,
+                timeout=5,
            )
            if result.returncode == 0 and result.stdout.strip():
                # Output is like "2.1.74 (Claude Code)" or just "2.1.74"
@ -224,7 +226,9 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
    if not normalized:
        return False
    normalized = normalized.rstrip("/").lower()
-    return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
+    return normalized.startswith(
+        ("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic")
+    )


 def _common_betas_for_base_url(base_url: str | None) -> list[str]:
@ -357,7 +361,9 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
    return now_ms < (expires_at - 60_000)


-def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
+def refresh_anthropic_oauth_pure(
+    refresh_token: str, *, use_json: bool = False
+) -> Dict[str, Any]:
    """Refresh an Anthropic OAuth token without mutating local credential files."""
    import time
    import urllib.parse
@ -368,18 +374,22 @@ def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False)

    client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
    if use_json:
-        data = json.dumps({
-            "grant_type": "refresh_token",
-            "refresh_token": refresh_token,
-            "client_id": client_id,
-        }).encode()
+        data = json.dumps(
+            {
+                "grant_type": "refresh_token",
+                "refresh_token": refresh_token,
+                "client_id": client_id,
+            }
+        ).encode()
        content_type = "application/json"
    else:
-        data = urllib.parse.urlencode({
-            "grant_type": "refresh_token",
-            "refresh_token": refresh_token,
-            "client_id": client_id,
-        }).encode()
+        data = urllib.parse.urlencode(
+            {
+                "grant_type": "refresh_token",
+                "refresh_token": refresh_token,
+                "client_id": client_id,
+            }
+        ).encode()
        content_type = "application/x-www-form-urlencoded"

    token_endpoints = [
@ -485,7 +495,9 @@ def _write_claude_code_credentials(
        logger.debug("Failed to write refreshed credentials: %s", e)


-def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]:
+def _resolve_claude_code_token_from_credentials(
+    creds: Optional[Dict[str, Any]] = None,
+) -> Optional[str]:
    """Resolve a token from Claude Code credential files, refreshing if needed."""
    creds = creds or read_claude_code_credentials()
    if creds and is_claude_code_token_valid(creds):
@ -496,11 +508,15 @@ def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]]
        refreshed = _refresh_oauth_token(creds)
        if refreshed:
            return refreshed
-        logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate")
+        logger.debug(
+            "Token refresh failed — re-run 'claude setup-token' to reauthenticate"
+        )
    return None


-def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]:
+def _prefer_refreshable_claude_code_token(
+    env_token: str, creds: Optional[Dict[str, Any]]
+) -> Optional[str]:
    """Prefer Claude Code creds when a persisted env OAuth token would shadow refresh.

    Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes
@ -624,9 +640,11 @@ def _generate_pkce() -> tuple:
    import secrets

    verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
-    challenge = base64.urlsafe_b64encode(
-        hashlib.sha256(verifier.encode()).digest()
-    ).rstrip(b"=").decode()
+    challenge = (
+        base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest())
+        .rstrip(b"=")
+        .decode()
+    )
    return verifier, challenge


@ -687,14 +705,16 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
    try:
        import urllib.request

-        exchange_data = json.dumps({
-            "grant_type": "authorization_code",
-            "client_id": _OAUTH_CLIENT_ID,
-            "code": code,
-            "state": state,
-            "redirect_uri": _OAUTH_REDIRECT_URI,
-            "code_verifier": verifier,
-        }).encode()
+        exchange_data = json.dumps(
+            {
+                "grant_type": "authorization_code",
+                "client_id": _OAUTH_CLIENT_ID,
+                "code": code,
+                "state": state,
+                "redirect_uri": _OAUTH_REDIRECT_URI,
+                "code_verifier": verifier,
+            }
+        ).encode()

        req = urllib.request.Request(
            _OAUTH_TOKEN_URL,
@ -755,7 +775,7 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    """
    lower = model.lower()
    if lower.startswith("anthropic/"):
-        model = model[len("anthropic/"):]
+        model = model[len("anthropic/") :]
    if not preserve_dots:
        # OpenRouter uses dots for version separators (claude-opus-4.6),
        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
@ -770,6 +790,7 @@ def _sanitize_tool_id(tool_id: str) -> str:
    characters with underscores and ensure non-empty.
    """
    import re
+
    if not tool_id:
        return "tool_0"
    sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id)
@ -783,11 +804,15 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    result = []
    for t in tools:
        fn = t.get("function", {})
-        result.append({
-            "name": fn.get("name", ""),
-            "description": fn.get("description", ""),
-            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
-        })
+        result.append(
+            {
+                "name": fn.get("name", ""),
+                "description": fn.get("description", ""),
+                "input_schema": fn.get(
+                    "parameters", {"type": "object", "properties": {}}
+                ),
+            }
+        )
    return result


@ -801,7 +826,7 @@ def _image_source_from_openai_url(url: str) -> Dict[str, str]:
        header, _, data = url.partition(",")
        media_type = "image/jpeg"
        if header.startswith("data:"):
-            mime_part = header[len("data:"):].split(";", 1)[0].strip()
+            mime_part = header[len("data:") :].split(";", 1)[0].strip()
            if mime_part.startswith("image/"):
                media_type = mime_part
        return {
@ -828,7 +853,11 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
        block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
    elif ptype in {"image_url", "input_image"}:
        image_value = part.get("image_url", {})
-        url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
+        url = (
+            image_value.get("url", "")
+            if isinstance(image_value, dict)
+            else str(image_value or "")
+        )
        block = {"type": "image", "source": _image_source_from_openai_url(url)}
    else:
        block = dict(part)
@ -864,7 +893,10 @@ def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None)
        return result
    if isinstance(value, dict):
        _path.add(obj_id)
-        result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
+        result = {
+            k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
+            for k, v in value.items()
+        }
        _path.discard(obj_id)
        return result
    if isinstance(value, (list, tuple)):
@ -925,9 +957,10 @@ def convert_messages_to_anthropic(
    system_prompt is a string or list of content blocks (when cache_control present).

    When *base_url* is provided and points to a third-party Anthropic-compatible
-    endpoint, all thinking block signatures are stripped.  Signatures are
-    Anthropic-proprietary — third-party endpoints cannot validate them and will
-    reject them with HTTP 400 "Invalid signature in thinking block".
+    endpoint, Anthropic thinking signatures are removed. Signed thinking blocks
+    are downgraded to plain text to preserve useful reasoning context, while
+    redacted_thinking blocks are dropped. Third-party endpoints cannot validate
+    Anthropic signatures and may reject them with HTTP 400.
    """
    system = None
    result = []
@ -970,12 +1003,14 @@ def convert_messages_to_anthropic(
                    parsed_args = json.loads(args) if isinstance(args, str) else args
                except (json.JSONDecodeError, ValueError):
                    parsed_args = {}
-                blocks.append({
-                    "type": "tool_use",
-                    "id": _sanitize_tool_id(tc.get("id", "")),
-                    "name": fn.get("name", ""),
-                    "input": parsed_args,
-                })
+                blocks.append(
+                    {
+                        "type": "tool_use",
+                        "id": _sanitize_tool_id(tc.get("id", "")),
+                        "name": fn.get("name", ""),
+                        "input": parsed_args,
+                    }
+                )
            # Anthropic rejects empty assistant content
            effective = blocks or content
            if not effective or effective == "":
@ -985,7 +1020,9 @@ def convert_messages_to_anthropic(

        if role == "tool":
            # Sanitize tool_use_id and ensure non-empty content
-            result_content = content if isinstance(content, str) else json.dumps(content)
+            result_content = (
+                content if isinstance(content, str) else json.dumps(content)
+            )
            if not result_content:
                result_content = "(no output)"
            tool_result = {
@ -1057,7 +1094,8 @@ def convert_messages_to_anthropic(
            m["content"] = [
                b
                for b in m["content"]
-                if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids
+                if b.get("type") != "tool_result"
+                or b.get("tool_use_id") in tool_use_ids
            ]
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool result removed)"}]
@ -1088,8 +1126,12 @@ def convert_messages_to_anthropic(
                # and becomes invalid once merged.
                if isinstance(m["content"], list):
                    m["content"] = [
-                        b for b in m["content"]
-                        if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
+                        b
+                        for b in m["content"]
+                        if not (
+                            isinstance(b, dict)
+                            and b.get("type") in ("thinking", "redacted_thinking")
+                        )
                    ]
                prev_blocks = fixed[-1]["content"]
                curr_blocks = m["content"]
@ -1117,9 +1159,8 @@ def convert_messages_to_anthropic(
    # Signatures are Anthropic-proprietary.  Third-party endpoints
    # (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
    # them and will reject them outright.  When targeting a third-party
-    # endpoint, strip ALL thinking/redacted_thinking blocks from every
-    # assistant message — the third-party will generate its own
-    # thinking blocks if it supports extended thinking.
+    # endpoint, downgrade thinking blocks to plain text and drop
+    # redacted_thinking blocks.
    #
    # For direct Anthropic (strategy following clawdbot/OpenClaw):
    # 1. Strip thinking/redacted_thinking from all assistant messages
@ -1142,12 +1183,33 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_third_party or idx != last_assistant_idx:
-            # Third-party endpoint: strip ALL thinking blocks from every
-            # assistant message — signatures are Anthropic-proprietary.
-            # Direct Anthropic: strip from non-latest assistant messages only.
+        if _is_third_party:
+            # Third-party endpoint: Anthropic signatures are proprietary
+            # and will be rejected.  Downgrade thinking blocks to plain
+            # text so the model retains reasoning context across turns.
+            # (Direct Anthropic would validate signatures; third-party
+            # endpoints like z.ai / GLM-5.1 don't use signatures at all.)
+            _tp_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    _tp_content.append(b)
+                    continue
+                # redacted_thinking carries opaque data — drop it.
+                if b.get("type") == "redacted_thinking":
+                    continue
+                # Regular thinking → plain text preserves reasoning for next turn.
+                thinking_text = b.get("thinking", "")
+                if thinking_text:
+                    _tp_content.append({"type": "text", "text": thinking_text})
+            m["content"] = _tp_content or [
+                {"type": "text", "text": "(thinking elided)"}
+            ]
+        elif idx != last_assistant_idx:
+            # Direct Anthropic: strip thinking from non-latest assistant
+            # messages to avoid stale-signature 400s.
            stripped = [
-                b for b in m["content"]
+                b
+                for b in m["content"]
                if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
            ]
            m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
@ -1235,7 +1297,9 @@ def build_anthropic_kwargs(
    Currently only supported on native Anthropic endpoints (not third-party
    compatible ones).
    """
-    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
+    system, anthropic_messages = convert_messages_to_anthropic(
+        messages, base_url=base_url
+    )
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []

    model = normalize_model_name(model, preserve_dots=preserve_dots)
@ -1287,7 +1351,10 @@ def build_anthropic_kwargs(
                        if block.get("type") == "tool_use" and "name" in block:
                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
-                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
+                        elif (
+                            block.get("type") == "tool_result"
+                            and "tool_use_id" in block
+                        ):
                            pass  # tool_result uses ID, not name

    kwargs: Dict[str, Any] = {
@ -1319,7 +1386,10 @@ def build_anthropic_kwargs(
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    if reasoning_config and isinstance(reasoning_config, dict):
-        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
+        if (
+            reasoning_config.get("enabled") is not False
+            and "haiku" not in model.lower()
+        ):
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
            if _supports_adaptive_thinking(model):
@ -1378,7 +1448,7 @@ def normalize_anthropic_response(
        elif block.type == "tool_use":
            name = block.name
            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
-                name = name[len(_MCP_TOOL_PREFIX):]
+                name = name[len(_MCP_TOOL_PREFIX) :]
            tool_calls.append(
                SimpleNamespace(
                    id=block.id,
--- a/run_agent.py
+++ b/run_agent.py
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@ -120,13 +120,17 @@ class TestReadClaudeCodeCredentials:
    def test_reads_valid_credentials(self, tmp_path, monkeypatch):
        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "sk-ant-oat01-token",
-                "refreshToken": "sk-ant-oat01-refresh",
-                "expiresAt": int(time.time() * 1000) + 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "sk-ant-oat01-token",
+                        "refreshToken": "sk-ant-oat01-refresh",
+                        "expiresAt": int(time.time() * 1000) + 3600_000,
+                    }
+                }
+            )
+        )
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        creds = read_claude_code_credentials()
        assert creds is not None
@ -134,7 +138,9 @@ class TestReadClaudeCodeCredentials:
        assert creds["refreshToken"] == "sk-ant-oat01-refresh"
        assert creds["source"] == "claude_code_credentials_file"

-    def test_ignores_primary_api_key_for_native_anthropic_resolution(self, tmp_path, monkeypatch):
+    def test_ignores_primary_api_key_for_native_anthropic_resolution(
+        self, tmp_path, monkeypatch
+    ):
        claude_json = tmp_path / ".claude.json"
        claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
@ -156,9 +162,9 @@ class TestReadClaudeCodeCredentials:
    def test_returns_none_for_empty_access_token(self, tmp_path, monkeypatch):
        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {"accessToken": "", "refreshToken": "x"}
-        }))
+        cred_file.write_text(
+            json.dumps({"claudeAiOauth": {"accessToken": "", "refreshToken": "x"}})
+        )
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        assert read_claude_code_credentials() is None

@ -185,16 +191,22 @@ class TestResolveAnthropicToken:
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        assert resolve_anthropic_token() == "sk-ant-oat01-mytoken"

-    def test_does_not_resolve_primary_api_key_as_native_anthropic_token(self, monkeypatch, tmp_path):
+    def test_does_not_resolve_primary_api_key_as_native_anthropic_token(
+        self, monkeypatch, tmp_path
+    ):
        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
        monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
-        (tmp_path / ".claude.json").write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
+        (tmp_path / ".claude.json").write_text(
+            json.dumps({"primaryApiKey": "sk-ant-api03-primary"})
+        )
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)

        assert resolve_anthropic_token() is None

-    def test_falls_back_to_api_key_when_no_oauth_sources_exist(self, monkeypatch, tmp_path):
+    def test_falls_back_to_api_key_when_no_oauth_sources_exist(
+        self, monkeypatch, tmp_path
+    ):
        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-mykey")
        monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
@ -228,39 +240,53 @@ class TestResolveAnthropicToken:
        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "cc-auto-token",
-                "refreshToken": "refresh",
-                "expiresAt": int(time.time() * 1000) + 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "cc-auto-token",
+                        "refreshToken": "refresh",
+                        "expiresAt": int(time.time() * 1000) + 3600_000,
+                    }
+                }
+            )
+        )
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        assert resolve_anthropic_token() == "cc-auto-token"

-    def test_prefers_refreshable_claude_code_credentials_over_static_anthropic_token(self, monkeypatch, tmp_path):
+    def test_prefers_refreshable_claude_code_credentials_over_static_anthropic_token(
+        self, monkeypatch, tmp_path
+    ):
        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token")
        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "cc-auto-token",
-                "refreshToken": "refresh-token",
-                "expiresAt": int(time.time() * 1000) + 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "cc-auto-token",
+                        "refreshToken": "refresh-token",
+                        "expiresAt": int(time.time() * 1000) + 3600_000,
+                    }
+                }
+            )
+        )
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)

        assert resolve_anthropic_token() == "cc-auto-token"

-    def test_keeps_static_anthropic_token_when_only_non_refreshable_claude_key_exists(self, monkeypatch, tmp_path):
+    def test_keeps_static_anthropic_token_when_only_non_refreshable_claude_key_exists(
+        self, monkeypatch, tmp_path
+    ):
        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token")
        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
        claude_json = tmp_path / ".claude.json"
-        claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-managed-key"}))
+        claude_json.write_text(
+            json.dumps({"primaryApiKey": "sk-ant-api03-managed-key"})
+        )
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)

        assert resolve_anthropic_token() == "sk-ant-oat01-static-token"
@ -280,17 +306,19 @@ class TestRefreshOauthToken:
            "expiresAt": int(time.time() * 1000) - 3600_000,
        }

-        mock_response = json.dumps({
-            "access_token": "new-token-abc",
-            "refresh_token": "new-refresh-456",
-            "expires_in": 7200,
-        }).encode()
+        mock_response = json.dumps(
+            {
+                "access_token": "new-token-abc",
+                "refresh_token": "new-refresh-456",
+                "expires_in": 7200,
+            }
+        ).encode()

        with patch("urllib.request.urlopen") as mock_urlopen:
            mock_ctx = MagicMock()
-            mock_ctx.__enter__ = MagicMock(return_value=MagicMock(
-                read=MagicMock(return_value=mock_response)
-            ))
+            mock_ctx.__enter__ = MagicMock(
+                return_value=MagicMock(read=MagicMock(return_value=mock_response))
+            )
            mock_ctx.__exit__ = MagicMock(return_value=False)
            mock_urlopen.return_value = mock_ctx

@ -348,38 +376,54 @@ class TestResolveWithRefresh:
        # Set up expired creds with a refresh token
        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "expired-tok",
-                "refreshToken": "valid-refresh",
-                "expiresAt": int(time.time() * 1000) - 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "expired-tok",
+                        "refreshToken": "valid-refresh",
+                        "expiresAt": int(time.time() * 1000) - 3600_000,
+                    }
+                }
+            )
+        )
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)

        # Mock refresh to succeed
-        with patch("agent.anthropic_adapter._refresh_oauth_token", return_value="refreshed-token"):
+        with patch(
+            "agent.anthropic_adapter._refresh_oauth_token",
+            return_value="refreshed-token",
+        ):
            result = resolve_anthropic_token()

        assert result == "refreshed-token"

-    def test_static_env_oauth_token_does_not_block_refreshable_claude_creds(self, monkeypatch, tmp_path):
+    def test_static_env_oauth_token_does_not_block_refreshable_claude_creds(
+        self, monkeypatch, tmp_path
+    ):
        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-expired-env-token")
        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)

        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "expired-claude-creds-token",
-                "refreshToken": "valid-refresh",
-                "expiresAt": int(time.time() * 1000) - 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "expired-claude-creds-token",
+                        "refreshToken": "valid-refresh",
+                        "expiresAt": int(time.time() * 1000) - 3600_000,
+                    }
+                }
+            )
+        )
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)

-        with patch("agent.anthropic_adapter._refresh_oauth_token", return_value="refreshed-token"):
+        with patch(
+            "agent.anthropic_adapter._refresh_oauth_token",
+            return_value="refreshed-token",
+        ):
            result = resolve_anthropic_token()

        assert result == "refreshed-token"
@ -400,13 +444,17 @@ class TestRunOauthSetupToken:
        # Pre-create credential files that will be found after subprocess
        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
-        cred_file.write_text(json.dumps({
-            "claudeAiOauth": {
-                "accessToken": "from-cred-file",
-                "refreshToken": "refresh",
-                "expiresAt": int(time.time() * 1000) + 3600_000,
-            }
-        }))
+        cred_file.write_text(
+            json.dumps(
+                {
+                    "claudeAiOauth": {
+                        "accessToken": "from-cred-file",
+                        "refreshToken": "refresh",
+                        "expiresAt": int(time.time() * 1000) + 3600_000,
+                    }
+                }
+            )
+        )
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)

        with patch("subprocess.run") as mock_run:
@ -459,27 +507,45 @@ class TestRunOauthSetupToken:

 class TestNormalizeModelName:
    def test_strips_anthropic_prefix(self):
-        assert normalize_model_name("anthropic/claude-sonnet-4-20250514") == "claude-sonnet-4-20250514"
+        assert (
+            normalize_model_name("anthropic/claude-sonnet-4-20250514")
+            == "claude-sonnet-4-20250514"
+        )

    def test_leaves_bare_name(self):
-        assert normalize_model_name("claude-sonnet-4-20250514") == "claude-sonnet-4-20250514"
+        assert (
+            normalize_model_name("claude-sonnet-4-20250514")
+            == "claude-sonnet-4-20250514"
+        )

    def test_converts_dots_to_hyphens(self):
        """OpenRouter uses dots (4.6), Anthropic uses hyphens (4-6)."""
        assert normalize_model_name("anthropic/claude-opus-4.6") == "claude-opus-4-6"
-        assert normalize_model_name("anthropic/claude-sonnet-4.5") == "claude-sonnet-4-5"
+        assert (
+            normalize_model_name("anthropic/claude-sonnet-4.5") == "claude-sonnet-4-5"
+        )
        assert normalize_model_name("claude-opus-4.6") == "claude-opus-4-6"

    def test_already_hyphenated_unchanged(self):
        """Names already in Anthropic format should pass through."""
        assert normalize_model_name("claude-opus-4-6") == "claude-opus-4-6"
-        assert normalize_model_name("claude-opus-4-5-20251101") == "claude-opus-4-5-20251101"
+        assert (
+            normalize_model_name("claude-opus-4-5-20251101")
+            == "claude-opus-4-5-20251101"
+        )

    def test_preserve_dots_for_alibaba_dashscope(self):
        """Alibaba/DashScope use dots in model names (e.g. qwen3.5-plus). Fixes #1739."""
-        assert normalize_model_name("qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus"
-        assert normalize_model_name("anthropic/qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus"
-        assert normalize_model_name("qwen3.5-flash", preserve_dots=True) == "qwen3.5-flash"
+        assert (
+            normalize_model_name("qwen3.5-plus", preserve_dots=True) == "qwen3.5-plus"
+        )
+        assert (
+            normalize_model_name("anthropic/qwen3.5-plus", preserve_dots=True)
+            == "qwen3.5-plus"
+        )
+        assert (
+            normalize_model_name("qwen3.5-flash", preserve_dots=True) == "qwen3.5-flash"
+        )


 # ---------------------------------------------------------------------------
@ -536,7 +602,10 @@ class TestConvertMessages:
                "role": "user",
                "content": [
                    {"type": "text", "text": "Can you see this?"},
-                    {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "https://example.com/cat.png"},
+                    },
                ],
            }
        ]
@ -548,7 +617,10 @@ class TestConvertMessages:
                "role": "user",
                "content": [
                    {"type": "text", "text": "Can you see this?"},
-                    {"type": "image", "source": {"type": "url", "url": "https://example.com/cat.png"}},
+                    {
+                        "type": "image",
+                        "source": {"type": "url", "url": "https://example.com/cat.png"},
+                    },
                ],
            }
        ]
@ -613,7 +685,10 @@ class TestConvertMessages:
                "role": "assistant",
                "content": "",
                "tool_calls": [
-                    {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
+                    {
+                        "id": "tc_1",
+                        "function": {"name": "test_tool", "arguments": "{}"},
+                    },
                ],
            },
            {"role": "tool", "tool_call_id": "tc_1", "content": "result data"},
@ -678,10 +753,9 @@ class TestConvertMessages:
        # tc_gone has no matching tool_use — its tool_result should be stripped
        for m in result:
            if m["role"] == "user" and isinstance(m["content"], list):
-                assert all(
-                    b.get("type") != "tool_result"
-                    for b in m["content"]
-                ), "Orphaned tool_result should have been stripped"
+                assert all(b.get("type") != "tool_result" for b in m["content"]), (
+                    "Orphaned tool_result should have been stripped"
+                )

    def test_strips_orphaned_tool_result_preserves_valid(self):
        """Orphaned tool_results are stripped while valid ones survive."""
@ -690,7 +764,10 @@ class TestConvertMessages:
                "role": "assistant",
                "content": "",
                "tool_calls": [
-                    {"id": "tc_valid", "function": {"name": "search", "arguments": "{}"}},
+                    {
+                        "id": "tc_valid",
+                        "function": {"name": "search", "arguments": "{}"},
+                    },
                ],
            },
            {"role": "tool", "tool_call_id": "tc_valid", "content": "good result"},
@ -709,7 +786,11 @@ class TestConvertMessages:
            {
                "role": "system",
                "content": [
-                    {"type": "text", "text": "System prompt", "cache_control": {"type": "ephemeral"}},
+                    {
+                        "type": "text",
+                        "text": "System prompt",
+                        "cache_control": {"type": "ephemeral"},
+                    },
                ],
            },
            {"role": "user", "content": "Hi"},
@ -720,10 +801,12 @@ class TestConvertMessages:
        assert system[0]["cache_control"] == {"type": "ephemeral"}

    def test_assistant_cache_control_blocks_are_preserved(self):
-        messages = apply_anthropic_cache_control([
-            {"role": "system", "content": "System prompt"},
-            {"role": "assistant", "content": "Hello from assistant"},
-        ])
+        messages = apply_anthropic_cache_control(
+            [
+                {"role": "system", "content": "System prompt"},
+                {"role": "assistant", "content": "Hello from assistant"},
+            ]
+        )

        _, result = convert_messages_to_anthropic(messages)
        assistant_blocks = result[0]["content"]
@ -733,17 +816,23 @@ class TestConvertMessages:
        assert assistant_blocks[0]["cache_control"] == {"type": "ephemeral"}

    def test_tool_cache_control_is_preserved_on_tool_result_block(self):
-        messages = apply_anthropic_cache_control([
-            {"role": "system", "content": "System prompt"},
-            {
-                "role": "assistant",
-                "content": "",
-                "tool_calls": [
-                    {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
-                ],
-            },
-            {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
-        ], native_anthropic=True)
+        messages = apply_anthropic_cache_control(
+            [
+                {"role": "system", "content": "System prompt"},
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "tc_1",
+                            "function": {"name": "test_tool", "arguments": "{}"},
+                        },
+                    ],
+                },
+                {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
+            ],
+            native_anthropic=True,
+        )

        _, result = convert_messages_to_anthropic(messages)
        user_msg = [m for m in result if m["role"] == "user"][0]
@ -760,7 +849,10 @@ class TestConvertMessages:
                "role": "assistant",
                "content": "",
                "tool_calls": [
-                    {"id": "tc_1", "function": {"name": "test_tool", "arguments": "{}"}},
+                    {
+                        "id": "tc_1",
+                        "function": {"name": "test_tool", "arguments": "{}"},
+                    },
                ],
                "reasoning_details": [
                    {
@ -774,10 +866,14 @@ class TestConvertMessages:
        ]

        _, result = convert_messages_to_anthropic(messages)
-        assistant_blocks = next(msg for msg in result if msg["role"] == "assistant")["content"]
+        assistant_blocks = next(msg for msg in result if msg["role"] == "assistant")[
+            "content"
+        ]

        assert assistant_blocks[0]["type"] == "thinking"
-        assert assistant_blocks[0]["thinking"] == "Need to inspect the tool result first."
+        assert (
+            assistant_blocks[0]["thinking"] == "Need to inspect the tool result first."
+        )
        assert assistant_blocks[0]["signature"] == "sig_123"
        assert assistant_blocks[1]["type"] == "tool_use"

@ -832,25 +928,33 @@ class TestConvertMessages:
        }

    def test_empty_cached_assistant_tool_turn_converts_without_empty_text_block(self):
-        messages = apply_anthropic_cache_control([
-            {"role": "system", "content": "System prompt"},
-            {"role": "user", "content": "Find the skill"},
-            {
-                "role": "assistant",
-                "content": "",
-                "tool_calls": [
-                    {"id": "tc_1", "function": {"name": "skill_view", "arguments": "{}"}},
-                ],
-            },
-            {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
-        ])
+        messages = apply_anthropic_cache_control(
+            [
+                {"role": "system", "content": "System prompt"},
+                {"role": "user", "content": "Find the skill"},
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "tc_1",
+                            "function": {"name": "skill_view", "arguments": "{}"},
+                        },
+                    ],
+                },
+                {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
+            ]
+        )

        _, result = convert_messages_to_anthropic(messages)

        assistant_turn = next(msg for msg in result if msg["role"] == "assistant")
        assistant_blocks = assistant_turn["content"]

-        assert all(not (b.get("type") == "text" and b.get("text") == "") for b in assistant_blocks)
+        assert all(
+            not (b.get("type") == "text" and b.get("text") == "")
+            for b in assistant_blocks
+        )
        assert any(b.get("type") == "tool_use" for b in assistant_blocks)

    def test_empty_user_message_string_gets_placeholder(self):
@ -888,7 +992,13 @@ class TestConvertMessages:
    def test_user_message_with_empty_text_blocks_gets_placeholder(self):
        """User message with only empty text blocks should get placeholder."""
        messages = [
-            {"role": "user", "content": [{"type": "text", "text": ""}, {"type": "text", "text": "  "}]},
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": ""},
+                    {"type": "text", "text": "  "},
+                ],
+            },
        ]
        _, result = convert_messages_to_anthropic(messages)
        assert result[0]["role"] == "user"
@ -1085,35 +1195,43 @@ class TestBuildAnthropicKwargs:
 class TestGetAnthropicMaxOutput:
    def test_opus_4_6(self):
        from agent.anthropic_adapter import _get_anthropic_max_output
+
        assert _get_anthropic_max_output("claude-opus-4-6") == 128_000

    def test_opus_4_6_variant(self):
        from agent.anthropic_adapter import _get_anthropic_max_output
+
        assert _get_anthropic_max_output("claude-opus-4-6:1m:fast") == 128_000

    def test_sonnet_4_6(self):
        from agent.anthropic_adapter import _get_anthropic_max_output
+
        assert _get_anthropic_max_output("claude-sonnet-4-6") == 64_000

    def test_sonnet_4_date_stamped(self):
        from agent.anthropic_adapter import _get_anthropic_max_output
+
        assert _get_anthropic_max_output("claude-sonnet-4-20250514") == 64_000

    def test_claude_3_5_sonnet(self):
        from agent.anthropic_adapter import _get_anthropic_max_output
+
        assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192

    def test_claude_3_opus(self):
        from agent.anthropic_adapter import _get_anthropic_max_output
+
        assert _get_anthropic_max_output("claude-3-opus-20240229") == 4_096

    def test_unknown_future_model(self):
        from agent.anthropic_adapter import _get_anthropic_max_output
+
        assert _get_anthropic_max_output("claude-ultra-5-20260101") == 128_000

    def test_longest_prefix_wins(self):
        """'claude-3-5-sonnet' should match before 'claude-3-5'."""
        from agent.anthropic_adapter import _get_anthropic_max_output
+
        # claude-3-5-sonnet (8192) should win over a hypothetical shorter match
        assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192

@ -1218,7 +1336,9 @@ class TestNormalizeResponse:
        msg, reason = normalize_anthropic_response(self._make_response(blocks))
        assert msg.content == "The answer is 42."
        assert msg.reasoning == "Let me reason about this..."
-        assert msg.reasoning_details == [{"type": "thinking", "thinking": "Let me reason about this..."}]
+        assert msg.reasoning_details == [
+            {"type": "thinking", "thinking": "Let me reason about this..."}
+        ]

    def test_thinking_response_preserves_signature(self):
        blocks = [
@ -1235,15 +1355,9 @@ class TestNormalizeResponse:

    def test_stop_reason_mapping(self):
        block = SimpleNamespace(type="text", text="x")
-        _, r1 = normalize_anthropic_response(
-            self._make_response([block], "end_turn")
-        )
-        _, r2 = normalize_anthropic_response(
-            self._make_response([block], "tool_use")
-        )
-        _, r3 = normalize_anthropic_response(
-            self._make_response([block], "max_tokens")
-        )
+        _, r1 = normalize_anthropic_response(self._make_response([block], "end_turn"))
+        _, r2 = normalize_anthropic_response(self._make_response([block], "tool_use"))
+        _, r3 = normalize_anthropic_response(self._make_response([block], "max_tokens"))
        assert r1 == "stop"
        assert r2 == "tool_calls"
        assert r3 == "length"
@ -1306,7 +1420,11 @@ class TestThinkingBlockSignatureManagement:
                    {"id": "tc_1", "function": {"name": "tool1", "arguments": "{}"}},
                ],
                "reasoning_details": [
-                    {"type": "thinking", "thinking": "Old reasoning.", "signature": "sig_old"},
+                    {
+                        "type": "thinking",
+                        "thinking": "Old reasoning.",
+                        "signature": "sig_old",
+                    },
                ],
            },
            {"role": "tool", "tool_call_id": "tc_1", "content": "result 1"},
@ -1317,7 +1435,11 @@ class TestThinkingBlockSignatureManagement:
                    {"id": "tc_2", "function": {"name": "tool2", "arguments": "{}"}},
                ],
                "reasoning_details": [
-                    {"type": "thinking", "thinking": "Latest reasoning.", "signature": "sig_new"},
+                    {
+                        "type": "thinking",
+                        "thinking": "Latest reasoning.",
+                        "signature": "sig_new",
+                    },
                ],
            },
            {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
@ -1348,7 +1470,11 @@ class TestThinkingBlockSignatureManagement:
                "role": "assistant",
                "content": "The answer is 42.",
                "reasoning_details": [
-                    {"type": "thinking", "thinking": "Deep thought.", "signature": "sig_valid"},
+                    {
+                        "type": "thinking",
+                        "thinking": "Deep thought.",
+                        "signature": "sig_valid",
+                    },
                ],
            },
        ]
@ -1445,14 +1571,22 @@ class TestThinkingBlockSignatureManagement:
                "role": "assistant",
                "content": "First response.",
                "reasoning_details": [
-                    {"type": "thinking", "thinking": "First thought.", "signature": "sig_1"},
+                    {
+                        "type": "thinking",
+                        "thinking": "First thought.",
+                        "signature": "sig_1",
+                    },
                ],
            },
            {
                "role": "assistant",
                "content": "Second response.",
                "reasoning_details": [
-                    {"type": "thinking", "thinking": "Second thought.", "signature": "sig_2"},
+                    {
+                        "type": "thinking",
+                        "thinking": "Second thought.",
+                        "signature": "sig_2",
+                    },
                ],
            },
        ]
@ -1532,12 +1666,57 @@ class TestThinkingBlockSignatureManagement:

        # Last one: thinking preserved
        last_thinking = [
-            b for b in assistants[2]["content"]
+            b
+            for b in assistants[2]["content"]
            if isinstance(b, dict) and b.get("type") == "thinking"
        ]
        assert len(last_thinking) == 1
        assert last_thinking[0]["signature"] == "sig_3"

+    def test_third_party_downgrades_thinking_to_text(self):
+        """Third-party Anthropic-compatible endpoints get plain text thinking."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "Visible answer.",
+                "reasoning_details": [
+                    {
+                        "type": "thinking",
+                        "thinking": "Third-party-safe reasoning.",
+                        "signature": "sig",
+                    },
+                    {"type": "redacted_thinking", "data": "opaque"},
+                ],
+            }
+        ]
+        _, result = convert_messages_to_anthropic(
+            messages,
+            base_url="https://api.z.ai/api/paas/v4",
+        )
+        blocks = result[0]["content"]
+        assert not any(b.get("type") == "thinking" for b in blocks)
+        assert not any(b.get("type") == "redacted_thinking" for b in blocks)
+        text_blocks = [b.get("text", "") for b in blocks if b.get("type") == "text"]
+        assert "Third-party-safe reasoning." in text_blocks
+        assert "Visible answer." in text_blocks
+
+    def test_third_party_thinking_only_content_gets_placeholder(self):
+        """If third-party turn only has redacted_thinking, use placeholder text."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "",
+                "reasoning_details": [
+                    {"type": "redacted_thinking", "data": "opaque"},
+                ],
+            }
+        ]
+        _, result = convert_messages_to_anthropic(
+            messages,
+            base_url="https://api.minimax.io/anthropic",
+        )
+        assert result[0]["content"] == [{"type": "text", "text": "(thinking elided)"}]
+

 # ---------------------------------------------------------------------------
 # Tool choice
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py