mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 01:21:43 +00:00
The AIAgent.flush_memories pre-compression save, the gateway _flush_memories_for_session, and everything feeding them are obsolete now that the background memory/skill review handles persistent memory extraction. Problems with flush_memories: - Pre-dates the background review loop. It was the only memory-save path when introduced; the background review now fires every 10 user turns on CLI and gateway alike, which is far more frequent than compression or session reset ever triggered flush. - Blocking and synchronous. Pre-compression flush ran on the live agent before compression, blocking the user-visible response. - Cache-breaking. Flush built a temporary conversation prefix (system prompt + memory-only tool list) that diverged from the live conversation's cached prefix, invalidating prompt caching. The gateway variant spawned a fresh AIAgent with its own clean prompt for each finalized session — still cache-breaking, just in a different process. - Redundant. Background review runs in the live conversation's session context, gets the same content, writes to the same memory store, and doesn't break the cache. Everything flush_memories claimed to preserve is already covered. What this removes: - AIAgent.flush_memories() method (~248 LOC in run_agent.py) - Pre-compression flush call in _compress_context - flush_memories call sites in cli.py (/new + exit) - GatewayRunner._flush_memories_for_session + _async_flush_memories (and the 3 call sites: session expiry watcher, /new, /resume) - 'flush_memories' entry from DEFAULT_CONFIG auxiliary tasks, hermes tools UI task list, auxiliary_client docstrings - _memory_flush_min_turns config + init - #15631's headroom-deduction math in _check_compression_model_feasibility (headroom was only needed because flush dragged the full main-agent system prompt along; the compression summariser sends a single user-role prompt so new_threshold = aux_context is safe again) - The dedicated test files and assertions that exercised flush-specific paths What this renames (with read-time backcompat on sessions.json): - SessionEntry.memory_flushed -> SessionEntry.expiry_finalized. The session-expiry watcher still uses the flag to avoid re-running finalize/eviction on the same expired session; the new name reflects what it now actually gates. from_dict() reads 'expiry_finalized' first, falls back to the legacy 'memory_flushed' key so existing sessions.json files upgrade seamlessly. Supersedes #15631 and #15638. Tested: 383 targeted tests pass across run_agent/, agent/, cli/, and gateway/ session-boundary suites. No behavior regressions — background memory review continues to handle persistent memory extraction on both CLI and gateway.
1706 lines
70 KiB
Python
1706 lines
70 KiB
Python
"""Anthropic Messages API adapter for Hermes Agent.
|
||
|
||
Translates between Hermes's internal OpenAI-style message format and
|
||
Anthropic's Messages API. Follows the same pattern as the codex_responses
|
||
adapter — all provider-specific logic is isolated here.
|
||
|
||
Auth supports:
|
||
- Regular API keys (sk-ant-api*) → x-api-key header
|
||
- OAuth setup-tokens (sk-ant-oat*) → Bearer auth + beta header
|
||
- Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json) → Bearer auth
|
||
"""
|
||
|
||
import copy
|
||
import json
|
||
import logging
|
||
import os
|
||
import platform
|
||
import subprocess
|
||
from pathlib import Path
|
||
|
||
from hermes_constants import get_hermes_home
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
from utils import normalize_proxy_env_vars
|
||
|
||
try:
|
||
import anthropic as _anthropic_sdk
|
||
except ImportError:
|
||
_anthropic_sdk = None # type: ignore[assignment]
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
|
||
# Hermes effort → Anthropic adaptive-thinking effort (output_config.effort).
|
||
# Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max.
|
||
# Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh.
|
||
# We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/
|
||
# agentic work) and downgrade it to max on pre-4.7 adaptive models (which
|
||
# is the strongest level they accept). "minimal" is a legacy alias that
|
||
# maps to low on every model. See:
|
||
# https://platform.claude.com/docs/en/about-claude/models/migration-guide
|
||
ADAPTIVE_EFFORT_MAP = {
|
||
"max": "max",
|
||
"xhigh": "xhigh",
|
||
"high": "high",
|
||
"medium": "medium",
|
||
"low": "low",
|
||
"minimal": "low",
|
||
}
|
||
|
||
# Models that accept the "xhigh" output_config.effort level. Opus 4.7 added
|
||
# xhigh as a distinct level between high and max; older adaptive-thinking
|
||
# models (4.6) reject it with a 400. Keep this substring list in sync with
|
||
# the Anthropic migration guide as new model families ship.
|
||
_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
|
||
|
||
# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
|
||
# is the only supported mode; 4.7 additionally forbids manual thinking entirely
|
||
# and drops temperature/top_p/top_k).
|
||
_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
|
||
|
||
# Models where temperature/top_p/top_k return 400 if set to non-default values.
|
||
# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
|
||
_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
|
||
|
||
# ── Max output token limits per Anthropic model ───────────────────────
|
||
# Source: Anthropic docs + Cline model catalog. Anthropic's API requires
|
||
# max_tokens as a mandatory field. Previously we hardcoded 16384, which
|
||
# starves thinking-enabled models (thinking tokens count toward the limit).
|
||
_ANTHROPIC_OUTPUT_LIMITS = {
|
||
# Claude 4.7
|
||
"claude-opus-4-7": 128_000,
|
||
# Claude 4.6
|
||
"claude-opus-4-6": 128_000,
|
||
"claude-sonnet-4-6": 64_000,
|
||
# Claude 4.5
|
||
"claude-opus-4-5": 64_000,
|
||
"claude-sonnet-4-5": 64_000,
|
||
"claude-haiku-4-5": 64_000,
|
||
# Claude 4
|
||
"claude-opus-4": 32_000,
|
||
"claude-sonnet-4": 64_000,
|
||
# Claude 3.7
|
||
"claude-3-7-sonnet": 128_000,
|
||
# Claude 3.5
|
||
"claude-3-5-sonnet": 8_192,
|
||
"claude-3-5-haiku": 8_192,
|
||
# Claude 3
|
||
"claude-3-opus": 4_096,
|
||
"claude-3-sonnet": 4_096,
|
||
"claude-3-haiku": 4_096,
|
||
# Third-party Anthropic-compatible providers
|
||
"minimax": 131_072,
|
||
}
|
||
|
||
# For any model not in the table, assume the highest current limit.
|
||
# Future Anthropic models are unlikely to have *less* output capacity.
|
||
_ANTHROPIC_DEFAULT_OUTPUT_LIMIT = 128_000
|
||
|
||
|
||
def _get_anthropic_max_output(model: str) -> int:
|
||
"""Look up the max output token limit for an Anthropic model.
|
||
|
||
Uses substring matching against _ANTHROPIC_OUTPUT_LIMITS so date-stamped
|
||
model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
|
||
resolve correctly. Longest-prefix match wins to avoid e.g. "claude-3-5"
|
||
matching before "claude-3-5-sonnet".
|
||
|
||
Normalizes dots to hyphens so that model names like
|
||
``anthropic/claude-opus-4.6`` match the ``claude-opus-4-6`` table key.
|
||
"""
|
||
m = model.lower().replace(".", "-")
|
||
best_key = ""
|
||
best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
|
||
for key, val in _ANTHROPIC_OUTPUT_LIMITS.items():
|
||
if key in m and len(key) > len(best_key):
|
||
best_key = key
|
||
best_val = val
|
||
return best_val
|
||
|
||
|
||
def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
|
||
"""Return ``value`` floored to a positive int, or ``None`` if it is not a
|
||
finite positive number. Ported from openclaw/openclaw#66664.
|
||
|
||
Anthropic's Messages API rejects ``max_tokens`` values that are 0,
|
||
negative, non-integer, or non-finite with HTTP 400. Python's ``or``
|
||
idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
|
||
negative ints and fractional floats (``-1``, ``0.5``) through to the
|
||
API, producing a user-visible failure instead of a local error.
|
||
"""
|
||
# Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
|
||
# silently become 1 and ``False`` doesn't become 0.
|
||
if isinstance(value, bool):
|
||
return None
|
||
if not isinstance(value, (int, float)):
|
||
return None
|
||
try:
|
||
import math
|
||
if not math.isfinite(value):
|
||
return None
|
||
except Exception:
|
||
return None
|
||
floored = int(value) # truncates toward zero for floats
|
||
return floored if floored > 0 else None
|
||
|
||
|
||
def _resolve_anthropic_messages_max_tokens(
|
||
requested,
|
||
model: str,
|
||
context_length: Optional[int] = None,
|
||
) -> int:
|
||
"""Resolve the ``max_tokens`` budget for an Anthropic Messages call.
|
||
|
||
Prefers ``requested`` when it is a positive finite number; otherwise
|
||
falls back to the model's output ceiling. Raises ``ValueError`` if no
|
||
positive budget can be resolved (should not happen with current model
|
||
table defaults, but guards against a future regression where
|
||
``_get_anthropic_max_output`` could return ``0``).
|
||
|
||
Separately, callers apply a context-window clamp — this resolver does
|
||
not, to keep the positive-value contract independent of endpoint
|
||
specifics.
|
||
|
||
Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
|
||
"""
|
||
resolved = _resolve_positive_anthropic_max_tokens(requested)
|
||
if resolved is not None:
|
||
return resolved
|
||
fallback = _get_anthropic_max_output(model)
|
||
if fallback > 0:
|
||
return fallback
|
||
raise ValueError(
|
||
f"Anthropic Messages adapter requires a positive max_tokens value for "
|
||
f"model {model!r}; got {requested!r} and no model default resolved."
|
||
)
|
||
|
||
|
||
def _supports_adaptive_thinking(model: str) -> bool:
|
||
"""Return True for Claude 4.6+ models that support adaptive thinking."""
|
||
return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
|
||
|
||
|
||
def _supports_xhigh_effort(model: str) -> bool:
|
||
"""Return True for models that accept the 'xhigh' adaptive effort level.
|
||
|
||
Opus 4.7 introduced xhigh as a distinct level between high and max.
|
||
Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
|
||
and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max
|
||
when this returns False.
|
||
"""
|
||
return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)
|
||
|
||
|
||
def _forbids_sampling_params(model: str) -> bool:
|
||
"""Return True for models that 400 on any non-default temperature/top_p/top_k.
|
||
|
||
Opus 4.7 explicitly rejects sampling parameters; later Claude releases are
|
||
expected to follow suit. Callers should omit these fields entirely rather
|
||
than passing zero/default values (the API rejects anything non-null).
|
||
"""
|
||
return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
|
||
|
||
|
||
# Beta headers for enhanced features (sent with ALL auth types).
|
||
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
|
||
# beta headers are still accepted (harmless no-op) but not required. Kept
|
||
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
|
||
# that still gate on the headers continue to get the enhanced features.
|
||
# Migration guide: remove these if you no longer support ≤4.5 models.
|
||
_COMMON_BETAS = [
|
||
"interleaved-thinking-2025-05-14",
|
||
"fine-grained-tool-streaming-2025-05-14",
|
||
]
|
||
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
||
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
||
# fall back to the provider's default response path.
|
||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||
|
||
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||
# significantly higher output token throughput on Opus 4.6 (~2.5x).
|
||
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
||
_FAST_MODE_BETA = "fast-mode-2026-02-01"
|
||
|
||
# Additional beta headers required for OAuth/subscription auth.
|
||
# Matches what Claude Code (and pi-ai / OpenCode) send.
|
||
_OAUTH_ONLY_BETAS = [
|
||
"claude-code-20250219",
|
||
"oauth-2025-04-20",
|
||
]
|
||
|
||
# Claude Code identity — required for OAuth requests to be routed correctly.
|
||
# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
|
||
# The version must stay reasonably current — Anthropic rejects OAuth requests
|
||
# when the spoofed user-agent version is too far behind the actual release.
|
||
_CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
|
||
_claude_code_version_cache: Optional[str] = None
|
||
|
||
|
||
def _detect_claude_code_version() -> str:
|
||
"""Detect the installed Claude Code version, fall back to a static constant.
|
||
|
||
Anthropic's OAuth infrastructure validates the user-agent version and may
|
||
reject requests with a version that's too old. Detecting dynamically means
|
||
users who keep Claude Code updated never hit stale-version 400s.
|
||
"""
|
||
import subprocess as _sp
|
||
|
||
for cmd in ("claude", "claude-code"):
|
||
try:
|
||
result = _sp.run(
|
||
[cmd, "--version"],
|
||
capture_output=True, text=True, timeout=5,
|
||
)
|
||
if result.returncode == 0 and result.stdout.strip():
|
||
# Output is like "2.1.74 (Claude Code)" or just "2.1.74"
|
||
version = result.stdout.strip().split()[0]
|
||
if version and version[0].isdigit():
|
||
return version
|
||
except Exception:
|
||
pass
|
||
return _CLAUDE_CODE_VERSION_FALLBACK
|
||
|
||
|
||
_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
|
||
_MCP_TOOL_PREFIX = "mcp_"
|
||
|
||
|
||
def _get_claude_code_version() -> str:
|
||
"""Lazily detect the installed Claude Code version when OAuth headers need it."""
|
||
global _claude_code_version_cache
|
||
if _claude_code_version_cache is None:
|
||
_claude_code_version_cache = _detect_claude_code_version()
|
||
return _claude_code_version_cache
|
||
|
||
|
||
def _is_oauth_token(key: str) -> bool:
|
||
"""Check if the key is an Anthropic OAuth/setup token.
|
||
|
||
Positively identifies Anthropic OAuth tokens by their key format:
|
||
- ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
|
||
- ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
|
||
- ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)
|
||
|
||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
|
||
and correctly return False.
|
||
"""
|
||
if not key:
|
||
return False
|
||
# Regular Anthropic Console API keys — x-api-key auth, never OAuth
|
||
if key.startswith("sk-ant-api"):
|
||
return False
|
||
# Anthropic-issued tokens (setup-tokens sk-ant-oat-*, managed keys)
|
||
if key.startswith("sk-ant-"):
|
||
return True
|
||
# JWTs from Anthropic OAuth flow
|
||
if key.startswith("eyJ"):
|
||
return True
|
||
# Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
|
||
if key.startswith("cc-"):
|
||
return True
|
||
return False
|
||
|
||
|
||
def _normalize_base_url_text(base_url) -> str:
|
||
"""Normalize SDK/base transport URL values to a plain string for inspection.
|
||
|
||
Some client objects expose ``base_url`` as an ``httpx.URL`` instead of a raw
|
||
string. Provider/auth detection should accept either shape.
|
||
"""
|
||
if not base_url:
|
||
return ""
|
||
return str(base_url).strip()
|
||
|
||
|
||
def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
|
||
"""Return True for non-Anthropic endpoints using the Anthropic Messages API.
|
||
|
||
Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate
|
||
with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
|
||
detection should be skipped for these endpoints.
|
||
"""
|
||
normalized = _normalize_base_url_text(base_url)
|
||
if not normalized:
|
||
return False # No base_url = direct Anthropic API
|
||
normalized = normalized.rstrip("/").lower()
|
||
if "anthropic.com" in normalized:
|
||
return False # Direct Anthropic API — OAuth applies
|
||
return True # Any other endpoint is a third-party proxy
|
||
|
||
|
||
def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
|
||
"""Return True for Kimi's /coding endpoint that requires claude-code UA."""
|
||
normalized = _normalize_base_url_text(base_url)
|
||
if not normalized:
|
||
return False
|
||
return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
|
||
|
||
|
||
def _requires_bearer_auth(base_url: str | None) -> bool:
|
||
"""Return True for Anthropic-compatible providers that require Bearer auth.
|
||
|
||
Some third-party /anthropic endpoints implement Anthropic's Messages API but
|
||
require Authorization: Bearer *** of Anthropic's native x-api-key header.
|
||
MiniMax's global and China Anthropic-compatible endpoints follow this pattern.
|
||
"""
|
||
normalized = _normalize_base_url_text(base_url)
|
||
if not normalized:
|
||
return False
|
||
normalized = normalized.rstrip("/").lower()
|
||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||
|
||
|
||
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||
"""Return the beta headers that are safe for the configured endpoint.
|
||
|
||
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
|
||
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
||
tool-use message triggers a connection error. Strip that beta for
|
||
Bearer-auth endpoints while keeping all other betas intact.
|
||
"""
|
||
if _requires_bearer_auth(base_url):
|
||
return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
|
||
return _COMMON_BETAS
|
||
|
||
|
||
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
|
||
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
|
||
|
||
If *timeout* is provided it overrides the default 900s read timeout. The
|
||
connect timeout stays at 10s. Callers pass this from the per-provider /
|
||
per-model ``request_timeout_seconds`` config so Anthropic-native and
|
||
Anthropic-compatible providers respect the same knob as OpenAI-wire
|
||
providers.
|
||
|
||
Returns an anthropic.Anthropic instance.
|
||
"""
|
||
if _anthropic_sdk is None:
|
||
raise ImportError(
|
||
"The 'anthropic' package is required for the Anthropic provider. "
|
||
"Install it with: pip install 'anthropic>=0.39.0'"
|
||
)
|
||
|
||
normalize_proxy_env_vars()
|
||
|
||
from httpx import Timeout
|
||
|
||
normalized_base_url = _normalize_base_url_text(base_url)
|
||
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
|
||
kwargs = {
|
||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||
}
|
||
if normalized_base_url:
|
||
kwargs["base_url"] = normalized_base_url
|
||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||
|
||
if _is_kimi_coding_endpoint(base_url):
|
||
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
|
||
# to be recognized as a valid Coding Agent. Without it, returns 403.
|
||
# Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
|
||
kwargs["api_key"] = api_key
|
||
kwargs["default_headers"] = {
|
||
"User-Agent": "claude-code/0.1.0",
|
||
**( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
|
||
}
|
||
elif _requires_bearer_auth(normalized_base_url):
|
||
# Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
|
||
# Authorization: Bearer *** for regular API keys. Route those endpoints
|
||
# through auth_token so the SDK sends Bearer auth instead of x-api-key.
|
||
# Check this before OAuth token shape detection because MiniMax secrets do
|
||
# not use Anthropic's sk-ant-api prefix and would otherwise be misread as
|
||
# Anthropic OAuth/setup tokens.
|
||
kwargs["auth_token"] = api_key
|
||
if common_betas:
|
||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||
elif _is_third_party_anthropic_endpoint(base_url):
|
||
# Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their
|
||
# own API keys with x-api-key auth. Skip OAuth detection — their keys
|
||
# don't follow Anthropic's sk-ant-* prefix convention and would be
|
||
# misclassified as OAuth tokens.
|
||
kwargs["api_key"] = api_key
|
||
if common_betas:
|
||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||
elif _is_oauth_token(api_key):
|
||
# OAuth access token / setup-token → Bearer auth + Claude Code identity.
|
||
# Anthropic routes OAuth requests based on user-agent and headers;
|
||
# without Claude Code's fingerprint, requests get intermittent 500s.
|
||
all_betas = common_betas + _OAUTH_ONLY_BETAS
|
||
kwargs["auth_token"] = api_key
|
||
kwargs["default_headers"] = {
|
||
"anthropic-beta": ",".join(all_betas),
|
||
"user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
|
||
"x-app": "cli",
|
||
}
|
||
else:
|
||
# Regular API key → x-api-key header + common betas
|
||
kwargs["api_key"] = api_key
|
||
if common_betas:
|
||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||
|
||
return _anthropic_sdk.Anthropic(**kwargs)
|
||
|
||
|
||
def build_anthropic_bedrock_client(region: str):
|
||
"""Create an AnthropicBedrock client for Bedrock Claude models.
|
||
|
||
Uses the Anthropic SDK's native Bedrock adapter, which provides full
|
||
Claude feature parity: prompt caching, thinking budgets, adaptive
|
||
thinking, fast mode — features not available via the Converse API.
|
||
|
||
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
|
||
"""
|
||
if _anthropic_sdk is None:
|
||
raise ImportError(
|
||
"The 'anthropic' package is required for the Bedrock provider. "
|
||
"Install it with: pip install 'anthropic>=0.39.0'"
|
||
)
|
||
if not hasattr(_anthropic_sdk, "AnthropicBedrock"):
|
||
raise ImportError(
|
||
"anthropic.AnthropicBedrock not available. "
|
||
"Upgrade with: pip install 'anthropic>=0.39.0'"
|
||
)
|
||
from httpx import Timeout
|
||
|
||
return _anthropic_sdk.AnthropicBedrock(
|
||
aws_region=region,
|
||
timeout=Timeout(timeout=900.0, connect=10.0),
|
||
)
|
||
|
||
|
||
def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
|
||
"""Read Claude Code OAuth credentials from the macOS Keychain.
|
||
|
||
Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
|
||
service name "Claude Code-credentials" rather than (or in addition to)
|
||
the JSON file at ~/.claude/.credentials.json.
|
||
|
||
The password field contains a JSON string with the same claudeAiOauth
|
||
structure as the JSON file.
|
||
|
||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||
"""
|
||
import platform
|
||
import subprocess
|
||
|
||
if platform.system() != "Darwin":
|
||
return None
|
||
|
||
try:
|
||
# Read the "Claude Code-credentials" generic password entry
|
||
result = subprocess.run(
|
||
["security", "find-generic-password",
|
||
"-s", "Claude Code-credentials",
|
||
"-w"],
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=5,
|
||
)
|
||
except (OSError, subprocess.TimeoutExpired):
|
||
logger.debug("Keychain: security command not available or timed out")
|
||
return None
|
||
|
||
if result.returncode != 0:
|
||
logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
|
||
return None
|
||
|
||
raw = result.stdout.strip()
|
||
if not raw:
|
||
return None
|
||
|
||
try:
|
||
data = json.loads(raw)
|
||
except json.JSONDecodeError:
|
||
logger.debug("Keychain: credentials payload is not valid JSON")
|
||
return None
|
||
|
||
oauth_data = data.get("claudeAiOauth")
|
||
if oauth_data and isinstance(oauth_data, dict):
|
||
access_token = oauth_data.get("accessToken", "")
|
||
if access_token:
|
||
return {
|
||
"accessToken": access_token,
|
||
"refreshToken": oauth_data.get("refreshToken", ""),
|
||
"expiresAt": oauth_data.get("expiresAt", 0),
|
||
"source": "macos_keychain",
|
||
}
|
||
|
||
return None
|
||
|
||
|
||
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||
"""Read refreshable Claude Code OAuth credentials.
|
||
|
||
Checks two sources in order:
|
||
1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
|
||
2. ~/.claude/.credentials.json file
|
||
|
||
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
|
||
subscription flow is OAuth/setup-token based with refreshable credentials,
|
||
and native direct Anthropic provider usage should follow that path rather
|
||
than auto-detecting Claude's first-party managed key.
|
||
|
||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||
"""
|
||
# Try macOS Keychain first (covers Claude Code >=2.1.114)
|
||
kc_creds = _read_claude_code_credentials_from_keychain()
|
||
if kc_creds:
|
||
return kc_creds
|
||
|
||
# Fall back to JSON file
|
||
cred_path = Path.home() / ".claude" / ".credentials.json"
|
||
if cred_path.exists():
|
||
try:
|
||
data = json.loads(cred_path.read_text(encoding="utf-8"))
|
||
oauth_data = data.get("claudeAiOauth")
|
||
if oauth_data and isinstance(oauth_data, dict):
|
||
access_token = oauth_data.get("accessToken", "")
|
||
if access_token:
|
||
return {
|
||
"accessToken": access_token,
|
||
"refreshToken": oauth_data.get("refreshToken", ""),
|
||
"expiresAt": oauth_data.get("expiresAt", 0),
|
||
"source": "claude_code_credentials_file",
|
||
}
|
||
except (json.JSONDecodeError, OSError, IOError) as e:
|
||
logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
|
||
|
||
return None
|
||
|
||
|
||
def read_claude_managed_key() -> Optional[str]:
|
||
"""Read Claude's native managed key from ~/.claude.json for diagnostics only."""
|
||
claude_json = Path.home() / ".claude.json"
|
||
if claude_json.exists():
|
||
try:
|
||
data = json.loads(claude_json.read_text(encoding="utf-8"))
|
||
primary_key = data.get("primaryApiKey", "")
|
||
if isinstance(primary_key, str) and primary_key.strip():
|
||
return primary_key.strip()
|
||
except (json.JSONDecodeError, OSError, IOError) as e:
|
||
logger.debug("Failed to read ~/.claude.json: %s", e)
|
||
return None
|
||
|
||
|
||
def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
|
||
"""Check if Claude Code credentials have a non-expired access token."""
|
||
import time
|
||
|
||
expires_at = creds.get("expiresAt", 0)
|
||
if not expires_at:
|
||
# No expiry set (managed keys) — valid if token is present
|
||
return bool(creds.get("accessToken"))
|
||
|
||
# expiresAt is in milliseconds since epoch
|
||
now_ms = int(time.time() * 1000)
|
||
# Allow 60 seconds of buffer
|
||
return now_ms < (expires_at - 60_000)
|
||
|
||
|
||
def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
|
||
"""Refresh an Anthropic OAuth token without mutating local credential files."""
|
||
import time
|
||
import urllib.parse
|
||
import urllib.request
|
||
|
||
if not refresh_token:
|
||
raise ValueError("refresh_token is required")
|
||
|
||
client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
|
||
if use_json:
|
||
data = json.dumps({
|
||
"grant_type": "refresh_token",
|
||
"refresh_token": refresh_token,
|
||
"client_id": client_id,
|
||
}).encode()
|
||
content_type = "application/json"
|
||
else:
|
||
data = urllib.parse.urlencode({
|
||
"grant_type": "refresh_token",
|
||
"refresh_token": refresh_token,
|
||
"client_id": client_id,
|
||
}).encode()
|
||
content_type = "application/x-www-form-urlencoded"
|
||
|
||
token_endpoints = [
|
||
"https://platform.claude.com/v1/oauth/token",
|
||
"https://console.anthropic.com/v1/oauth/token",
|
||
]
|
||
last_error = None
|
||
for endpoint in token_endpoints:
|
||
req = urllib.request.Request(
|
||
endpoint,
|
||
data=data,
|
||
headers={
|
||
"Content-Type": content_type,
|
||
"User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
|
||
},
|
||
method="POST",
|
||
)
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||
result = json.loads(resp.read().decode())
|
||
except Exception as exc:
|
||
last_error = exc
|
||
logger.debug("Anthropic token refresh failed at %s: %s", endpoint, exc)
|
||
continue
|
||
|
||
access_token = result.get("access_token", "")
|
||
if not access_token:
|
||
raise ValueError("Anthropic refresh response was missing access_token")
|
||
next_refresh = result.get("refresh_token", refresh_token)
|
||
expires_in = result.get("expires_in", 3600)
|
||
return {
|
||
"access_token": access_token,
|
||
"refresh_token": next_refresh,
|
||
"expires_at_ms": int(time.time() * 1000) + (expires_in * 1000),
|
||
}
|
||
|
||
if last_error is not None:
|
||
raise last_error
|
||
raise ValueError("Anthropic token refresh failed")
|
||
|
||
|
||
def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
|
||
"""Attempt to refresh an expired Claude Code OAuth token."""
|
||
refresh_token = creds.get("refreshToken", "")
|
||
if not refresh_token:
|
||
logger.debug("No refresh token available — cannot refresh")
|
||
return None
|
||
|
||
try:
|
||
refreshed = refresh_anthropic_oauth_pure(refresh_token, use_json=False)
|
||
_write_claude_code_credentials(
|
||
refreshed["access_token"],
|
||
refreshed["refresh_token"],
|
||
refreshed["expires_at_ms"],
|
||
)
|
||
logger.debug("Successfully refreshed Claude Code OAuth token")
|
||
return refreshed["access_token"]
|
||
except Exception as e:
|
||
logger.debug("Failed to refresh Claude Code token: %s", e)
|
||
return None
|
||
|
||
|
||
def _write_claude_code_credentials(
|
||
access_token: str,
|
||
refresh_token: str,
|
||
expires_at_ms: int,
|
||
*,
|
||
scopes: Optional[list] = None,
|
||
) -> None:
|
||
"""Write refreshed credentials back to ~/.claude/.credentials.json.
|
||
|
||
The optional *scopes* list (e.g. ``["user:inference", "user:profile", ...]``)
|
||
is persisted so that Claude Code's own auth check recognises the credential
|
||
as valid. Claude Code >=2.1.81 gates on the presence of ``"user:inference"``
|
||
in the stored scopes before it will use the token.
|
||
"""
|
||
cred_path = Path.home() / ".claude" / ".credentials.json"
|
||
try:
|
||
# Read existing file to preserve other fields
|
||
existing = {}
|
||
if cred_path.exists():
|
||
existing = json.loads(cred_path.read_text(encoding="utf-8"))
|
||
|
||
oauth_data: Dict[str, Any] = {
|
||
"accessToken": access_token,
|
||
"refreshToken": refresh_token,
|
||
"expiresAt": expires_at_ms,
|
||
}
|
||
if scopes is not None:
|
||
oauth_data["scopes"] = scopes
|
||
elif "claudeAiOauth" in existing and "scopes" in existing["claudeAiOauth"]:
|
||
# Preserve previously-stored scopes when the refresh response
|
||
# does not include a scope field.
|
||
oauth_data["scopes"] = existing["claudeAiOauth"]["scopes"]
|
||
|
||
existing["claudeAiOauth"] = oauth_data
|
||
|
||
cred_path.parent.mkdir(parents=True, exist_ok=True)
|
||
_tmp_cred = cred_path.with_suffix(".tmp")
|
||
_tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
|
||
_tmp_cred.replace(cred_path)
|
||
# Restrict permissions (credentials file)
|
||
cred_path.chmod(0o600)
|
||
except (OSError, IOError) as e:
|
||
logger.debug("Failed to write refreshed credentials: %s", e)
|
||
|
||
|
||
def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]:
|
||
"""Resolve a token from Claude Code credential files, refreshing if needed."""
|
||
creds = creds or read_claude_code_credentials()
|
||
if creds and is_claude_code_token_valid(creds):
|
||
logger.debug("Using Claude Code credentials (auto-detected)")
|
||
return creds["accessToken"]
|
||
if creds:
|
||
logger.debug("Claude Code credentials expired — attempting refresh")
|
||
refreshed = _refresh_oauth_token(creds)
|
||
if refreshed:
|
||
return refreshed
|
||
logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate")
|
||
return None
|
||
|
||
|
||
def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]:
|
||
"""Prefer Claude Code creds when a persisted env OAuth token would shadow refresh.
|
||
|
||
Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes
|
||
later refresh impossible because the static env token wins before we ever
|
||
inspect Claude Code's refreshable credential file. If we have a refreshable
|
||
Claude Code credential record, prefer it over the static env OAuth token.
|
||
"""
|
||
if not env_token or not _is_oauth_token(env_token) or not isinstance(creds, dict):
|
||
return None
|
||
if not creds.get("refreshToken"):
|
||
return None
|
||
|
||
resolved = _resolve_claude_code_token_from_credentials(creds)
|
||
if resolved and resolved != env_token:
|
||
logger.debug(
|
||
"Preferring Claude Code credential file over static env OAuth token so refresh can proceed"
|
||
)
|
||
return resolved
|
||
return None
|
||
|
||
|
||
def resolve_anthropic_token() -> Optional[str]:
|
||
"""Resolve an Anthropic token from all available sources.
|
||
|
||
Priority:
|
||
1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
|
||
2. CLAUDE_CODE_OAUTH_TOKEN env var
|
||
3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
|
||
— with automatic refresh if expired and a refresh token is available
|
||
4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
|
||
|
||
Returns the token string or None.
|
||
"""
|
||
creds = read_claude_code_credentials()
|
||
|
||
# 1. Hermes-managed OAuth/setup token env var
|
||
token = os.getenv("ANTHROPIC_TOKEN", "").strip()
|
||
if token:
|
||
preferred = _prefer_refreshable_claude_code_token(token, creds)
|
||
if preferred:
|
||
return preferred
|
||
return token
|
||
|
||
# 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
|
||
cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
|
||
if cc_token:
|
||
preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
|
||
if preferred:
|
||
return preferred
|
||
return cc_token
|
||
|
||
# 3. Claude Code credential file
|
||
resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
|
||
if resolved_claude_token:
|
||
return resolved_claude_token
|
||
|
||
# 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
|
||
# This remains as a compatibility fallback for pre-migration Hermes configs.
|
||
api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||
if api_key:
|
||
return api_key
|
||
|
||
return None
|
||
|
||
|
||
def run_oauth_setup_token() -> Optional[str]:
|
||
"""Run 'claude setup-token' interactively and return the resulting token.
|
||
|
||
Checks multiple sources after the subprocess completes:
|
||
1. Claude Code credential files (may be written by the subprocess)
|
||
2. CLAUDE_CODE_OAUTH_TOKEN / ANTHROPIC_TOKEN env vars
|
||
|
||
Returns the token string, or None if no credentials were obtained.
|
||
Raises FileNotFoundError if the 'claude' CLI is not installed.
|
||
"""
|
||
import shutil
|
||
import subprocess
|
||
|
||
claude_path = shutil.which("claude")
|
||
if not claude_path:
|
||
raise FileNotFoundError(
|
||
"The 'claude' CLI is not installed. "
|
||
"Install it with: npm install -g @anthropic-ai/claude-code"
|
||
)
|
||
|
||
# Run interactively — stdin/stdout/stderr inherited so user can interact
|
||
try:
|
||
subprocess.run([claude_path, "setup-token"])
|
||
except (KeyboardInterrupt, EOFError):
|
||
return None
|
||
|
||
# Check if credentials were saved to Claude Code's config files
|
||
creds = read_claude_code_credentials()
|
||
if creds and is_claude_code_token_valid(creds):
|
||
return creds["accessToken"]
|
||
|
||
# Check env vars that may have been set
|
||
for env_var in ("CLAUDE_CODE_OAUTH_TOKEN", "ANTHROPIC_TOKEN"):
|
||
val = os.getenv(env_var, "").strip()
|
||
if val:
|
||
return val
|
||
|
||
return None
|
||
|
||
|
||
# ── Hermes-native PKCE OAuth flow ────────────────────────────────────────
|
||
# Mirrors the flow used by Claude Code, pi-ai, and OpenCode.
|
||
# Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file).
|
||
|
||
_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
|
||
_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
|
||
_OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback"
|
||
_OAUTH_SCOPES = "org:create_api_key user:profile user:inference"
|
||
_HERMES_OAUTH_FILE = get_hermes_home() / ".anthropic_oauth.json"
|
||
|
||
|
||
def _generate_pkce() -> tuple:
|
||
"""Generate PKCE code_verifier and code_challenge (S256)."""
|
||
import base64
|
||
import hashlib
|
||
import secrets
|
||
|
||
verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
|
||
challenge = base64.urlsafe_b64encode(
|
||
hashlib.sha256(verifier.encode()).digest()
|
||
).rstrip(b"=").decode()
|
||
return verifier, challenge
|
||
|
||
|
||
def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
|
||
"""Run Hermes-native OAuth PKCE flow and return credential state."""
|
||
import time
|
||
import webbrowser
|
||
|
||
verifier, challenge = _generate_pkce()
|
||
|
||
params = {
|
||
"code": "true",
|
||
"client_id": _OAUTH_CLIENT_ID,
|
||
"response_type": "code",
|
||
"redirect_uri": _OAUTH_REDIRECT_URI,
|
||
"scope": _OAUTH_SCOPES,
|
||
"code_challenge": challenge,
|
||
"code_challenge_method": "S256",
|
||
"state": verifier,
|
||
}
|
||
from urllib.parse import urlencode
|
||
|
||
auth_url = f"https://claude.ai/oauth/authorize?{urlencode(params)}"
|
||
|
||
print()
|
||
print("Authorize Hermes with your Claude Pro/Max subscription.")
|
||
print()
|
||
print("╭─ Claude Pro/Max Authorization ────────────────────╮")
|
||
print("│ │")
|
||
print("│ Open this link in your browser: │")
|
||
print("╰───────────────────────────────────────────────────╯")
|
||
print()
|
||
print(f" {auth_url}")
|
||
print()
|
||
|
||
try:
|
||
webbrowser.open(auth_url)
|
||
print(" (Browser opened automatically)")
|
||
except Exception:
|
||
pass
|
||
|
||
print()
|
||
print("After authorizing, you'll see a code. Paste it below.")
|
||
print()
|
||
try:
|
||
auth_code = input("Authorization code: ").strip()
|
||
except (KeyboardInterrupt, EOFError):
|
||
return None
|
||
|
||
if not auth_code:
|
||
print("No code entered.")
|
||
return None
|
||
|
||
splits = auth_code.split("#")
|
||
code = splits[0]
|
||
state = splits[1] if len(splits) > 1 else ""
|
||
|
||
try:
|
||
import urllib.request
|
||
|
||
exchange_data = json.dumps({
|
||
"grant_type": "authorization_code",
|
||
"client_id": _OAUTH_CLIENT_ID,
|
||
"code": code,
|
||
"state": state,
|
||
"redirect_uri": _OAUTH_REDIRECT_URI,
|
||
"code_verifier": verifier,
|
||
}).encode()
|
||
|
||
req = urllib.request.Request(
|
||
_OAUTH_TOKEN_URL,
|
||
data=exchange_data,
|
||
headers={
|
||
"Content-Type": "application/json",
|
||
"User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
|
||
},
|
||
method="POST",
|
||
)
|
||
|
||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||
result = json.loads(resp.read().decode())
|
||
except Exception as e:
|
||
print(f"Token exchange failed: {e}")
|
||
return None
|
||
|
||
access_token = result.get("access_token", "")
|
||
refresh_token = result.get("refresh_token", "")
|
||
expires_in = result.get("expires_in", 3600)
|
||
|
||
if not access_token:
|
||
print("No access token in response.")
|
||
return None
|
||
|
||
expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
|
||
return {
|
||
"access_token": access_token,
|
||
"refresh_token": refresh_token,
|
||
"expires_at_ms": expires_at_ms,
|
||
}
|
||
|
||
|
||
def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
||
"""Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json."""
|
||
if _HERMES_OAUTH_FILE.exists():
|
||
try:
|
||
data = json.loads(_HERMES_OAUTH_FILE.read_text(encoding="utf-8"))
|
||
if data.get("accessToken"):
|
||
return data
|
||
except (json.JSONDecodeError, OSError, IOError) as e:
|
||
logger.debug("Failed to read Hermes OAuth credentials: %s", e)
|
||
return None
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Message / tool / response format conversion
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def _is_bedrock_model_id(model: str) -> bool:
|
||
"""Detect AWS Bedrock model IDs that use dots as namespace separators.
|
||
|
||
Bedrock model IDs come in two forms:
|
||
- Bare: ``anthropic.claude-opus-4-7``
|
||
- Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
|
||
|
||
In both cases the dots separate namespace components, not version
|
||
numbers, and must be preserved verbatim for the Bedrock API.
|
||
"""
|
||
lower = model.lower()
|
||
# Regional inference-profile prefixes
|
||
if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
|
||
return True
|
||
# Bare Bedrock model IDs: provider.model-family
|
||
if lower.startswith("anthropic."):
|
||
return True
|
||
return False
|
||
|
||
|
||
def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||
"""Normalize a model name for the Anthropic API.
|
||
|
||
- Strips 'anthropic/' prefix (OpenRouter format, case-insensitive)
|
||
- Converts dots to hyphens in version numbers (OpenRouter uses dots,
|
||
Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
|
||
preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
|
||
- Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
|
||
regional inference profiles (``us.anthropic.claude-*``) whose dots
|
||
are namespace separators, not version separators.
|
||
"""
|
||
lower = model.lower()
|
||
if lower.startswith("anthropic/"):
|
||
model = model[len("anthropic/"):]
|
||
if not preserve_dots:
|
||
# Bedrock model IDs use dots as namespace separators
|
||
# (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
|
||
# These must not be converted to hyphens. See issue #12295.
|
||
if _is_bedrock_model_id(model):
|
||
return model
|
||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||
model = model.replace(".", "-")
|
||
return model
|
||
|
||
|
||
def _sanitize_tool_id(tool_id: str) -> str:
|
||
"""Sanitize a tool call ID for the Anthropic API.
|
||
|
||
Anthropic requires IDs matching [a-zA-Z0-9_-]. Replace invalid
|
||
characters with underscores and ensure non-empty.
|
||
"""
|
||
import re
|
||
if not tool_id:
|
||
return "tool_0"
|
||
sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id)
|
||
return sanitized or "tool_0"
|
||
|
||
|
||
def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||
"""Convert OpenAI tool definitions to Anthropic format."""
|
||
if not tools:
|
||
return []
|
||
result = []
|
||
for t in tools:
|
||
fn = t.get("function", {})
|
||
result.append({
|
||
"name": fn.get("name", ""),
|
||
"description": fn.get("description", ""),
|
||
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
|
||
})
|
||
return result
|
||
|
||
|
||
def _image_source_from_openai_url(url: str) -> Dict[str, str]:
|
||
"""Convert an OpenAI-style image URL/data URL into Anthropic image source."""
|
||
url = str(url or "").strip()
|
||
if not url:
|
||
return {"type": "url", "url": ""}
|
||
|
||
if url.startswith("data:"):
|
||
header, _, data = url.partition(",")
|
||
media_type = "image/jpeg"
|
||
if header.startswith("data:"):
|
||
mime_part = header[len("data:"):].split(";", 1)[0].strip()
|
||
if mime_part.startswith("image/"):
|
||
media_type = mime_part
|
||
return {
|
||
"type": "base64",
|
||
"media_type": media_type,
|
||
"data": data,
|
||
}
|
||
|
||
return {"type": "url", "url": url}
|
||
|
||
|
||
def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
|
||
"""Convert a single OpenAI-style content part to Anthropic format."""
|
||
if part is None:
|
||
return None
|
||
if isinstance(part, str):
|
||
return {"type": "text", "text": part}
|
||
if not isinstance(part, dict):
|
||
return {"type": "text", "text": str(part)}
|
||
|
||
ptype = part.get("type")
|
||
|
||
if ptype == "input_text":
|
||
block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
|
||
elif ptype in {"image_url", "input_image"}:
|
||
image_value = part.get("image_url", {})
|
||
url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
|
||
block = {"type": "image", "source": _image_source_from_openai_url(url)}
|
||
else:
|
||
block = dict(part)
|
||
|
||
if isinstance(part.get("cache_control"), dict) and "cache_control" not in block:
|
||
block["cache_control"] = dict(part["cache_control"])
|
||
return block
|
||
|
||
|
||
def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None) -> Any:
|
||
"""Recursively convert SDK objects to plain Python data structures.
|
||
|
||
Guards against circular references (``_path`` tracks ``id()`` of objects
|
||
on the *current* recursion path) and runaway depth (capped at 20 levels).
|
||
Uses path-based tracking so shared (but non-cyclic) objects referenced by
|
||
multiple siblings are converted correctly rather than being stringified.
|
||
"""
|
||
_MAX_DEPTH = 20
|
||
if _depth > _MAX_DEPTH:
|
||
return str(value)
|
||
|
||
if _path is None:
|
||
_path = set()
|
||
|
||
obj_id = id(value)
|
||
if obj_id in _path:
|
||
return str(value)
|
||
|
||
if hasattr(value, "model_dump"):
|
||
_path.add(obj_id)
|
||
result = _to_plain_data(value.model_dump(), _depth=_depth + 1, _path=_path)
|
||
_path.discard(obj_id)
|
||
return result
|
||
if isinstance(value, dict):
|
||
_path.add(obj_id)
|
||
result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
|
||
_path.discard(obj_id)
|
||
return result
|
||
if isinstance(value, (list, tuple)):
|
||
_path.add(obj_id)
|
||
result = [_to_plain_data(v, _depth=_depth + 1, _path=_path) for v in value]
|
||
_path.discard(obj_id)
|
||
return result
|
||
if hasattr(value, "__dict__"):
|
||
_path.add(obj_id)
|
||
result = {
|
||
k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
|
||
for k, v in vars(value).items()
|
||
if not k.startswith("_")
|
||
}
|
||
_path.discard(obj_id)
|
||
return result
|
||
return value
|
||
|
||
|
||
def _extract_preserved_thinking_blocks(message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||
"""Return Anthropic thinking blocks previously preserved on the message."""
|
||
raw_details = message.get("reasoning_details")
|
||
if not isinstance(raw_details, list):
|
||
return []
|
||
|
||
preserved: List[Dict[str, Any]] = []
|
||
for detail in raw_details:
|
||
if not isinstance(detail, dict):
|
||
continue
|
||
block_type = str(detail.get("type", "") or "").strip().lower()
|
||
if block_type not in {"thinking", "redacted_thinking"}:
|
||
continue
|
||
preserved.append(copy.deepcopy(detail))
|
||
return preserved
|
||
|
||
|
||
def _convert_content_to_anthropic(content: Any) -> Any:
|
||
"""Convert OpenAI-style multimodal content arrays to Anthropic blocks."""
|
||
if not isinstance(content, list):
|
||
return content
|
||
|
||
converted = []
|
||
for part in content:
|
||
block = _convert_content_part_to_anthropic(part)
|
||
if block is not None:
|
||
converted.append(block)
|
||
return converted
|
||
|
||
|
||
def convert_messages_to_anthropic(
|
||
messages: List[Dict],
|
||
base_url: str | None = None,
|
||
) -> Tuple[Optional[Any], List[Dict]]:
|
||
"""Convert OpenAI-format messages to Anthropic format.
|
||
|
||
Returns (system_prompt, anthropic_messages).
|
||
System messages are extracted since Anthropic takes them as a separate param.
|
||
system_prompt is a string or list of content blocks (when cache_control present).
|
||
|
||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||
endpoint, all thinking block signatures are stripped. Signatures are
|
||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||
"""
|
||
system = None
|
||
result = []
|
||
|
||
for m in messages:
|
||
role = m.get("role", "user")
|
||
content = m.get("content", "")
|
||
|
||
if role == "system":
|
||
if isinstance(content, list):
|
||
# Preserve cache_control markers on content blocks
|
||
has_cache = any(
|
||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||
)
|
||
if has_cache:
|
||
system = [p for p in content if isinstance(p, dict)]
|
||
else:
|
||
system = "\n".join(
|
||
p["text"] for p in content if p.get("type") == "text"
|
||
)
|
||
else:
|
||
system = content
|
||
continue
|
||
|
||
if role == "assistant":
|
||
blocks = _extract_preserved_thinking_blocks(m)
|
||
if content:
|
||
if isinstance(content, list):
|
||
converted_content = _convert_content_to_anthropic(content)
|
||
if isinstance(converted_content, list):
|
||
blocks.extend(converted_content)
|
||
else:
|
||
blocks.append({"type": "text", "text": str(content)})
|
||
for tc in m.get("tool_calls", []):
|
||
if not tc or not isinstance(tc, dict):
|
||
continue
|
||
fn = tc.get("function", {})
|
||
args = fn.get("arguments", "{}")
|
||
try:
|
||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||
except (json.JSONDecodeError, ValueError):
|
||
parsed_args = {}
|
||
blocks.append({
|
||
"type": "tool_use",
|
||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||
"name": fn.get("name", ""),
|
||
"input": parsed_args,
|
||
})
|
||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||
# tool-call messages to carry reasoning_content when thinking is
|
||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||
# can validate the message history. See hermes-agent#13848.
|
||
#
|
||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||
# that had no reasoning. Kimi requires the field to exist, even
|
||
# if empty.
|
||
#
|
||
# Prepend (not append): Anthropic protocol requires thinking
|
||
# blocks before text and tool_use blocks.
|
||
#
|
||
# Guard: only add when reasoning_details didn't already contribute
|
||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||
# signed thinking blocks — adding another unsigned one from
|
||
# reasoning_content would create a duplicate (same text) that gets
|
||
# downgraded to a spurious text block on the last assistant message.
|
||
reasoning_content = m.get("reasoning_content")
|
||
_already_has_thinking = any(
|
||
isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
|
||
for b in blocks
|
||
)
|
||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||
# Anthropic rejects empty assistant content
|
||
effective = blocks or content
|
||
if not effective or effective == "":
|
||
effective = [{"type": "text", "text": "(empty)"}]
|
||
result.append({"role": "assistant", "content": effective})
|
||
continue
|
||
|
||
if role == "tool":
|
||
# Sanitize tool_use_id and ensure non-empty content
|
||
result_content = content if isinstance(content, str) else json.dumps(content)
|
||
if not result_content:
|
||
result_content = "(no output)"
|
||
tool_result = {
|
||
"type": "tool_result",
|
||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||
"content": result_content,
|
||
}
|
||
if isinstance(m.get("cache_control"), dict):
|
||
tool_result["cache_control"] = dict(m["cache_control"])
|
||
# Merge consecutive tool results into one user message
|
||
if (
|
||
result
|
||
and result[-1]["role"] == "user"
|
||
and isinstance(result[-1]["content"], list)
|
||
and result[-1]["content"]
|
||
and result[-1]["content"][0].get("type") == "tool_result"
|
||
):
|
||
result[-1]["content"].append(tool_result)
|
||
else:
|
||
result.append({"role": "user", "content": [tool_result]})
|
||
continue
|
||
|
||
# Regular user message — validate non-empty content (Anthropic rejects empty)
|
||
if isinstance(content, list):
|
||
converted_blocks = _convert_content_to_anthropic(content)
|
||
# Check if all text blocks are empty
|
||
if not converted_blocks or all(
|
||
b.get("text", "").strip() == ""
|
||
for b in converted_blocks
|
||
if isinstance(b, dict) and b.get("type") == "text"
|
||
):
|
||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||
result.append({"role": "user", "content": converted_blocks})
|
||
else:
|
||
# Validate string content is non-empty
|
||
if not content or (isinstance(content, str) and not content.strip()):
|
||
content = "(empty message)"
|
||
result.append({"role": "user", "content": content})
|
||
|
||
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
||
tool_result_ids = set()
|
||
for m in result:
|
||
if m["role"] == "user" and isinstance(m["content"], list):
|
||
for block in m["content"]:
|
||
if block.get("type") == "tool_result":
|
||
tool_result_ids.add(block.get("tool_use_id"))
|
||
for m in result:
|
||
if m["role"] == "assistant" and isinstance(m["content"], list):
|
||
m["content"] = [
|
||
b
|
||
for b in m["content"]
|
||
if b.get("type") != "tool_use" or b.get("id") in tool_result_ids
|
||
]
|
||
if not m["content"]:
|
||
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
||
|
||
# Strip orphaned tool_result blocks (no matching tool_use precedes them).
|
||
# This is the mirror of the above: context compression or session truncation
|
||
# can remove an assistant message containing a tool_use while leaving the
|
||
# subsequent tool_result intact. Anthropic rejects these with a 400.
|
||
tool_use_ids = set()
|
||
for m in result:
|
||
if m["role"] == "assistant" and isinstance(m["content"], list):
|
||
for block in m["content"]:
|
||
if block.get("type") == "tool_use":
|
||
tool_use_ids.add(block.get("id"))
|
||
for m in result:
|
||
if m["role"] == "user" and isinstance(m["content"], list):
|
||
m["content"] = [
|
||
b
|
||
for b in m["content"]
|
||
if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids
|
||
]
|
||
if not m["content"]:
|
||
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
||
|
||
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
||
fixed = []
|
||
for m in result:
|
||
if fixed and fixed[-1]["role"] == m["role"]:
|
||
if m["role"] == "user":
|
||
# Merge consecutive user messages
|
||
prev_content = fixed[-1]["content"]
|
||
curr_content = m["content"]
|
||
if isinstance(prev_content, str) and isinstance(curr_content, str):
|
||
fixed[-1]["content"] = prev_content + "\n" + curr_content
|
||
elif isinstance(prev_content, list) and isinstance(curr_content, list):
|
||
fixed[-1]["content"] = prev_content + curr_content
|
||
else:
|
||
# Mixed types — wrap string in list
|
||
if isinstance(prev_content, str):
|
||
prev_content = [{"type": "text", "text": prev_content}]
|
||
if isinstance(curr_content, str):
|
||
curr_content = [{"type": "text", "text": curr_content}]
|
||
fixed[-1]["content"] = prev_content + curr_content
|
||
else:
|
||
# Consecutive assistant messages — merge text content.
|
||
# Drop thinking blocks from the *second* message: their
|
||
# signature was computed against a different turn boundary
|
||
# and becomes invalid once merged.
|
||
if isinstance(m["content"], list):
|
||
m["content"] = [
|
||
b for b in m["content"]
|
||
if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
|
||
]
|
||
prev_blocks = fixed[-1]["content"]
|
||
curr_blocks = m["content"]
|
||
if isinstance(prev_blocks, list) and isinstance(curr_blocks, list):
|
||
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
||
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
||
else:
|
||
# Mixed types — normalize both to list and merge
|
||
if isinstance(prev_blocks, str):
|
||
prev_blocks = [{"type": "text", "text": prev_blocks}]
|
||
if isinstance(curr_blocks, str):
|
||
curr_blocks = [{"type": "text", "text": curr_blocks}]
|
||
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||
else:
|
||
fixed.append(m)
|
||
result = fixed
|
||
|
||
# ── Thinking block signature management ──────────────────────────
|
||
# Anthropic signs thinking blocks against the full turn content.
|
||
# Any upstream mutation (context compression, session truncation,
|
||
# orphan stripping, message merging) invalidates the signature,
|
||
# causing HTTP 400 "Invalid signature in thinking block".
|
||
#
|
||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||
# (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
|
||
# them and will reject them outright. When targeting a third-party
|
||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||
# assistant message — the third-party will generate its own
|
||
# thinking blocks if it supports extended thinking.
|
||
#
|
||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
||
# EXCEPT the last one — preserves reasoning continuity on the
|
||
# current tool-use chain while avoiding stale signature errors.
|
||
# 2. Downgrade unsigned thinking blocks (no signature) to text —
|
||
# Anthropic can't validate them and will reject them.
|
||
# 3. Strip cache_control from thinking/redacted_thinking blocks —
|
||
# cache markers can interfere with signature validation.
|
||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||
_is_kimi = _is_kimi_coding_endpoint(base_url)
|
||
|
||
last_assistant_idx = None
|
||
for i in range(len(result) - 1, -1, -1):
|
||
if result[i].get("role") == "assistant":
|
||
last_assistant_idx = i
|
||
break
|
||
|
||
for idx, m in enumerate(result):
|
||
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
|
||
continue
|
||
|
||
if _is_kimi:
|
||
# Kimi's /coding endpoint enables thinking server-side and
|
||
# requires unsigned thinking blocks on replayed assistant
|
||
# tool-call messages. Strip signed Anthropic blocks (Kimi
|
||
# can't validate signatures) but preserve the unsigned ones
|
||
# we synthesised from reasoning_content above.
|
||
new_content = []
|
||
for b in m["content"]:
|
||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||
new_content.append(b)
|
||
continue
|
||
if b.get("signature") or b.get("data"):
|
||
# Anthropic-signed block — Kimi can't validate, strip
|
||
continue
|
||
# Unsigned thinking (synthesised from reasoning_content) —
|
||
# keep it: Kimi needs it for message-history validation.
|
||
new_content.append(b)
|
||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||
elif _is_third_party or idx != last_assistant_idx:
|
||
# Third-party endpoint: strip ALL thinking blocks from every
|
||
# assistant message — signatures are Anthropic-proprietary.
|
||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||
stripped = [
|
||
b for b in m["content"]
|
||
if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
|
||
]
|
||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||
else:
|
||
# Latest assistant on direct Anthropic: keep signed thinking
|
||
# blocks for reasoning continuity; downgrade unsigned ones to
|
||
# plain text.
|
||
new_content = []
|
||
for b in m["content"]:
|
||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||
new_content.append(b)
|
||
continue
|
||
if b.get("type") == "redacted_thinking":
|
||
# Redacted blocks use 'data' for the signature payload
|
||
if b.get("data"):
|
||
new_content.append(b)
|
||
# else: drop — no data means it can't be validated
|
||
elif b.get("signature"):
|
||
# Signed thinking block — keep it
|
||
new_content.append(b)
|
||
else:
|
||
# Unsigned thinking — downgrade to text so it's not lost
|
||
thinking_text = b.get("thinking", "")
|
||
if thinking_text:
|
||
new_content.append({"type": "text", "text": thinking_text})
|
||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||
|
||
# Strip cache_control from any remaining thinking/redacted_thinking
|
||
# blocks — cache markers interfere with signature validation.
|
||
for b in m["content"]:
|
||
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
|
||
b.pop("cache_control", None)
|
||
|
||
return system, result
|
||
|
||
|
||
def build_anthropic_kwargs(
|
||
model: str,
|
||
messages: List[Dict],
|
||
tools: Optional[List[Dict]],
|
||
max_tokens: Optional[int],
|
||
reasoning_config: Optional[Dict[str, Any]],
|
||
tool_choice: Optional[str] = None,
|
||
is_oauth: bool = False,
|
||
preserve_dots: bool = False,
|
||
context_length: Optional[int] = None,
|
||
base_url: str | None = None,
|
||
fast_mode: bool = False,
|
||
) -> Dict[str, Any]:
|
||
"""Build kwargs for anthropic.messages.create().
|
||
|
||
Naming note — two distinct concepts, easily confused:
|
||
max_tokens = OUTPUT token cap for a single response.
|
||
Anthropic's API calls this "max_tokens" but it only
|
||
limits the *output*. Anthropic's own native SDK
|
||
renamed it "max_output_tokens" for clarity.
|
||
context_length = TOTAL context window (input tokens + output tokens).
|
||
The API enforces: input_tokens + max_tokens ≤ context_length.
|
||
Stored on the ContextCompressor; reduced on overflow errors.
|
||
|
||
When *max_tokens* is None the model's native output ceiling is used
|
||
(e.g. 128K for Opus 4.6, 64K for Sonnet 4.6).
|
||
|
||
When *context_length* is provided and the model's native output ceiling
|
||
exceeds it (e.g. a local endpoint with an 8K window), the output cap is
|
||
clamped to context_length − 1. This only kicks in for unusually small
|
||
context windows; for full-size models the native output cap is always
|
||
smaller than the context window so no clamping happens.
|
||
NOTE: this clamping does not account for prompt size — if the prompt is
|
||
large, Anthropic may still reject the request. The caller must detect
|
||
"max_tokens too large given prompt" errors and retry with a smaller cap
|
||
(see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).
|
||
|
||
When *is_oauth* is True, applies Claude Code compatibility transforms:
|
||
system prompt prefix, tool name prefixing, and prompt sanitization.
|
||
|
||
When *preserve_dots* is True, model name dots are not converted to hyphens
|
||
(for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
|
||
|
||
When *base_url* points to a third-party Anthropic-compatible endpoint,
|
||
thinking block signatures are stripped (they are Anthropic-proprietary).
|
||
|
||
When *fast_mode* is True, adds ``extra_body["speed"] = "fast"`` and the
|
||
fast-mode beta header for ~2.5x faster output throughput on Opus 4.6.
|
||
Currently only supported on native Anthropic endpoints (not third-party
|
||
compatible ones).
|
||
"""
|
||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||
|
||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||
# effective_max_tokens = output cap for this call (≠ total context window)
|
||
# Use the resolver helper so non-positive values (negative ints,
|
||
# fractional floats, NaN, non-numeric) fail locally with a clear error
|
||
# rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
|
||
effective_max_tokens = _resolve_anthropic_messages_max_tokens(
|
||
max_tokens, model, context_length=context_length
|
||
)
|
||
|
||
# Clamp output cap to fit inside the total context window.
|
||
# Only matters for small custom endpoints where context_length < native
|
||
# output ceiling. For standard Anthropic models context_length (e.g.
|
||
# 200K) is always larger than the output ceiling (e.g. 128K), so this
|
||
# branch is not taken.
|
||
if context_length and effective_max_tokens > context_length:
|
||
effective_max_tokens = max(context_length - 1, 1)
|
||
|
||
# ── OAuth: Claude Code identity ──────────────────────────────────
|
||
if is_oauth:
|
||
# 1. Prepend Claude Code system prompt identity
|
||
cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
|
||
if isinstance(system, list):
|
||
system = [cc_block] + system
|
||
elif isinstance(system, str) and system:
|
||
system = [cc_block, {"type": "text", "text": system}]
|
||
else:
|
||
system = [cc_block]
|
||
|
||
# 2. Sanitize system prompt — replace product name references
|
||
# to avoid Anthropic's server-side content filters.
|
||
for block in system:
|
||
if isinstance(block, dict) and block.get("type") == "text":
|
||
text = block.get("text", "")
|
||
text = text.replace("Hermes Agent", "Claude Code")
|
||
text = text.replace("Hermes agent", "Claude Code")
|
||
text = text.replace("hermes-agent", "claude-code")
|
||
text = text.replace("Nous Research", "Anthropic")
|
||
block["text"] = text
|
||
|
||
# 3. Prefix tool names with mcp_ (Claude Code convention)
|
||
if anthropic_tools:
|
||
for tool in anthropic_tools:
|
||
if "name" in tool:
|
||
tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
|
||
|
||
# 4. Prefix tool names in message history (tool_use and tool_result blocks)
|
||
for msg in anthropic_messages:
|
||
content = msg.get("content")
|
||
if isinstance(content, list):
|
||
for block in content:
|
||
if isinstance(block, dict):
|
||
if block.get("type") == "tool_use" and "name" in block:
|
||
if not block["name"].startswith(_MCP_TOOL_PREFIX):
|
||
block["name"] = _MCP_TOOL_PREFIX + block["name"]
|
||
elif block.get("type") == "tool_result" and "tool_use_id" in block:
|
||
pass # tool_result uses ID, not name
|
||
|
||
kwargs: Dict[str, Any] = {
|
||
"model": model,
|
||
"messages": anthropic_messages,
|
||
"max_tokens": effective_max_tokens,
|
||
}
|
||
|
||
if system:
|
||
kwargs["system"] = system
|
||
|
||
if anthropic_tools:
|
||
kwargs["tools"] = anthropic_tools
|
||
# Map OpenAI tool_choice to Anthropic format
|
||
if tool_choice == "auto" or tool_choice is None:
|
||
kwargs["tool_choice"] = {"type": "auto"}
|
||
elif tool_choice == "required":
|
||
kwargs["tool_choice"] = {"type": "any"}
|
||
elif tool_choice == "none":
|
||
# Anthropic has no tool_choice "none" — omit tools entirely to prevent use
|
||
kwargs.pop("tools", None)
|
||
elif isinstance(tool_choice, str):
|
||
# Specific tool name
|
||
kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
|
||
|
||
# Map reasoning_config to Anthropic's thinking parameter.
|
||
# Claude 4.6+ models use adaptive thinking + output_config.effort.
|
||
# Older models use manual thinking with budget_tokens.
|
||
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
|
||
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
|
||
#
|
||
# Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
|
||
# its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
|
||
# validates the message history and requires every prior assistant
|
||
# tool-call message to carry OpenAI-style ``reasoning_content``. The
|
||
# Anthropic path never populates that field, and
|
||
# ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
|
||
# on third-party endpoints — so the request fails with HTTP 400
|
||
# "thinking is enabled but reasoning_content is missing in assistant
|
||
# tool call message at index N". Kimi's reasoning is driven server-side
|
||
# on the /coding route, so skip Anthropic's thinking parameter entirely
|
||
# for that host. (Kimi on chat_completions enables thinking via
|
||
# extra_body in the ChatCompletionsTransport — see #13503.)
|
||
#
|
||
# On 4.7+ the `thinking.display` field defaults to "omitted", which
|
||
# silently hides reasoning text that Hermes surfaces in its CLI. We
|
||
# request "summarized" so the reasoning blocks stay populated — matching
|
||
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
|
||
_is_kimi_coding = _is_kimi_coding_endpoint(base_url)
|
||
if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
|
||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||
budget = THINKING_BUDGET.get(effort, 8000)
|
||
if _supports_adaptive_thinking(model):
|
||
kwargs["thinking"] = {
|
||
"type": "adaptive",
|
||
"display": "summarized",
|
||
}
|
||
adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium")
|
||
# Downgrade xhigh→max on models that don't list xhigh as a
|
||
# supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh.
|
||
if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model):
|
||
adaptive_effort = "max"
|
||
kwargs["output_config"] = {
|
||
"effort": adaptive_effort,
|
||
}
|
||
else:
|
||
kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
|
||
# Anthropic requires temperature=1 when thinking is enabled on older models
|
||
kwargs["temperature"] = 1
|
||
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
|
||
|
||
# ── Strip sampling params on 4.7+ ─────────────────────────────────
|
||
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
|
||
# Callers (auxiliary_client, etc.) may set these for older models;
|
||
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
|
||
# don't require coordinated edits everywhere.
|
||
if _forbids_sampling_params(model):
|
||
for _sampling_key in ("temperature", "top_p", "top_k"):
|
||
kwargs.pop(_sampling_key, None)
|
||
|
||
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
|
||
# Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
|
||
# output speed. Only for native Anthropic endpoints — third-party
|
||
# providers would reject the unknown beta header and speed parameter.
|
||
if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
|
||
kwargs.setdefault("extra_body", {})["speed"] = "fast"
|
||
# Build extra_headers with ALL applicable betas (the per-request
|
||
# extra_headers override the client-level anthropic-beta header).
|
||
betas = list(_common_betas_for_base_url(base_url))
|
||
if is_oauth:
|
||
betas.extend(_OAUTH_ONLY_BETAS)
|
||
betas.append(_FAST_MODE_BETA)
|
||
kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
|
||
|
||
return kwargs
|
||
|
||
|