mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(anthropic): complete third-party Anthropic-compatible provider support (#12846)
Third-party gateways that speak the native Anthropic protocol (MiniMax,
Zhipu GLM, Alibaba DashScope, Kimi, LiteLLM proxies) now work end-to-end
with the same feature set as direct api.anthropic.com callers. Synthesizes
eight stale community PRs into one consolidated change.
Five fixes:
- URL detection: consolidate three inline `endswith("/anthropic")`
checks in runtime_provider.py into the shared _detect_api_mode_for_url
helper. Third-party /anthropic endpoints now auto-resolve to
api_mode=anthropic_messages via one code path instead of three.
- OAuth leak-guard: all five sites that assign `_is_anthropic_oauth`
(__init__, switch_model, _try_refresh_anthropic_client_credentials,
_swap_credential, _try_activate_fallback) now gate on
`provider == "anthropic"` so a stale ANTHROPIC_TOKEN never trips
Claude-Code identity injection on third-party endpoints. Previously
only 2 of 5 sites were guarded.
- Prompt caching: new method `_anthropic_prompt_cache_policy()` returns
`(should_cache, use_native_layout)` per endpoint. Replaces three
inline conditions and the `native_anthropic=(api_mode=='anthropic_messages')`
call-site flag. Native Anthropic and third-party Anthropic gateways
both get the native cache_control layout; OpenRouter gets envelope
layout. Layout is persisted in `_primary_runtime` so fallback
restoration preserves the per-endpoint choice.
- Auxiliary client: `_try_custom_endpoint` honors
`api_mode=anthropic_messages` and builds `AnthropicAuxiliaryClient`
instead of silently downgrading to an OpenAI-wire client. Degrades
gracefully to OpenAI-wire when the anthropic SDK isn't installed.
- Config hygiene: `_update_config_for_provider` (hermes_cli/auth.py)
clears stale `api_key`/`api_mode` when switching to a built-in
provider, so a previous MiniMax custom endpoint's credentials can't
leak into a later OpenRouter session.
- Truncation continuation: length-continuation and tool-call-truncation
retry now cover `anthropic_messages` in addition to `chat_completions`
and `bedrock_converse`. Reuses the existing `_build_assistant_message`
path via `normalize_anthropic_response()` so the interim message
shape is byte-identical to the non-truncated path.
Tests: 6 new files, 42 test cases. Targeted run + tests/run_agent,
tests/agent, tests/hermes_cli all pass (4554 passed).
Synthesized from (credits preserved via Co-authored-by trailers):
#7410 @nocoo — URL detection helper
#7393 @keyuyuan — OAuth 5-site guard
#7367 @n-WN — OAuth guard (narrower cousin, kept comment)
#8636 @sgaofen — caching helper + native-vs-proxy layout split
#10954 @Only-Code-A — caching on anthropic_messages+Claude
#7648 @zhongyueming1121 — aux client anthropic_messages branch
#6096 @hansnow — /model switch clears stale api_mode
#9691 @TroyMitchell911 — anthropic_messages truncation continuation
Closes: #7366, #8294 (third-party Anthropic identity + caching).
Supersedes: #7410, #7367, #7393, #8636, #10954, #7648, #6096, #9691.
Rejects: #9621 (OpenAI-wire caching with incomplete blocklist — risky),
#7242 (superseded by #9691, stale branch),
#8321 (targets smart_model_routing which was removed in #12732).
Co-authored-by: nocoo <nocoo@users.noreply.github.com>
Co-authored-by: Keyu Yuan <leoyuan0099@gmail.com>
Co-authored-by: Zoee <30841158+n-WN@users.noreply.github.com>
Co-authored-by: sgaofen <135070653+sgaofen@users.noreply.github.com>
Co-authored-by: Only-Code-A <bxzt2006@163.com>
Co-authored-by: zhongyueming <mygamez@163.com>
Co-authored-by: Xiaohan Li <hansnow@users.noreply.github.com>
Co-authored-by: Troy Mitchell <i@troy-y.org>
This commit is contained in:
parent
491cf25eef
commit
65a31ee0d5
11 changed files with 911 additions and 58 deletions
|
|
@ -1098,7 +1098,7 @@ def _validate_base_url(base_url: str) -> None:
|
|||
) from exc
|
||||
|
||||
|
||||
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
runtime = _resolve_custom_runtime()
|
||||
if len(runtime) == 2:
|
||||
custom_base, custom_key = runtime
|
||||
|
|
@ -1114,6 +1114,23 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
if custom_mode == "codex_responses":
|
||||
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
if custom_mode == "anthropic_messages":
|
||||
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
|
||||
# LiteLLM proxies, etc.). Must NEVER be treated as OAuth —
|
||||
# Anthropic OAuth claims only apply to api.anthropic.com.
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
real_client = build_anthropic_client(custom_key, custom_base)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"Custom endpoint declares api_mode=anthropic_messages but the "
|
||||
"anthropic SDK is not installed — falling back to OpenAI-wire."
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return (
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2732,6 +2732,17 @@ def _update_config_for_provider(
|
|||
# Clear stale base_url to prevent contamination when switching providers
|
||||
model_cfg.pop("base_url", None)
|
||||
|
||||
# Clear stale api_key/api_mode left over from a previous custom provider.
|
||||
# When the user switches from e.g. a MiniMax custom endpoint
|
||||
# (api_mode=anthropic_messages, api_key=mxp-...) to a built-in provider
|
||||
# (e.g. OpenRouter), the stale api_key/api_mode would override the new
|
||||
# provider's credentials and transport choice. Built-in providers that
|
||||
# need a specific api_mode (copilot, xai) set it at request-resolution
|
||||
# time via `_copilot_runtime_api_mode` / `_detect_api_mode_for_url`, so
|
||||
# removing the persisted value here is safe.
|
||||
model_cfg.pop("api_key", None)
|
||||
model_cfg.pop("api_mode", None)
|
||||
|
||||
# When switching to a non-OpenRouter provider, ensure model.default is
|
||||
# valid for the new provider. An OpenRouter-formatted name like
|
||||
# "anthropic/claude-opus-4.6" will fail on direct-API providers.
|
||||
|
|
|
|||
|
|
@ -38,14 +38,21 @@ def _normalize_custom_provider_name(value: str) -> str:
|
|||
def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
|
||||
"""Auto-detect api_mode from the resolved base URL.
|
||||
|
||||
Direct api.openai.com endpoints need the Responses API for GPT-5.x
|
||||
tool calls with reasoning (chat/completions returns 400).
|
||||
- Direct api.openai.com endpoints need the Responses API for GPT-5.x
|
||||
tool calls with reasoning (chat/completions returns 400).
|
||||
- Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM,
|
||||
LiteLLM proxies, etc.) conventionally expose the native Anthropic
|
||||
protocol under a ``/anthropic`` suffix — treat those as
|
||||
``anthropic_messages`` transport instead of the default
|
||||
``chat_completions``.
|
||||
"""
|
||||
normalized = (base_url or "").strip().lower().rstrip("/")
|
||||
if "api.x.ai" in normalized:
|
||||
return "codex_responses"
|
||||
if "api.openai.com" in normalized and "openrouter" not in normalized:
|
||||
return "codex_responses"
|
||||
if normalized.endswith("/anthropic"):
|
||||
return "anthropic_messages"
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -194,8 +201,12 @@ def _resolve_runtime_from_pool_entry(
|
|||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
|
||||
elif base_url.rstrip("/").endswith("/anthropic"):
|
||||
api_mode = "anthropic_messages"
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
|
||||
# api.openai.com → codex_responses, api.x.ai → codex_responses).
|
||||
detected = _detect_api_mode_for_url(base_url)
|
||||
if detected:
|
||||
api_mode = detected
|
||||
|
||||
# OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
|
||||
# Anthropic SDK prepends its own /v1/messages to the base_url. Strip the
|
||||
|
|
@ -642,8 +653,11 @@ def _resolve_explicit_runtime(
|
|||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if configured_mode:
|
||||
api_mode = configured_mode
|
||||
elif base_url.rstrip("/").endswith("/anthropic"):
|
||||
api_mode = "anthropic_messages"
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
|
||||
detected = _detect_api_mode_for_url(base_url)
|
||||
if detected:
|
||||
api_mode = detected
|
||||
|
||||
return {
|
||||
"provider": provider,
|
||||
|
|
@ -965,10 +979,13 @@ def resolve_runtime_provider(
|
|||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
|
||||
# Auto-detect Anthropic-compatible endpoints by URL convention
|
||||
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
|
||||
elif base_url.rstrip("/").endswith("/anthropic"):
|
||||
api_mode = "anthropic_messages"
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints by URL convention
|
||||
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
|
||||
# plus api.openai.com → codex_responses and api.x.ai → codex_responses.
|
||||
detected = _detect_api_mode_for_url(base_url)
|
||||
if detected:
|
||||
api_mode = detected
|
||||
# Strip trailing /v1 for OpenCode Anthropic models (see comment above).
|
||||
if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
|
|
|
|||
186
run_agent.py
186
run_agent.py
|
|
@ -892,13 +892,15 @@ class AIAgent:
|
|||
self.prefill_messages = prefill_messages or [] # Prefilled conversation turns
|
||||
self._force_ascii_payload = False
|
||||
|
||||
# Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
|
||||
# Reduces input costs by ~75% on multi-turn conversations by caching the
|
||||
# conversation prefix. Uses system_and_3 strategy (4 breakpoints).
|
||||
is_openrouter = self._is_openrouter_url()
|
||||
is_claude = "claude" in self.model.lower()
|
||||
is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic"
|
||||
self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
|
||||
# Anthropic prompt caching: auto-enabled for Claude models on native
|
||||
# Anthropic, OpenRouter, and third-party gateways that speak the
|
||||
# Anthropic protocol (``api_mode == 'anthropic_messages'``). Reduces
|
||||
# input costs by ~75% on multi-turn conversations. Uses system_and_3
|
||||
# strategy (4 breakpoints). See ``_anthropic_prompt_cache_policy``
|
||||
# for the layout-vs-transport decision.
|
||||
self._use_prompt_caching, self._use_native_cache_layout = (
|
||||
self._anthropic_prompt_cache_policy()
|
||||
)
|
||||
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
|
||||
|
||||
# Iteration budget: the LLM is only notified when it actually exhausts
|
||||
|
|
@ -1013,8 +1015,15 @@ class AIAgent:
|
|||
self.api_key = effective_key
|
||||
self._anthropic_api_key = effective_key
|
||||
self._anthropic_base_url = base_url
|
||||
# Only mark the session as OAuth-authenticated when the token
|
||||
# genuinely belongs to native Anthropic. Third-party providers
|
||||
# (MiniMax, Kimi, GLM, LiteLLM proxies) that accept the
|
||||
# Anthropic protocol must never trip OAuth code paths — doing
|
||||
# so injects Claude-Code identity headers and system prompts
|
||||
# that cause 401/403 on their endpoints. Guards #1739 and
|
||||
# the third-party identity-injection bug.
|
||||
from agent.anthropic_adapter import _is_oauth_token as _is_oat
|
||||
self._is_anthropic_oauth = _is_oat(effective_key)
|
||||
self._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False
|
||||
self._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
|
||||
# No OpenAI client needed for Anthropic mode
|
||||
self.client = None
|
||||
|
|
@ -1227,7 +1236,12 @@ class AIAgent:
|
|||
|
||||
# Show prompt caching status
|
||||
if self._use_prompt_caching and not self.quiet_mode:
|
||||
source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter"
|
||||
if self._use_native_cache_layout and self.provider == "anthropic":
|
||||
source = "native Anthropic"
|
||||
elif self._use_native_cache_layout:
|
||||
source = "Anthropic-compatible endpoint"
|
||||
else:
|
||||
source = "Claude via OpenRouter"
|
||||
print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)")
|
||||
|
||||
# Session logging setup - auto-save conversation trajectories for debugging
|
||||
|
|
@ -1701,6 +1715,7 @@ class AIAgent:
|
|||
"api_key": getattr(self, "api_key", ""),
|
||||
"client_kwargs": dict(self._client_kwargs),
|
||||
"use_prompt_caching": self._use_prompt_caching,
|
||||
"use_native_cache_layout": self._use_native_cache_layout,
|
||||
# Context engine state that _try_activate_fallback() overwrites.
|
||||
# Use getattr for model/base_url/api_key/provider since plugin
|
||||
# engines may not have these (they're ContextCompressor-specific).
|
||||
|
|
@ -1822,7 +1837,7 @@ class AIAgent:
|
|||
effective_key, self._anthropic_base_url,
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
self._is_anthropic_oauth = _is_oauth_token(effective_key)
|
||||
self._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False
|
||||
self.client = None
|
||||
self._client_kwargs = {}
|
||||
else:
|
||||
|
|
@ -1842,10 +1857,13 @@ class AIAgent:
|
|||
)
|
||||
|
||||
# ── Re-evaluate prompt caching ──
|
||||
is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic"
|
||||
self._use_prompt_caching = (
|
||||
("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower())
|
||||
or is_native_anthropic
|
||||
self._use_prompt_caching, self._use_native_cache_layout = (
|
||||
self._anthropic_prompt_cache_policy(
|
||||
provider=new_provider,
|
||||
base_url=self.base_url,
|
||||
api_mode=api_mode,
|
||||
model=new_model,
|
||||
)
|
||||
)
|
||||
|
||||
# ── Update context compressor ──
|
||||
|
|
@ -1880,6 +1898,7 @@ class AIAgent:
|
|||
"api_key": getattr(self, "api_key", ""),
|
||||
"client_kwargs": dict(self._client_kwargs),
|
||||
"use_prompt_caching": self._use_prompt_caching,
|
||||
"use_native_cache_layout": self._use_native_cache_layout,
|
||||
"compressor_model": getattr(_cc, "model", self.model) if _cc else self.model,
|
||||
"compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url,
|
||||
"compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
|
||||
|
|
@ -2143,6 +2162,55 @@ class AIAgent:
|
|||
"""Return True when the base URL targets OpenRouter."""
|
||||
return "openrouter" in self._base_url_lower
|
||||
|
||||
def _anthropic_prompt_cache_policy(
|
||||
self,
|
||||
*,
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> tuple[bool, bool]:
|
||||
"""Decide whether to apply Anthropic prompt caching and which layout to use.
|
||||
|
||||
Returns ``(should_cache, use_native_layout)``:
|
||||
* ``should_cache`` — inject ``cache_control`` breakpoints for this
|
||||
request (applies to OpenRouter Claude, native Anthropic, and
|
||||
third-party gateways that speak the native Anthropic protocol).
|
||||
* ``use_native_layout`` — place markers on the *inner* content
|
||||
blocks (native Anthropic accepts and requires this layout);
|
||||
when False markers go on the message envelope (OpenRouter and
|
||||
OpenAI-wire proxies expect the looser layout).
|
||||
|
||||
Third-party providers using the native Anthropic transport
|
||||
(``api_mode == 'anthropic_messages'`` + Claude-named model) get
|
||||
caching with the native layout so they benefit from the same
|
||||
cost reduction as direct Anthropic callers, provided their
|
||||
gateway implements the Anthropic cache_control contract
|
||||
(MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do).
|
||||
"""
|
||||
eff_provider = (provider if provider is not None else self.provider) or ""
|
||||
eff_base_url = base_url if base_url is not None else (self.base_url or "")
|
||||
eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "")
|
||||
eff_model = (model if model is not None else self.model) or ""
|
||||
|
||||
base_lower = eff_base_url.lower()
|
||||
is_claude = "claude" in eff_model.lower()
|
||||
is_openrouter = "openrouter" in base_lower
|
||||
is_anthropic_wire = eff_api_mode == "anthropic_messages"
|
||||
is_native_anthropic = (
|
||||
is_anthropic_wire
|
||||
and (eff_provider == "anthropic" or "api.anthropic.com" in base_lower)
|
||||
)
|
||||
|
||||
if is_native_anthropic:
|
||||
return True, True
|
||||
if is_openrouter and is_claude:
|
||||
return True, False
|
||||
if is_anthropic_wire and is_claude:
|
||||
# Third-party Anthropic-compatible gateway.
|
||||
return True, True
|
||||
return False, False
|
||||
|
||||
@staticmethod
|
||||
def _model_requires_responses_api(model: str) -> bool:
|
||||
"""Return True for models that require the Responses API path.
|
||||
|
|
@ -5322,9 +5390,12 @@ class AIAgent:
|
|||
return False
|
||||
|
||||
self._anthropic_api_key = new_token
|
||||
# Update OAuth flag — token type may have changed (API key ↔ OAuth)
|
||||
# Update OAuth flag — token type may have changed (API key ↔ OAuth).
|
||||
# Only treat as OAuth on native Anthropic; third-party endpoints using
|
||||
# the Anthropic protocol must not trip OAuth paths (#1739 & third-party
|
||||
# identity-injection guard).
|
||||
from agent.anthropic_adapter import _is_oauth_token
|
||||
self._is_anthropic_oauth = _is_oauth_token(new_token)
|
||||
self._is_anthropic_oauth = _is_oauth_token(new_token) if self.provider == "anthropic" else False
|
||||
return True
|
||||
|
||||
def _apply_client_headers_for_base_url(self, base_url: str) -> None:
|
||||
|
|
@ -5367,7 +5438,7 @@ class AIAgent:
|
|||
runtime_key, runtime_base,
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
self._is_anthropic_oauth = _is_oauth_token(runtime_key)
|
||||
self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False
|
||||
self.api_key = runtime_key
|
||||
self.base_url = runtime_base
|
||||
return
|
||||
|
|
@ -6523,7 +6594,7 @@ class AIAgent:
|
|||
self._anthropic_client = build_anthropic_client(
|
||||
effective_key, self._anthropic_base_url, timeout=_fb_timeout,
|
||||
)
|
||||
self._is_anthropic_oauth = _is_oauth_token(effective_key)
|
||||
self._is_anthropic_oauth = _is_oauth_token(effective_key) if fb_provider == "anthropic" else False
|
||||
self.client = None
|
||||
self._client_kwargs = {}
|
||||
else:
|
||||
|
|
@ -6554,10 +6625,13 @@ class AIAgent:
|
|||
self._replace_primary_openai_client(reason="fallback_timeout_apply")
|
||||
|
||||
# Re-evaluate prompt caching for the new provider/model
|
||||
is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic"
|
||||
self._use_prompt_caching = (
|
||||
("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower())
|
||||
or is_native_anthropic
|
||||
self._use_prompt_caching, self._use_native_cache_layout = (
|
||||
self._anthropic_prompt_cache_policy(
|
||||
provider=fb_provider,
|
||||
base_url=fb_base_url,
|
||||
api_mode=fb_api_mode,
|
||||
model=fb_model,
|
||||
)
|
||||
)
|
||||
|
||||
# Update context compressor limits for the fallback model.
|
||||
|
|
@ -6617,6 +6691,12 @@ class AIAgent:
|
|||
self.api_key = rt["api_key"]
|
||||
self._client_kwargs = dict(rt["client_kwargs"])
|
||||
self._use_prompt_caching = rt["use_prompt_caching"]
|
||||
# Default to native layout when the restored snapshot predates the
|
||||
# native-vs-proxy split (older sessions saved before this PR).
|
||||
self._use_native_cache_layout = rt.get(
|
||||
"use_native_cache_layout",
|
||||
self.api_mode == "anthropic_messages" and self.provider == "anthropic",
|
||||
)
|
||||
|
||||
# ── Rebuild client for the primary provider ──
|
||||
if self.api_mode == "anthropic_messages":
|
||||
|
|
@ -9317,12 +9397,19 @@ class AIAgent:
|
|||
for idx, pfm in enumerate(self.prefill_messages):
|
||||
api_messages.insert(sys_offset + idx, pfm.copy())
|
||||
|
||||
# Apply Anthropic prompt caching for Claude models via OpenRouter.
|
||||
# Auto-detected: if model name contains "claude" and base_url is OpenRouter,
|
||||
# inject cache_control breakpoints (system + last 3 messages) to reduce
|
||||
# input token costs by ~75% on multi-turn conversations.
|
||||
# Apply Anthropic prompt caching for Claude models on native
|
||||
# Anthropic, OpenRouter, and third-party Anthropic-compatible
|
||||
# gateways. Auto-detected: if ``_use_prompt_caching`` is set,
|
||||
# inject cache_control breakpoints (system + last 3 messages)
|
||||
# to reduce input token costs by ~75% on multi-turn
|
||||
# conversations. Layout is chosen per endpoint by
|
||||
# ``_anthropic_prompt_cache_policy``.
|
||||
if self._use_prompt_caching:
|
||||
api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages'))
|
||||
api_messages = apply_anthropic_cache_control(
|
||||
api_messages,
|
||||
cache_ttl=self._cache_ttl,
|
||||
native_anthropic=self._use_native_cache_layout,
|
||||
)
|
||||
|
||||
# Safety net: strip orphaned tool results / add stubs for missing
|
||||
# results before sending to the API. Runs unconditionally — not
|
||||
|
|
@ -9779,25 +9866,30 @@ class AIAgent:
|
|||
if finish_reason == "length":
|
||||
self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True)
|
||||
|
||||
# Normalize the truncated response to a single OpenAI-style
|
||||
# message shape so text-continuation and tool-call retry
|
||||
# work uniformly across chat_completions, bedrock_converse,
|
||||
# and anthropic_messages. For Anthropic we use the same
|
||||
# adapter the agent loop already relies on so the rebuilt
|
||||
# interim assistant message is byte-identical to what
|
||||
# would have been appended in the non-truncated path.
|
||||
_trunc_msg = None
|
||||
if self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import normalize_anthropic_response
|
||||
_trunc_msg, _ = normalize_anthropic_response(
|
||||
response, strip_tool_prefix=self._is_anthropic_oauth
|
||||
)
|
||||
|
||||
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
|
||||
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
|
||||
|
||||
# ── Detect thinking-budget exhaustion ──────────────
|
||||
# When the model spends ALL output tokens on reasoning
|
||||
# and has none left for the response, continuation
|
||||
# retries are pointless. Detect this early and give a
|
||||
# targeted error instead of wasting 3 API calls.
|
||||
_trunc_content = None
|
||||
_trunc_has_tool_calls = False
|
||||
if self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
|
||||
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
|
||||
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
# Anthropic response.content is a list of blocks
|
||||
_text_parts = []
|
||||
for _blk in getattr(response, "content", []):
|
||||
if getattr(_blk, "type", None) == "text":
|
||||
_text_parts.append(getattr(_blk, "text", ""))
|
||||
_trunc_content = "\n".join(_text_parts) if _text_parts else None
|
||||
|
||||
# A response is "thinking exhausted" only when the model
|
||||
# actually produced reasoning blocks but no visible text after
|
||||
# them. Models that do not use <think> tags (e.g. GLM-4.7 on
|
||||
|
|
@ -9854,9 +9946,9 @@ class AIAgent:
|
|||
"error": _exhaust_error,
|
||||
}
|
||||
|
||||
if self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
assistant_message = response.choices[0].message
|
||||
if not assistant_message.tool_calls:
|
||||
if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"):
|
||||
assistant_message = _trunc_msg
|
||||
if assistant_message is not None and not _trunc_has_tool_calls:
|
||||
length_continue_retries += 1
|
||||
interim_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
messages.append(interim_msg)
|
||||
|
|
@ -9894,9 +9986,9 @@ class AIAgent:
|
|||
"error": "Response remained truncated after 3 continuation attempts",
|
||||
}
|
||||
|
||||
if self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
assistant_message = response.choices[0].message
|
||||
if assistant_message.tool_calls:
|
||||
if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"):
|
||||
assistant_message = _trunc_msg
|
||||
if assistant_message is not None and _trunc_has_tool_calls:
|
||||
if truncated_tool_call_retries < 1:
|
||||
truncated_tool_call_retries += 1
|
||||
self._vprint(
|
||||
|
|
|
|||
|
|
@ -80,6 +80,13 @@ AUTHOR_MAP = {
|
|||
"nish3451@users.noreply.github.com": "nish3451",
|
||||
"Mibayy@users.noreply.github.com": "Mibayy",
|
||||
"135070653+sgaofen@users.noreply.github.com": "sgaofen",
|
||||
"nocoo@users.noreply.github.com": "nocoo",
|
||||
"30841158+n-WN@users.noreply.github.com": "n-WN",
|
||||
"leoyuan0099@gmail.com": "keyuyuan",
|
||||
"bxzt2006@163.com": "Only-Code-A",
|
||||
"i@troy-y.org": "TroyMitchell911",
|
||||
"mygamez@163.com": "zhongyueming1121",
|
||||
"hansnow@users.noreply.github.com": "hansnow",
|
||||
# contributors (manual mapping from git names)
|
||||
"ahmedsherif95@gmail.com": "asheriif",
|
||||
"liujinkun@bytedance.com": "liujinkun2025",
|
||||
|
|
|
|||
107
tests/agent/test_auxiliary_client_anthropic_custom.py
Normal file
107
tests/agent/test_auxiliary_client_anthropic_custom.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
"""Tests for agent.auxiliary_client._try_custom_endpoint's anthropic_messages branch.
|
||||
|
||||
When a user configures a custom endpoint with ``api_mode: anthropic_messages``
|
||||
(e.g. MiniMax, Zhipu GLM, LiteLLM in Anthropic-proxy mode), auxiliary tasks
|
||||
(compression, web_extract, session_search, title generation) must use the
|
||||
native Anthropic transport rather than being silently downgraded to an
|
||||
OpenAI-wire client that speaks the wrong protocol.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_env(monkeypatch):
|
||||
for key in (
|
||||
"OPENAI_API_KEY", "OPENAI_BASE_URL",
|
||||
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
|
||||
def _install_anthropic_adapter_mocks():
|
||||
"""Patch build_anthropic_client so the test doesn't need the SDK."""
|
||||
fake_client = MagicMock(name="anthropic_client")
|
||||
return patch(
|
||||
"agent.anthropic_adapter.build_anthropic_client",
|
||||
return_value=fake_client,
|
||||
), fake_client
|
||||
|
||||
|
||||
def test_custom_endpoint_anthropic_messages_builds_anthropic_wrapper():
|
||||
"""api_mode=anthropic_messages → returns AnthropicAuxiliaryClient, not OpenAI."""
|
||||
from agent.auxiliary_client import _try_custom_endpoint, AnthropicAuxiliaryClient
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._resolve_custom_runtime",
|
||||
return_value=(
|
||||
"https://api.minimax.io/anthropic",
|
||||
"minimax-key",
|
||||
"anthropic_messages",
|
||||
),
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="claude-sonnet-4-6",
|
||||
):
|
||||
adapter_patch, fake_client = _install_anthropic_adapter_mocks()
|
||||
with adapter_patch:
|
||||
client, model = _try_custom_endpoint()
|
||||
|
||||
assert isinstance(client, AnthropicAuxiliaryClient), (
|
||||
"Custom endpoint with api_mode=anthropic_messages must return the "
|
||||
f"native Anthropic wrapper, got {type(client).__name__}"
|
||||
)
|
||||
assert model == "claude-sonnet-4-6"
|
||||
# Wrapper should NOT be marked as OAuth — third-party endpoints are
|
||||
# always API-key authenticated.
|
||||
assert client.api_key == "minimax-key"
|
||||
assert client.base_url == "https://api.minimax.io/anthropic"
|
||||
|
||||
|
||||
def test_custom_endpoint_anthropic_messages_falls_back_when_sdk_missing():
|
||||
"""Graceful degradation when anthropic SDK is unavailable."""
|
||||
from agent.auxiliary_client import _try_custom_endpoint
|
||||
|
||||
import_error = ImportError("anthropic package not installed")
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._resolve_custom_runtime",
|
||||
return_value=("https://api.minimax.io/anthropic", "k", "anthropic_messages"),
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="claude-sonnet-4-6",
|
||||
), patch(
|
||||
"agent.anthropic_adapter.build_anthropic_client",
|
||||
side_effect=import_error,
|
||||
):
|
||||
client, model = _try_custom_endpoint()
|
||||
|
||||
# Should fall back to an OpenAI-wire client rather than returning
|
||||
# (None, None) — the tool still needs to do *something*.
|
||||
assert client is not None
|
||||
assert model == "claude-sonnet-4-6"
|
||||
# OpenAI client, not AnthropicAuxiliaryClient.
|
||||
from agent.auxiliary_client import AnthropicAuxiliaryClient
|
||||
assert not isinstance(client, AnthropicAuxiliaryClient)
|
||||
|
||||
|
||||
def test_custom_endpoint_chat_completions_still_uses_openai_wire():
|
||||
"""Regression: default path (no api_mode) must remain OpenAI client."""
|
||||
from agent.auxiliary_client import _try_custom_endpoint, AnthropicAuxiliaryClient
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client._resolve_custom_runtime",
|
||||
return_value=("https://api.example.com/v1", "key", None),
|
||||
), patch(
|
||||
"agent.auxiliary_client._read_main_model",
|
||||
return_value="my-model",
|
||||
):
|
||||
client, model = _try_custom_endpoint()
|
||||
|
||||
assert client is not None
|
||||
assert model == "my-model"
|
||||
assert not isinstance(client, AnthropicAuxiliaryClient)
|
||||
70
tests/hermes_cli/test_detect_api_mode_for_url.py
Normal file
70
tests/hermes_cli/test_detect_api_mode_for_url.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
"""Tests for hermes_cli.runtime_provider._detect_api_mode_for_url.
|
||||
|
||||
The helper maps base URLs to api_modes for three cases:
|
||||
* api.openai.com → codex_responses
|
||||
* api.x.ai → codex_responses
|
||||
* */anthropic → anthropic_messages (third-party gateways like MiniMax,
|
||||
Zhipu GLM, LiteLLM proxies)
|
||||
|
||||
Consolidating the /anthropic detection in this helper (instead of three
|
||||
inline ``endswith`` checks spread across _resolve_runtime_from_pool_entry,
|
||||
the explicit-provider path, and the api-key-provider path) means every
|
||||
future update to the detection logic lives in one place.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from hermes_cli.runtime_provider import _detect_api_mode_for_url
|
||||
|
||||
|
||||
class TestCodexResponsesDetection:
|
||||
def test_openai_api_returns_codex_responses(self):
|
||||
assert _detect_api_mode_for_url("https://api.openai.com/v1") == "codex_responses"
|
||||
|
||||
def test_xai_api_returns_codex_responses(self):
|
||||
assert _detect_api_mode_for_url("https://api.x.ai/v1") == "codex_responses"
|
||||
|
||||
def test_openrouter_is_not_codex_responses(self):
|
||||
# api.openai.com check must exclude openrouter (which routes to openai-hosted models).
|
||||
assert _detect_api_mode_for_url("https://openrouter.ai/api/v1") is None
|
||||
|
||||
|
||||
class TestAnthropicMessagesDetection:
|
||||
"""Third-party gateways that speak the Anthropic protocol under /anthropic."""
|
||||
|
||||
def test_minimax_anthropic_endpoint(self):
|
||||
assert _detect_api_mode_for_url("https://api.minimax.io/anthropic") == "anthropic_messages"
|
||||
|
||||
def test_minimax_cn_anthropic_endpoint(self):
|
||||
assert _detect_api_mode_for_url("https://api.minimaxi.com/anthropic") == "anthropic_messages"
|
||||
|
||||
def test_dashscope_anthropic_endpoint(self):
|
||||
assert (
|
||||
_detect_api_mode_for_url("https://dashscope.aliyuncs.com/api/v2/apps/anthropic")
|
||||
== "anthropic_messages"
|
||||
)
|
||||
|
||||
def test_trailing_slash_tolerated(self):
|
||||
assert _detect_api_mode_for_url("https://api.minimax.io/anthropic/") == "anthropic_messages"
|
||||
|
||||
def test_uppercase_path_tolerated(self):
|
||||
assert _detect_api_mode_for_url("https://API.MINIMAX.IO/Anthropic") == "anthropic_messages"
|
||||
|
||||
def test_anthropic_in_middle_of_path_does_not_match(self):
|
||||
# The helper requires ``/anthropic`` as the path SUFFIX, not anywhere.
|
||||
# Protects against false positives on e.g. /anthropic/v1/models.
|
||||
assert _detect_api_mode_for_url("https://api.example.com/anthropic/v1") is None
|
||||
|
||||
|
||||
class TestDefaultCase:
|
||||
def test_generic_url_returns_none(self):
|
||||
assert _detect_api_mode_for_url("https://api.together.xyz/v1") is None
|
||||
|
||||
def test_empty_string_returns_none(self):
|
||||
assert _detect_api_mode_for_url("") is None
|
||||
|
||||
def test_none_returns_none(self):
|
||||
assert _detect_api_mode_for_url(None) is None
|
||||
|
||||
def test_localhost_returns_none(self):
|
||||
assert _detect_api_mode_for_url("http://localhost:11434/v1") is None
|
||||
84
tests/hermes_cli/test_update_config_clears_custom_fields.py
Normal file
84
tests/hermes_cli/test_update_config_clears_custom_fields.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
"""Tests for hermes_cli.auth._update_config_for_provider clearing stale fields.
|
||||
|
||||
When the user switches from a custom provider (e.g. MiniMax with
|
||||
``api_mode: anthropic_messages``, ``api_key: mxp-...``) to a built-in
|
||||
provider (e.g. OpenRouter), the stale ``api_key`` and ``api_mode`` would
|
||||
otherwise override the new provider's credentials and transport choice.
|
||||
|
||||
Built-in providers that legitimately need a specific ``api_mode`` (copilot,
|
||||
xai) compute it at request-resolution time in
|
||||
``_copilot_runtime_api_mode`` / ``_detect_api_mode_for_url``, so removing
|
||||
the persisted value here is safe.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import yaml
|
||||
|
||||
from hermes_cli.auth import _update_config_for_provider
|
||||
from hermes_cli.config import get_config_path
|
||||
|
||||
|
||||
def _read_model_cfg() -> dict:
|
||||
path = get_config_path()
|
||||
if not path.exists():
|
||||
return {}
|
||||
data = yaml.safe_load(path.read_text()) or {}
|
||||
model = data.get("model", {})
|
||||
return model if isinstance(model, dict) else {}
|
||||
|
||||
|
||||
def _seed_custom_provider_config(api_mode: str = "anthropic_messages") -> None:
|
||||
"""Write a config.yaml mimicking a user on a MiniMax-style custom provider."""
|
||||
path = get_config_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(
|
||||
yaml.safe_dump(
|
||||
{
|
||||
"model": {
|
||||
"provider": "custom",
|
||||
"base_url": "https://api.minimax.io/anthropic",
|
||||
"api_key": "mxp-stale-key",
|
||||
"api_mode": api_mode,
|
||||
"default": "claude-sonnet-4-6",
|
||||
}
|
||||
},
|
||||
sort_keys=False,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class TestUpdateConfigForProviderClearsStaleCustomFields:
|
||||
def test_switching_to_openrouter_clears_api_key_and_api_mode(self):
|
||||
_seed_custom_provider_config()
|
||||
|
||||
_update_config_for_provider(
|
||||
"openrouter",
|
||||
"https://openrouter.ai/api/v1",
|
||||
default_model="anthropic/claude-sonnet-4.6",
|
||||
)
|
||||
|
||||
model_cfg = _read_model_cfg()
|
||||
assert model_cfg.get("provider") == "openrouter"
|
||||
assert model_cfg.get("base_url") == "https://openrouter.ai/api/v1"
|
||||
assert "api_key" not in model_cfg, (
|
||||
"Stale custom api_key would leak into OpenRouter requests — must be cleared"
|
||||
)
|
||||
assert "api_mode" not in model_cfg, (
|
||||
"Stale api_mode=anthropic_messages from MiniMax would mis-route "
|
||||
"OpenRouter requests to the Anthropic SDK — must be cleared"
|
||||
)
|
||||
|
||||
def test_switching_to_nous_clears_stale_api_mode(self):
|
||||
_seed_custom_provider_config()
|
||||
_update_config_for_provider("nous", "https://inference-api.nousresearch.com/v1")
|
||||
model_cfg = _read_model_cfg()
|
||||
assert model_cfg.get("provider") == "nous"
|
||||
assert "api_mode" not in model_cfg
|
||||
assert "api_key" not in model_cfg
|
||||
|
||||
def test_switching_clears_codex_responses_api_mode(self):
|
||||
"""Also covers codex_responses, not just anthropic_messages."""
|
||||
_seed_custom_provider_config(api_mode="codex_responses")
|
||||
_update_config_for_provider("openrouter", "https://openrouter.ai/api/v1")
|
||||
assert "api_mode" not in _read_model_cfg()
|
||||
152
tests/run_agent/test_anthropic_prompt_cache_policy.py
Normal file
152
tests/run_agent/test_anthropic_prompt_cache_policy.py
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
"""Tests for AIAgent._anthropic_prompt_cache_policy().
|
||||
|
||||
The policy returns ``(should_cache, use_native_layout)`` for five endpoint
|
||||
classes. The test matrix pins the decision for each so a regression (e.g.
|
||||
silently dropping caching on third-party Anthropic gateways, or applying
|
||||
the native layout on OpenRouter) surfaces loudly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
def _make_agent(
|
||||
*,
|
||||
provider: str = "openrouter",
|
||||
base_url: str = "https://openrouter.ai/api/v1",
|
||||
api_mode: str = "chat_completions",
|
||||
model: str = "anthropic/claude-sonnet-4.6",
|
||||
) -> AIAgent:
|
||||
agent = AIAgent.__new__(AIAgent)
|
||||
agent.provider = provider
|
||||
agent.base_url = base_url
|
||||
agent.api_mode = api_mode
|
||||
agent.model = model
|
||||
agent._base_url_lower = (base_url or "").lower()
|
||||
agent.client = MagicMock()
|
||||
agent.quiet_mode = True
|
||||
return agent
|
||||
|
||||
|
||||
class TestNativeAnthropic:
|
||||
def test_claude_on_native_anthropic_caches_with_native_layout(self):
|
||||
agent = _make_agent(
|
||||
provider="anthropic",
|
||||
base_url="https://api.anthropic.com",
|
||||
api_mode="anthropic_messages",
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, True)
|
||||
|
||||
def test_api_anthropic_host_detected_even_when_provider_label_differs(self):
|
||||
# Some pool configurations label native Anthropic as "anthropic-direct"
|
||||
# or similar; falling back to hostname keeps caching on.
|
||||
agent = _make_agent(
|
||||
provider="anthropic-direct",
|
||||
base_url="https://api.anthropic.com",
|
||||
api_mode="anthropic_messages",
|
||||
model="claude-opus-4.6",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, True)
|
||||
|
||||
|
||||
class TestOpenRouter:
|
||||
def test_claude_on_openrouter_caches_with_envelope_layout(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
)
|
||||
should, native = agent._anthropic_prompt_cache_policy()
|
||||
assert should is True
|
||||
assert native is False # OpenRouter uses envelope layout
|
||||
|
||||
def test_non_claude_on_openrouter_does_not_cache(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="openai/gpt-5.4",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestThirdPartyAnthropicGateway:
|
||||
"""Third-party gateways speaking the Anthropic protocol (MiniMax, Zhipu GLM, LiteLLM)."""
|
||||
|
||||
def test_minimax_claude_via_anthropic_messages(self):
|
||||
agent = _make_agent(
|
||||
provider="custom",
|
||||
base_url="https://api.minimax.io/anthropic",
|
||||
api_mode="anthropic_messages",
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
should, native = agent._anthropic_prompt_cache_policy()
|
||||
assert should is True, "Third-party Anthropic gateway with Claude must cache"
|
||||
assert native is True, "Third-party Anthropic gateway uses native cache_control layout"
|
||||
|
||||
def test_third_party_without_claude_name_does_not_cache(self):
|
||||
# A provider exposing e.g. GLM via anthropic_messages transport — we
|
||||
# don't know whether it supports cache_control, so stay conservative.
|
||||
agent = _make_agent(
|
||||
provider="custom",
|
||||
base_url="https://api.minimax.io/anthropic",
|
||||
api_mode="anthropic_messages",
|
||||
model="minimax-m2.7",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestOpenAIWireFormatOnCustomProvider:
|
||||
"""A custom provider using chat_completions (OpenAI wire) should NOT get caching."""
|
||||
|
||||
def test_custom_openai_wire_does_not_cache_even_with_claude_name(self):
|
||||
# This is the blocklist risk #9621 failed to avoid: sending
|
||||
# cache_control fields in OpenAI-wire JSON can trip strict providers
|
||||
# that reject unknown keys. Stay off unless the transport is
|
||||
# explicitly anthropic_messages or the aggregator is OpenRouter.
|
||||
agent = _make_agent(
|
||||
provider="custom",
|
||||
base_url="https://api.fireworks.ai/inference/v1",
|
||||
api_mode="chat_completions",
|
||||
model="claude-sonnet-4",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestExplicitOverrides:
|
||||
"""Policy accepts keyword overrides for switch_model / fallback activation."""
|
||||
|
||||
def test_overrides_take_precedence_over_self(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="openai/gpt-5.4",
|
||||
)
|
||||
# Simulate switch_model evaluating cache policy for a Claude target
|
||||
# before self.model is mutated.
|
||||
should, native = agent._anthropic_prompt_cache_policy(
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
)
|
||||
assert (should, native) == (True, False)
|
||||
|
||||
def test_fallback_target_evaluated_independently(self):
|
||||
# Starting on native Anthropic but falling back to OpenRouter.
|
||||
agent = _make_agent(
|
||||
provider="anthropic",
|
||||
base_url="https://api.anthropic.com",
|
||||
api_mode="anthropic_messages",
|
||||
model="claude-opus-4.6",
|
||||
)
|
||||
should, native = agent._anthropic_prompt_cache_policy(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
)
|
||||
assert (should, native) == (True, False)
|
||||
182
tests/run_agent/test_anthropic_third_party_oauth_guard.py
Normal file
182
tests/run_agent/test_anthropic_third_party_oauth_guard.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
"""Tests for ``_is_anthropic_oauth`` guard against third-party Anthropic-compatible providers.
|
||||
|
||||
The invariant: ``self._is_anthropic_oauth`` must only ever be True when
|
||||
``self.provider == 'anthropic'`` (native Anthropic). Third-party providers
|
||||
that speak the Anthropic protocol (MiniMax, Zhipu GLM, Alibaba DashScope,
|
||||
Kimi, LiteLLM proxies, etc.) must never trip OAuth code paths — doing so
|
||||
injects Claude-Code identity headers and system prompts that cause
|
||||
401/403 from those endpoints.
|
||||
|
||||
This test class covers all FIVE sites that assign ``_is_anthropic_oauth``:
|
||||
|
||||
1. ``AIAgent.__init__`` (line ~1022)
|
||||
2. ``AIAgent.switch_model`` (line ~1832)
|
||||
3. ``AIAgent._try_refresh_anthropic_client_credentials`` (line ~5335)
|
||||
4. ``AIAgent._swap_credential`` (line ~5378)
|
||||
5. ``AIAgent._try_activate_fallback`` (line ~6536)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
# A plausible-looking OAuth token (``sk-ant-`` without the ``-api`` suffix).
|
||||
_OAUTH_LIKE_TOKEN = "sk-ant-oauth-example-1234567890abcdef"
|
||||
_API_KEY_TOKEN = "sk-ant-api-abcdef1234567890"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def agent():
|
||||
"""Minimal AIAgent construction, skipping tool discovery."""
|
||||
with (
|
||||
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
):
|
||||
a = AIAgent(
|
||||
api_key="test-key-1234567890",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
a.client = MagicMock()
|
||||
return a
|
||||
|
||||
|
||||
class TestOAuthFlagOnRefresh:
|
||||
"""Site 3 — _try_refresh_anthropic_client_credentials."""
|
||||
|
||||
def test_third_party_provider_refresh_is_noop(self, agent):
|
||||
"""Refresh path returns False immediately when provider != anthropic — the
|
||||
OAuth flag can never be mutated for third-party providers. Double-defended
|
||||
by the per-assignment guard at line ~5393 so future refactors can't
|
||||
reintroduce the bug."""
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.provider = "minimax" # ← third-party
|
||||
agent._anthropic_api_key = "***"
|
||||
agent._anthropic_client = MagicMock()
|
||||
agent._is_anthropic_oauth = False
|
||||
|
||||
with (
|
||||
patch("agent.anthropic_adapter.resolve_anthropic_token",
|
||||
return_value=_OAUTH_LIKE_TOKEN),
|
||||
patch("agent.anthropic_adapter.build_anthropic_client",
|
||||
return_value=MagicMock()),
|
||||
):
|
||||
result = agent._try_refresh_anthropic_client_credentials()
|
||||
|
||||
# The function short-circuits on non-anthropic providers.
|
||||
assert result is False
|
||||
# And the flag is untouched regardless.
|
||||
assert agent._is_anthropic_oauth is False
|
||||
|
||||
def test_native_anthropic_preserves_existing_oauth_behaviour(self, agent):
|
||||
"""Regression: native anthropic with OAuth token still flips flag to True."""
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.provider = "anthropic"
|
||||
agent._anthropic_api_key = "***"
|
||||
agent._anthropic_client = MagicMock()
|
||||
agent._is_anthropic_oauth = False
|
||||
|
||||
with (
|
||||
patch("agent.anthropic_adapter.resolve_anthropic_token",
|
||||
return_value=_OAUTH_LIKE_TOKEN),
|
||||
patch("agent.anthropic_adapter.build_anthropic_client",
|
||||
return_value=MagicMock()),
|
||||
):
|
||||
result = agent._try_refresh_anthropic_client_credentials()
|
||||
|
||||
assert result is True
|
||||
assert agent._is_anthropic_oauth is True
|
||||
|
||||
|
||||
class TestOAuthFlagOnCredentialSwap:
|
||||
"""Site 4 — _swap_credential (credential pool rotation)."""
|
||||
|
||||
def test_pool_swap_on_third_party_never_flips_oauth(self, agent):
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.provider = "glm" # ← Zhipu GLM via /anthropic
|
||||
agent._anthropic_api_key = "old-key"
|
||||
agent._anthropic_base_url = "https://open.bigmodel.cn/api/anthropic"
|
||||
agent._anthropic_client = MagicMock()
|
||||
agent._is_anthropic_oauth = False
|
||||
|
||||
entry = MagicMock()
|
||||
entry.runtime_api_key = _OAUTH_LIKE_TOKEN
|
||||
entry.runtime_base_url = "https://open.bigmodel.cn/api/anthropic"
|
||||
|
||||
with patch("agent.anthropic_adapter.build_anthropic_client",
|
||||
return_value=MagicMock()):
|
||||
agent._swap_credential(entry)
|
||||
|
||||
assert agent._is_anthropic_oauth is False
|
||||
|
||||
|
||||
class TestOAuthFlagOnConstruction:
|
||||
"""Site 1 — AIAgent.__init__ on a third-party anthropic_messages provider."""
|
||||
|
||||
def test_minimax_init_does_not_flip_oauth(self):
|
||||
with (
|
||||
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("agent.anthropic_adapter.build_anthropic_client",
|
||||
return_value=MagicMock()),
|
||||
# Simulate a stale ANTHROPIC_TOKEN in the env — the init code
|
||||
# MUST NOT fall back to it when provider != anthropic.
|
||||
patch("agent.anthropic_adapter.resolve_anthropic_token",
|
||||
return_value=_OAUTH_LIKE_TOKEN),
|
||||
):
|
||||
agent = AIAgent(
|
||||
api_key="minimax-key-1234",
|
||||
base_url="https://api.minimax.io/anthropic",
|
||||
provider="minimax",
|
||||
api_mode="anthropic_messages",
|
||||
model="claude-sonnet-4-6",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
# The effective key should be the explicit minimax-key, not the
|
||||
# stale Anthropic OAuth token, and the OAuth flag must be False.
|
||||
assert agent._anthropic_api_key == "minimax-key-1234"
|
||||
assert agent._is_anthropic_oauth is False
|
||||
|
||||
|
||||
class TestOAuthFlagOnFallbackActivation:
|
||||
"""Site 5 — _try_activate_fallback targeting a third-party Anthropic endpoint."""
|
||||
|
||||
def test_fallback_to_third_party_does_not_flip_oauth(self, agent):
|
||||
"""Directly mimic the post-fallback assignment at line ~6537."""
|
||||
from agent.anthropic_adapter import _is_oauth_token
|
||||
|
||||
# Emulate the relevant lines of _try_activate_fallback without
|
||||
# running the entire recovery stack (which pulls in streaming,
|
||||
# sessions, etc.).
|
||||
fb_provider = "minimax"
|
||||
effective_key = _OAUTH_LIKE_TOKEN
|
||||
agent._is_anthropic_oauth = (
|
||||
_is_oauth_token(effective_key) if fb_provider == "anthropic" else False
|
||||
)
|
||||
assert agent._is_anthropic_oauth is False
|
||||
|
||||
|
||||
class TestApiKeyTokensAlwaysSafe:
|
||||
"""Regression: plain API-key shapes must always resolve to non-OAuth, any provider."""
|
||||
|
||||
def test_native_anthropic_with_api_key_token(self):
|
||||
from agent.anthropic_adapter import _is_oauth_token
|
||||
assert _is_oauth_token(_API_KEY_TOKEN) is False
|
||||
|
||||
def test_third_party_key_shape(self):
|
||||
from agent.anthropic_adapter import _is_oauth_token
|
||||
# Third-party key shapes (MiniMax 'mxp-...', GLM 'glm.sess.', etc.)
|
||||
# already return False from _is_oauth_token; the guard adds a second
|
||||
# defense line in case future token formats accidentally look OAuth-y.
|
||||
assert _is_oauth_token("mxp-abcdef123") is False
|
||||
114
tests/run_agent/test_anthropic_truncation_continuation.py
Normal file
114
tests/run_agent/test_anthropic_truncation_continuation.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
"""Regression test for anthropic_messages truncation continuation.
|
||||
|
||||
When an Anthropic response hits ``stop_reason: max_tokens`` (mapped to
|
||||
``finish_reason == 'length'`` in run_agent), the agent must retry with
|
||||
a continuation prompt — the same behavior it has always had for
|
||||
chat_completions and bedrock_converse. Before this PR, the
|
||||
``if self.api_mode in ('chat_completions', 'bedrock_converse'):`` guard
|
||||
silently dropped Anthropic-wire truncations on the floor, returning a
|
||||
half-finished response with no retry.
|
||||
|
||||
We don't exercise the full agent loop here (it's 3000 lines of inference,
|
||||
streaming, plugin hooks, etc.) — instead we verify the normalization
|
||||
adapter produces exactly the shape the continuation block now consumes.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_anthropic_text_block(text: str) -> SimpleNamespace:
|
||||
return SimpleNamespace(type="text", text=text)
|
||||
|
||||
|
||||
def _make_anthropic_tool_use_block(name: str = "my_tool") -> SimpleNamespace:
|
||||
return SimpleNamespace(
|
||||
type="tool_use",
|
||||
id="toolu_01",
|
||||
name=name,
|
||||
input={"foo": "bar"},
|
||||
)
|
||||
|
||||
|
||||
def _make_anthropic_response(blocks, stop_reason: str = "max_tokens"):
|
||||
return SimpleNamespace(
|
||||
id="msg_01",
|
||||
type="message",
|
||||
role="assistant",
|
||||
model="claude-sonnet-4-6",
|
||||
content=blocks,
|
||||
stop_reason=stop_reason,
|
||||
stop_sequence=None,
|
||||
usage=SimpleNamespace(input_tokens=100, output_tokens=200),
|
||||
)
|
||||
|
||||
|
||||
class TestTruncatedAnthropicResponseNormalization:
|
||||
"""normalize_anthropic_response() gives us the shape _build_assistant_message expects."""
|
||||
|
||||
def test_text_only_truncation_produces_text_content_no_tool_calls(self):
|
||||
"""Pure-text Anthropic truncation → continuation path should fire."""
|
||||
from agent.anthropic_adapter import normalize_anthropic_response
|
||||
|
||||
response = _make_anthropic_response(
|
||||
[_make_anthropic_text_block("partial response that was cut off")]
|
||||
)
|
||||
msg, finish = normalize_anthropic_response(response)
|
||||
|
||||
# The continuation block checks these two attributes:
|
||||
# assistant_message.content → appended to truncated_response_prefix
|
||||
# assistant_message.tool_calls → guards the text-retry branch
|
||||
assert msg.content is not None
|
||||
assert "partial response" in msg.content
|
||||
assert not msg.tool_calls, (
|
||||
"Pure-text truncation must have no tool_calls so the text-continuation "
|
||||
"branch (not the tool-retry branch) fires"
|
||||
)
|
||||
assert finish == "length", "max_tokens stop_reason must map to OpenAI-style 'length'"
|
||||
|
||||
def test_truncated_tool_call_produces_tool_calls(self):
|
||||
"""Tool-use truncation → tool-call retry path should fire."""
|
||||
from agent.anthropic_adapter import normalize_anthropic_response
|
||||
|
||||
response = _make_anthropic_response(
|
||||
[
|
||||
_make_anthropic_text_block("thinking..."),
|
||||
_make_anthropic_tool_use_block(),
|
||||
]
|
||||
)
|
||||
msg, finish = normalize_anthropic_response(response)
|
||||
|
||||
assert bool(msg.tool_calls), (
|
||||
"Truncation mid-tool_use must expose tool_calls so the "
|
||||
"tool-call retry branch fires instead of text continuation"
|
||||
)
|
||||
assert finish == "length"
|
||||
|
||||
def test_empty_content_does_not_crash(self):
|
||||
"""Empty response.content — defensive: treat as a truncation with no text."""
|
||||
from agent.anthropic_adapter import normalize_anthropic_response
|
||||
|
||||
response = _make_anthropic_response([])
|
||||
msg, finish = normalize_anthropic_response(response)
|
||||
# Depending on the adapter, content may be "" or None — both are
|
||||
# acceptable; what matters is no exception.
|
||||
assert msg is not None
|
||||
assert not msg.tool_calls
|
||||
|
||||
|
||||
class TestContinuationLogicBranching:
|
||||
"""Symbolic check that the api_mode gate now includes anthropic_messages."""
|
||||
|
||||
@pytest.mark.parametrize("api_mode", ["chat_completions", "bedrock_converse", "anthropic_messages"])
|
||||
def test_all_three_api_modes_hit_continuation_branch(self, api_mode):
|
||||
# The guard in run_agent.py is:
|
||||
# if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"):
|
||||
assert api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages")
|
||||
|
||||
def test_codex_responses_still_excluded(self):
|
||||
# codex_responses has its own truncation path (not continuation-based)
|
||||
# and should NOT be routed through the shared block.
|
||||
assert "codex_responses" not in ("chat_completions", "bedrock_converse", "anthropic_messages")
|
||||
Loading…
Add table
Add a link
Reference in a new issue