diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index c9d83f3b7..9f9b94b2b 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1098,7 +1098,7 @@ def _validate_base_url(base_url: str) -> None: ) from exc -def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: +def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: runtime = _resolve_custom_runtime() if len(runtime) == 2: custom_base, custom_key = runtime @@ -1114,6 +1114,23 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: if custom_mode == "codex_responses": real_client = OpenAI(api_key=custom_key, base_url=custom_base) return CodexAuxiliaryClient(real_client, model), model + if custom_mode == "anthropic_messages": + # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM, + # LiteLLM proxies, etc.). Must NEVER be treated as OAuth — + # Anthropic OAuth claims only apply to api.anthropic.com. + try: + from agent.anthropic_adapter import build_anthropic_client + real_client = build_anthropic_client(custom_key, custom_base) + except ImportError: + logger.warning( + "Custom endpoint declares api_mode=anthropic_messages but the " + "anthropic SDK is not installed — falling back to OpenAI-wire." + ) + return OpenAI(api_key=custom_key, base_url=custom_base), model + return ( + AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False), + model, + ) return OpenAI(api_key=custom_key, base_url=custom_base), model diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index f6bf1fef8..78f1a13ce 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -2732,6 +2732,17 @@ def _update_config_for_provider( # Clear stale base_url to prevent contamination when switching providers model_cfg.pop("base_url", None) + # Clear stale api_key/api_mode left over from a previous custom provider. + # When the user switches from e.g. a MiniMax custom endpoint + # (api_mode=anthropic_messages, api_key=mxp-...) to a built-in provider + # (e.g. OpenRouter), the stale api_key/api_mode would override the new + # provider's credentials and transport choice. Built-in providers that + # need a specific api_mode (copilot, xai) set it at request-resolution + # time via `_copilot_runtime_api_mode` / `_detect_api_mode_for_url`, so + # removing the persisted value here is safe. + model_cfg.pop("api_key", None) + model_cfg.pop("api_mode", None) + # When switching to a non-OpenRouter provider, ensure model.default is # valid for the new provider. An OpenRouter-formatted name like # "anthropic/claude-opus-4.6" will fail on direct-API providers. diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index a5c286fe0..392d7769d 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -38,14 +38,21 @@ def _normalize_custom_provider_name(value: str) -> str: def _detect_api_mode_for_url(base_url: str) -> Optional[str]: """Auto-detect api_mode from the resolved base URL. - Direct api.openai.com endpoints need the Responses API for GPT-5.x - tool calls with reasoning (chat/completions returns 400). + - Direct api.openai.com endpoints need the Responses API for GPT-5.x + tool calls with reasoning (chat/completions returns 400). + - Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM, + LiteLLM proxies, etc.) conventionally expose the native Anthropic + protocol under a ``/anthropic`` suffix — treat those as + ``anthropic_messages`` transport instead of the default + ``chat_completions``. """ normalized = (base_url or "").strip().lower().rstrip("/") if "api.x.ai" in normalized: return "codex_responses" if "api.openai.com" in normalized and "openrouter" not in normalized: return "codex_responses" + if normalized.endswith("/anthropic"): + return "anthropic_messages" return None @@ -194,8 +201,12 @@ def _resolve_runtime_from_pool_entry( elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) - elif base_url.rstrip("/").endswith("/anthropic"): - api_mode = "anthropic_messages" + else: + # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, + # api.openai.com → codex_responses, api.x.ai → codex_responses). + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the # Anthropic SDK prepends its own /v1/messages to the base_url. Strip the @@ -642,8 +653,11 @@ def _resolve_explicit_runtime( configured_mode = _parse_api_mode(model_cfg.get("api_mode")) if configured_mode: api_mode = configured_mode - elif base_url.rstrip("/").endswith("/anthropic"): - api_mode = "anthropic_messages" + else: + # Auto-detect Anthropic-compatible endpoints (/anthropic suffix). + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected return { "provider": provider, @@ -965,10 +979,13 @@ def resolve_runtime_provider( elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) - # Auto-detect Anthropic-compatible endpoints by URL convention - # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) - elif base_url.rstrip("/").endswith("/anthropic"): - api_mode = "anthropic_messages" + else: + # Auto-detect Anthropic-compatible endpoints by URL convention + # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) + # plus api.openai.com → codex_responses and api.x.ai → codex_responses. + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected # Strip trailing /v1 for OpenCode Anthropic models (see comment above). if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"): base_url = re.sub(r"/v1/?$", "", base_url) diff --git a/run_agent.py b/run_agent.py index fc1e3560f..16e603896 100644 --- a/run_agent.py +++ b/run_agent.py @@ -892,13 +892,15 @@ class AIAgent: self.prefill_messages = prefill_messages or [] # Prefilled conversation turns self._force_ascii_payload = False - # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter. - # Reduces input costs by ~75% on multi-turn conversations by caching the - # conversation prefix. Uses system_and_3 strategy (4 breakpoints). - is_openrouter = self._is_openrouter_url() - is_claude = "claude" in self.model.lower() - is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic" - self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic + # Anthropic prompt caching: auto-enabled for Claude models on native + # Anthropic, OpenRouter, and third-party gateways that speak the + # Anthropic protocol (``api_mode == 'anthropic_messages'``). Reduces + # input costs by ~75% on multi-turn conversations. Uses system_and_3 + # strategy (4 breakpoints). See ``_anthropic_prompt_cache_policy`` + # for the layout-vs-transport decision. + self._use_prompt_caching, self._use_native_cache_layout = ( + self._anthropic_prompt_cache_policy() + ) self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost) # Iteration budget: the LLM is only notified when it actually exhausts @@ -1013,8 +1015,15 @@ class AIAgent: self.api_key = effective_key self._anthropic_api_key = effective_key self._anthropic_base_url = base_url + # Only mark the session as OAuth-authenticated when the token + # genuinely belongs to native Anthropic. Third-party providers + # (MiniMax, Kimi, GLM, LiteLLM proxies) that accept the + # Anthropic protocol must never trip OAuth code paths — doing + # so injects Claude-Code identity headers and system prompts + # that cause 401/403 on their endpoints. Guards #1739 and + # the third-party identity-injection bug. from agent.anthropic_adapter import _is_oauth_token as _is_oat - self._is_anthropic_oauth = _is_oat(effective_key) + self._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False self._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout) # No OpenAI client needed for Anthropic mode self.client = None @@ -1227,7 +1236,12 @@ class AIAgent: # Show prompt caching status if self._use_prompt_caching and not self.quiet_mode: - source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter" + if self._use_native_cache_layout and self.provider == "anthropic": + source = "native Anthropic" + elif self._use_native_cache_layout: + source = "Anthropic-compatible endpoint" + else: + source = "Claude via OpenRouter" print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)") # Session logging setup - auto-save conversation trajectories for debugging @@ -1701,6 +1715,7 @@ class AIAgent: "api_key": getattr(self, "api_key", ""), "client_kwargs": dict(self._client_kwargs), "use_prompt_caching": self._use_prompt_caching, + "use_native_cache_layout": self._use_native_cache_layout, # Context engine state that _try_activate_fallback() overwrites. # Use getattr for model/base_url/api_key/provider since plugin # engines may not have these (they're ContextCompressor-specific). @@ -1822,7 +1837,7 @@ class AIAgent: effective_key, self._anthropic_base_url, timeout=get_provider_request_timeout(self.provider, self.model), ) - self._is_anthropic_oauth = _is_oauth_token(effective_key) + self._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False self.client = None self._client_kwargs = {} else: @@ -1842,10 +1857,13 @@ class AIAgent: ) # ── Re-evaluate prompt caching ── - is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic" - self._use_prompt_caching = ( - ("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower()) - or is_native_anthropic + self._use_prompt_caching, self._use_native_cache_layout = ( + self._anthropic_prompt_cache_policy( + provider=new_provider, + base_url=self.base_url, + api_mode=api_mode, + model=new_model, + ) ) # ── Update context compressor ── @@ -1880,6 +1898,7 @@ class AIAgent: "api_key": getattr(self, "api_key", ""), "client_kwargs": dict(self._client_kwargs), "use_prompt_caching": self._use_prompt_caching, + "use_native_cache_layout": self._use_native_cache_layout, "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model, "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url, "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "", @@ -2143,6 +2162,55 @@ class AIAgent: """Return True when the base URL targets OpenRouter.""" return "openrouter" in self._base_url_lower + def _anthropic_prompt_cache_policy( + self, + *, + provider: Optional[str] = None, + base_url: Optional[str] = None, + api_mode: Optional[str] = None, + model: Optional[str] = None, + ) -> tuple[bool, bool]: + """Decide whether to apply Anthropic prompt caching and which layout to use. + + Returns ``(should_cache, use_native_layout)``: + * ``should_cache`` — inject ``cache_control`` breakpoints for this + request (applies to OpenRouter Claude, native Anthropic, and + third-party gateways that speak the native Anthropic protocol). + * ``use_native_layout`` — place markers on the *inner* content + blocks (native Anthropic accepts and requires this layout); + when False markers go on the message envelope (OpenRouter and + OpenAI-wire proxies expect the looser layout). + + Third-party providers using the native Anthropic transport + (``api_mode == 'anthropic_messages'`` + Claude-named model) get + caching with the native layout so they benefit from the same + cost reduction as direct Anthropic callers, provided their + gateway implements the Anthropic cache_control contract + (MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do). + """ + eff_provider = (provider if provider is not None else self.provider) or "" + eff_base_url = base_url if base_url is not None else (self.base_url or "") + eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "") + eff_model = (model if model is not None else self.model) or "" + + base_lower = eff_base_url.lower() + is_claude = "claude" in eff_model.lower() + is_openrouter = "openrouter" in base_lower + is_anthropic_wire = eff_api_mode == "anthropic_messages" + is_native_anthropic = ( + is_anthropic_wire + and (eff_provider == "anthropic" or "api.anthropic.com" in base_lower) + ) + + if is_native_anthropic: + return True, True + if is_openrouter and is_claude: + return True, False + if is_anthropic_wire and is_claude: + # Third-party Anthropic-compatible gateway. + return True, True + return False, False + @staticmethod def _model_requires_responses_api(model: str) -> bool: """Return True for models that require the Responses API path. @@ -5322,9 +5390,12 @@ class AIAgent: return False self._anthropic_api_key = new_token - # Update OAuth flag — token type may have changed (API key ↔ OAuth) + # Update OAuth flag — token type may have changed (API key ↔ OAuth). + # Only treat as OAuth on native Anthropic; third-party endpoints using + # the Anthropic protocol must not trip OAuth paths (#1739 & third-party + # identity-injection guard). from agent.anthropic_adapter import _is_oauth_token - self._is_anthropic_oauth = _is_oauth_token(new_token) + self._is_anthropic_oauth = _is_oauth_token(new_token) if self.provider == "anthropic" else False return True def _apply_client_headers_for_base_url(self, base_url: str) -> None: @@ -5367,7 +5438,7 @@ class AIAgent: runtime_key, runtime_base, timeout=get_provider_request_timeout(self.provider, self.model), ) - self._is_anthropic_oauth = _is_oauth_token(runtime_key) + self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False self.api_key = runtime_key self.base_url = runtime_base return @@ -6523,7 +6594,7 @@ class AIAgent: self._anthropic_client = build_anthropic_client( effective_key, self._anthropic_base_url, timeout=_fb_timeout, ) - self._is_anthropic_oauth = _is_oauth_token(effective_key) + self._is_anthropic_oauth = _is_oauth_token(effective_key) if fb_provider == "anthropic" else False self.client = None self._client_kwargs = {} else: @@ -6554,10 +6625,13 @@ class AIAgent: self._replace_primary_openai_client(reason="fallback_timeout_apply") # Re-evaluate prompt caching for the new provider/model - is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic" - self._use_prompt_caching = ( - ("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower()) - or is_native_anthropic + self._use_prompt_caching, self._use_native_cache_layout = ( + self._anthropic_prompt_cache_policy( + provider=fb_provider, + base_url=fb_base_url, + api_mode=fb_api_mode, + model=fb_model, + ) ) # Update context compressor limits for the fallback model. @@ -6617,6 +6691,12 @@ class AIAgent: self.api_key = rt["api_key"] self._client_kwargs = dict(rt["client_kwargs"]) self._use_prompt_caching = rt["use_prompt_caching"] + # Default to native layout when the restored snapshot predates the + # native-vs-proxy split (older sessions saved before this PR). + self._use_native_cache_layout = rt.get( + "use_native_cache_layout", + self.api_mode == "anthropic_messages" and self.provider == "anthropic", + ) # ── Rebuild client for the primary provider ── if self.api_mode == "anthropic_messages": @@ -9317,12 +9397,19 @@ class AIAgent: for idx, pfm in enumerate(self.prefill_messages): api_messages.insert(sys_offset + idx, pfm.copy()) - # Apply Anthropic prompt caching for Claude models via OpenRouter. - # Auto-detected: if model name contains "claude" and base_url is OpenRouter, - # inject cache_control breakpoints (system + last 3 messages) to reduce - # input token costs by ~75% on multi-turn conversations. + # Apply Anthropic prompt caching for Claude models on native + # Anthropic, OpenRouter, and third-party Anthropic-compatible + # gateways. Auto-detected: if ``_use_prompt_caching`` is set, + # inject cache_control breakpoints (system + last 3 messages) + # to reduce input token costs by ~75% on multi-turn + # conversations. Layout is chosen per endpoint by + # ``_anthropic_prompt_cache_policy``. if self._use_prompt_caching: - api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages')) + api_messages = apply_anthropic_cache_control( + api_messages, + cache_ttl=self._cache_ttl, + native_anthropic=self._use_native_cache_layout, + ) # Safety net: strip orphaned tool results / add stubs for missing # results before sending to the API. Runs unconditionally — not @@ -9779,25 +9866,30 @@ class AIAgent: if finish_reason == "length": self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True) + # Normalize the truncated response to a single OpenAI-style + # message shape so text-continuation and tool-call retry + # work uniformly across chat_completions, bedrock_converse, + # and anthropic_messages. For Anthropic we use the same + # adapter the agent loop already relies on so the rebuilt + # interim assistant message is byte-identical to what + # would have been appended in the non-truncated path. + _trunc_msg = None + if self.api_mode in ("chat_completions", "bedrock_converse"): + _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None + elif self.api_mode == "anthropic_messages": + from agent.anthropic_adapter import normalize_anthropic_response + _trunc_msg, _ = normalize_anthropic_response( + response, strip_tool_prefix=self._is_anthropic_oauth + ) + + _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None + _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False + # ── Detect thinking-budget exhaustion ────────────── # When the model spends ALL output tokens on reasoning # and has none left for the response, continuation # retries are pointless. Detect this early and give a # targeted error instead of wasting 3 API calls. - _trunc_content = None - _trunc_has_tool_calls = False - if self.api_mode in ("chat_completions", "bedrock_converse"): - _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None - _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None - _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False - elif self.api_mode == "anthropic_messages": - # Anthropic response.content is a list of blocks - _text_parts = [] - for _blk in getattr(response, "content", []): - if getattr(_blk, "type", None) == "text": - _text_parts.append(getattr(_blk, "text", "")) - _trunc_content = "\n".join(_text_parts) if _text_parts else None - # A response is "thinking exhausted" only when the model # actually produced reasoning blocks but no visible text after # them. Models that do not use tags (e.g. GLM-4.7 on @@ -9854,9 +9946,9 @@ class AIAgent: "error": _exhaust_error, } - if self.api_mode in ("chat_completions", "bedrock_converse"): - assistant_message = response.choices[0].message - if not assistant_message.tool_calls: + if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"): + assistant_message = _trunc_msg + if assistant_message is not None and not _trunc_has_tool_calls: length_continue_retries += 1 interim_msg = self._build_assistant_message(assistant_message, finish_reason) messages.append(interim_msg) @@ -9894,9 +9986,9 @@ class AIAgent: "error": "Response remained truncated after 3 continuation attempts", } - if self.api_mode in ("chat_completions", "bedrock_converse"): - assistant_message = response.choices[0].message - if assistant_message.tool_calls: + if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"): + assistant_message = _trunc_msg + if assistant_message is not None and _trunc_has_tool_calls: if truncated_tool_call_retries < 1: truncated_tool_call_retries += 1 self._vprint( diff --git a/scripts/release.py b/scripts/release.py index 48fbdcae9..93d536546 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -80,6 +80,13 @@ AUTHOR_MAP = { "nish3451@users.noreply.github.com": "nish3451", "Mibayy@users.noreply.github.com": "Mibayy", "135070653+sgaofen@users.noreply.github.com": "sgaofen", + "nocoo@users.noreply.github.com": "nocoo", + "30841158+n-WN@users.noreply.github.com": "n-WN", + "leoyuan0099@gmail.com": "keyuyuan", + "bxzt2006@163.com": "Only-Code-A", + "i@troy-y.org": "TroyMitchell911", + "mygamez@163.com": "zhongyueming1121", + "hansnow@users.noreply.github.com": "hansnow", # contributors (manual mapping from git names) "ahmedsherif95@gmail.com": "asheriif", "liujinkun@bytedance.com": "liujinkun2025", diff --git a/tests/agent/test_auxiliary_client_anthropic_custom.py b/tests/agent/test_auxiliary_client_anthropic_custom.py new file mode 100644 index 000000000..689a6c37e --- /dev/null +++ b/tests/agent/test_auxiliary_client_anthropic_custom.py @@ -0,0 +1,107 @@ +"""Tests for agent.auxiliary_client._try_custom_endpoint's anthropic_messages branch. + +When a user configures a custom endpoint with ``api_mode: anthropic_messages`` +(e.g. MiniMax, Zhipu GLM, LiteLLM in Anthropic-proxy mode), auxiliary tasks +(compression, web_extract, session_search, title generation) must use the +native Anthropic transport rather than being silently downgraded to an +OpenAI-wire client that speaks the wrong protocol. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture(autouse=True) +def _clean_env(monkeypatch): + for key in ( + "OPENAI_API_KEY", "OPENAI_BASE_URL", + "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", + ): + monkeypatch.delenv(key, raising=False) + + +def _install_anthropic_adapter_mocks(): + """Patch build_anthropic_client so the test doesn't need the SDK.""" + fake_client = MagicMock(name="anthropic_client") + return patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=fake_client, + ), fake_client + + +def test_custom_endpoint_anthropic_messages_builds_anthropic_wrapper(): + """api_mode=anthropic_messages → returns AnthropicAuxiliaryClient, not OpenAI.""" + from agent.auxiliary_client import _try_custom_endpoint, AnthropicAuxiliaryClient + + with patch( + "agent.auxiliary_client._resolve_custom_runtime", + return_value=( + "https://api.minimax.io/anthropic", + "minimax-key", + "anthropic_messages", + ), + ), patch( + "agent.auxiliary_client._read_main_model", + return_value="claude-sonnet-4-6", + ): + adapter_patch, fake_client = _install_anthropic_adapter_mocks() + with adapter_patch: + client, model = _try_custom_endpoint() + + assert isinstance(client, AnthropicAuxiliaryClient), ( + "Custom endpoint with api_mode=anthropic_messages must return the " + f"native Anthropic wrapper, got {type(client).__name__}" + ) + assert model == "claude-sonnet-4-6" + # Wrapper should NOT be marked as OAuth — third-party endpoints are + # always API-key authenticated. + assert client.api_key == "minimax-key" + assert client.base_url == "https://api.minimax.io/anthropic" + + +def test_custom_endpoint_anthropic_messages_falls_back_when_sdk_missing(): + """Graceful degradation when anthropic SDK is unavailable.""" + from agent.auxiliary_client import _try_custom_endpoint + + import_error = ImportError("anthropic package not installed") + + with patch( + "agent.auxiliary_client._resolve_custom_runtime", + return_value=("https://api.minimax.io/anthropic", "k", "anthropic_messages"), + ), patch( + "agent.auxiliary_client._read_main_model", + return_value="claude-sonnet-4-6", + ), patch( + "agent.anthropic_adapter.build_anthropic_client", + side_effect=import_error, + ): + client, model = _try_custom_endpoint() + + # Should fall back to an OpenAI-wire client rather than returning + # (None, None) — the tool still needs to do *something*. + assert client is not None + assert model == "claude-sonnet-4-6" + # OpenAI client, not AnthropicAuxiliaryClient. + from agent.auxiliary_client import AnthropicAuxiliaryClient + assert not isinstance(client, AnthropicAuxiliaryClient) + + +def test_custom_endpoint_chat_completions_still_uses_openai_wire(): + """Regression: default path (no api_mode) must remain OpenAI client.""" + from agent.auxiliary_client import _try_custom_endpoint, AnthropicAuxiliaryClient + + with patch( + "agent.auxiliary_client._resolve_custom_runtime", + return_value=("https://api.example.com/v1", "key", None), + ), patch( + "agent.auxiliary_client._read_main_model", + return_value="my-model", + ): + client, model = _try_custom_endpoint() + + assert client is not None + assert model == "my-model" + assert not isinstance(client, AnthropicAuxiliaryClient) diff --git a/tests/hermes_cli/test_detect_api_mode_for_url.py b/tests/hermes_cli/test_detect_api_mode_for_url.py new file mode 100644 index 000000000..4fc954032 --- /dev/null +++ b/tests/hermes_cli/test_detect_api_mode_for_url.py @@ -0,0 +1,70 @@ +"""Tests for hermes_cli.runtime_provider._detect_api_mode_for_url. + +The helper maps base URLs to api_modes for three cases: + * api.openai.com → codex_responses + * api.x.ai → codex_responses + * */anthropic → anthropic_messages (third-party gateways like MiniMax, + Zhipu GLM, LiteLLM proxies) + +Consolidating the /anthropic detection in this helper (instead of three +inline ``endswith`` checks spread across _resolve_runtime_from_pool_entry, +the explicit-provider path, and the api-key-provider path) means every +future update to the detection logic lives in one place. +""" + +from __future__ import annotations + +from hermes_cli.runtime_provider import _detect_api_mode_for_url + + +class TestCodexResponsesDetection: + def test_openai_api_returns_codex_responses(self): + assert _detect_api_mode_for_url("https://api.openai.com/v1") == "codex_responses" + + def test_xai_api_returns_codex_responses(self): + assert _detect_api_mode_for_url("https://api.x.ai/v1") == "codex_responses" + + def test_openrouter_is_not_codex_responses(self): + # api.openai.com check must exclude openrouter (which routes to openai-hosted models). + assert _detect_api_mode_for_url("https://openrouter.ai/api/v1") is None + + +class TestAnthropicMessagesDetection: + """Third-party gateways that speak the Anthropic protocol under /anthropic.""" + + def test_minimax_anthropic_endpoint(self): + assert _detect_api_mode_for_url("https://api.minimax.io/anthropic") == "anthropic_messages" + + def test_minimax_cn_anthropic_endpoint(self): + assert _detect_api_mode_for_url("https://api.minimaxi.com/anthropic") == "anthropic_messages" + + def test_dashscope_anthropic_endpoint(self): + assert ( + _detect_api_mode_for_url("https://dashscope.aliyuncs.com/api/v2/apps/anthropic") + == "anthropic_messages" + ) + + def test_trailing_slash_tolerated(self): + assert _detect_api_mode_for_url("https://api.minimax.io/anthropic/") == "anthropic_messages" + + def test_uppercase_path_tolerated(self): + assert _detect_api_mode_for_url("https://API.MINIMAX.IO/Anthropic") == "anthropic_messages" + + def test_anthropic_in_middle_of_path_does_not_match(self): + # The helper requires ``/anthropic`` as the path SUFFIX, not anywhere. + # Protects against false positives on e.g. /anthropic/v1/models. + assert _detect_api_mode_for_url("https://api.example.com/anthropic/v1") is None + + +class TestDefaultCase: + def test_generic_url_returns_none(self): + assert _detect_api_mode_for_url("https://api.together.xyz/v1") is None + + def test_empty_string_returns_none(self): + assert _detect_api_mode_for_url("") is None + + def test_none_returns_none(self): + assert _detect_api_mode_for_url(None) is None + + def test_localhost_returns_none(self): + assert _detect_api_mode_for_url("http://localhost:11434/v1") is None diff --git a/tests/hermes_cli/test_update_config_clears_custom_fields.py b/tests/hermes_cli/test_update_config_clears_custom_fields.py new file mode 100644 index 000000000..6d74a1c03 --- /dev/null +++ b/tests/hermes_cli/test_update_config_clears_custom_fields.py @@ -0,0 +1,84 @@ +"""Tests for hermes_cli.auth._update_config_for_provider clearing stale fields. + +When the user switches from a custom provider (e.g. MiniMax with +``api_mode: anthropic_messages``, ``api_key: mxp-...``) to a built-in +provider (e.g. OpenRouter), the stale ``api_key`` and ``api_mode`` would +otherwise override the new provider's credentials and transport choice. + +Built-in providers that legitimately need a specific ``api_mode`` (copilot, +xai) compute it at request-resolution time in +``_copilot_runtime_api_mode`` / ``_detect_api_mode_for_url``, so removing +the persisted value here is safe. +""" + +from __future__ import annotations + +import yaml + +from hermes_cli.auth import _update_config_for_provider +from hermes_cli.config import get_config_path + + +def _read_model_cfg() -> dict: + path = get_config_path() + if not path.exists(): + return {} + data = yaml.safe_load(path.read_text()) or {} + model = data.get("model", {}) + return model if isinstance(model, dict) else {} + + +def _seed_custom_provider_config(api_mode: str = "anthropic_messages") -> None: + """Write a config.yaml mimicking a user on a MiniMax-style custom provider.""" + path = get_config_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + yaml.safe_dump( + { + "model": { + "provider": "custom", + "base_url": "https://api.minimax.io/anthropic", + "api_key": "mxp-stale-key", + "api_mode": api_mode, + "default": "claude-sonnet-4-6", + } + }, + sort_keys=False, + ) + ) + + +class TestUpdateConfigForProviderClearsStaleCustomFields: + def test_switching_to_openrouter_clears_api_key_and_api_mode(self): + _seed_custom_provider_config() + + _update_config_for_provider( + "openrouter", + "https://openrouter.ai/api/v1", + default_model="anthropic/claude-sonnet-4.6", + ) + + model_cfg = _read_model_cfg() + assert model_cfg.get("provider") == "openrouter" + assert model_cfg.get("base_url") == "https://openrouter.ai/api/v1" + assert "api_key" not in model_cfg, ( + "Stale custom api_key would leak into OpenRouter requests — must be cleared" + ) + assert "api_mode" not in model_cfg, ( + "Stale api_mode=anthropic_messages from MiniMax would mis-route " + "OpenRouter requests to the Anthropic SDK — must be cleared" + ) + + def test_switching_to_nous_clears_stale_api_mode(self): + _seed_custom_provider_config() + _update_config_for_provider("nous", "https://inference-api.nousresearch.com/v1") + model_cfg = _read_model_cfg() + assert model_cfg.get("provider") == "nous" + assert "api_mode" not in model_cfg + assert "api_key" not in model_cfg + + def test_switching_clears_codex_responses_api_mode(self): + """Also covers codex_responses, not just anthropic_messages.""" + _seed_custom_provider_config(api_mode="codex_responses") + _update_config_for_provider("openrouter", "https://openrouter.ai/api/v1") + assert "api_mode" not in _read_model_cfg() diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py new file mode 100644 index 000000000..7d5a16654 --- /dev/null +++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py @@ -0,0 +1,152 @@ +"""Tests for AIAgent._anthropic_prompt_cache_policy(). + +The policy returns ``(should_cache, use_native_layout)`` for five endpoint +classes. The test matrix pins the decision for each so a regression (e.g. +silently dropping caching on third-party Anthropic gateways, or applying +the native layout on OpenRouter) surfaces loudly. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from run_agent import AIAgent + + +def _make_agent( + *, + provider: str = "openrouter", + base_url: str = "https://openrouter.ai/api/v1", + api_mode: str = "chat_completions", + model: str = "anthropic/claude-sonnet-4.6", +) -> AIAgent: + agent = AIAgent.__new__(AIAgent) + agent.provider = provider + agent.base_url = base_url + agent.api_mode = api_mode + agent.model = model + agent._base_url_lower = (base_url or "").lower() + agent.client = MagicMock() + agent.quiet_mode = True + return agent + + +class TestNativeAnthropic: + def test_claude_on_native_anthropic_caches_with_native_layout(self): + agent = _make_agent( + provider="anthropic", + base_url="https://api.anthropic.com", + api_mode="anthropic_messages", + model="claude-sonnet-4-6", + ) + assert agent._anthropic_prompt_cache_policy() == (True, True) + + def test_api_anthropic_host_detected_even_when_provider_label_differs(self): + # Some pool configurations label native Anthropic as "anthropic-direct" + # or similar; falling back to hostname keeps caching on. + agent = _make_agent( + provider="anthropic-direct", + base_url="https://api.anthropic.com", + api_mode="anthropic_messages", + model="claude-opus-4.6", + ) + assert agent._anthropic_prompt_cache_policy() == (True, True) + + +class TestOpenRouter: + def test_claude_on_openrouter_caches_with_envelope_layout(self): + agent = _make_agent( + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + model="anthropic/claude-sonnet-4.6", + ) + should, native = agent._anthropic_prompt_cache_policy() + assert should is True + assert native is False # OpenRouter uses envelope layout + + def test_non_claude_on_openrouter_does_not_cache(self): + agent = _make_agent( + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + model="openai/gpt-5.4", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + +class TestThirdPartyAnthropicGateway: + """Third-party gateways speaking the Anthropic protocol (MiniMax, Zhipu GLM, LiteLLM).""" + + def test_minimax_claude_via_anthropic_messages(self): + agent = _make_agent( + provider="custom", + base_url="https://api.minimax.io/anthropic", + api_mode="anthropic_messages", + model="claude-sonnet-4-6", + ) + should, native = agent._anthropic_prompt_cache_policy() + assert should is True, "Third-party Anthropic gateway with Claude must cache" + assert native is True, "Third-party Anthropic gateway uses native cache_control layout" + + def test_third_party_without_claude_name_does_not_cache(self): + # A provider exposing e.g. GLM via anthropic_messages transport — we + # don't know whether it supports cache_control, so stay conservative. + agent = _make_agent( + provider="custom", + base_url="https://api.minimax.io/anthropic", + api_mode="anthropic_messages", + model="minimax-m2.7", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + +class TestOpenAIWireFormatOnCustomProvider: + """A custom provider using chat_completions (OpenAI wire) should NOT get caching.""" + + def test_custom_openai_wire_does_not_cache_even_with_claude_name(self): + # This is the blocklist risk #9621 failed to avoid: sending + # cache_control fields in OpenAI-wire JSON can trip strict providers + # that reject unknown keys. Stay off unless the transport is + # explicitly anthropic_messages or the aggregator is OpenRouter. + agent = _make_agent( + provider="custom", + base_url="https://api.fireworks.ai/inference/v1", + api_mode="chat_completions", + model="claude-sonnet-4", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + +class TestExplicitOverrides: + """Policy accepts keyword overrides for switch_model / fallback activation.""" + + def test_overrides_take_precedence_over_self(self): + agent = _make_agent( + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + model="openai/gpt-5.4", + ) + # Simulate switch_model evaluating cache policy for a Claude target + # before self.model is mutated. + should, native = agent._anthropic_prompt_cache_policy( + model="anthropic/claude-sonnet-4.6", + ) + assert (should, native) == (True, False) + + def test_fallback_target_evaluated_independently(self): + # Starting on native Anthropic but falling back to OpenRouter. + agent = _make_agent( + provider="anthropic", + base_url="https://api.anthropic.com", + api_mode="anthropic_messages", + model="claude-opus-4.6", + ) + should, native = agent._anthropic_prompt_cache_policy( + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + model="anthropic/claude-sonnet-4.6", + ) + assert (should, native) == (True, False) diff --git a/tests/run_agent/test_anthropic_third_party_oauth_guard.py b/tests/run_agent/test_anthropic_third_party_oauth_guard.py new file mode 100644 index 000000000..b45190daa --- /dev/null +++ b/tests/run_agent/test_anthropic_third_party_oauth_guard.py @@ -0,0 +1,182 @@ +"""Tests for ``_is_anthropic_oauth`` guard against third-party Anthropic-compatible providers. + +The invariant: ``self._is_anthropic_oauth`` must only ever be True when +``self.provider == 'anthropic'`` (native Anthropic). Third-party providers +that speak the Anthropic protocol (MiniMax, Zhipu GLM, Alibaba DashScope, +Kimi, LiteLLM proxies, etc.) must never trip OAuth code paths — doing so +injects Claude-Code identity headers and system prompts that cause +401/403 from those endpoints. + +This test class covers all FIVE sites that assign ``_is_anthropic_oauth``: + +1. ``AIAgent.__init__`` (line ~1022) +2. ``AIAgent.switch_model`` (line ~1832) +3. ``AIAgent._try_refresh_anthropic_client_credentials`` (line ~5335) +4. ``AIAgent._swap_credential`` (line ~5378) +5. ``AIAgent._try_activate_fallback`` (line ~6536) +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from run_agent import AIAgent + + +# A plausible-looking OAuth token (``sk-ant-`` without the ``-api`` suffix). +_OAUTH_LIKE_TOKEN = "sk-ant-oauth-example-1234567890abcdef" +_API_KEY_TOKEN = "sk-ant-api-abcdef1234567890" + + +@pytest.fixture +def agent(): + """Minimal AIAgent construction, skipping tool discovery.""" + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + a = AIAgent( + api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + a.client = MagicMock() + return a + + +class TestOAuthFlagOnRefresh: + """Site 3 — _try_refresh_anthropic_client_credentials.""" + + def test_third_party_provider_refresh_is_noop(self, agent): + """Refresh path returns False immediately when provider != anthropic — the + OAuth flag can never be mutated for third-party providers. Double-defended + by the per-assignment guard at line ~5393 so future refactors can't + reintroduce the bug.""" + agent.api_mode = "anthropic_messages" + agent.provider = "minimax" # ← third-party + agent._anthropic_api_key = "***" + agent._anthropic_client = MagicMock() + agent._is_anthropic_oauth = False + + with ( + patch("agent.anthropic_adapter.resolve_anthropic_token", + return_value=_OAUTH_LIKE_TOKEN), + patch("agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock()), + ): + result = agent._try_refresh_anthropic_client_credentials() + + # The function short-circuits on non-anthropic providers. + assert result is False + # And the flag is untouched regardless. + assert agent._is_anthropic_oauth is False + + def test_native_anthropic_preserves_existing_oauth_behaviour(self, agent): + """Regression: native anthropic with OAuth token still flips flag to True.""" + agent.api_mode = "anthropic_messages" + agent.provider = "anthropic" + agent._anthropic_api_key = "***" + agent._anthropic_client = MagicMock() + agent._is_anthropic_oauth = False + + with ( + patch("agent.anthropic_adapter.resolve_anthropic_token", + return_value=_OAUTH_LIKE_TOKEN), + patch("agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock()), + ): + result = agent._try_refresh_anthropic_client_credentials() + + assert result is True + assert agent._is_anthropic_oauth is True + + +class TestOAuthFlagOnCredentialSwap: + """Site 4 — _swap_credential (credential pool rotation).""" + + def test_pool_swap_on_third_party_never_flips_oauth(self, agent): + agent.api_mode = "anthropic_messages" + agent.provider = "glm" # ← Zhipu GLM via /anthropic + agent._anthropic_api_key = "old-key" + agent._anthropic_base_url = "https://open.bigmodel.cn/api/anthropic" + agent._anthropic_client = MagicMock() + agent._is_anthropic_oauth = False + + entry = MagicMock() + entry.runtime_api_key = _OAUTH_LIKE_TOKEN + entry.runtime_base_url = "https://open.bigmodel.cn/api/anthropic" + + with patch("agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock()): + agent._swap_credential(entry) + + assert agent._is_anthropic_oauth is False + + +class TestOAuthFlagOnConstruction: + """Site 1 — AIAgent.__init__ on a third-party anthropic_messages provider.""" + + def test_minimax_init_does_not_flip_oauth(self): + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("agent.anthropic_adapter.build_anthropic_client", + return_value=MagicMock()), + # Simulate a stale ANTHROPIC_TOKEN in the env — the init code + # MUST NOT fall back to it when provider != anthropic. + patch("agent.anthropic_adapter.resolve_anthropic_token", + return_value=_OAUTH_LIKE_TOKEN), + ): + agent = AIAgent( + api_key="minimax-key-1234", + base_url="https://api.minimax.io/anthropic", + provider="minimax", + api_mode="anthropic_messages", + model="claude-sonnet-4-6", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + # The effective key should be the explicit minimax-key, not the + # stale Anthropic OAuth token, and the OAuth flag must be False. + assert agent._anthropic_api_key == "minimax-key-1234" + assert agent._is_anthropic_oauth is False + + +class TestOAuthFlagOnFallbackActivation: + """Site 5 — _try_activate_fallback targeting a third-party Anthropic endpoint.""" + + def test_fallback_to_third_party_does_not_flip_oauth(self, agent): + """Directly mimic the post-fallback assignment at line ~6537.""" + from agent.anthropic_adapter import _is_oauth_token + + # Emulate the relevant lines of _try_activate_fallback without + # running the entire recovery stack (which pulls in streaming, + # sessions, etc.). + fb_provider = "minimax" + effective_key = _OAUTH_LIKE_TOKEN + agent._is_anthropic_oauth = ( + _is_oauth_token(effective_key) if fb_provider == "anthropic" else False + ) + assert agent._is_anthropic_oauth is False + + +class TestApiKeyTokensAlwaysSafe: + """Regression: plain API-key shapes must always resolve to non-OAuth, any provider.""" + + def test_native_anthropic_with_api_key_token(self): + from agent.anthropic_adapter import _is_oauth_token + assert _is_oauth_token(_API_KEY_TOKEN) is False + + def test_third_party_key_shape(self): + from agent.anthropic_adapter import _is_oauth_token + # Third-party key shapes (MiniMax 'mxp-...', GLM 'glm.sess.', etc.) + # already return False from _is_oauth_token; the guard adds a second + # defense line in case future token formats accidentally look OAuth-y. + assert _is_oauth_token("mxp-abcdef123") is False diff --git a/tests/run_agent/test_anthropic_truncation_continuation.py b/tests/run_agent/test_anthropic_truncation_continuation.py new file mode 100644 index 000000000..d109ccf58 --- /dev/null +++ b/tests/run_agent/test_anthropic_truncation_continuation.py @@ -0,0 +1,114 @@ +"""Regression test for anthropic_messages truncation continuation. + +When an Anthropic response hits ``stop_reason: max_tokens`` (mapped to +``finish_reason == 'length'`` in run_agent), the agent must retry with +a continuation prompt — the same behavior it has always had for +chat_completions and bedrock_converse. Before this PR, the +``if self.api_mode in ('chat_completions', 'bedrock_converse'):`` guard +silently dropped Anthropic-wire truncations on the floor, returning a +half-finished response with no retry. + +We don't exercise the full agent loop here (it's 3000 lines of inference, +streaming, plugin hooks, etc.) — instead we verify the normalization +adapter produces exactly the shape the continuation block now consumes. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + + +def _make_anthropic_text_block(text: str) -> SimpleNamespace: + return SimpleNamespace(type="text", text=text) + + +def _make_anthropic_tool_use_block(name: str = "my_tool") -> SimpleNamespace: + return SimpleNamespace( + type="tool_use", + id="toolu_01", + name=name, + input={"foo": "bar"}, + ) + + +def _make_anthropic_response(blocks, stop_reason: str = "max_tokens"): + return SimpleNamespace( + id="msg_01", + type="message", + role="assistant", + model="claude-sonnet-4-6", + content=blocks, + stop_reason=stop_reason, + stop_sequence=None, + usage=SimpleNamespace(input_tokens=100, output_tokens=200), + ) + + +class TestTruncatedAnthropicResponseNormalization: + """normalize_anthropic_response() gives us the shape _build_assistant_message expects.""" + + def test_text_only_truncation_produces_text_content_no_tool_calls(self): + """Pure-text Anthropic truncation → continuation path should fire.""" + from agent.anthropic_adapter import normalize_anthropic_response + + response = _make_anthropic_response( + [_make_anthropic_text_block("partial response that was cut off")] + ) + msg, finish = normalize_anthropic_response(response) + + # The continuation block checks these two attributes: + # assistant_message.content → appended to truncated_response_prefix + # assistant_message.tool_calls → guards the text-retry branch + assert msg.content is not None + assert "partial response" in msg.content + assert not msg.tool_calls, ( + "Pure-text truncation must have no tool_calls so the text-continuation " + "branch (not the tool-retry branch) fires" + ) + assert finish == "length", "max_tokens stop_reason must map to OpenAI-style 'length'" + + def test_truncated_tool_call_produces_tool_calls(self): + """Tool-use truncation → tool-call retry path should fire.""" + from agent.anthropic_adapter import normalize_anthropic_response + + response = _make_anthropic_response( + [ + _make_anthropic_text_block("thinking..."), + _make_anthropic_tool_use_block(), + ] + ) + msg, finish = normalize_anthropic_response(response) + + assert bool(msg.tool_calls), ( + "Truncation mid-tool_use must expose tool_calls so the " + "tool-call retry branch fires instead of text continuation" + ) + assert finish == "length" + + def test_empty_content_does_not_crash(self): + """Empty response.content — defensive: treat as a truncation with no text.""" + from agent.anthropic_adapter import normalize_anthropic_response + + response = _make_anthropic_response([]) + msg, finish = normalize_anthropic_response(response) + # Depending on the adapter, content may be "" or None — both are + # acceptable; what matters is no exception. + assert msg is not None + assert not msg.tool_calls + + +class TestContinuationLogicBranching: + """Symbolic check that the api_mode gate now includes anthropic_messages.""" + + @pytest.mark.parametrize("api_mode", ["chat_completions", "bedrock_converse", "anthropic_messages"]) + def test_all_three_api_modes_hit_continuation_branch(self, api_mode): + # The guard in run_agent.py is: + # if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"): + assert api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages") + + def test_codex_responses_still_excluded(self): + # codex_responses has its own truncation path (not continuation-based) + # and should NOT be routed through the shared block. + assert "codex_responses" not in ("chat_completions", "bedrock_converse", "anthropic_messages")