diff --git a/run_agent.py b/run_agent.py index ba8a2bf4e..fadf28b31 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2355,6 +2355,13 @@ class AIAgent: cost reduction as direct Anthropic callers, provided their gateway implements the Anthropic cache_control contract (MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do). + + Qwen / Alibaba-family models on OpenCode, OpenCode Go, and direct + Alibaba (DashScope) also honour Anthropic-style ``cache_control`` + markers on OpenAI-wire chat completions. Upstream pi-mono #3392 / + pi #3393 documented this for opencode-go Qwen. Without markers + these providers serve zero cache hits, re-billing the full prompt + on every turn. """ eff_provider = (provider if provider is not None else self.provider) or "" eff_base_url = base_url if base_url is not None else (self.base_url or "") @@ -2362,7 +2369,9 @@ class AIAgent: eff_model = (model if model is not None else self.model) or "" base_lower = eff_base_url.lower() - is_claude = "claude" in eff_model.lower() + model_lower = eff_model.lower() + provider_lower = eff_provider.lower() + is_claude = "claude" in model_lower is_openrouter = base_url_host_matches(eff_base_url, "openrouter.ai") is_anthropic_wire = eff_api_mode == "anthropic_messages" is_native_anthropic = ( @@ -2377,6 +2386,22 @@ class AIAgent: if is_anthropic_wire and is_claude: # Third-party Anthropic-compatible gateway. return True, True + + # Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire + # transport that accepts Anthropic-style cache_control markers and + # rewards them with real cache hits. Without this branch + # qwen3.6-plus on opencode-go reports 0% cached tokens and burns + # through the subscription on every turn. + model_is_qwen = "qwen" in model_lower + provider_is_alibaba_family = provider_lower in { + "opencode", "opencode-zen", "opencode-go", "alibaba", + } + if provider_is_alibaba_family and model_is_qwen: + # Envelope layout (native_anthropic=False): markers on inner + # content parts, not top-level tool messages. Matches + # pi-mono's "alibaba" cacheControlFormat. + return True, False + return False, False @staticmethod diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py index 7d5a16654..7a85022a5 100644 --- a/tests/run_agent/test_anthropic_prompt_cache_policy.py +++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py @@ -118,6 +118,86 @@ class TestOpenAIWireFormatOnCustomProvider: assert agent._anthropic_prompt_cache_policy() == (False, False) +class TestQwenAlibabaFamily: + """Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire. + + Upstream pi-mono #3392 / #3393 documented that these providers serve + zero cache hits without Anthropic-style markers. Regression reported + by community user (Qwen3.6 on opencode-go burning through + subscription with no cache). Envelope layout, not native, because the + wire format is OpenAI chat.completions. + """ + + def test_qwen_on_opencode_go_caches_with_envelope_layout(self): + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="qwen3.6-plus", + ) + should, native = agent._anthropic_prompt_cache_policy() + assert should is True, "Qwen on opencode-go must cache" + assert native is False, "opencode-go is OpenAI-wire; envelope layout" + + def test_qwen35_plus_on_opencode_go(self): + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="qwen3.5-plus", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_qwen_on_opencode_zen_caches(self): + agent = _make_agent( + provider="opencode", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="qwen3-coder-plus", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_qwen_on_direct_alibaba_caches(self): + agent = _make_agent( + provider="alibaba", + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", + api_mode="chat_completions", + model="qwen3-coder", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_non_qwen_on_opencode_go_does_not_cache(self): + # GLM / Kimi on opencode-go don't need markers (they have automatic + # server-side caching or none at all). + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="glm-5", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + def test_kimi_on_opencode_go_does_not_cache(self): + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="kimi-k2.5", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + def test_qwen_on_openrouter_not_affected(self): + # Qwen via OpenRouter falls through — OpenRouter has its own + # upstream caching arrangement for Qwen (provider-dependent). + agent = _make_agent( + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + model="qwen/qwen3-coder", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + class TestExplicitOverrides: """Policy accepts keyword overrides for switch_model / fallback activation."""