From 7993e03c06145baece40427801161918b4a9130e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 11 May 2026 21:04:55 -0700 Subject: [PATCH] fix(cache): route Nous Portal Qwen through Portal-Claude cache pathway (#24151) Qwen models on Nous Portal (e.g. qwen3.6-plus) now get the same envelope-layout cache_control markers and long-lived (1h cross-session) cache treatment as Portal Claude. Portal proxies to OpenRouter with identical wire-format and cache_control semantics, but the prior policy left Portal Qwen falling through to the alibaba-family branch (which only matches provider=opencode/alibaba), serving 0% cache hits and re-billing the full prompt every turn. Scope is narrow: Portal Claude OR Portal Qwen. Other models on Portal keep their existing behavior. - _anthropic_prompt_cache_policy: add (is_nous_portal and qwen) -> (True, False) - _supports_long_lived_anthropic_cache: drop Claude-only gate for Portal so Qwen also gets the validated 1h cross-session layout - tests cover both functions, both bare and vendored qwen slug forms, and the rejection of non-Claude non-Qwen Portal traffic --- run_agent.py | 25 ++++++- .../test_anthropic_prompt_cache_policy.py | 65 +++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) diff --git a/run_agent.py b/run_agent.py index aa01c8ecdf5..973f0d95d72 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3465,6 +3465,15 @@ class AIAgent: return True, True if (is_openrouter or is_nous_portal) and is_claude: return True, False + # Nous Portal Qwen (e.g. qwen3.6-plus) takes the same envelope-layout + # cache_control path as Portal Claude. Portal proxies to OpenRouter + # and the upstream Qwen route accepts cache_control markers; without + # this branch the alibaba-family check below only matches + # provider=opencode/alibaba and Portal traffic falls through to + # (False, False), serving 0% cache hits and re-billing the full + # prompt on every turn. + if is_nous_portal and "qwen" in model_lower: + return True, False if is_anthropic_wire and is_claude: # Third-party Anthropic-compatible gateway. return True, True @@ -3540,7 +3549,19 @@ class AIAgent: eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "") eff_model = (model if model is not None else self.model) or "" - if "claude" not in eff_model.lower(): + model_lower = eff_model.lower() + is_claude = "claude" in model_lower + is_nous_portal = "nousresearch" in eff_base_url.lower() + + # Nous Portal: Claude AND Qwen both get long-lived caching. + # Portal proxies to OpenRouter with identical cache_control + # semantics; any model on Portal that accepts envelope-layout + # markers via _anthropic_prompt_cache_policy also benefits from + # the documented 1h cross-session TTL. + if is_nous_portal and (is_claude or "qwen" in model_lower): + return True + + if not is_claude: return False # Native Anthropic + Anthropic OAuth subscription @@ -3554,7 +3575,7 @@ class AIAgent: # Nous Portal — front-ends OpenRouter behind the scenes; identical # wire format and cache_control semantics. - if "nousresearch" in eff_base_url.lower(): + if is_nous_portal: return True return False diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py index 0c5b17a39f6..15d1cb4e87a 100644 --- a/tests/run_agent/test_anthropic_prompt_cache_policy.py +++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py @@ -257,6 +257,40 @@ class TestQwenAlibabaFamily: ) assert agent._anthropic_prompt_cache_policy() == (False, False) + def test_qwen_on_nous_portal_caches_with_envelope_layout(self): + # Nous Portal Qwen takes the same envelope-layout cache_control + # path as Portal Claude. Without this, Portal-routed qwen3.6-plus + # falls through to the alibaba-family check (which only matches + # provider=opencode/alibaba) and serves 0% cache hits. + agent = _make_agent( + provider="nous", + base_url="https://inference-api.nousresearch.com/v1", + api_mode="chat_completions", + model="qwen3.6-plus", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_qwen_vendored_slug_on_nous_portal_caches(self): + # Same path but with the vendored slug form Portal sometimes uses. + agent = _make_agent( + provider="nous", + base_url="https://inference-api.nousresearch.com/v1", + api_mode="chat_completions", + model="qwen/qwen3.6-plus", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_non_qwen_non_claude_on_nous_portal_does_not_cache(self): + # Portal scope is narrow: Claude OR Qwen only. Other models + # routed through Portal keep their existing fall-through behavior. + agent = _make_agent( + provider="nous", + base_url="https://inference-api.nousresearch.com/v1", + api_mode="chat_completions", + model="openai/gpt-5.4", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + class TestExplicitOverrides: """Policy accepts keyword overrides for switch_model / fallback activation.""" @@ -338,6 +372,37 @@ class TestSupportsLongLivedAnthropicCache: ) assert agent._supports_long_lived_anthropic_cache() is True + def test_nous_portal_qwen_supported(self): + # Portal Qwen rides the same OpenRouter-equivalent transport as + # Portal Claude; long-lived (1h cross-session) cache_control + # markers apply identically. + agent = _make_agent( + provider="nous", + base_url="https://inference-api.nousresearch.com/v1", + api_mode="chat_completions", + model="qwen3.6-plus", + ) + assert agent._supports_long_lived_anthropic_cache() is True + + def test_nous_portal_qwen_vendored_slug_supported(self): + agent = _make_agent( + provider="nous", + base_url="https://inference-api.nousresearch.com/v1", + api_mode="chat_completions", + model="qwen/qwen3.6-plus", + ) + assert agent._supports_long_lived_anthropic_cache() is True + + def test_nous_portal_non_claude_non_qwen_rejected(self): + # Portal long-lived cache scope mirrors policy: Claude or Qwen only. + agent = _make_agent( + provider="nous", + base_url="https://inference-api.nousresearch.com/v1", + api_mode="chat_completions", + model="openai/gpt-5.4", + ) + assert agent._supports_long_lived_anthropic_cache() is False + def test_openrouter_non_claude_rejected(self): agent = _make_agent( provider="openrouter",