fix(cache): route Nous Portal Qwen through Portal-Claude cache pathway (#24151)

Qwen models on Nous Portal (e.g. qwen3.6-plus) now get the same envelope-layout cache_control markers and long-lived (1h cross-session) cache treatment as Portal Claude. Portal proxies to OpenRouter with identical wire-format and cache_control semantics, but the prior policy left Portal Qwen falling through to the alibaba-family branch (which only matches provider=opencode/alibaba), serving 0% cache hits and re-billing the full prompt every turn. Scope is narrow: Portal Claude OR Portal Qwen. Other models on Portal keep their existing behavior. - _anthropic_prompt_cache_policy: add (is_nous_portal and qwen) -> (True, False) - _supports_long_lived_anthropic_cache: drop Claude-only gate for Portal so Qwen also gets the validated 1h cross-session layout - tests cover both functions, both bare and vendored qwen slug forms, and the rejection of non-Claude non-Qwen Portal traffic
2026-05-18 04:41:56 +00:00 · 2026-05-11 21:04:55 -07:00 · 2026-05-11 21:04:55 -07:00 · 7993e03c06
commit 7993e03c06
parent 3c23b15f81
2 changed files with 88 additions and 2 deletions
--- a/tests/run_agent/test_anthropic_prompt_cache_policy.py
+++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py
@ -257,6 +257,40 @@ class TestQwenAlibabaFamily:
        )
        assert agent._anthropic_prompt_cache_policy() == (False, False)

+    def test_qwen_on_nous_portal_caches_with_envelope_layout(self):
+        # Nous Portal Qwen takes the same envelope-layout cache_control
+        # path as Portal Claude. Without this, Portal-routed qwen3.6-plus
+        # falls through to the alibaba-family check (which only matches
+        # provider=opencode/alibaba) and serves 0% cache hits.
+        agent = _make_agent(
+            provider="nous",
+            base_url="https://inference-api.nousresearch.com/v1",
+            api_mode="chat_completions",
+            model="qwen3.6-plus",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_qwen_vendored_slug_on_nous_portal_caches(self):
+        # Same path but with the vendored slug form Portal sometimes uses.
+        agent = _make_agent(
+            provider="nous",
+            base_url="https://inference-api.nousresearch.com/v1",
+            api_mode="chat_completions",
+            model="qwen/qwen3.6-plus",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_non_qwen_non_claude_on_nous_portal_does_not_cache(self):
+        # Portal scope is narrow: Claude OR Qwen only. Other models
+        # routed through Portal keep their existing fall-through behavior.
+        agent = _make_agent(
+            provider="nous",
+            base_url="https://inference-api.nousresearch.com/v1",
+            api_mode="chat_completions",
+            model="openai/gpt-5.4",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+

 class TestExplicitOverrides:
    """Policy accepts keyword overrides for switch_model / fallback activation."""
@ -338,6 +372,37 @@ class TestSupportsLongLivedAnthropicCache:
        )
        assert agent._supports_long_lived_anthropic_cache() is True

+    def test_nous_portal_qwen_supported(self):
+        # Portal Qwen rides the same OpenRouter-equivalent transport as
+        # Portal Claude; long-lived (1h cross-session) cache_control
+        # markers apply identically.
+        agent = _make_agent(
+            provider="nous",
+            base_url="https://inference-api.nousresearch.com/v1",
+            api_mode="chat_completions",
+            model="qwen3.6-plus",
+        )
+        assert agent._supports_long_lived_anthropic_cache() is True
+
+    def test_nous_portal_qwen_vendored_slug_supported(self):
+        agent = _make_agent(
+            provider="nous",
+            base_url="https://inference-api.nousresearch.com/v1",
+            api_mode="chat_completions",
+            model="qwen/qwen3.6-plus",
+        )
+        assert agent._supports_long_lived_anthropic_cache() is True
+
+    def test_nous_portal_non_claude_non_qwen_rejected(self):
+        # Portal long-lived cache scope mirrors policy: Claude or Qwen only.
+        agent = _make_agent(
+            provider="nous",
+            base_url="https://inference-api.nousresearch.com/v1",
+            api_mode="chat_completions",
+            model="openai/gpt-5.4",
+        )
+        assert agent._supports_long_lived_anthropic_cache() is False
+
    def test_openrouter_non_claude_rejected(self):
        agent = _make_agent(
            provider="openrouter",