diff --git a/run_agent.py b/run_agent.py index a8b071c8724..6b5c199a415 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3619,12 +3619,19 @@ class AIAgent: is_claude = "claude" in model_lower is_nous_portal = "nousresearch" in eff_base_url.lower() - # Nous Portal: Claude AND Qwen both get long-lived caching. - # Portal proxies to OpenRouter with identical cache_control - # semantics; any model on Portal that accepts envelope-layout - # markers via _anthropic_prompt_cache_policy also benefits from - # the documented 1h cross-session TTL. - if is_nous_portal and (is_claude or "qwen" in model_lower): + # Nous Portal Claude rides the 1h prefix_and_2 layout (Portal + # proxies to OpenRouter, which honours ttl=1h on Anthropic + # routes). Qwen does NOT — Alibaba DashScope (the upstream for + # all Qwen routes, including Portal -> OpenRouter -> Alibaba) + # documents a single ``ephemeral`` TTL of 5 minutes; ttl="1h" + # on Qwen markers is silently ignored upstream, so the + # high-value tools[-1] + system-prefix breakpoints never land + # and only the 5m rolling-window markers on the last 2 messages + # get cached. Portal Qwen still gets cache_control via + # _anthropic_prompt_cache_policy returning (True, False) — it + # just rides the standard system_and_3 5m layout instead of the + # mismatched prefix_and_2 1h layout. + if is_nous_portal and is_claude: return True if not is_claude: diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py index 15d1cb4e87a..3d7358e6704 100644 --- a/tests/run_agent/test_anthropic_prompt_cache_policy.py +++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py @@ -372,29 +372,36 @@ class TestSupportsLongLivedAnthropicCache: ) assert agent._supports_long_lived_anthropic_cache() is True - def test_nous_portal_qwen_supported(self): - # Portal Qwen rides the same OpenRouter-equivalent transport as - # Portal Claude; long-lived (1h cross-session) cache_control - # markers apply identically. + def test_nous_portal_qwen_NOT_long_lived(self): + # Portal Qwen still gets cache_control markers via the standard + # system_and_3 5m layout (see _anthropic_prompt_cache_policy + # tests above), but it must NOT ride the prefix_and_2 1h layout. + # Alibaba DashScope (the upstream for every Qwen route, incl. + # Portal -> OpenRouter -> Alibaba) only supports a single + # ``ephemeral`` TTL of 5 minutes; ttl="1h" markers are silently + # ignored, so the high-value tools[-1] + system-prefix + # breakpoints don't land. Stay on system_and_3 instead. agent = _make_agent( provider="nous", base_url="https://inference-api.nousresearch.com/v1", api_mode="chat_completions", model="qwen3.6-plus", ) - assert agent._supports_long_lived_anthropic_cache() is True + assert agent._supports_long_lived_anthropic_cache() is False - def test_nous_portal_qwen_vendored_slug_supported(self): + def test_nous_portal_qwen_vendored_slug_NOT_long_lived(self): agent = _make_agent( provider="nous", base_url="https://inference-api.nousresearch.com/v1", api_mode="chat_completions", model="qwen/qwen3.6-plus", ) - assert agent._supports_long_lived_anthropic_cache() is True + assert agent._supports_long_lived_anthropic_cache() is False - def test_nous_portal_non_claude_non_qwen_rejected(self): - # Portal long-lived cache scope mirrors policy: Claude or Qwen only. + def test_nous_portal_non_claude_rejected(self): + # Portal long-lived cache scope is now Claude-only. Qwen + # rejection is covered by the dedicated tests above; this + # covers everything else (gpt, etc.). agent = _make_agent( provider="nous", base_url="https://inference-api.nousresearch.com/v1",