mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(cache): route Nous Portal Qwen through Portal-Claude cache pathway (#24151)
Qwen models on Nous Portal (e.g. qwen3.6-plus) now get the same envelope-layout cache_control markers and long-lived (1h cross-session) cache treatment as Portal Claude. Portal proxies to OpenRouter with identical wire-format and cache_control semantics, but the prior policy left Portal Qwen falling through to the alibaba-family branch (which only matches provider=opencode/alibaba), serving 0% cache hits and re-billing the full prompt every turn. Scope is narrow: Portal Claude OR Portal Qwen. Other models on Portal keep their existing behavior. - _anthropic_prompt_cache_policy: add (is_nous_portal and qwen) -> (True, False) - _supports_long_lived_anthropic_cache: drop Claude-only gate for Portal so Qwen also gets the validated 1h cross-session layout - tests cover both functions, both bare and vendored qwen slug forms, and the rejection of non-Claude non-Qwen Portal traffic
This commit is contained in:
parent
3c23b15f81
commit
7993e03c06
2 changed files with 88 additions and 2 deletions
25
run_agent.py
25
run_agent.py
|
|
@ -3465,6 +3465,15 @@ class AIAgent:
|
|||
return True, True
|
||||
if (is_openrouter or is_nous_portal) and is_claude:
|
||||
return True, False
|
||||
# Nous Portal Qwen (e.g. qwen3.6-plus) takes the same envelope-layout
|
||||
# cache_control path as Portal Claude. Portal proxies to OpenRouter
|
||||
# and the upstream Qwen route accepts cache_control markers; without
|
||||
# this branch the alibaba-family check below only matches
|
||||
# provider=opencode/alibaba and Portal traffic falls through to
|
||||
# (False, False), serving 0% cache hits and re-billing the full
|
||||
# prompt on every turn.
|
||||
if is_nous_portal and "qwen" in model_lower:
|
||||
return True, False
|
||||
if is_anthropic_wire and is_claude:
|
||||
# Third-party Anthropic-compatible gateway.
|
||||
return True, True
|
||||
|
|
@ -3540,7 +3549,19 @@ class AIAgent:
|
|||
eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "")
|
||||
eff_model = (model if model is not None else self.model) or ""
|
||||
|
||||
if "claude" not in eff_model.lower():
|
||||
model_lower = eff_model.lower()
|
||||
is_claude = "claude" in model_lower
|
||||
is_nous_portal = "nousresearch" in eff_base_url.lower()
|
||||
|
||||
# Nous Portal: Claude AND Qwen both get long-lived caching.
|
||||
# Portal proxies to OpenRouter with identical cache_control
|
||||
# semantics; any model on Portal that accepts envelope-layout
|
||||
# markers via _anthropic_prompt_cache_policy also benefits from
|
||||
# the documented 1h cross-session TTL.
|
||||
if is_nous_portal and (is_claude or "qwen" in model_lower):
|
||||
return True
|
||||
|
||||
if not is_claude:
|
||||
return False
|
||||
|
||||
# Native Anthropic + Anthropic OAuth subscription
|
||||
|
|
@ -3554,7 +3575,7 @@ class AIAgent:
|
|||
|
||||
# Nous Portal — front-ends OpenRouter behind the scenes; identical
|
||||
# wire format and cache_control semantics.
|
||||
if "nousresearch" in eff_base_url.lower():
|
||||
if is_nous_portal:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -257,6 +257,40 @@ class TestQwenAlibabaFamily:
|
|||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
def test_qwen_on_nous_portal_caches_with_envelope_layout(self):
|
||||
# Nous Portal Qwen takes the same envelope-layout cache_control
|
||||
# path as Portal Claude. Without this, Portal-routed qwen3.6-plus
|
||||
# falls through to the alibaba-family check (which only matches
|
||||
# provider=opencode/alibaba) and serves 0% cache hits.
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3.6-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_qwen_vendored_slug_on_nous_portal_caches(self):
|
||||
# Same path but with the vendored slug form Portal sometimes uses.
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3.6-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_non_qwen_non_claude_on_nous_portal_does_not_cache(self):
|
||||
# Portal scope is narrow: Claude OR Qwen only. Other models
|
||||
# routed through Portal keep their existing fall-through behavior.
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="openai/gpt-5.4",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestExplicitOverrides:
|
||||
"""Policy accepts keyword overrides for switch_model / fallback activation."""
|
||||
|
|
@ -338,6 +372,37 @@ class TestSupportsLongLivedAnthropicCache:
|
|||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is True
|
||||
|
||||
def test_nous_portal_qwen_supported(self):
|
||||
# Portal Qwen rides the same OpenRouter-equivalent transport as
|
||||
# Portal Claude; long-lived (1h cross-session) cache_control
|
||||
# markers apply identically.
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3.6-plus",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is True
|
||||
|
||||
def test_nous_portal_qwen_vendored_slug_supported(self):
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3.6-plus",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is True
|
||||
|
||||
def test_nous_portal_non_claude_non_qwen_rejected(self):
|
||||
# Portal long-lived cache scope mirrors policy: Claude or Qwen only.
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="openai/gpt-5.4",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is False
|
||||
|
||||
def test_openrouter_non_claude_rejected(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue