mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-24 05:41:40 +00:00
fix(openrouter): add x-grok-conv-id header for Grok models to improve prompt cache hit rates (carve-out of #22708)
Pass session_id through to provider profile build_api_kwargs_extras so the OpenRouter profile can attach an xAI cache-affinity header (x-grok-conv-id: <session-id>) for x-ai/grok-* models. xAI prompt cache requires server affinity via this header — without it the cache is poisoned and Grok prompt-cache hit rates drop dramatically on multi-turn sessions. Carve-out of #22708 by Ninso112. The original PR bundled a /diff slash command, a zsh completion fix (already on main via #22802), and holographic memory null-guards. This salvage keeps just the Grok header work — small, targeted, and well-tested. Other contributors and changes preserved for separate review. Closes #22705.
This commit is contained in:
parent
5e2eba87e6
commit
883e11f0a0
3 changed files with 61 additions and 2 deletions
|
|
@ -448,6 +448,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
qwen_session_metadata=params.get("qwen_session_metadata"),
|
qwen_session_metadata=params.get("qwen_session_metadata"),
|
||||||
model=model,
|
model=model,
|
||||||
ollama_num_ctx=params.get("ollama_num_ctx"),
|
ollama_num_ctx=params.get("ollama_num_ctx"),
|
||||||
|
session_id=params.get("session_id"),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
api_kwargs.update(top_level_from_profile)
|
api_kwargs.update(top_level_from_profile)
|
||||||
|
|
|
||||||
|
|
@ -53,16 +53,28 @@ class OpenRouterProfile(ProviderProfile):
|
||||||
*,
|
*,
|
||||||
reasoning_config: dict | None = None,
|
reasoning_config: dict | None = None,
|
||||||
supports_reasoning: bool = False,
|
supports_reasoning: bool = False,
|
||||||
|
model: str | None = None,
|
||||||
|
session_id: str | None = None,
|
||||||
**context: Any,
|
**context: Any,
|
||||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||||
"""OpenRouter passes the full reasoning_config dict as extra_body.reasoning."""
|
"""OpenRouter passes the full reasoning_config dict as extra_body.reasoning.
|
||||||
|
|
||||||
|
For xAI Grok models routed through OpenRouter, attach the
|
||||||
|
``x-grok-conv-id`` header so that xAI's prompt cache stays pinned to
|
||||||
|
the same backend server across turns.
|
||||||
|
"""
|
||||||
extra_body: dict[str, Any] = {}
|
extra_body: dict[str, Any] = {}
|
||||||
if supports_reasoning:
|
if supports_reasoning:
|
||||||
if reasoning_config is not None:
|
if reasoning_config is not None:
|
||||||
extra_body["reasoning"] = dict(reasoning_config)
|
extra_body["reasoning"] = dict(reasoning_config)
|
||||||
else:
|
else:
|
||||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||||
return extra_body, {}
|
|
||||||
|
extra_headers: dict[str, Any] = {}
|
||||||
|
if session_id and model and model.startswith(("x-ai/grok-", "xai/grok-")):
|
||||||
|
extra_headers["x-grok-conv-id"] = session_id
|
||||||
|
|
||||||
|
return extra_body, {"extra_headers": extra_headers} if extra_headers else {}
|
||||||
|
|
||||||
|
|
||||||
openrouter = OpenRouterProfile(
|
openrouter = OpenRouterProfile(
|
||||||
|
|
|
||||||
|
|
@ -121,6 +121,52 @@ class TestOpenRouterProfile:
|
||||||
eb, _ = p.build_api_kwargs_extras(supports_reasoning=True)
|
eb, _ = p.build_api_kwargs_extras(supports_reasoning=True)
|
||||||
assert eb["reasoning"] == {"enabled": True, "effort": "medium"}
|
assert eb["reasoning"] == {"enabled": True, "effort": "medium"}
|
||||||
|
|
||||||
|
def test_grok_session_id_sets_cache_affinity_header(self):
|
||||||
|
"""OpenRouter + Grok model + session_id => x-grok-conv-id header."""
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
_, tl = p.build_api_kwargs_extras(
|
||||||
|
model="x-ai/grok-4",
|
||||||
|
session_id="sess-abc123",
|
||||||
|
)
|
||||||
|
assert tl["extra_headers"]["x-grok-conv-id"] == "sess-abc123"
|
||||||
|
|
||||||
|
def test_grok_xai_prefix_also_supported(self):
|
||||||
|
"""xai/ prefix (without dash) should also get the header."""
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
_, tl = p.build_api_kwargs_extras(
|
||||||
|
model="xai/grok-3",
|
||||||
|
session_id="sess-xyz",
|
||||||
|
)
|
||||||
|
assert tl["extra_headers"]["x-grok-conv-id"] == "sess-xyz"
|
||||||
|
|
||||||
|
def test_non_grok_model_no_affinity_header(self):
|
||||||
|
"""OpenRouter + non-Grok model => no x-grok-conv-id header."""
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
_, tl = p.build_api_kwargs_extras(
|
||||||
|
model="anthropic/claude-sonnet-4.6",
|
||||||
|
session_id="sess-abc123",
|
||||||
|
)
|
||||||
|
assert "extra_headers" not in tl
|
||||||
|
assert "x-grok-conv-id" not in tl
|
||||||
|
|
||||||
|
def test_grok_without_session_id_no_header(self):
|
||||||
|
"""Grok model but no session_id => no header (nothing to pin)."""
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
_, tl = p.build_api_kwargs_extras(model="x-ai/grok-4")
|
||||||
|
assert "extra_headers" not in tl
|
||||||
|
|
||||||
|
def test_grok_reasoning_and_header_together(self):
|
||||||
|
"""Reasoning extra_body and Grok header should coexist."""
|
||||||
|
p = get_provider_profile("openrouter")
|
||||||
|
eb, tl = p.build_api_kwargs_extras(
|
||||||
|
model="x-ai/grok-4",
|
||||||
|
session_id="sess-123",
|
||||||
|
supports_reasoning=True,
|
||||||
|
reasoning_config={"enabled": True, "effort": "high"},
|
||||||
|
)
|
||||||
|
assert eb["reasoning"] == {"enabled": True, "effort": "high"}
|
||||||
|
assert tl["extra_headers"]["x-grok-conv-id"] == "sess-123"
|
||||||
|
|
||||||
|
|
||||||
class TestNousProfile:
|
class TestNousProfile:
|
||||||
def test_tags(self):
|
def test_tags(self):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue