fix(openrouter): add x-grok-conv-id header for Grok models to improve prompt cache hit rates (carve-out of #22708)

Pass session_id through to provider profile build_api_kwargs_extras so
the OpenRouter profile can attach an xAI cache-affinity header
(x-grok-conv-id: <session-id>) for x-ai/grok-* models. xAI prompt
cache requires server affinity via this header — without it the cache
is poisoned and Grok prompt-cache hit rates drop dramatically on
multi-turn sessions.

Carve-out of #22708 by Ninso112. The original PR bundled a /diff
slash command, a zsh completion fix (already on main via #22802),
and holographic memory null-guards. This salvage keeps just the
Grok header work — small, targeted, and well-tested. Other
contributors and changes preserved for separate review.

Closes #22705.
This commit is contained in:
Ninso112 2026-05-09 13:23:39 -07:00 committed by Teknium
parent 5e2eba87e6
commit 883e11f0a0
3 changed files with 61 additions and 2 deletions

View file

@ -448,6 +448,7 @@ class ChatCompletionsTransport(ProviderTransport):
qwen_session_metadata=params.get("qwen_session_metadata"),
model=model,
ollama_num_ctx=params.get("ollama_num_ctx"),
session_id=params.get("session_id"),
)
)
api_kwargs.update(top_level_from_profile)

View file

@ -53,16 +53,28 @@ class OpenRouterProfile(ProviderProfile):
*,
reasoning_config: dict | None = None,
supports_reasoning: bool = False,
model: str | None = None,
session_id: str | None = None,
**context: Any,
) -> tuple[dict[str, Any], dict[str, Any]]:
"""OpenRouter passes the full reasoning_config dict as extra_body.reasoning."""
"""OpenRouter passes the full reasoning_config dict as extra_body.reasoning.
For xAI Grok models routed through OpenRouter, attach the
``x-grok-conv-id`` header so that xAI's prompt cache stays pinned to
the same backend server across turns.
"""
extra_body: dict[str, Any] = {}
if supports_reasoning:
if reasoning_config is not None:
extra_body["reasoning"] = dict(reasoning_config)
else:
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
return extra_body, {}
extra_headers: dict[str, Any] = {}
if session_id and model and model.startswith(("x-ai/grok-", "xai/grok-")):
extra_headers["x-grok-conv-id"] = session_id
return extra_body, {"extra_headers": extra_headers} if extra_headers else {}
openrouter = OpenRouterProfile(

View file

@ -121,6 +121,52 @@ class TestOpenRouterProfile:
eb, _ = p.build_api_kwargs_extras(supports_reasoning=True)
assert eb["reasoning"] == {"enabled": True, "effort": "medium"}
def test_grok_session_id_sets_cache_affinity_header(self):
"""OpenRouter + Grok model + session_id => x-grok-conv-id header."""
p = get_provider_profile("openrouter")
_, tl = p.build_api_kwargs_extras(
model="x-ai/grok-4",
session_id="sess-abc123",
)
assert tl["extra_headers"]["x-grok-conv-id"] == "sess-abc123"
def test_grok_xai_prefix_also_supported(self):
"""xai/ prefix (without dash) should also get the header."""
p = get_provider_profile("openrouter")
_, tl = p.build_api_kwargs_extras(
model="xai/grok-3",
session_id="sess-xyz",
)
assert tl["extra_headers"]["x-grok-conv-id"] == "sess-xyz"
def test_non_grok_model_no_affinity_header(self):
"""OpenRouter + non-Grok model => no x-grok-conv-id header."""
p = get_provider_profile("openrouter")
_, tl = p.build_api_kwargs_extras(
model="anthropic/claude-sonnet-4.6",
session_id="sess-abc123",
)
assert "extra_headers" not in tl
assert "x-grok-conv-id" not in tl
def test_grok_without_session_id_no_header(self):
"""Grok model but no session_id => no header (nothing to pin)."""
p = get_provider_profile("openrouter")
_, tl = p.build_api_kwargs_extras(model="x-ai/grok-4")
assert "extra_headers" not in tl
def test_grok_reasoning_and_header_together(self):
"""Reasoning extra_body and Grok header should coexist."""
p = get_provider_profile("openrouter")
eb, tl = p.build_api_kwargs_extras(
model="x-ai/grok-4",
session_id="sess-123",
supports_reasoning=True,
reasoning_config={"enabled": True, "effort": "high"},
)
assert eb["reasoning"] == {"enabled": True, "effort": "high"}
assert tl["extra_headers"]["x-grok-conv-id"] == "sess-123"
class TestNousProfile:
def test_tags(self):