From 883e11f0a09a6683e35bb75758b686322e8634b0 Mon Sep 17 00:00:00 2001
From: Ninso112 <ninso112@proton.me>
Date: Sat, 9 May 2026 13:23:39 -0700
Subject: [PATCH] fix(openrouter): add x-grok-conv-id header for Grok models to
 improve prompt cache hit rates (carve-out of #22708)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pass session_id through to provider profile build_api_kwargs_extras so
the OpenRouter profile can attach an xAI cache-affinity header
(x-grok-conv-id: <session-id>) for x-ai/grok-* models. xAI prompt
cache requires server affinity via this header — without it the cache
is poisoned and Grok prompt-cache hit rates drop dramatically on
multi-turn sessions.

Carve-out of #22708 by Ninso112. The original PR bundled a /diff
slash command, a zsh completion fix (already on main via #22802),
and holographic memory null-guards. This salvage keeps just the
Grok header work — small, targeted, and well-tested. Other
contributors and changes preserved for separate review.

Closes #22705.
---
 agent/transports/chat_completions.py          |  1 +
 .../model-providers/openrouter/__init__.py    | 16 ++++++-
 tests/providers/test_provider_profiles.py     | 46 +++++++++++++++++++
 3 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
index ca29b39ffe4..9076797c7e9 100644
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -448,6 +448,7 @@ class ChatCompletionsTransport(ProviderTransport):
                 qwen_session_metadata=params.get("qwen_session_metadata"),
                 model=model,
                 ollama_num_ctx=params.get("ollama_num_ctx"),
+                session_id=params.get("session_id"),
             )
         )
         api_kwargs.update(top_level_from_profile)
diff --git a/plugins/model-providers/openrouter/__init__.py b/plugins/model-providers/openrouter/__init__.py
index 6aad8fc65df..d1bed8eec0d 100644
--- a/plugins/model-providers/openrouter/__init__.py
+++ b/plugins/model-providers/openrouter/__init__.py
@@ -53,16 +53,28 @@ class OpenRouterProfile(ProviderProfile):
         *,
         reasoning_config: dict | None = None,
         supports_reasoning: bool = False,
+        model: str | None = None,
+        session_id: str | None = None,
         **context: Any,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """OpenRouter passes the full reasoning_config dict as extra_body.reasoning."""
+        """OpenRouter passes the full reasoning_config dict as extra_body.reasoning.
+
+        For xAI Grok models routed through OpenRouter, attach the
+        ``x-grok-conv-id`` header so that xAI's prompt cache stays pinned to
+        the same backend server across turns.
+        """
         extra_body: dict[str, Any] = {}
         if supports_reasoning:
             if reasoning_config is not None:
                 extra_body["reasoning"] = dict(reasoning_config)
             else:
                 extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-        return extra_body, {}
+
+        extra_headers: dict[str, Any] = {}
+        if session_id and model and model.startswith(("x-ai/grok-", "xai/grok-")):
+            extra_headers["x-grok-conv-id"] = session_id
+
+        return extra_body, {"extra_headers": extra_headers} if extra_headers else {}
 
 
 openrouter = OpenRouterProfile(
diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py
index 3e80b0d2f26..d56306cef35 100644
--- a/tests/providers/test_provider_profiles.py
+++ b/tests/providers/test_provider_profiles.py
@@ -121,6 +121,52 @@ class TestOpenRouterProfile:
         eb, _ = p.build_api_kwargs_extras(supports_reasoning=True)
         assert eb["reasoning"] == {"enabled": True, "effort": "medium"}
 
+    def test_grok_session_id_sets_cache_affinity_header(self):
+        """OpenRouter + Grok model + session_id => x-grok-conv-id header."""
+        p = get_provider_profile("openrouter")
+        _, tl = p.build_api_kwargs_extras(
+            model="x-ai/grok-4",
+            session_id="sess-abc123",
+        )
+        assert tl["extra_headers"]["x-grok-conv-id"] == "sess-abc123"
+
+    def test_grok_xai_prefix_also_supported(self):
+        """xai/ prefix (without dash) should also get the header."""
+        p = get_provider_profile("openrouter")
+        _, tl = p.build_api_kwargs_extras(
+            model="xai/grok-3",
+            session_id="sess-xyz",
+        )
+        assert tl["extra_headers"]["x-grok-conv-id"] == "sess-xyz"
+
+    def test_non_grok_model_no_affinity_header(self):
+        """OpenRouter + non-Grok model => no x-grok-conv-id header."""
+        p = get_provider_profile("openrouter")
+        _, tl = p.build_api_kwargs_extras(
+            model="anthropic/claude-sonnet-4.6",
+            session_id="sess-abc123",
+        )
+        assert "extra_headers" not in tl
+        assert "x-grok-conv-id" not in tl
+
+    def test_grok_without_session_id_no_header(self):
+        """Grok model but no session_id => no header (nothing to pin)."""
+        p = get_provider_profile("openrouter")
+        _, tl = p.build_api_kwargs_extras(model="x-ai/grok-4")
+        assert "extra_headers" not in tl
+
+    def test_grok_reasoning_and_header_together(self):
+        """Reasoning extra_body and Grok header should coexist."""
+        p = get_provider_profile("openrouter")
+        eb, tl = p.build_api_kwargs_extras(
+            model="x-ai/grok-4",
+            session_id="sess-123",
+            supports_reasoning=True,
+            reasoning_config={"enabled": True, "effort": "high"},
+        )
+        assert eb["reasoning"] == {"enabled": True, "effort": "high"}
+        assert tl["extra_headers"]["x-grok-conv-id"] == "sess-123"
+
 
 class TestNousProfile:
     def test_tags(self):