From 883e11f0a09a6683e35bb75758b686322e8634b0 Mon Sep 17 00:00:00 2001 From: Ninso112 Date: Sat, 9 May 2026 13:23:39 -0700 Subject: [PATCH] fix(openrouter): add x-grok-conv-id header for Grok models to improve prompt cache hit rates (carve-out of #22708) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass session_id through to provider profile build_api_kwargs_extras so the OpenRouter profile can attach an xAI cache-affinity header (x-grok-conv-id: ) for x-ai/grok-* models. xAI prompt cache requires server affinity via this header — without it the cache is poisoned and Grok prompt-cache hit rates drop dramatically on multi-turn sessions. Carve-out of #22708 by Ninso112. The original PR bundled a /diff slash command, a zsh completion fix (already on main via #22802), and holographic memory null-guards. This salvage keeps just the Grok header work — small, targeted, and well-tested. Other contributors and changes preserved for separate review. Closes #22705. --- agent/transports/chat_completions.py | 1 + .../model-providers/openrouter/__init__.py | 16 ++++++- tests/providers/test_provider_profiles.py | 46 +++++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index ca29b39ffe4..9076797c7e9 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -448,6 +448,7 @@ class ChatCompletionsTransport(ProviderTransport): qwen_session_metadata=params.get("qwen_session_metadata"), model=model, ollama_num_ctx=params.get("ollama_num_ctx"), + session_id=params.get("session_id"), ) ) api_kwargs.update(top_level_from_profile) diff --git a/plugins/model-providers/openrouter/__init__.py b/plugins/model-providers/openrouter/__init__.py index 6aad8fc65df..d1bed8eec0d 100644 --- a/plugins/model-providers/openrouter/__init__.py +++ b/plugins/model-providers/openrouter/__init__.py @@ -53,16 +53,28 @@ class OpenRouterProfile(ProviderProfile): *, reasoning_config: dict | None = None, supports_reasoning: bool = False, + model: str | None = None, + session_id: str | None = None, **context: Any, ) -> tuple[dict[str, Any], dict[str, Any]]: - """OpenRouter passes the full reasoning_config dict as extra_body.reasoning.""" + """OpenRouter passes the full reasoning_config dict as extra_body.reasoning. + + For xAI Grok models routed through OpenRouter, attach the + ``x-grok-conv-id`` header so that xAI's prompt cache stays pinned to + the same backend server across turns. + """ extra_body: dict[str, Any] = {} if supports_reasoning: if reasoning_config is not None: extra_body["reasoning"] = dict(reasoning_config) else: extra_body["reasoning"] = {"enabled": True, "effort": "medium"} - return extra_body, {} + + extra_headers: dict[str, Any] = {} + if session_id and model and model.startswith(("x-ai/grok-", "xai/grok-")): + extra_headers["x-grok-conv-id"] = session_id + + return extra_body, {"extra_headers": extra_headers} if extra_headers else {} openrouter = OpenRouterProfile( diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py index 3e80b0d2f26..d56306cef35 100644 --- a/tests/providers/test_provider_profiles.py +++ b/tests/providers/test_provider_profiles.py @@ -121,6 +121,52 @@ class TestOpenRouterProfile: eb, _ = p.build_api_kwargs_extras(supports_reasoning=True) assert eb["reasoning"] == {"enabled": True, "effort": "medium"} + def test_grok_session_id_sets_cache_affinity_header(self): + """OpenRouter + Grok model + session_id => x-grok-conv-id header.""" + p = get_provider_profile("openrouter") + _, tl = p.build_api_kwargs_extras( + model="x-ai/grok-4", + session_id="sess-abc123", + ) + assert tl["extra_headers"]["x-grok-conv-id"] == "sess-abc123" + + def test_grok_xai_prefix_also_supported(self): + """xai/ prefix (without dash) should also get the header.""" + p = get_provider_profile("openrouter") + _, tl = p.build_api_kwargs_extras( + model="xai/grok-3", + session_id="sess-xyz", + ) + assert tl["extra_headers"]["x-grok-conv-id"] == "sess-xyz" + + def test_non_grok_model_no_affinity_header(self): + """OpenRouter + non-Grok model => no x-grok-conv-id header.""" + p = get_provider_profile("openrouter") + _, tl = p.build_api_kwargs_extras( + model="anthropic/claude-sonnet-4.6", + session_id="sess-abc123", + ) + assert "extra_headers" not in tl + assert "x-grok-conv-id" not in tl + + def test_grok_without_session_id_no_header(self): + """Grok model but no session_id => no header (nothing to pin).""" + p = get_provider_profile("openrouter") + _, tl = p.build_api_kwargs_extras(model="x-ai/grok-4") + assert "extra_headers" not in tl + + def test_grok_reasoning_and_header_together(self): + """Reasoning extra_body and Grok header should coexist.""" + p = get_provider_profile("openrouter") + eb, tl = p.build_api_kwargs_extras( + model="x-ai/grok-4", + session_id="sess-123", + supports_reasoning=True, + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert eb["reasoning"] == {"enabled": True, "effort": "high"} + assert tl["extra_headers"]["x-grok-conv-id"] == "sess-123" + class TestNousProfile: def test_tags(self):