diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index ca29b39ffe4..9076797c7e9 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -448,6 +448,7 @@ class ChatCompletionsTransport(ProviderTransport): qwen_session_metadata=params.get("qwen_session_metadata"), model=model, ollama_num_ctx=params.get("ollama_num_ctx"), + session_id=params.get("session_id"), ) ) api_kwargs.update(top_level_from_profile) diff --git a/plugins/model-providers/openrouter/__init__.py b/plugins/model-providers/openrouter/__init__.py index 6aad8fc65df..d1bed8eec0d 100644 --- a/plugins/model-providers/openrouter/__init__.py +++ b/plugins/model-providers/openrouter/__init__.py @@ -53,16 +53,28 @@ class OpenRouterProfile(ProviderProfile): *, reasoning_config: dict | None = None, supports_reasoning: bool = False, + model: str | None = None, + session_id: str | None = None, **context: Any, ) -> tuple[dict[str, Any], dict[str, Any]]: - """OpenRouter passes the full reasoning_config dict as extra_body.reasoning.""" + """OpenRouter passes the full reasoning_config dict as extra_body.reasoning. + + For xAI Grok models routed through OpenRouter, attach the + ``x-grok-conv-id`` header so that xAI's prompt cache stays pinned to + the same backend server across turns. + """ extra_body: dict[str, Any] = {} if supports_reasoning: if reasoning_config is not None: extra_body["reasoning"] = dict(reasoning_config) else: extra_body["reasoning"] = {"enabled": True, "effort": "medium"} - return extra_body, {} + + extra_headers: dict[str, Any] = {} + if session_id and model and model.startswith(("x-ai/grok-", "xai/grok-")): + extra_headers["x-grok-conv-id"] = session_id + + return extra_body, {"extra_headers": extra_headers} if extra_headers else {} openrouter = OpenRouterProfile( diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py index 3e80b0d2f26..d56306cef35 100644 --- a/tests/providers/test_provider_profiles.py +++ b/tests/providers/test_provider_profiles.py @@ -121,6 +121,52 @@ class TestOpenRouterProfile: eb, _ = p.build_api_kwargs_extras(supports_reasoning=True) assert eb["reasoning"] == {"enabled": True, "effort": "medium"} + def test_grok_session_id_sets_cache_affinity_header(self): + """OpenRouter + Grok model + session_id => x-grok-conv-id header.""" + p = get_provider_profile("openrouter") + _, tl = p.build_api_kwargs_extras( + model="x-ai/grok-4", + session_id="sess-abc123", + ) + assert tl["extra_headers"]["x-grok-conv-id"] == "sess-abc123" + + def test_grok_xai_prefix_also_supported(self): + """xai/ prefix (without dash) should also get the header.""" + p = get_provider_profile("openrouter") + _, tl = p.build_api_kwargs_extras( + model="xai/grok-3", + session_id="sess-xyz", + ) + assert tl["extra_headers"]["x-grok-conv-id"] == "sess-xyz" + + def test_non_grok_model_no_affinity_header(self): + """OpenRouter + non-Grok model => no x-grok-conv-id header.""" + p = get_provider_profile("openrouter") + _, tl = p.build_api_kwargs_extras( + model="anthropic/claude-sonnet-4.6", + session_id="sess-abc123", + ) + assert "extra_headers" not in tl + assert "x-grok-conv-id" not in tl + + def test_grok_without_session_id_no_header(self): + """Grok model but no session_id => no header (nothing to pin).""" + p = get_provider_profile("openrouter") + _, tl = p.build_api_kwargs_extras(model="x-ai/grok-4") + assert "extra_headers" not in tl + + def test_grok_reasoning_and_header_together(self): + """Reasoning extra_body and Grok header should coexist.""" + p = get_provider_profile("openrouter") + eb, tl = p.build_api_kwargs_extras( + model="x-ai/grok-4", + session_id="sess-123", + supports_reasoning=True, + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert eb["reasoning"] == {"enabled": True, "effort": "high"} + assert tl["extra_headers"]["x-grok-conv-id"] == "sess-123" + class TestNousProfile: def test_tags(self):