fix: preserve session_id across previous_response_id chains in /v1/responses (#10059)

The /v1/responses endpoint generated a new UUID session_id for every request, even when previous_response_id was provided. This caused each turn of a multi-turn conversation to appear as a separate session on the web dashboard, despite the conversation history being correctly chained. Fix: store session_id alongside the response in the ResponseStore, and reuse it when a subsequent request chains via previous_response_id. Applies to both the non-streaming /v1/responses path and the streaming SSE path. The /v1/runs endpoint also gains session continuity from stored responses (explicit body.session_id still takes priority). Adds test verifying session_id is preserved across chained requests.
2026-04-25 00:51:20 +00:00 · 2026-04-14 21:06:32 -07:00 · 2026-04-14 21:06:32 -07:00 · 5cbb45d93e
commit 5cbb45d93e
parent ca0ae56ccb
2 changed files with 51 additions and 3 deletions
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@ -1016,6 +1016,47 @@ class TestResponsesEndpoint:
            assert len(call_kwargs["conversation_history"]) > 0
            assert call_kwargs["user_message"] == "Now add 1 more"

+    @pytest.mark.asyncio
+    async def test_previous_response_id_preserves_session(self, adapter):
+        """Chained responses via previous_response_id reuse the same session_id."""
+        mock_result = {
+            "final_response": "ok",
+            "messages": [{"role": "assistant", "content": "ok"}],
+            "api_calls": 1,
+        }
+        usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            # First request — establishes a session
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (mock_result, usage)
+                resp1 = await cli.post(
+                    "/v1/responses",
+                    json={"model": "hermes-agent", "input": "Hello"},
+                )
+            assert resp1.status == 200
+            first_session_id = mock_run.call_args.kwargs["session_id"]
+            data1 = await resp1.json()
+            response_id = data1["id"]
+
+            # Second request — chains from the first
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (mock_result, usage)
+                resp2 = await cli.post(
+                    "/v1/responses",
+                    json={
+                        "model": "hermes-agent",
+                        "input": "Follow up",
+                        "previous_response_id": response_id,
+                    },
+                )
+            assert resp2.status == 200
+            second_session_id = mock_run.call_args.kwargs["session_id"]
+
+            # Session must be the same across the chain
+            assert first_session_id == second_session_id
+
    @pytest.mark.asyncio
    async def test_invalid_previous_response_id_returns_404(self, adapter):
        app = _create_app(adapter)