fix: preserve session_id across previous_response_id chains in /v1/responses (#10059)

The /v1/responses endpoint generated a new UUID session_id for every
request, even when previous_response_id was provided. This caused each
turn of a multi-turn conversation to appear as a separate session on the
web dashboard, despite the conversation history being correctly chained.

Fix: store session_id alongside the response in the ResponseStore, and
reuse it when a subsequent request chains via previous_response_id.
Applies to both the non-streaming /v1/responses path and the streaming
SSE path. The /v1/runs endpoint also gains session continuity from
stored responses (explicit body.session_id still takes priority).

Adds test verifying session_id is preserved across chained requests.
This commit is contained in:
Teknium 2026-04-14 21:06:32 -07:00 committed by GitHub
parent ca0ae56ccb
commit 5cbb45d93e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 51 additions and 3 deletions

View file

@ -1366,6 +1366,7 @@ class APIServerAdapter(BasePlatformAdapter):
"response": completed_env,
"conversation_history": full_history,
"instructions": instructions,
"session_id": session_id,
})
if conversation:
self._response_store.set_conversation(conversation, response_id)
@ -1459,11 +1460,13 @@ class APIServerAdapter(BasePlatformAdapter):
if previous_response_id:
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
stored_session_id = None
if not conversation_history and previous_response_id:
stored = self._response_store.get(previous_response_id)
if stored is None:
return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
conversation_history = list(stored.get("conversation_history", []))
stored_session_id = stored.get("session_id")
# If no instructions provided, carry forward from previous
if instructions is None:
instructions = stored.get("instructions")
@ -1481,8 +1484,9 @@ class APIServerAdapter(BasePlatformAdapter):
if body.get("truncation") == "auto" and len(conversation_history) > 100:
conversation_history = conversation_history[-100:]
# Run the agent (with Idempotency-Key support)
session_id = str(uuid.uuid4())
# Reuse session from previous_response_id chain so the dashboard
# groups the entire conversation under one session entry.
session_id = stored_session_id or str(uuid.uuid4())
stream = bool(body.get("stream", False))
if stream:
@ -1631,6 +1635,7 @@ class APIServerAdapter(BasePlatformAdapter):
"response": response_data,
"conversation_history": full_history,
"instructions": instructions,
"session_id": session_id,
})
# Update conversation mapping so the next request with the same
# conversation name automatically chains to this response
@ -2145,10 +2150,12 @@ class APIServerAdapter(BasePlatformAdapter):
if previous_response_id:
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
stored_session_id = None
if not conversation_history and previous_response_id:
stored = self._response_store.get(previous_response_id)
if stored:
conversation_history = list(stored.get("conversation_history", []))
stored_session_id = stored.get("session_id")
if instructions is None:
instructions = stored.get("instructions")
@ -2167,7 +2174,7 @@ class APIServerAdapter(BasePlatformAdapter):
)
conversation_history.append({"role": msg["role"], "content": str(content)})
session_id = body.get("session_id") or run_id
session_id = body.get("session_id") or stored_session_id or run_id
ephemeral_system_prompt = instructions
async def _run_and_close():

View file

@ -1016,6 +1016,47 @@ class TestResponsesEndpoint:
assert len(call_kwargs["conversation_history"]) > 0
assert call_kwargs["user_message"] == "Now add 1 more"
@pytest.mark.asyncio
async def test_previous_response_id_preserves_session(self, adapter):
"""Chained responses via previous_response_id reuse the same session_id."""
mock_result = {
"final_response": "ok",
"messages": [{"role": "assistant", "content": "ok"}],
"api_calls": 1,
}
usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
# First request — establishes a session
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (mock_result, usage)
resp1 = await cli.post(
"/v1/responses",
json={"model": "hermes-agent", "input": "Hello"},
)
assert resp1.status == 200
first_session_id = mock_run.call_args.kwargs["session_id"]
data1 = await resp1.json()
response_id = data1["id"]
# Second request — chains from the first
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (mock_result, usage)
resp2 = await cli.post(
"/v1/responses",
json={
"model": "hermes-agent",
"input": "Follow up",
"previous_response_id": response_id,
},
)
assert resp2.status == 200
second_session_id = mock_run.call_args.kwargs["session_id"]
# Session must be the same across the chain
assert first_session_id == second_session_id
@pytest.mark.asyncio
async def test_invalid_previous_response_id_returns_404(self, adapter):
app = _create_app(adapter)