fix(api-server): persist incomplete snapshot on asyncio.CancelledError too

Extends PR #15171 to also cover the server-side cancellation path (aiohttp shutdown, request-level timeout) — previously only ConnectionResetError triggered the incomplete-snapshot write, so cancellations left the store stuck at the in_progress snapshot written on response.created. Factors the incomplete-snapshot build into a _persist_incomplete_if_needed() helper called from both the ConnectionResetError and CancelledError branches; the CancelledError handler re-raises so cooperative cancellation semantics are preserved. Adds two regression tests that drive _write_sse_responses directly (the TestClient disconnect path races the server handler, which makes the end-to-end assertion flaky).
2026-04-25 00:51:20 +00:00 · 2026-04-24 15:21:39 -07:00 · 2026-04-24 15:21:39 -07:00 · 36d68bcb82
commit 36d68bcb82
parent a29bad2a3c
2 changed files with 184 additions and 24 deletions
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@ -1292,6 +1292,40 @@ class APIServerAdapter(BasePlatformAdapter):
            if conversation:
                self._response_store.set_conversation(conversation, response_id)

+        def _persist_incomplete_if_needed() -> None:
+            """Persist an ``incomplete`` snapshot if no terminal one was written.
+
+            Called from both the client-disconnect (``ConnectionResetError``)
+            and server-cancellation (``asyncio.CancelledError``) paths so
+            GET /v1/responses/{id} and ``previous_response_id`` chaining keep
+            working after abrupt stream termination.
+            """
+            if not store or terminal_snapshot_persisted:
+                return
+            incomplete_text = "".join(final_text_parts) or final_response_text
+            incomplete_items: List[Dict[str, Any]] = list(emitted_items)
+            if incomplete_text:
+                incomplete_items.append({
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [{"type": "output_text", "text": incomplete_text}],
+                })
+            incomplete_env = _envelope("incomplete")
+            incomplete_env["output"] = incomplete_items
+            incomplete_env["usage"] = {
+                "input_tokens": usage.get("input_tokens", 0),
+                "output_tokens": usage.get("output_tokens", 0),
+                "total_tokens": usage.get("total_tokens", 0),
+            }
+            incomplete_history = list(conversation_history)
+            incomplete_history.append({"role": "user", "content": user_message})
+            if incomplete_text:
+                incomplete_history.append({"role": "assistant", "content": incomplete_text})
+            _persist_response_snapshot(
+                incomplete_env,
+                conversation_history_snapshot=incomplete_history,
+            )
+
        try:
            # response.created — initial envelope, status=in_progress
            created_env = _envelope("in_progress")
@ -1598,30 +1632,7 @@ class APIServerAdapter(BasePlatformAdapter):
                })

        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
-            if store and not terminal_snapshot_persisted:
-                incomplete_text = "".join(final_text_parts) or final_response_text
-                incomplete_items: List[Dict[str, Any]] = list(emitted_items)
-                if incomplete_text:
-                    incomplete_items.append({
-                        "type": "message",
-                        "role": "assistant",
-                        "content": [{"type": "output_text", "text": incomplete_text}],
-                    })
-                incomplete_env = _envelope("incomplete")
-                incomplete_env["output"] = incomplete_items
-                incomplete_env["usage"] = {
-                    "input_tokens": usage.get("input_tokens", 0),
-                    "output_tokens": usage.get("output_tokens", 0),
-                    "total_tokens": usage.get("total_tokens", 0),
-                }
-                incomplete_history = list(conversation_history)
-                incomplete_history.append({"role": "user", "content": user_message})
-                if incomplete_text:
-                    incomplete_history.append({"role": "assistant", "content": incomplete_text})
-                _persist_response_snapshot(
-                    incomplete_env,
-                    conversation_history_snapshot=incomplete_history,
-                )
+            _persist_incomplete_if_needed()
            # Client disconnected — interrupt the agent so it stops
            # making upstream LLM calls, then cancel the task.
            agent = agent_ref[0] if agent_ref else None
@ -1637,6 +1648,22 @@ class APIServerAdapter(BasePlatformAdapter):
                except (asyncio.CancelledError, Exception):
                    pass
            logger.info("SSE client disconnected; interrupted agent task %s", response_id)
+        except asyncio.CancelledError:
+            # Server-side cancellation (e.g. shutdown, request timeout) —
+            # persist an incomplete snapshot so GET /v1/responses/{id} and
+            # previous_response_id chaining still work, then re-raise so the
+            # runtime's cancellation semantics are respected.
+            _persist_incomplete_if_needed()
+            agent = agent_ref[0] if agent_ref else None
+            if agent is not None:
+                try:
+                    agent.interrupt("SSE task cancelled")
+                except Exception:
+                    pass
+            if not agent_task.done():
+                agent_task.cancel()
+            logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
+            raise

        return response