From 5a55d54ee22ccc10e4ca9ca4e843b91ec5f0d8cd Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 11 Apr 2026 01:55:36 -0700 Subject: [PATCH] fix(gateway): don't suppress error messages when streaming already_sent (#7652) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the stream consumer has sent at least one message (already_sent=True), the gateway skips sending the final response to avoid duplicates. But this also suppressed error messages when the agent failed mid-loop — rate limit exhaustion, context overflow, compression failure, etc. The user would see the last streamed content and then nothing: no error message, no explanation. The agent appeared to 'stop responding.' Fix: check the 'failed' flag at both the producer (_run_agent marks already_sent) and consumer (_handle_message_with_agent checks it) sites. Error messages are always delivered regardless of streaming state. --- gateway/run.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index bf5103d126..2f15361c6a 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3485,7 +3485,12 @@ class GatewayRunner: # post-processing in _process_message_background is skipped # when already_sent is True, so media files would never be # delivered without this. - if agent_result.get("already_sent"): + # + # Never skip when the agent failed — the error message is new + # content the user hasn't seen (streaming only sent earlier + # partial output before the failure). Without this guard, + # users see the agent "stop responding without explanation." + if agent_result.get("already_sent") and not agent_result.get("failed"): if response: _media_adapter = self.adapters.get(source.platform) if _media_adapter: @@ -8012,9 +8017,13 @@ class GatewayRunner: # If streaming already delivered the response, mark it so the # caller's send() is skipped (avoiding duplicate messages). + # BUT: never suppress delivery when the agent failed — the error + # message is new content the user hasn't seen, and it must reach + # them even if streaming had sent earlier partial output. _sc = stream_consumer_holder[0] if _sc and _sc.already_sent and isinstance(response, dict): - response["already_sent"] = True + if not response.get("failed"): + response["already_sent"] = True return response