From 9140be7c228dcff02dc12d52a499eb06d6721496 Mon Sep 17 00:00:00 2001 From: xxxigm Date: Sat, 23 May 2026 22:05:14 +0700 Subject: [PATCH] fix(streaming): emit finish_reason=length on text-only partial-stream stub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the API connection drops mid-stream after text deltas have already been delivered, chat_completion_helpers returned a stub response with finish_reason=stop. The conversation loop then classified the stub as a clean text completion (text_response(finish_reason=stop)) and exited with iteration budget remaining — even when the goal-judge verdict came back as "continue" milliseconds later (issue #30963). Switch the text-only partial-stream stub to finish_reason=length. The existing length-continuation path (length_continue_retries up to 3, "continue exactly where you left off" prompt, partial parts merged into final_response) then fires automatically: the partial assistant content is persisted, the model is asked to continue from the cut point, and the loop keeps making progress against the goal. The mid-tool-call branch keeps finish_reason=stop on purpose — its user-facing warning ("Ask me to retry if you want to continue") asks the user to drive the retry rather than auto-replaying a tool call with possible side effects. #5544's "no duplicate message" contract is preserved verbatim: the partial content is reused, never re-emitted as a fresh API call, so the user never sees two copies of the same delta. Refs: NousResearch/hermes-agent#30963 --- agent/chat_completion_helpers.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index 602b923a894..b3261b60d0b 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -2077,8 +2077,21 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= # Streaming failed AFTER some tokens were already delivered to # the platform. Re-raising would let the outer retry loop make # a new API call, creating a duplicate message. Return a - # partial "stop" response instead so the outer loop treats this - # turn as complete (no retry, no fallback). + # partial response stub instead and let the outer loop decide: + # + # - text-only partials → finish_reason="length" so the + # conversation loop persists the partial assistant content + # and asks the model to continue from where the stream + # died (issue #30963: partial stop misclassified as a + # clean completion was exiting the loop with budget + # remaining and an unfinished goal). + # + # - partial mid-tool-call → finish_reason="stop" stays. + # The user-visible warning we append says "Ask me to + # retry if you want to continue", so the agent should + # hand control back rather than auto-retry a tool call + # that may have side-effects. + # # Recover whatever content was already streamed to the user. # _current_streamed_assistant_text accumulates text fired # through _fire_stream_delta, so it has exactly what the @@ -2116,14 +2129,17 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= "of text; surfaced warning to user: %s", _partial_names, len(_partial_text or ""), result["error"], ) + _stub_finish_reason = "stop" else: logger.warning( - "Partial stream delivered before error; returning stub " - "response with %s chars of recovered content to prevent " - "duplicate messages: %s", + "Partial stream delivered before error; returning " + "length-truncated stub with %s chars of recovered " + "content so the loop can continue from where the " + "stream died: %s", len(_partial_text or ""), result["error"], ) + _stub_finish_reason = "length" _stub_msg = SimpleNamespace( role="assistant", content=_partial_text, tool_calls=None, reasoning_content=None, @@ -2132,7 +2148,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= id="partial-stream-stub", model=getattr(agent, "model", "unknown"), choices=[SimpleNamespace( - index=0, message=_stub_msg, finish_reason="stop", + index=0, message=_stub_msg, finish_reason=_stub_finish_reason, )], usage=None, )