From 8233598e64309f585953b26cc595855b78ca215c Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 25 Jun 2026 23:54:20 -0500 Subject: [PATCH] fix(interrupt): keep partial streamed reply when stopped mid-response Stopping a turn while the model is streaming (stop/esc to redirect) raised InterruptedError, set final_response to the throwaway "waiting for model response" sentinel, and persisted messages WITHOUT the assistant text that was already streamed to the screen. The next turn then had no record of the half-finished reply, so the model appeared to "forget" what it just said. Recover the on-screen text from _current_streamed_assistant_text in the InterruptedError branch and append it as the assistant turn (and surface it as final_response). The metadata sentinel is kept only when nothing was streamed yet, preserving the ACP/client suppression behavior. Completes the partial-stream recovery from 397eae5d9 (which wired the same _current_streamed_assistant_text salvage into the connection-failure twin but missed the user-interrupt path). The lossy handler dates to c98ee9852. --- agent/conversation_loop.py | 16 +++++++++-- tests/run_agent/test_run_agent.py | 48 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 0d854537d9e..d68c652d2b8 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -2011,9 +2011,21 @@ def run_conversation( agent.thinking_callback("") api_elapsed = time.time() - api_start_time agent._vprint(f"{agent.log_prefix}⚡ Interrupted during API call.", force=True) - agent._persist_session(messages, conversation_history) interrupted = True - final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)." + # Preserve any assistant text already streamed to the user + # before the stop landed. Dropping it leaves history with no + # record of the half-finished reply on screen, so the next turn + # the model "forgets" what it just said — exactly what users hit + # when they stop to redirect mid-response. + _partial = agent._strip_think_blocks( + getattr(agent, "_current_streamed_assistant_text", "") or "" + ).strip() + if _partial: + messages.append({"role": "assistant", "content": _partial}) + final_response = _partial + else: + final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)." + agent._persist_session(messages, conversation_history) break except Exception as api_error: diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 381f9f554c8..f13ece98a23 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -4077,6 +4077,54 @@ class TestRunConversation: assert result["final_response"] == "Fresh partial content from this turn" assert result["api_calls"] == 1 + def test_interrupt_during_stream_preserves_partial_assistant_text(self, agent): + """Stopping mid-response keeps the streamed reply in history (not 'forgotten').""" + self._setup_agent(agent) + + def _fake_api_call(api_kwargs): + # Model streamed some visible text, then the user hit stop. + agent._current_streamed_assistant_text = "Sure, here's how to do it: first" + raise InterruptedError("Agent interrupted during streaming API call") + + with ( + patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("how do I do X") + + assert result["interrupted"] is True + # Partial reply is surfaced and persisted as an assistant turn so the + # next turn remembers what the model said. + assert result["final_response"] == "Sure, here's how to do it: first" + assert result["messages"][-1] == { + "role": "assistant", + "content": "Sure, here's how to do it: first", + } + + def test_interrupt_before_any_stream_keeps_sentinel(self, agent): + """An interrupt with no streamed text falls back to the metadata sentinel.""" + from agent.conversation_loop import INTERRUPT_WAITING_FOR_MODEL_PREFIX + + self._setup_agent(agent) + + def _fake_api_call(api_kwargs): + agent._current_streamed_assistant_text = "" + raise InterruptedError("Agent interrupted during streaming API call") + + with ( + patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("hi") + + assert result["interrupted"] is True + assert result["final_response"].startswith(INTERRUPT_WAITING_FOR_MODEL_PREFIX) + assert result["messages"][-1]["role"] == "user" + def test_nous_401_refreshes_after_remint_and_retries(self, agent): self._setup_agent(agent) agent.provider = "nous"