diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 0d854537d9e..d68c652d2b8 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -2011,9 +2011,21 @@ def run_conversation( agent.thinking_callback("") api_elapsed = time.time() - api_start_time agent._vprint(f"{agent.log_prefix}⚡ Interrupted during API call.", force=True) - agent._persist_session(messages, conversation_history) interrupted = True - final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)." + # Preserve any assistant text already streamed to the user + # before the stop landed. Dropping it leaves history with no + # record of the half-finished reply on screen, so the next turn + # the model "forgets" what it just said — exactly what users hit + # when they stop to redirect mid-response. + _partial = agent._strip_think_blocks( + getattr(agent, "_current_streamed_assistant_text", "") or "" + ).strip() + if _partial: + messages.append({"role": "assistant", "content": _partial}) + final_response = _partial + else: + final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)." + agent._persist_session(messages, conversation_history) break except Exception as api_error: diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 10570c72616..1fce5ba164b 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -4125,6 +4125,54 @@ class TestRunConversation: assert result["final_response"] == "Fresh partial content from this turn" assert result["api_calls"] == 1 + def test_interrupt_during_stream_preserves_partial_assistant_text(self, agent): + """Stopping mid-response keeps the streamed reply in history (not 'forgotten').""" + self._setup_agent(agent) + + def _fake_api_call(api_kwargs): + # Model streamed some visible text, then the user hit stop. + agent._current_streamed_assistant_text = "Sure, here's how to do it: first" + raise InterruptedError("Agent interrupted during streaming API call") + + with ( + patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("how do I do X") + + assert result["interrupted"] is True + # Partial reply is surfaced and persisted as an assistant turn so the + # next turn remembers what the model said. + assert result["final_response"] == "Sure, here's how to do it: first" + assert result["messages"][-1] == { + "role": "assistant", + "content": "Sure, here's how to do it: first", + } + + def test_interrupt_before_any_stream_keeps_sentinel(self, agent): + """An interrupt with no streamed text falls back to the metadata sentinel.""" + from agent.conversation_loop import INTERRUPT_WAITING_FOR_MODEL_PREFIX + + self._setup_agent(agent) + + def _fake_api_call(api_kwargs): + agent._current_streamed_assistant_text = "" + raise InterruptedError("Agent interrupted during streaming API call") + + with ( + patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("hi") + + assert result["interrupted"] is True + assert result["final_response"].startswith(INTERRUPT_WAITING_FOR_MODEL_PREFIX) + assert result["messages"][-1]["role"] == "user" + def test_nous_401_refreshes_after_remint_and_retries(self, agent): self._setup_agent(agent) agent.provider = "nous"