Merge pull request #52854 from NousResearch/bb/fix-interrupt-partial-reply

fix(interrupt): keep partial streamed reply when stopped mid-response
This commit is contained in:
brooklyn! 2026-06-26 00:04:37 -05:00 committed by GitHub
commit f4c656b0a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 62 additions and 2 deletions

View file

@ -2011,9 +2011,21 @@ def run_conversation(
agent.thinking_callback("")
api_elapsed = time.time() - api_start_time
agent._vprint(f"{agent.log_prefix}⚡ Interrupted during API call.", force=True)
agent._persist_session(messages, conversation_history)
interrupted = True
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
# Preserve any assistant text already streamed to the user
# before the stop landed. Dropping it leaves history with no
# record of the half-finished reply on screen, so the next turn
# the model "forgets" what it just said — exactly what users hit
# when they stop to redirect mid-response.
_partial = agent._strip_think_blocks(
getattr(agent, "_current_streamed_assistant_text", "") or ""
).strip()
if _partial:
messages.append({"role": "assistant", "content": _partial})
final_response = _partial
else:
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
agent._persist_session(messages, conversation_history)
break
except Exception as api_error:

View file

@ -4125,6 +4125,54 @@ class TestRunConversation:
assert result["final_response"] == "Fresh partial content from this turn"
assert result["api_calls"] == 1
def test_interrupt_during_stream_preserves_partial_assistant_text(self, agent):
"""Stopping mid-response keeps the streamed reply in history (not 'forgotten')."""
self._setup_agent(agent)
def _fake_api_call(api_kwargs):
# Model streamed some visible text, then the user hit stop.
agent._current_streamed_assistant_text = "Sure, here's how to do it: first"
raise InterruptedError("Agent interrupted during streaming API call")
with (
patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("how do I do X")
assert result["interrupted"] is True
# Partial reply is surfaced and persisted as an assistant turn so the
# next turn remembers what the model said.
assert result["final_response"] == "Sure, here's how to do it: first"
assert result["messages"][-1] == {
"role": "assistant",
"content": "Sure, here's how to do it: first",
}
def test_interrupt_before_any_stream_keeps_sentinel(self, agent):
"""An interrupt with no streamed text falls back to the metadata sentinel."""
from agent.conversation_loop import INTERRUPT_WAITING_FOR_MODEL_PREFIX
self._setup_agent(agent)
def _fake_api_call(api_kwargs):
agent._current_streamed_assistant_text = ""
raise InterruptedError("Agent interrupted during streaming API call")
with (
patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hi")
assert result["interrupted"] is True
assert result["final_response"].startswith(INTERRUPT_WAITING_FOR_MODEL_PREFIX)
assert result["messages"][-1]["role"] == "user"
def test_nous_401_refreshes_after_remint_and_retries(self, agent):
self._setup_agent(agent)
agent.provider = "nous"