mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
Merge pull request #52854 from NousResearch/bb/fix-interrupt-partial-reply
fix(interrupt): keep partial streamed reply when stopped mid-response
This commit is contained in:
commit
f4c656b0a0
2 changed files with 62 additions and 2 deletions
|
|
@ -2011,9 +2011,21 @@ def run_conversation(
|
|||
agent.thinking_callback("")
|
||||
api_elapsed = time.time() - api_start_time
|
||||
agent._vprint(f"{agent.log_prefix}⚡ Interrupted during API call.", force=True)
|
||||
agent._persist_session(messages, conversation_history)
|
||||
interrupted = True
|
||||
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
|
||||
# Preserve any assistant text already streamed to the user
|
||||
# before the stop landed. Dropping it leaves history with no
|
||||
# record of the half-finished reply on screen, so the next turn
|
||||
# the model "forgets" what it just said — exactly what users hit
|
||||
# when they stop to redirect mid-response.
|
||||
_partial = agent._strip_think_blocks(
|
||||
getattr(agent, "_current_streamed_assistant_text", "") or ""
|
||||
).strip()
|
||||
if _partial:
|
||||
messages.append({"role": "assistant", "content": _partial})
|
||||
final_response = _partial
|
||||
else:
|
||||
final_response = f"{INTERRUPT_WAITING_FOR_MODEL_PREFIX}{api_elapsed:.1f}s elapsed)."
|
||||
agent._persist_session(messages, conversation_history)
|
||||
break
|
||||
|
||||
except Exception as api_error:
|
||||
|
|
|
|||
|
|
@ -4125,6 +4125,54 @@ class TestRunConversation:
|
|||
assert result["final_response"] == "Fresh partial content from this turn"
|
||||
assert result["api_calls"] == 1
|
||||
|
||||
def test_interrupt_during_stream_preserves_partial_assistant_text(self, agent):
|
||||
"""Stopping mid-response keeps the streamed reply in history (not 'forgotten')."""
|
||||
self._setup_agent(agent)
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
# Model streamed some visible text, then the user hit stop.
|
||||
agent._current_streamed_assistant_text = "Sure, here's how to do it: first"
|
||||
raise InterruptedError("Agent interrupted during streaming API call")
|
||||
|
||||
with (
|
||||
patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("how do I do X")
|
||||
|
||||
assert result["interrupted"] is True
|
||||
# Partial reply is surfaced and persisted as an assistant turn so the
|
||||
# next turn remembers what the model said.
|
||||
assert result["final_response"] == "Sure, here's how to do it: first"
|
||||
assert result["messages"][-1] == {
|
||||
"role": "assistant",
|
||||
"content": "Sure, here's how to do it: first",
|
||||
}
|
||||
|
||||
def test_interrupt_before_any_stream_keeps_sentinel(self, agent):
|
||||
"""An interrupt with no streamed text falls back to the metadata sentinel."""
|
||||
from agent.conversation_loop import INTERRUPT_WAITING_FOR_MODEL_PREFIX
|
||||
|
||||
self._setup_agent(agent)
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
agent._current_streamed_assistant_text = ""
|
||||
raise InterruptedError("Agent interrupted during streaming API call")
|
||||
|
||||
with (
|
||||
patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("hi")
|
||||
|
||||
assert result["interrupted"] is True
|
||||
assert result["final_response"].startswith(INTERRUPT_WAITING_FOR_MODEL_PREFIX)
|
||||
assert result["messages"][-1]["role"] == "user"
|
||||
|
||||
def test_nous_401_refreshes_after_remint_and_retries(self, agent):
|
||||
self._setup_agent(agent)
|
||||
agent.provider = "nous"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue