diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index 3526029ddab..a7f653932dc 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -35,6 +35,7 @@ from agent.turn_context import build_turn_context from agent.turn_retry_state import TurnRetryState from agent.memory_manager import build_memory_context_block from agent.message_sanitization import ( + close_interrupted_tool_sequence, _repair_tool_call_arguments, _sanitize_messages_non_ascii, _sanitize_messages_surrogates, @@ -1396,10 +1397,12 @@ def run_conversation( while time.time() < sleep_end: if agent._interrupt_requested: agent._vprint(f"{agent.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True) + _interrupt_text = f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries})." + close_interrupted_tool_sequence(messages, _interrupt_text) agent._persist_session(messages, conversation_history) agent.clear_interrupt() return { - "final_response": f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries}).", + "final_response": _interrupt_text, "messages": messages, "api_calls": api_call_count, "completed": False, @@ -2663,10 +2666,12 @@ def run_conversation( # Check for interrupt before deciding to retry if agent._interrupt_requested: agent._vprint(f"{agent.log_prefix}⚡ Interrupt detected during error handling, aborting retries.", force=True) + _interrupt_text = f"Operation interrupted: handling API error ({error_type}: {agent._clean_error_message(str(api_error))})." + close_interrupted_tool_sequence(messages, _interrupt_text) agent._persist_session(messages, conversation_history) agent.clear_interrupt() return { - "final_response": f"Operation interrupted: handling API error ({error_type}: {agent._clean_error_message(str(api_error))}).", + "final_response": _interrupt_text, "messages": messages, "api_calls": api_call_count, "completed": False, @@ -3578,10 +3583,12 @@ def run_conversation( while time.time() < sleep_end: if agent._interrupt_requested: agent._vprint(f"{agent.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True) + _interrupt_text = f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries})." + close_interrupted_tool_sequence(messages, _interrupt_text) agent._persist_session(messages, conversation_history) agent.clear_interrupt() return { - "final_response": f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries}).", + "final_response": _interrupt_text, "messages": messages, "api_calls": api_call_count, "completed": False, diff --git a/agent/message_sanitization.py b/agent/message_sanitization.py index ff53d247a84..29a4b8691ae 100644 --- a/agent/message_sanitization.py +++ b/agent/message_sanitization.py @@ -279,6 +279,38 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str: return "{}" +def close_interrupted_tool_sequence(messages: list, final_response: Any = None) -> bool: + """Append a synthetic assistant turn when an interrupted tail is a tool result. + + A turn cut short by ``/stop`` can leave the transcript ending on a raw + ``tool`` message (a tool finished, or its execution was cancelled, but the + model never streamed a closing assistant turn). Persisting that tail means + the next user message lands as ``… tool → user`` — a role-alternation + violation that strict providers (Gemini, Claude) react to by hallucinating + a continuation of the user's message and ignoring prior context, which + reads to the user as "lost context" (#48879). + + ``finalize_turn`` closes this on the happy interrupt path, but the + retry/backoff/error interrupt aborts in ``conversation_loop`` ``return`` + early and never reach it — this shared helper closes the sequence on all of + them. ``final_response`` is usually empty on an interrupt, so an explicit + placeholder is used rather than an empty-content assistant turn. + + Mutates ``messages`` in place. Returns True if a closing turn was appended. + """ + if not messages: + return False + last = messages[-1] + if not isinstance(last, dict) or last.get("role") != "tool": + return False + text = final_response if isinstance(final_response, str) else "" + messages.append({ + "role": "assistant", + "content": text.strip() or "Operation interrupted.", + }) + return True + + def _strip_non_ascii(text: str) -> str: """Remove non-ASCII characters, replacing with closest ASCII equivalent or removing. @@ -431,6 +463,7 @@ def _sanitize_structure_non_ascii(payload: Any) -> bool: __all__ = [ "_SURROGATE_RE", + "close_interrupted_tool_sequence", "_sanitize_surrogates", "_sanitize_structure_surrogates", "_sanitize_messages_surrogates", diff --git a/agent/turn_finalizer.py b/agent/turn_finalizer.py index 60d6a88b103..c51c18f93cc 100644 --- a/agent/turn_finalizer.py +++ b/agent/turn_finalizer.py @@ -181,13 +181,9 @@ def finalize_turn( # here instead. On an interrupt ``final_response`` is typically # empty, so fall back to an explicit placeholder rather than # persisting an empty-content assistant turn. - if interrupted and messages and messages[-1].get("role") == "tool": - messages.append( - { - "role": "assistant", - "content": (final_response or "").strip() or "Operation interrupted.", - } - ) + if interrupted: + from agent.message_sanitization import close_interrupted_tool_sequence + close_interrupted_tool_sequence(messages, final_response) agent._persist_session(messages, conversation_history) except Exception as _persist_err: diff --git a/tests/agent/test_close_interrupted_tool_sequence.py b/tests/agent/test_close_interrupted_tool_sequence.py new file mode 100644 index 00000000000..a8f5ea31900 --- /dev/null +++ b/tests/agent/test_close_interrupted_tool_sequence.py @@ -0,0 +1,86 @@ +"""Regression tests for ``close_interrupted_tool_sequence`` (#48879 follow-up). + +#48879 closed the tool-call sequence on interrupt inside ``finalize_turn``, +but the retry/backoff/error interrupt aborts in ``conversation_loop`` ``return`` +early and never reach it — so they persisted a raw ``tool`` tail. The next user +message then lands as ``... tool → user``, the role-alternation violation that +makes strict providers (Gemini, Claude) hallucinate a continuation and ignore +prior context (what the user perceives as "lost context"). + +The fix routes every interrupt abort through this one shared helper. These tests +pin the helper's contract and prove the post-interrupt + next-user-message +transcript is alternation-safe. +""" + +from agent.message_sanitization import close_interrupted_tool_sequence + + +def _tool_tail(): + return [ + {"role": "user", "content": "edit the file"}, + { + "role": "assistant", + "content": "", + "tool_calls": [{"id": "c1", "function": {"name": "patch", "arguments": "{}"}}], + }, + {"role": "tool", "tool_call_id": "c1", "content": "ok edited"}, + ] + + +def _assert_no_tool_then_user(messages): + for i in range(len(messages) - 1): + if messages[i].get("role") == "tool": + assert messages[i + 1].get("role") != "user", ( + f"role-alternation violation: tool → user at index {i}" + ) + + +def test_tool_tail_is_closed_with_placeholder(): + messages = _tool_tail() + assert close_interrupted_tool_sequence(messages, None) is True + assert messages[-1]["role"] == "assistant" + assert messages[-1]["content"] == "Operation interrupted." + + +def test_tool_tail_keeps_interrupt_text_when_present(): + messages = _tool_tail() + close_interrupted_tool_sequence(messages, "Operation interrupted during retry (attempt 2/3).") + assert messages[-1]["role"] == "assistant" + assert messages[-1]["content"] == "Operation interrupted during retry (attempt 2/3)." + + +def test_blank_interrupt_text_falls_back_to_placeholder(): + messages = _tool_tail() + close_interrupted_tool_sequence(messages, " ") + assert messages[-1]["content"] == "Operation interrupted." + + +def test_closing_makes_next_user_message_alternation_safe(): + """The whole point: appending a user turn after the close must not + produce the ``tool → user`` shape strict providers choke on.""" + messages = _tool_tail() + close_interrupted_tool_sequence(messages, None) + follow_on = messages + [{"role": "user", "content": "they do! increase the timing"}] + _assert_no_tool_then_user(follow_on) + + +def test_assistant_tail_is_left_untouched(): + messages = [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "partial reply"}, + ] + before = [dict(m) for m in messages] + assert close_interrupted_tool_sequence(messages, "interrupted") is False + assert messages == before + + +def test_user_tail_is_left_untouched(): + messages = [{"role": "user", "content": "hi"}] + assert close_interrupted_tool_sequence(messages, None) is False + assert len(messages) == 1 + + +def test_empty_messages_is_noop(): + messages = [] + assert close_interrupted_tool_sequence(messages, "x") is False + assert messages == []