diff --git a/run_agent.py b/run_agent.py index 1dc9d058e0..b3a7003e77 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3792,18 +3792,21 @@ class AIAgent: Ensures conversations are never lost, even on errors or early returns. """ - self._drop_trailing_empty_recovery_synthetic(messages) + self._drop_trailing_empty_response_scaffolding(messages) self._apply_persist_user_message_override(messages) self._session_messages = messages self._save_session_log(messages) self._flush_messages_to_session_db(messages, conversation_history) - def _drop_trailing_empty_recovery_synthetic(self, messages: List[Dict]) -> None: - """Remove private empty-response retry scaffolding from transcript tails.""" + def _drop_trailing_empty_response_scaffolding(self, messages: List[Dict]) -> None: + """Remove private empty-response retry/failure scaffolding from transcript tails.""" while ( messages and isinstance(messages[-1], dict) - and messages[-1].get("_empty_recovery_synthetic") + and ( + messages[-1].get("_empty_recovery_synthetic") + or messages[-1].get("_empty_terminal_sentinel") + ) ): messages.pop() @@ -13828,9 +13831,15 @@ class AIAgent: # "(empty)" terminal. _turn_exit_reason = "empty_response_exhausted" reasoning_text = self._extract_reasoning(assistant_message) - self._drop_trailing_empty_recovery_synthetic(messages) + self._drop_trailing_empty_response_scaffolding(messages) assistant_msg = self._build_assistant_message(assistant_message, finish_reason) assistant_msg["content"] = "(empty)" + # This is a user-facing failure sentinel for the gateway, + # not real assistant content. Persisting it makes later + # "continue" turns replay assistant("(empty)") as if it + # were a meaningful model response, which can keep long + # tool-heavy sessions stuck in empty-response loops. + assistant_msg["_empty_terminal_sentinel"] = True messages.append(assistant_msg) if reasoning_text: @@ -13913,6 +13922,7 @@ class AIAgent: and ( messages[-1].get("_thinking_prefill") or messages[-1].get("_empty_recovery_synthetic") + or messages[-1].get("_empty_terminal_sentinel") ) ): messages.pop() @@ -14004,7 +14014,11 @@ class AIAgent: # Clean up VM and browser for this task after conversation completes self._cleanup_task_resources(effective_task_id) - # Persist session to both JSON log and SQLite + # Persist session to both JSON log and SQLite only after private retry + # scaffolding has been removed. Otherwise a later user "continue" turn + # can replay assistant("(empty)") / recovery nudges and fall into the + # same empty-response loop again. + self._drop_trailing_empty_response_scaffolding(messages) self._persist_session(messages, conversation_history) # ── Turn-exit diagnostic log ───────────────────────────────────── diff --git a/tests/run_agent/test_empty_response_recovery_persistence.py b/tests/run_agent/test_empty_response_recovery_persistence.py index 59c606dadc..d31a1ff8d2 100644 --- a/tests/run_agent/test_empty_response_recovery_persistence.py +++ b/tests/run_agent/test_empty_response_recovery_persistence.py @@ -50,7 +50,7 @@ def test_persist_session_strips_trailing_empty_recovery_scaffolding(): assert all(not msg.get("_empty_recovery_synthetic") for msg in messages) -def test_persist_session_keeps_real_terminal_empty_response(): +def test_persist_session_keeps_unmarked_terminal_empty_response(): agent = _agent_with_stubbed_persistence() messages = [ {"role": "user", "content": "run the task"}, @@ -64,3 +64,21 @@ def test_persist_session_keeps_real_terminal_empty_response(): {"role": "assistant", "content": "(empty)"}, ] assert agent.saved_session_logs[-1] == messages + + +def test_persist_session_strips_marked_terminal_empty_sentinel(): + agent = _agent_with_stubbed_persistence() + messages = [ + {"role": "user", "content": "continue"}, + { + "role": "assistant", + "content": "(empty)", + "_empty_terminal_sentinel": True, + }, + ] + + AIAgent._persist_session(agent, messages, conversation_history=[]) + + assert messages == [{"role": "user", "content": "continue"}] + assert agent.saved_session_logs[-1] == messages + assert all(not msg.get("_empty_terminal_sentinel") for msg in messages)