diff --git a/run_agent.py b/run_agent.py index 06485aca3..0b12ce53f 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2777,8 +2777,8 @@ class AIAgent: self._turns_since_memory = 0 self._iters_since_skill = 0 - # Initialize conversation - messages = conversation_history or [] + # Initialize conversation (copy to avoid mutating the caller's list) + messages = list(conversation_history) if conversation_history else [] # Hydrate todo store from conversation history (gateway creates a fresh # AIAgent per message, so the in-memory store is empty -- we need to diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index b56a9e954..f5cb19334 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -862,3 +862,36 @@ class TestFlushSentinelNotLeaked: assert "_flush_sentinel" not in msg, ( f"_flush_sentinel leaked to API in message: {msg}" ) + + +# --------------------------------------------------------------------------- +# Conversation history mutation +# --------------------------------------------------------------------------- + +class TestConversationHistoryNotMutated: + """run_conversation must not mutate the caller's conversation_history list.""" + + def test_caller_list_unchanged_after_run(self, agent): + """Passing conversation_history should not modify the original list.""" + history = [ + {"role": "user", "content": "previous question"}, + {"role": "assistant", "content": "previous answer"}, + ] + original_len = len(history) + + resp = _mock_response(content="new answer", finish_reason="stop") + agent.client.chat.completions.create.return_value = resp + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("new question", conversation_history=history) + + # Caller's list must be untouched + assert len(history) == original_len, ( + f"conversation_history was mutated: expected {original_len} items, got {len(history)}" + ) + # Result should have more messages than the original history + assert len(result["messages"]) > original_len