diff --git a/run_agent.py b/run_agent.py
index 06485aca3..0b12ce53f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2777,8 +2777,8 @@ class AIAgent:
         self._turns_since_memory = 0
         self._iters_since_skill = 0
         
-        # Initialize conversation
-        messages = conversation_history or []
+        # Initialize conversation (copy to avoid mutating the caller's list)
+        messages = list(conversation_history) if conversation_history else []
         
         # Hydrate todo store from conversation history (gateway creates a fresh
         # AIAgent per message, so the in-memory store is empty -- we need to
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index b56a9e954..f5cb19334 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -862,3 +862,36 @@ class TestFlushSentinelNotLeaked:
             assert "_flush_sentinel" not in msg, (
                 f"_flush_sentinel leaked to API in message: {msg}"
             )
+
+
+# ---------------------------------------------------------------------------
+# Conversation history mutation
+# ---------------------------------------------------------------------------
+
+class TestConversationHistoryNotMutated:
+    """run_conversation must not mutate the caller's conversation_history list."""
+
+    def test_caller_list_unchanged_after_run(self, agent):
+        """Passing conversation_history should not modify the original list."""
+        history = [
+            {"role": "user", "content": "previous question"},
+            {"role": "assistant", "content": "previous answer"},
+        ]
+        original_len = len(history)
+
+        resp = _mock_response(content="new answer", finish_reason="stop")
+        agent.client.chat.completions.create.return_value = resp
+
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("new question", conversation_history=history)
+
+        # Caller's list must be untouched
+        assert len(history) == original_len, (
+            f"conversation_history was mutated: expected {original_len} items, got {len(history)}"
+        )
+        # Result should have more messages than the original history
+        assert len(result["messages"]) > original_len