From 3d21666b2f7fbeb739edfcc8a85b1f5a91f4d482 Mon Sep 17 00:00:00 2001 From: Reiji Kisaragi Date: Thu, 11 Jun 2026 18:56:32 +0700 Subject: [PATCH] fix: preserve multimodal user content during persistence Avoid applying text-only persist_user_message overrides to multimodal current-turn user messages. Early crash-resilience persistence mutates the same messages list later used for the API call, so clobbering list content drops ACP image blocks before model dispatch.\n\nAdd regression coverage for both text override behavior and multimodal preservation.\n\nCloses #44242 --- run_agent.py | 10 +++++++++- tests/run_agent/test_run_agent.py | 27 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index bca3dd1e718..331ff2c66ab 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1483,7 +1483,15 @@ class AIAgent: if 0 <= idx < len(messages): msg = messages[idx] if isinstance(msg, dict) and msg.get("role") == "user": - if override is not None: + # Text-only call paths may pass a synthetic API-facing prompt + # and a cleaner transcript string separately. Multimodal + # turns, however, keep image/audio blocks in the live + # messages list that is still used for the API request after + # early crash-resilience persistence. Do not replace those + # blocks with the text-only persistence override before the + # model call is built. The paired timestamp override still + # applies — it is metadata, not content. + if override is not None and not isinstance(msg.get("content"), list): msg["content"] = override if timestamp is not None: msg["timestamp"] = timestamp diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 827bc0ef690..f2787628d4d 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -75,6 +75,33 @@ def agent(): return a +def test_persist_user_message_override_rewrites_text_turns(agent): + messages = [{"role": "user", "content": "API-only synthetic prefix\nhello"}] + agent._persist_user_message_idx = 0 + agent._persist_user_message_override = "hello" + + agent._apply_persist_user_message_override(messages) + + assert messages == [{"role": "user", "content": "hello"}] + + +def test_persist_user_message_override_preserves_multimodal_turns(agent): + multimodal_content = [ + {"type": "text", "text": "What color is this?"}, + { + "type": "image_url", + "image_url": {"url": "data:image/png;base64,AAAA"}, + }, + ] + messages = [{"role": "user", "content": multimodal_content}] + agent._persist_user_message_idx = 0 + agent._persist_user_message_override = "What color is this? [Image attachment]" + + agent._apply_persist_user_message_override(messages) + + assert messages == [{"role": "user", "content": multimodal_content}] + + @pytest.fixture() def agent_with_memory_tool(): """Agent whose valid_tool_names includes 'memory'."""