From 15439bee4700dc378777846481afe9218cd0e624 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Fri, 12 Jun 2026 12:49:18 +0530
Subject: [PATCH] refactor(memory): reuse _summarize_user_message_for_log
 instead of forking it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The original fix added agent/memory_manager.py:flatten_message_content, but
that helper was a near-exact duplicate of
agent/codex_responses_adapter.py:_summarize_user_message_for_log — same
None/str/list dispatch, same {text,input_text,output_text}/{image_url,input_image}
part sets, the identical [N image(s)] marker, and the same str() fallback. The
only difference was the join separator (newline for memory vs space for the
log/trajectory previews the existing helper already serves), and that helper is
already imported into agent/turn_finalizer.py — the same file whose call site the
memory fix touches.

Parameterize the existing helper with sep=' ' (default preserves every current
logging/trajectory caller byte-for-byte) and call it with sep='\n' at the memory
boundary; drop the forked flatten_message_content. Repoints the unit tests to the
consolidated helper and adds a case locking the default space-join.

Single source of truth for multimodal-content flattening; no behavior change for
the fix or for existing callers.
---
 agent/codex_responses_adapter.py    | 21 +++++++----
 agent/memory_manager.py             | 39 ---------------------
 run_agent.py                        | 11 +++---
 tests/agent/test_memory_provider.py | 54 ++++++++++++++++++-----------
 4 files changed, 54 insertions(+), 71 deletions(-)

diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py
index 943131f5592..a2678a6da36 100644
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -127,14 +127,21 @@ def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> Lis
     return converted
 
 
-def _summarize_user_message_for_log(content: Any) -> str:
-    """Return a short text summary of a user message for logging/trajectory.
+def _summarize_user_message_for_log(content: Any, *, sep: str = " ") -> str:
+    """Flatten message content to a plain-text summary.
 
     Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
-    parts from the API server.  Logging, spinner previews, and trajectory
-    files all want a plain string — this helper extracts the first chunk of
-    text and notes any attached images.  Returns an empty string for empty
-    lists and ``str(content)`` for unexpected scalar types.
+    parts from the API server.  Several consumers want a plain string:
+
+    - Logging, spinner previews, and trajectory files (the default ``sep=" "``).
+    - External memory providers, which feed the text to regexes
+      (``sanitize_context``) and text APIs — a raw list crashes the sync with
+      ``expected string or bytes-like object, got 'list'`` (use ``sep="\\n"``).
+
+    Text parts are joined with ``sep``; images become a ``[N image(s)]`` marker
+    so the turn isn't recorded as if the attachment never existed.  Returns an
+    empty string for empty lists and ``str(content)`` for unexpected scalar
+    types.
     """
     if content is None:
         return ""
@@ -157,7 +164,7 @@ def _summarize_user_message_for_log(content: Any) -> str:
                     text_bits.append(text)
             elif ptype in {"image_url", "input_image"}:
                 image_count += 1
-        summary = " ".join(text_bits).strip()
+        summary = sep.join(text_bits).strip()
         if image_count:
             note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
             summary = f"{note} {summary}" if summary else note
diff --git a/agent/memory_manager.py b/agent/memory_manager.py
index 34aef5d3a45..3cb3a734a8f 100644
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -67,45 +67,6 @@ def sanitize_context(text: str) -> str:
     return text
 
 
-def flatten_message_content(content: Any) -> str:
-    """Flatten message content to plain text for memory providers.
-
-    Multimodal turns carry content as a list of ``{type: "text"|"image_url",
-    ...}`` parts; providers expect a string and feed it to regexes
-    (``sanitize_context``) and text APIs, so a list crashes the sync
-    (``expected string or bytes-like object, got 'list'``). Text parts are
-    joined, images become a ``[N image(s)]`` marker so the turn isn't
-    recorded as if the attachment never existed.
-    """
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        text_bits: List[str] = []
-        image_count = 0
-        for part in content:
-            if isinstance(part, str):
-                if part:
-                    text_bits.append(part)
-                continue
-            if not isinstance(part, dict):
-                continue
-            ptype = str(part.get("type") or "").strip().lower()
-            if ptype in {"text", "input_text", "output_text"}:
-                text = part.get("text")
-                if isinstance(text, str) and text:
-                    text_bits.append(text)
-            elif ptype in {"image_url", "input_image"}:
-                image_count += 1
-        flattened = "\n".join(text_bits).strip()
-        if image_count:
-            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
-            flattened = f"{note} {flattened}" if flattened else note
-        return flattened
-    return str(content)
-
-
 class StreamingContextScrubber:
     """Stateful scrubber for streaming text that may contain split memory-context spans.
 
diff --git a/run_agent.py b/run_agent.py
index 57e7abddaf6..0390e01fd13 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -132,7 +132,7 @@ from tools.browser_tool import cleanup_browser
 
 
 # Agent internals extracted to agent/ package for modularity
-from agent.memory_manager import flatten_message_content, sanitize_context
+from agent.memory_manager import sanitize_context
 from agent.error_classifier import FailoverReason
 from agent.redact import redact_sensitive_text
 from agent.model_metadata import (
@@ -169,7 +169,7 @@ from agent.codex_responses_adapter import (
     _derive_responses_function_call_id as _codex_derive_responses_function_call_id,
     _deterministic_call_id as _codex_deterministic_call_id,
     _split_responses_tool_id as _codex_split_responses_tool_id,
-    _summarize_user_message_for_log,  # noqa: F401  # re-exported for tests
+    _summarize_user_message_for_log,  # also used by _sync_external_memory_for_turn (memory boundary)
 )
 from agent.tool_guardrails import (
     ToolGuardrailDecision,
@@ -2991,9 +2991,10 @@ class AIAgent:
         if not (self._memory_manager and final_response and original_user_message):
             return
         # Multimodal turns carry content as a list of typed parts; providers
-        # expect plain strings (see flatten_message_content).
-        user_text = flatten_message_content(original_user_message)
-        response_text = flatten_message_content(final_response)
+        # expect plain strings, so flatten to text first (newline-joined for
+        # memory, vs the default space-join used for log/trajectory previews).
+        user_text = _summarize_user_message_for_log(original_user_message, sep="\n")
+        response_text = _summarize_user_message_for_log(final_response, sep="\n")
         if not (user_text and response_text):
             return
         try:
diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py
index 369073bfbed..30c477440a4 100644
--- a/tests/agent/test_memory_provider.py
+++ b/tests/agent/test_memory_provider.py
@@ -982,62 +982,76 @@ class TestMemoryContextFencing:
 class TestFlattenMessageContent:
     """Multimodal message content (list of typed parts) must flatten to a
     plain string before reaching providers — a raw list crashes their regex
-    sanitization with ``expected string or bytes-like object, got 'list'``."""
+    sanitization with ``expected string or bytes-like object, got 'list'``.
+
+    The memory boundary reuses ``_summarize_user_message_for_log`` (the same
+    helper logging/trajectory use) with ``sep="\\n"`` instead of a forked copy.
+    """
 
     def test_string_passthrough(self):
-        from agent.memory_manager import flatten_message_content
-        assert flatten_message_content("hello") == "hello"
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
+        assert _summarize_user_message_for_log("hello", sep="\n") == "hello"
 
     def test_none_is_empty(self):
-        from agent.memory_manager import flatten_message_content
-        assert flatten_message_content(None) == ""
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
+        assert _summarize_user_message_for_log(None, sep="\n") == ""
 
-    def test_text_parts_joined(self):
-        from agent.memory_manager import flatten_message_content
+    def test_text_parts_joined_with_sep(self):
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
         content = [
             {"type": "text", "text": "first"},
             {"type": "text", "text": "second"},
         ]
-        assert flatten_message_content(content) == "first\nsecond"
+        assert _summarize_user_message_for_log(content, sep="\n") == "first\nsecond"
+
+    def test_default_sep_is_space(self):
+        """Logging/trajectory callers (the default) keep the space-join."""
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
+        content = [
+            {"type": "text", "text": "first"},
+            {"type": "text", "text": "second"},
+        ]
+        assert _summarize_user_message_for_log(content) == "first second"
 
     def test_image_part_becomes_marker(self):
-        from agent.memory_manager import flatten_message_content
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
         content = [
             {"type": "text", "text": "look at this"},
             {"type": "image_url", "image_url": {"url": "data:image/png;base64,xyz"}},
         ]
-        assert flatten_message_content(content) == "[1 image] look at this"
+        assert _summarize_user_message_for_log(content, sep="\n") == "[1 image] look at this"
 
     def test_image_only_message(self):
-        from agent.memory_manager import flatten_message_content
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
         content = [
             {"type": "image_url", "image_url": {"url": "data:..."}},
             {"type": "image_url", "image_url": {"url": "data:..."}},
         ]
-        assert flatten_message_content(content) == "[2 images]"
+        assert _summarize_user_message_for_log(content, sep="\n") == "[2 images]"
 
     def test_unknown_parts_skipped(self):
-        from agent.memory_manager import flatten_message_content
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
         content = [{"type": "audio", "data": "..."}, {"type": "text", "text": "ok"}, 42]
-        assert flatten_message_content(content) == "ok"
+        assert _summarize_user_message_for_log(content, sep="\n") == "ok"
 
     def test_bare_strings_in_list(self):
-        from agent.memory_manager import flatten_message_content
-        assert flatten_message_content(["plain", "strings"]) == "plain\nstrings"
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
+        assert _summarize_user_message_for_log(["plain", "strings"], sep="\n") == "plain\nstrings"
 
     def test_scalar_fallback(self):
-        from agent.memory_manager import flatten_message_content
-        assert flatten_message_content(42) == "42"
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
+        assert _summarize_user_message_for_log(42, sep="\n") == "42"
 
     def test_flattened_output_is_regex_safe(self):
         """The original failure: sanitize_context(list) raised TypeError."""
-        from agent.memory_manager import flatten_message_content, sanitize_context
+        from agent.codex_responses_adapter import _summarize_user_message_for_log
+        from agent.memory_manager import sanitize_context
         content = [
             {"type": "text", "text": "fix this bug"},
             {"type": "image_url", "image_url": {"url": "data:..."}},
         ]
         # Must not raise.
-        assert sanitize_context(flatten_message_content(content))
+        assert sanitize_context(_summarize_user_message_for_log(content, sep="\n"))
 
 
 # ---------------------------------------------------------------------------