From b1a46b30477527ecc3e174ebd4d49e011774dc55 Mon Sep 17 00:00:00 2001
From: Krishna <3540493+kpadilha@users.noreply.github.com>
Date: Wed, 13 May 2026 16:03:26 +0200
Subject: [PATCH] fix(codex): drop transient rs_tmp reasoning replay state

---
 agent/codex_responses_adapter.py            | 22 ++++---
 tests/agent/test_codex_responses_adapter.py | 63 +++++++++++++++++++++
 2 files changed, 78 insertions(+), 7 deletions(-)
 create mode 100644 tests/agent/test_codex_responses_adapter.py

diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py
index c3affd185dc..13a81ddafdc 100644
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -932,6 +932,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
     has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
     saw_commentary_phase = False
     saw_final_answer_phase = False
+    saw_reasoning_item = False
 
     for item in output:
         item_type = getattr(item, "type", None)
@@ -969,6 +970,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
                     raw_message_item["phase"] = normalized_phase
                 message_items_raw.append(raw_message_item)
         elif item_type == "reasoning":
+            saw_reasoning_item = True
             reasoning_text = _extract_responses_reasoning_text(item)
             if reasoning_text:
                 reasoning_parts.append(reasoning_text)
@@ -979,6 +981,12 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
             if isinstance(encrypted, str) and encrypted:
                 raw_item = {"type": "reasoning", "encrypted_content": encrypted}
                 item_id = getattr(item, "id", None)
+                if isinstance(item_id, str) and item_id.startswith("rs_tmp_"):
+                    logger.debug(
+                        "Skipping transient Codex reasoning item during normalization: %s",
+                        item_id,
+                    )
+                    continue
                 if isinstance(item_id, str) and item_id:
                     raw_item["id"] = item_id
                 # Capture summary — required by the API when replaying reasoning items
@@ -1089,13 +1097,13 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
         finish_reason = "incomplete"
     elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
         finish_reason = "incomplete"
-    elif reasoning_items_raw and not final_text:
-        # Response contains only reasoning (encrypted thinking state) with
-        # no visible content or tool calls.  The model is still thinking and
-        # needs another turn to produce the actual answer.  Marking this as
-        # "stop" would send it into the empty-content retry loop which burns
-        # 3 retries then fails — treat it as incomplete instead so the Codex
-        # continuation path handles it correctly.
+    elif (reasoning_items_raw or reasoning_parts or saw_reasoning_item) and not final_text:
+        # Response contains only reasoning (encrypted thinking state and/or
+        # human-readable summary) with no visible content or tool calls. The
+        # model is still thinking and needs another turn to produce the actual
+        # answer. Marking this as "stop" would send it into the empty-content
+        # retry loop which burns retries then fails — treat it as incomplete so
+        # the Codex continuation path handles it correctly.
         finish_reason = "incomplete"
     else:
         finish_reason = "stop"
diff --git a/tests/agent/test_codex_responses_adapter.py b/tests/agent/test_codex_responses_adapter.py
new file mode 100644
index 00000000000..751348bc6da
--- /dev/null
+++ b/tests/agent/test_codex_responses_adapter.py
@@ -0,0 +1,63 @@
+from types import SimpleNamespace
+
+from agent.codex_responses_adapter import _normalize_codex_response
+
+
+def test_normalize_codex_response_drops_transient_rs_tmp_reasoning_items():
+    response = SimpleNamespace(
+        status="completed",
+        output=[
+            SimpleNamespace(
+                type="reasoning",
+                id="rs_tmp_123",
+                encrypted_content="opaque-transient",
+                summary=[],
+            ),
+            SimpleNamespace(
+                type="reasoning",
+                id="rs_456",
+                encrypted_content="opaque-stable",
+                summary=[SimpleNamespace(text="stable summary")],
+            ),
+            SimpleNamespace(
+                type="message",
+                role="assistant",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text="done")],
+            ),
+        ],
+    )
+
+    assistant_message, finish_reason = _normalize_codex_response(response)
+
+    assert finish_reason == "stop"
+    assert assistant_message.content == "done"
+    assert assistant_message.codex_reasoning_items == [
+        {
+            "type": "reasoning",
+            "encrypted_content": "opaque-stable",
+            "id": "rs_456",
+            "summary": [{"type": "summary_text", "text": "stable summary"}],
+        }
+    ]
+
+
+def test_normalize_codex_response_treats_summary_only_reasoning_as_incomplete():
+    response = SimpleNamespace(
+        status="completed",
+        output=[
+            SimpleNamespace(
+                type="reasoning",
+                id="rs_tmp_789",
+                encrypted_content="opaque-transient",
+                summary=[SimpleNamespace(text="still thinking")],
+            )
+        ],
+    )
+
+    assistant_message, finish_reason = _normalize_codex_response(response)
+
+    assert finish_reason == "incomplete"
+    assert assistant_message.content == ""
+    assert assistant_message.reasoning == "still thinking"
+    assert assistant_message.codex_reasoning_items is None