From b1a46b30477527ecc3e174ebd4d49e011774dc55 Mon Sep 17 00:00:00 2001 From: Krishna <3540493+kpadilha@users.noreply.github.com> Date: Wed, 13 May 2026 16:03:26 +0200 Subject: [PATCH] fix(codex): drop transient rs_tmp reasoning replay state --- agent/codex_responses_adapter.py | 22 ++++--- tests/agent/test_codex_responses_adapter.py | 63 +++++++++++++++++++++ 2 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 tests/agent/test_codex_responses_adapter.py diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index c3affd185dc..13a81ddafdc 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -932,6 +932,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"} saw_commentary_phase = False saw_final_answer_phase = False + saw_reasoning_item = False for item in output: item_type = getattr(item, "type", None) @@ -969,6 +970,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: raw_message_item["phase"] = normalized_phase message_items_raw.append(raw_message_item) elif item_type == "reasoning": + saw_reasoning_item = True reasoning_text = _extract_responses_reasoning_text(item) if reasoning_text: reasoning_parts.append(reasoning_text) @@ -979,6 +981,12 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: if isinstance(encrypted, str) and encrypted: raw_item = {"type": "reasoning", "encrypted_content": encrypted} item_id = getattr(item, "id", None) + if isinstance(item_id, str) and item_id.startswith("rs_tmp_"): + logger.debug( + "Skipping transient Codex reasoning item during normalization: %s", + item_id, + ) + continue if isinstance(item_id, str) and item_id: raw_item["id"] = item_id # Capture summary — required by the API when replaying reasoning items @@ -1089,13 +1097,13 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: finish_reason = "incomplete" elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): finish_reason = "incomplete" - elif reasoning_items_raw and not final_text: - # Response contains only reasoning (encrypted thinking state) with - # no visible content or tool calls. The model is still thinking and - # needs another turn to produce the actual answer. Marking this as - # "stop" would send it into the empty-content retry loop which burns - # 3 retries then fails — treat it as incomplete instead so the Codex - # continuation path handles it correctly. + elif (reasoning_items_raw or reasoning_parts or saw_reasoning_item) and not final_text: + # Response contains only reasoning (encrypted thinking state and/or + # human-readable summary) with no visible content or tool calls. The + # model is still thinking and needs another turn to produce the actual + # answer. Marking this as "stop" would send it into the empty-content + # retry loop which burns retries then fails — treat it as incomplete so + # the Codex continuation path handles it correctly. finish_reason = "incomplete" else: finish_reason = "stop" diff --git a/tests/agent/test_codex_responses_adapter.py b/tests/agent/test_codex_responses_adapter.py new file mode 100644 index 00000000000..751348bc6da --- /dev/null +++ b/tests/agent/test_codex_responses_adapter.py @@ -0,0 +1,63 @@ +from types import SimpleNamespace + +from agent.codex_responses_adapter import _normalize_codex_response + + +def test_normalize_codex_response_drops_transient_rs_tmp_reasoning_items(): + response = SimpleNamespace( + status="completed", + output=[ + SimpleNamespace( + type="reasoning", + id="rs_tmp_123", + encrypted_content="opaque-transient", + summary=[], + ), + SimpleNamespace( + type="reasoning", + id="rs_456", + encrypted_content="opaque-stable", + summary=[SimpleNamespace(text="stable summary")], + ), + SimpleNamespace( + type="message", + role="assistant", + status="completed", + content=[SimpleNamespace(type="output_text", text="done")], + ), + ], + ) + + assistant_message, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "stop" + assert assistant_message.content == "done" + assert assistant_message.codex_reasoning_items == [ + { + "type": "reasoning", + "encrypted_content": "opaque-stable", + "id": "rs_456", + "summary": [{"type": "summary_text", "text": "stable summary"}], + } + ] + + +def test_normalize_codex_response_treats_summary_only_reasoning_as_incomplete(): + response = SimpleNamespace( + status="completed", + output=[ + SimpleNamespace( + type="reasoning", + id="rs_tmp_789", + encrypted_content="opaque-transient", + summary=[SimpleNamespace(text="still thinking")], + ) + ], + ) + + assistant_message, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "incomplete" + assert assistant_message.content == "" + assert assistant_message.reasoning == "still thinking" + assert assistant_message.codex_reasoning_items is None