From 7d11fa4e9ef8bfd45c5e60a2615d2af03fbb7305 Mon Sep 17 00:00:00 2001
From: briandevans <252620095+briandevans@users.noreply.github.com>
Date: Sat, 13 Jun 2026 13:31:26 -0700
Subject: [PATCH] fix(codex-responses): let final_answer complete top-level
 incomplete responses

---
 agent/codex_responses_adapter.py              |   6 +-
 .../test_run_agent_codex_responses.py         | 102 ++++++++++++++++++
 2 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py
index a2678a6da36..b8479141db1 100644
--- a/agent/codex_responses_adapter.py
+++ b/agent/codex_responses_adapter.py
@@ -1081,6 +1081,7 @@ def _normalize_codex_response(
     message_items_raw: List[Dict[str, Any]] = []
     tool_calls: List[Any] = []
     has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+    saw_streaming_or_item_incomplete = response_status in {"queued", "in_progress"}
     saw_commentary_phase = False
     saw_final_answer_phase = False
     saw_reasoning_item = False
@@ -1095,6 +1096,7 @@ def _normalize_codex_response(
 
         if item_status in {"queued", "in_progress", "incomplete"}:
             has_incomplete_items = True
+            saw_streaming_or_item_incomplete = True
 
         if item_type == "message":
             item_phase = getattr(item, "phase", None)
@@ -1252,7 +1254,9 @@ def _normalize_codex_response(
         finish_reason = "tool_calls"
     elif leaked_tool_call_text:
         finish_reason = "incomplete"
-    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
+    elif saw_streaming_or_item_incomplete:
+        finish_reason = "incomplete"
+    elif (has_incomplete_items or saw_commentary_phase) and not saw_final_answer_phase:
         finish_reason = "incomplete"
     elif (reasoning_items_raw or reasoning_parts or saw_reasoning_item) and not final_text:
         # Response contains only reasoning (encrypted thinking state and/or
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 7f899c601d1..a031907611c 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -154,6 +154,22 @@ def _codex_ack_message_response(text: str):
     )
 
 
+def _codex_final_answer_with_top_level_incomplete_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                phase="final_answer",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="incomplete",
+        model="gpt-5.4",
+    )
+
+
 class _FakeCreateStream:
     """Iterable-only fake for ``responses.create(stream=True)`` outputs.
 
@@ -1351,6 +1367,92 @@ def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(mo
     assert "inspect the repository" in (assistant_message.content or "")
 
 
+def test_normalize_codex_response_final_answer_overrides_top_level_incomplete(monkeypatch):
+    from agent.codex_responses_adapter import _normalize_codex_response
+
+    assistant_message, finish_reason = _normalize_codex_response(
+        _codex_final_answer_with_top_level_incomplete_response(
+            "Briefly:\n\n- I'm Ramsay, your assistant."
+        )
+    )
+
+    assert finish_reason == "stop"
+    assert "Ramsay" in (assistant_message.content or "")
+
+
+def test_normalize_codex_response_top_level_incomplete_without_final_answer_stays_incomplete(monkeypatch):
+    from agent.codex_responses_adapter import _normalize_codex_response
+
+    response = SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text="Partial...")],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="incomplete",
+        model="gpt-5.4",
+    )
+
+    _, finish_reason = _normalize_codex_response(response)
+
+    assert finish_reason == "incomplete"
+
+
+@pytest.mark.parametrize("top_level_status", ["queued", "in_progress"])
+def test_normalize_codex_response_final_answer_does_not_override_streaming_status(
+    monkeypatch, top_level_status
+):
+    from agent.codex_responses_adapter import _normalize_codex_response
+
+    response = SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                phase="final_answer",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text="Interim answer.")],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status=top_level_status,
+        model="gpt-5.4",
+    )
+
+    _, finish_reason = _normalize_codex_response(response)
+
+    assert finish_reason == "incomplete"
+
+
+def test_normalize_codex_response_final_answer_does_not_override_per_item_in_progress(monkeypatch):
+    from agent.codex_responses_adapter import _normalize_codex_response
+
+    response = SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                phase="final_answer",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text="Partial final.")],
+            ),
+            SimpleNamespace(
+                type="message",
+                status="in_progress",
+                content=[SimpleNamespace(type="output_text", text="")],
+            ),
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="completed",
+        model="gpt-5.4",
+    )
+
+    _, finish_reason = _normalize_codex_response(response)
+
+    assert finish_reason == "incomplete"
+
+
 def test_normalize_codex_response_preserves_message_status_for_replay(monkeypatch):
     """Incomplete Codex output messages must not be replayed as completed."""
     agent = _build_agent(monkeypatch)