From 7d11fa4e9ef8bfd45c5e60a2615d2af03fbb7305 Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Sat, 13 Jun 2026 13:31:26 -0700 Subject: [PATCH] fix(codex-responses): let final_answer complete top-level incomplete responses --- agent/codex_responses_adapter.py | 6 +- .../test_run_agent_codex_responses.py | 102 ++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index a2678a6da36..b8479141db1 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -1081,6 +1081,7 @@ def _normalize_codex_response( message_items_raw: List[Dict[str, Any]] = [] tool_calls: List[Any] = [] has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"} + saw_streaming_or_item_incomplete = response_status in {"queued", "in_progress"} saw_commentary_phase = False saw_final_answer_phase = False saw_reasoning_item = False @@ -1095,6 +1096,7 @@ def _normalize_codex_response( if item_status in {"queued", "in_progress", "incomplete"}: has_incomplete_items = True + saw_streaming_or_item_incomplete = True if item_type == "message": item_phase = getattr(item, "phase", None) @@ -1252,7 +1254,9 @@ def _normalize_codex_response( finish_reason = "tool_calls" elif leaked_tool_call_text: finish_reason = "incomplete" - elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): + elif saw_streaming_or_item_incomplete: + finish_reason = "incomplete" + elif (has_incomplete_items or saw_commentary_phase) and not saw_final_answer_phase: finish_reason = "incomplete" elif (reasoning_items_raw or reasoning_parts or saw_reasoning_item) and not final_text: # Response contains only reasoning (encrypted thinking state and/or diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index 7f899c601d1..a031907611c 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -154,6 +154,22 @@ def _codex_ack_message_response(text: str): ) +def _codex_final_answer_with_top_level_incomplete_response(text: str): + return SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + phase="final_answer", + status="completed", + content=[SimpleNamespace(type="output_text", text=text)], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="incomplete", + model="gpt-5.4", + ) + + class _FakeCreateStream: """Iterable-only fake for ``responses.create(stream=True)`` outputs. @@ -1351,6 +1367,92 @@ def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(mo assert "inspect the repository" in (assistant_message.content or "") +def test_normalize_codex_response_final_answer_overrides_top_level_incomplete(monkeypatch): + from agent.codex_responses_adapter import _normalize_codex_response + + assistant_message, finish_reason = _normalize_codex_response( + _codex_final_answer_with_top_level_incomplete_response( + "Briefly:\n\n- I'm Ramsay, your assistant." + ) + ) + + assert finish_reason == "stop" + assert "Ramsay" in (assistant_message.content or "") + + +def test_normalize_codex_response_top_level_incomplete_without_final_answer_stays_incomplete(monkeypatch): + from agent.codex_responses_adapter import _normalize_codex_response + + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="completed", + content=[SimpleNamespace(type="output_text", text="Partial...")], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="incomplete", + model="gpt-5.4", + ) + + _, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "incomplete" + + +@pytest.mark.parametrize("top_level_status", ["queued", "in_progress"]) +def test_normalize_codex_response_final_answer_does_not_override_streaming_status( + monkeypatch, top_level_status +): + from agent.codex_responses_adapter import _normalize_codex_response + + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + phase="final_answer", + status="completed", + content=[SimpleNamespace(type="output_text", text="Interim answer.")], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status=top_level_status, + model="gpt-5.4", + ) + + _, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "incomplete" + + +def test_normalize_codex_response_final_answer_does_not_override_per_item_in_progress(monkeypatch): + from agent.codex_responses_adapter import _normalize_codex_response + + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + phase="final_answer", + status="completed", + content=[SimpleNamespace(type="output_text", text="Partial final.")], + ), + SimpleNamespace( + type="message", + status="in_progress", + content=[SimpleNamespace(type="output_text", text="")], + ), + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5.4", + ) + + _, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "incomplete" + + def test_normalize_codex_response_preserves_message_status_for_replay(monkeypatch): """Incomplete Codex output messages must not be replayed as completed.""" agent = _build_agent(monkeypatch)