From c6a8bcb542e564fc61fa0d8f13a251bf4c659091 Mon Sep 17 00:00:00 2001
From: Evan <evan@EvandeMac-mini.local>
Date: Thu, 9 Apr 2026 01:27:36 +0800
Subject: [PATCH] test(codex): cover empty final tool stream regression

---
 .../test_run_agent_codex_responses.py         | 113 +++++++++++++++++-
 1 file changed, 111 insertions(+), 2 deletions(-)

diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index ea703ffbb..66bf40723 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -148,7 +148,8 @@ def _codex_ack_message_response(text: str):
 
 
 class _FakeResponsesStream:
-    def __init__(self, *, final_response=None, final_error=None):
+    def __init__(self, *, events=None, final_response=None, final_error=None):
+        self._events = list(events or [])
         self._final_response = final_response
         self._final_error = final_error
 
@@ -159,7 +160,7 @@ class _FakeResponsesStream:
         return False
 
     def __iter__(self):
-        return iter(())
+        return iter(self._events)
 
     def get_final_response(self):
         if self._final_error is not None:
@@ -374,6 +375,53 @@ def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
     assert response.output[0].content[0].text == "streamed create ok"
 
 
+def test_run_codex_stream_backfills_tool_items_from_output_item_done(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    tool_item_done = SimpleNamespace(
+        type="function_call",
+        status="completed",
+        call_id="call_1",
+        id="fc_1",
+        name="terminal",
+        arguments='{"command":"pwd"}',
+    )
+
+    stream = _FakeResponsesStream(
+        events=[
+            SimpleNamespace(type="response.created"),
+            SimpleNamespace(type="response.in_progress"),
+            SimpleNamespace(
+                type="response.output_item.done",
+                output_index=0,
+                item=tool_item_done,
+            ),
+            SimpleNamespace(type="response.completed"),
+        ],
+        final_response=SimpleNamespace(
+            output=[],
+            output_text="",
+            status="completed",
+            model="gpt-5.4",
+        ),
+    )
+
+    agent.client = SimpleNamespace(
+        responses=SimpleNamespace(
+            stream=lambda **kwargs: stream,
+            create=lambda **kwargs: _codex_message_response("fallback"),
+        )
+    )
+
+    response = agent._run_codex_stream(_codex_request_kwargs())
+
+    assistant_message, finish_reason = agent._normalize_codex_response(response)
+    assert finish_reason == "tool_calls"
+    assert len(assistant_message.tool_calls) == 1
+    assert assistant_message.tool_calls[0].id == "call_1"
+    assert assistant_message.tool_calls[0].function.name == "terminal"
+    assert assistant_message.tool_calls[0].function.arguments == '{"command":"pwd"}'
+
+
 def test_run_conversation_codex_plain_text(monkeypatch):
     agent = _build_agent(monkeypatch)
     monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: _codex_message_response("OK"))
@@ -386,6 +434,67 @@ def test_run_conversation_codex_plain_text(monkeypatch):
     assert result["messages"][-1]["content"] == "OK"
 
 
+def test_run_conversation_codex_tool_stream_empty_final_output(monkeypatch):
+    agent = _build_agent(monkeypatch)
+
+    first_stream = _FakeResponsesStream(
+        events=[
+            SimpleNamespace(type="response.created"),
+            SimpleNamespace(type="response.in_progress"),
+            SimpleNamespace(
+                type="response.output_item.done",
+                output_index=0,
+                item=SimpleNamespace(
+                    type="function_call",
+                    status="completed",
+                    call_id="call_1",
+                    id="fc_1",
+                    name="terminal",
+                    arguments="{}",
+                ),
+            ),
+            SimpleNamespace(type="response.completed"),
+        ],
+        final_response=SimpleNamespace(
+            output=[],
+            output_text="",
+            status="completed",
+            model="gpt-5.4",
+        ),
+    )
+    second_stream = _FakeResponsesStream(final_response=_codex_message_response("done"))
+
+    streams = [first_stream, second_stream]
+    fake_client = SimpleNamespace(
+        responses=SimpleNamespace(
+            stream=lambda **kwargs: streams.pop(0),
+            create=lambda **kwargs: _codex_message_response("fallback"),
+        )
+    )
+    monkeypatch.setattr(
+        agent,
+        "_interruptible_api_call",
+        lambda api_kwargs: agent._run_codex_stream(api_kwargs, client=fake_client),
+    )
+
+    def _fake_execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count=0):
+        for call in assistant_message.tool_calls:
+            messages.append(
+                {"role": "tool", "tool_call_id": call.id, "content": '{"ok":true}'}
+            )
+
+    monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls)
+
+    result = agent.run_conversation("run the terminal tool then finish")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "done"
+    assert any(
+        msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1"
+        for msg in result["messages"]
+    )
+
+
 def test_run_conversation_codex_empty_output_with_output_text(monkeypatch):
     """Regression: empty response.output + valid output_text should succeed,
     not trigger retry/fallback. The validation stage must defer to