fix(codex): align validation with normalization for empty stream output

The response validation stage unconditionally marked Codex Responses API replies as invalid when response.output was empty, triggering unnecessary retries and fallback chains. However, _normalize_codex_response can recover from this state by synthesizing output from response.output_text. Now the validation stage checks for output_text before marking the response invalid, matching the normalization logic. Also fixes logging.warning → logger.warning for consistency with the rest of the file. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-29 06:31:32 +00:00 · 2026-04-07 18:30:33 +08:00 · 2026-04-07 18:30:33 +08:00 · 37bf19a29d
commit 37bf19a29d
parent 469cd16fe0
2 changed files with 74 additions and 14 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -7391,12 +7391,22 @@ class AIAgent:
                            response_invalid = True
                            error_details.append("response.output is not a list")
                        elif not output_items:
-                            # If we reach here, _run_codex_stream's backfill
-                            # from output_item.done events and text-delta
-                            # synthesis both failed to populate output.
+                            # Stream backfill may have failed, but
+                            # _normalize_codex_response can still recover
+                            # from response.output_text. Only mark invalid
+                            # when that fallback is also absent.
+                            _out_text = getattr(response, "output_text", None)
+                            _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
+                            if _out_text_stripped:
+                                logger.debug(
+                                    "Codex response.output is empty but output_text is present "
+                                    "(%d chars); deferring to normalization.",
+                                    len(_out_text_stripped),
+                                )
+                            else:
                                _resp_status = getattr(response, "status", None)
                                _resp_incomplete = getattr(response, "incomplete_details", None)
-                            logging.warning(
+                                logger.warning(
                                    "Codex response.output is empty after stream backfill "
                                    "(status=%s, incomplete_details=%s, model=%s). %s",
                                    _resp_status, _resp_incomplete,
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@ -386,6 +386,56 @@ def test_run_conversation_codex_plain_text(monkeypatch):
    assert result["messages"][-1]["content"] == "OK"


+def test_run_conversation_codex_empty_output_with_output_text(monkeypatch):
+    """Regression: empty response.output + valid output_text should succeed,
+    not trigger retry/fallback. The validation stage must defer to
+    _normalize_codex_response which synthesizes output from output_text."""
+    agent = _build_agent(monkeypatch)
+
+    def _empty_output_response(api_kwargs):
+        return SimpleNamespace(
+            output=[],
+            output_text="Hello from Codex",
+            usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+            status="completed",
+            model="gpt-5-codex",
+        )
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _empty_output_response)
+
+    result = agent.run_conversation("Say hello")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Hello from Codex"
+
+
+def test_run_conversation_codex_empty_output_no_output_text_retries(monkeypatch):
+    """When both output and output_text are empty, validation should
+    correctly mark the response as invalid and trigger retry."""
+    agent = _build_agent(monkeypatch)
+    calls = {"api": 0}
+
+    def _fake_api_call(api_kwargs):
+        calls["api"] += 1
+        if calls["api"] == 1:
+            return SimpleNamespace(
+                output=[],
+                output_text=None,
+                usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+                status="completed",
+                model="gpt-5-codex",
+            )
+        return _codex_message_response("Recovered")
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+
+    result = agent.run_conversation("Say hello")
+
+    assert calls["api"] >= 2
+    assert result["completed"] is True
+    assert result["final_response"] == "Recovered"
+
+
 def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
    agent = _build_agent(monkeypatch)
    calls = {"api": 0, "refresh": 0}