diff --git a/run_agent.py b/run_agent.py index 76d4ffcf4..878188f95 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2356,13 +2356,22 @@ class AIAgent: # Replay encrypted reasoning items from previous turns # so the API can maintain coherent reasoning chains. codex_reasoning = msg.get("codex_reasoning_items") + has_codex_reasoning = False if isinstance(codex_reasoning, list): for ri in codex_reasoning: if isinstance(ri, dict) and ri.get("encrypted_content"): items.append(ri) + has_codex_reasoning = True if content_text.strip(): items.append({"role": "assistant", "content": content_text}) + elif has_codex_reasoning: + # The Responses API requires a following item after each + # reasoning item (otherwise: missing_following_item error). + # When the assistant produced only reasoning with no visible + # content, emit an empty assistant message as the required + # following item. + items.append({"role": "assistant", "content": ""}) tool_calls = msg.get("tool_calls") if isinstance(tool_calls, list): @@ -2804,6 +2813,14 @@ class AIAgent: finish_reason = "tool_calls" elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): finish_reason = "incomplete" + elif reasoning_items_raw and not final_text: + # Response contains only reasoning (encrypted thinking state) with + # no visible content or tool calls. The model is still thinking and + # needs another turn to produce the actual answer. Marking this as + # "stop" would send it into the empty-content retry loop which burns + # 3 retries then fails — treat it as incomplete instead so the Codex + # continuation path handles it correctly. + finish_reason = "incomplete" else: finish_reason = "stop" return assistant_message, finish_reason @@ -6214,15 +6231,24 @@ class AIAgent: interim_msg = self._build_assistant_message(assistant_message, finish_reason) interim_has_content = bool((interim_msg.get("content") or "").strip()) interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False + interim_has_codex_reasoning = bool(interim_msg.get("codex_reasoning_items")) - if interim_has_content or interim_has_reasoning: + if interim_has_content or interim_has_reasoning or interim_has_codex_reasoning: last_msg = messages[-1] if messages else None + # Duplicate detection: two consecutive incomplete assistant + # messages with identical content AND reasoning are collapsed. + # For reasoning-only messages (codex_reasoning_items differ but + # visible content/reasoning are both empty), we also compare + # the encrypted items to avoid silently dropping new state. + last_codex_items = last_msg.get("codex_reasoning_items") if isinstance(last_msg, dict) else None + interim_codex_items = interim_msg.get("codex_reasoning_items") duplicate_interim = ( isinstance(last_msg, dict) and last_msg.get("role") == "assistant" and last_msg.get("finish_reason") == "incomplete" and (last_msg.get("content") or "") == (interim_msg.get("content") or "") and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "") + and last_codex_items == interim_codex_items ) if not duplicate_interim: messages.append(interim_msg) diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py index 42e41ec7b..4b24fbb12 100644 --- a/tests/test_run_agent_codex_responses.py +++ b/tests/test_run_agent_codex_responses.py @@ -830,3 +830,212 @@ def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path) payload = json.loads(dump_file.read_text()) assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions" + + +# --- Reasoning-only response tests (fix for empty content retry loop) --- + + +def _codex_reasoning_only_response(*, encrypted_content="enc_abc123", summary_text="Thinking..."): + """Codex response containing only reasoning items — no message text, no tool calls.""" + return SimpleNamespace( + output=[ + SimpleNamespace( + type="reasoning", + id="rs_001", + encrypted_content=encrypted_content, + summary=[SimpleNamespace(type="summary_text", text=summary_text)], + status="completed", + ) + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), + status="completed", + model="gpt-5-codex", + ) + + +def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch): + """A response with only reasoning items and no content should be 'incomplete', not 'stop'. + + Without this fix, reasoning-only responses get finish_reason='stop' which + sends them into the empty-content retry loop (3 retries then failure). + """ + agent = _build_agent(monkeypatch) + assistant_message, finish_reason = agent._normalize_codex_response( + _codex_reasoning_only_response() + ) + + assert finish_reason == "incomplete" + assert assistant_message.content == "" + assert assistant_message.codex_reasoning_items is not None + assert len(assistant_message.codex_reasoning_items) == 1 + assert assistant_message.codex_reasoning_items[0]["encrypted_content"] == "enc_abc123" + + +def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch): + """If a response has both reasoning and message content, it should still be 'stop'.""" + agent = _build_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="reasoning", + id="rs_001", + encrypted_content="enc_xyz", + summary=[SimpleNamespace(type="summary_text", text="Thinking...")], + status="completed", + ), + SimpleNamespace( + type="message", + content=[SimpleNamespace(type="output_text", text="Here is the answer.")], + status="completed", + ), + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), + status="completed", + model="gpt-5-codex", + ) + assistant_message, finish_reason = agent._normalize_codex_response(response) + + assert finish_reason == "stop" + assert "Here is the answer" in assistant_message.content + + +def test_run_conversation_codex_continues_after_reasoning_only_response(monkeypatch): + """End-to-end: reasoning-only → final message should succeed, not hit retry loop.""" + agent = _build_agent(monkeypatch) + responses = [ + _codex_reasoning_only_response(), + _codex_message_response("The final answer is 42."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + result = agent.run_conversation("what is the answer?") + + assert result["completed"] is True + assert result["final_response"] == "The final answer is 42." + # The reasoning-only turn should be in messages as an incomplete interim + assert any( + msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + and msg.get("codex_reasoning_items") is not None + for msg in result["messages"] + ) + + +def test_run_conversation_codex_preserves_encrypted_reasoning_in_interim(monkeypatch): + """Encrypted codex_reasoning_items must be preserved in interim messages + even when there is no visible reasoning text or content.""" + agent = _build_agent(monkeypatch) + # Response with encrypted reasoning but no human-readable summary + reasoning_response = SimpleNamespace( + output=[ + SimpleNamespace( + type="reasoning", + id="rs_002", + encrypted_content="enc_opaque_blob", + summary=[], + status="completed", + ) + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), + status="completed", + model="gpt-5-codex", + ) + responses = [ + reasoning_response, + _codex_message_response("Done thinking."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + result = agent.run_conversation("think hard") + + assert result["completed"] is True + assert result["final_response"] == "Done thinking." + # The interim message must have codex_reasoning_items preserved + interim_msgs = [ + msg for msg in result["messages"] + if msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + ] + assert len(interim_msgs) >= 1 + assert interim_msgs[0].get("codex_reasoning_items") is not None + assert interim_msgs[0]["codex_reasoning_items"][0]["encrypted_content"] == "enc_opaque_blob" + + +def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monkeypatch): + """When converting a reasoning-only interim message to Responses API input, + the reasoning items must be followed by an assistant message (even if empty) + to satisfy the API's 'required following item' constraint.""" + agent = _build_agent(monkeypatch) + messages = [ + {"role": "user", "content": "think hard"}, + { + "role": "assistant", + "content": "", + "reasoning": None, + "finish_reason": "incomplete", + "codex_reasoning_items": [ + {"type": "reasoning", "id": "rs_001", "encrypted_content": "enc_abc", "summary": []}, + ], + }, + ] + items = agent._chat_messages_to_responses_input(messages) + + # Find the reasoning item + reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"] + assert len(reasoning_indices) == 1 + ri_idx = reasoning_indices[0] + + # There must be a following item after the reasoning + assert ri_idx < len(items) - 1, "Reasoning item must not be the last item (missing_following_item)" + following = items[ri_idx + 1] + assert following.get("role") == "assistant" + + +def test_duplicate_detection_distinguishes_different_codex_reasoning(monkeypatch): + """Two consecutive reasoning-only responses with different encrypted content + must NOT be treated as duplicates.""" + agent = _build_agent(monkeypatch) + responses = [ + # First reasoning-only response + SimpleNamespace( + output=[ + SimpleNamespace( + type="reasoning", id="rs_001", + encrypted_content="enc_first", summary=[], status="completed", + ) + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), + status="completed", model="gpt-5-codex", + ), + # Second reasoning-only response (different encrypted content) + SimpleNamespace( + output=[ + SimpleNamespace( + type="reasoning", id="rs_002", + encrypted_content="enc_second", summary=[], status="completed", + ) + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), + status="completed", model="gpt-5-codex", + ), + _codex_message_response("Final answer after thinking."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + result = agent.run_conversation("think very hard") + + assert result["completed"] is True + assert result["final_response"] == "Final answer after thinking." + # Both reasoning-only interim messages should be in history (not collapsed) + interim_msgs = [ + msg for msg in result["messages"] + if msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + ] + assert len(interim_msgs) == 2 + encrypted_contents = [ + msg["codex_reasoning_items"][0]["encrypted_content"] + for msg in interim_msgs + ] + assert "enc_first" in encrypted_contents + assert "enc_second" in encrypted_contents