feat: add ResponsesApiTransport + wire all Codex transport paths

Add ResponsesApiTransport wrapping codex_responses_adapter.py behind the ProviderTransport ABC. Auto-registered via _discover_transports(). Wire ALL Codex transport methods to production paths in run_agent.py: - build_kwargs: main _build_api_kwargs codex branch (50 lines extracted) - normalize_response: main loop + flush + summary + retry (4 sites) - convert_tools: memory flush tool override - convert_messages: called internally via build_kwargs - validate_response: response validation gate - preflight_kwargs: request sanitization (2 sites) Remove 7 dead legacy wrappers from AIAgent (_responses_tools, _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_api_kwargs, _preflight_codex_input_items, _extract_responses_message_text, _extract_responses_reasoning_text). Keep 3 ID manipulation methods still used by _build_assistant_message. Update 18 test call sites across 3 test files to call adapter functions directly instead of through deleted AIAgent wrappers. 24 new tests. 343 codex/responses/transport tests pass (0 failures). PR 4 of the provider transport refactor.
2026-04-25 00:51:20 +00:00 · 2026-04-21 14:24:41 +05:30 · 2026-04-21 14:24:41 +05:30 · c832ebd67c
commit c832ebd67c
parent 09dd5eb6a5
7 changed files with 589 additions and 169 deletions
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@ -12,6 +12,7 @@ from types import SimpleNamespace
 from unittest.mock import patch, MagicMock

 import pytest
+from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items

 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@ -446,7 +447,7 @@ class TestChatMessagesToResponsesInput:
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "user", "content": "hello"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        assert items == [{"role": "user", "content": "hello"}]

    def test_system_messages_filtered(self, monkeypatch):
@ -456,7 +457,7 @@ class TestChatMessagesToResponsesInput:
            {"role": "system", "content": "be helpful"},
            {"role": "user", "content": "hello"},
        ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        assert len(items) == 1
        assert items[0]["role"] == "user"

@ -472,7 +473,7 @@ class TestChatMessagesToResponsesInput:
                "function": {"name": "web_search", "arguments": '{"query": "test"}'},
            }],
        }]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        fc_items = [i for i in items if i.get("type") == "function_call"]
        assert len(fc_items) == 1
        assert fc_items[0]["name"] == "web_search"
@ -482,7 +483,7 @@ class TestChatMessagesToResponsesInput:
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        assert items[0]["type"] == "function_call_output"
        assert items[0]["call_id"] == "call_abc"
        assert items[0]["output"] == "result here"
@ -502,7 +503,7 @@ class TestChatMessagesToResponsesInput:
            },
            {"role": "user", "content": "continue"},
        ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 1
        assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
@ -515,7 +516,7 @@ class TestChatMessagesToResponsesInput:
            {"role": "assistant", "content": "hi"},
            {"role": "user", "content": "hello"},
        ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 0

@ -539,7 +540,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert msg.content == "Hello!"
        assert reason == "stop"

@ -557,7 +558,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert msg.content == "42"
        assert "math" in msg.reasoning
        assert reason == "stop"
@ -576,7 +577,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert msg.codex_reasoning_items is not None
        assert len(msg.codex_reasoning_items) == 1
        assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
@ -592,7 +593,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert msg.codex_reasoning_items is None

    def test_tool_calls_extracted(self, monkeypatch):
@ -605,7 +606,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert reason == "tool_calls"
        assert len(msg.tool_calls) == 1
        assert msg.tool_calls[0].function.name == "web_search"
@ -821,7 +822,7 @@ class TestCodexReasoningPreflight:
             "summary": [{"type": "summary_text", "text": "Thinking about it"}]},
            {"role": "assistant", "content": "hi there"},
        ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
        reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 1
        assert reasoning_items[0]["encrypted_content"] == "abc123encrypted"
@ -837,7 +838,7 @@ class TestCodexReasoningPreflight:
        raw_input = [
            {"type": "reasoning", "encrypted_content": "abc123"},
        ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
        assert len(normalized) == 1
        assert "id" not in normalized[0]
        assert normalized[0]["summary"] == []  # default empty summary
@ -849,7 +850,7 @@ class TestCodexReasoningPreflight:
            {"type": "reasoning", "encrypted_content": ""},
            {"role": "user", "content": "hello"},
        ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
        reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 0

@ -868,7 +869,7 @@ class TestCodexReasoningPreflight:
            },
            {"role": "user", "content": "follow up"},
        ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        reasoning_items = [i for i in items if isinstance(i, dict) and i.get("type") == "reasoning"]
        assert len(reasoning_items) == 1
        assert reasoning_items[0]["encrypted_content"] == "enc123"
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -16,6 +16,7 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
+from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items

 import run_agent
 from run_agent import AIAgent
@ -4248,7 +4249,7 @@ class TestNormalizeCodexDictArguments:
        json.dumps, not str(), so downstream json.loads() succeeds."""
        args_dict = {"query": "weather in NYC", "units": "celsius"}
        response = self._make_codex_response("function_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
        tc = msg.tool_calls[0]
        parsed = json.loads(tc.function.arguments)
        assert parsed == args_dict
@ -4257,7 +4258,7 @@ class TestNormalizeCodexDictArguments:
        """dict arguments from custom_tool_call must also use json.dumps."""
        args_dict = {"path": "/tmp/test.txt", "content": "hello"}
        response = self._make_codex_response("custom_tool_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
        tc = msg.tool_calls[0]
        parsed = json.loads(tc.function.arguments)
        assert parsed == args_dict
@ -4266,7 +4267,7 @@ class TestNormalizeCodexDictArguments:
        """String arguments must pass through without modification."""
        args_str = '{"query": "test"}'
        response = self._make_codex_response("function_call", args_str)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
        tc = msg.tool_calls[0]
        assert tc.function.arguments == args_str

--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@ -640,7 +640,8 @@ def test_run_conversation_codex_tool_round_trip(monkeypatch):

 def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
    agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(
        [
            {"role": "user", "content": "Run terminal"},
            {
@ -668,7 +669,8 @@ def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeyp

 def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
    agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(
        [
            {"role": "user", "content": "Run terminal"},
            {
@ -696,7 +698,8 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):

 def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
    agent = _build_agent(monkeypatch)
-    preflight = agent._preflight_codex_api_kwargs(
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    preflight = _preflight_codex_api_kwargs(
        {
            "model": "gpt-5-codex",
            "instructions": "You are Hermes.",
@ -724,7 +727,8 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
    agent = _build_agent(monkeypatch)

    with pytest.raises(ValueError, match="function_call_output is missing call_id"):
-        agent._preflight_codex_api_kwargs(
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        _preflight_codex_api_kwargs(
            {
                "model": "gpt-5-codex",
                "instructions": "You are Hermes.",
@ -741,7 +745,8 @@ def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypat
    kwargs["some_unknown_field"] = "value"

    with pytest.raises(ValueError, match="unsupported field"):
-        agent._preflight_codex_api_kwargs(kwargs)
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        _preflight_codex_api_kwargs(kwargs)


 def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
@ -752,7 +757,8 @@ def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch
    kwargs["temperature"] = 0.7
    kwargs["max_output_tokens"] = 4096

-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    result = _preflight_codex_api_kwargs(kwargs)
    assert result["reasoning"] == {"effort": "high", "summary": "auto"}
    assert result["include"] == ["reasoning.encrypted_content"]
    assert result["temperature"] == 0.7
@ -764,7 +770,8 @@ def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
    kwargs = _codex_request_kwargs()
    kwargs["service_tier"] = "priority"

-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+    result = _preflight_codex_api_kwargs(kwargs)
    assert result["service_tier"] == "priority"


@ -841,7 +848,8 @@ def test_run_conversation_codex_continues_after_incomplete_interim_message(monke

 def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
    agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(
        _codex_commentary_message_response("I'll inspect the repository first.")
    )

@ -1068,7 +1076,8 @@ def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch
    sends them into the empty-content retry loop (3 retries then failure).
    """
    agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(
        _codex_reasoning_only_response()
    )

@ -1101,7 +1110,8 @@ def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
        status="completed",
        model="gpt-5-codex",
    )
-    assistant_message, finish_reason = agent._normalize_codex_response(response)
+    from agent.codex_responses_adapter import _normalize_codex_response
+    assistant_message, finish_reason = _normalize_codex_response(response)

    assert finish_reason == "stop"
    assert "Here is the answer" in assistant_message.content
@ -1186,7 +1196,8 @@ def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monk
            ],
        },
    ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(messages)

    # Find the reasoning item
    reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
@ -1273,7 +1284,8 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
            ],
        },
    ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
+    items = _chat_messages_to_responses_input(messages)

    reasoning_items = [it for it in items if it.get("type") == "reasoning"]
    # Dedup: rs_aaa appears in both turns but should only be emitted once.
@ -1299,7 +1311,8 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
        {"type": "reasoning", "id": "rs_zzz", "encrypted_content": "enc_b"},
        {"role": "assistant", "content": "done"},
    ]
-    normalized = agent._preflight_codex_input_items(raw_input)
+    from agent.codex_responses_adapter import _preflight_codex_input_items
+    normalized = _preflight_codex_input_items(raw_input)

    reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
    # rs_xyz duplicate should be collapsed to one item; rs_zzz kept.