feat: add ResponsesApiTransport + wire all Codex transport paths

Add ResponsesApiTransport wrapping codex_responses_adapter.py behind the
ProviderTransport ABC. Auto-registered via _discover_transports().

Wire ALL Codex transport methods to production paths in run_agent.py:
- build_kwargs: main _build_api_kwargs codex branch (50 lines extracted)
- normalize_response: main loop + flush + summary + retry (4 sites)
- convert_tools: memory flush tool override
- convert_messages: called internally via build_kwargs
- validate_response: response validation gate
- preflight_kwargs: request sanitization (2 sites)

Remove 7 dead legacy wrappers from AIAgent (_responses_tools,
_chat_messages_to_responses_input, _normalize_codex_response,
_preflight_codex_api_kwargs, _preflight_codex_input_items,
_extract_responses_message_text, _extract_responses_reasoning_text).
Keep 3 ID manipulation methods still used by _build_assistant_message.

Update 18 test call sites across 3 test files to call adapter functions
directly instead of through deleted AIAgent wrappers.

24 new tests. 343 codex/responses/transport tests pass (0 failures).

PR 4 of the provider transport refactor.
This commit is contained in:
kshitijk4poor 2026-04-21 14:24:41 +05:30 committed by Teknium
parent 09dd5eb6a5
commit c832ebd67c
7 changed files with 589 additions and 169 deletions

View file

@ -12,6 +12,7 @@ from types import SimpleNamespace
from unittest.mock import patch, MagicMock
import pytest
from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@ -446,7 +447,7 @@ class TestChatMessagesToResponsesInput:
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "user", "content": "hello"}]
items = agent._chat_messages_to_responses_input(messages)
items = _chat_messages_to_responses_input(messages)
assert items == [{"role": "user", "content": "hello"}]
def test_system_messages_filtered(self, monkeypatch):
@ -456,7 +457,7 @@ class TestChatMessagesToResponsesInput:
{"role": "system", "content": "be helpful"},
{"role": "user", "content": "hello"},
]
items = agent._chat_messages_to_responses_input(messages)
items = _chat_messages_to_responses_input(messages)
assert len(items) == 1
assert items[0]["role"] == "user"
@ -472,7 +473,7 @@ class TestChatMessagesToResponsesInput:
"function": {"name": "web_search", "arguments": '{"query": "test"}'},
}],
}]
items = agent._chat_messages_to_responses_input(messages)
items = _chat_messages_to_responses_input(messages)
fc_items = [i for i in items if i.get("type") == "function_call"]
assert len(fc_items) == 1
assert fc_items[0]["name"] == "web_search"
@ -482,7 +483,7 @@ class TestChatMessagesToResponsesInput:
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
items = agent._chat_messages_to_responses_input(messages)
items = _chat_messages_to_responses_input(messages)
assert items[0]["type"] == "function_call_output"
assert items[0]["call_id"] == "call_abc"
assert items[0]["output"] == "result here"
@ -502,7 +503,7 @@ class TestChatMessagesToResponsesInput:
},
{"role": "user", "content": "continue"},
]
items = agent._chat_messages_to_responses_input(messages)
items = _chat_messages_to_responses_input(messages)
reasoning_items = [i for i in items if i.get("type") == "reasoning"]
assert len(reasoning_items) == 1
assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
@ -515,7 +516,7 @@ class TestChatMessagesToResponsesInput:
{"role": "assistant", "content": "hi"},
{"role": "user", "content": "hello"},
]
items = agent._chat_messages_to_responses_input(messages)
items = _chat_messages_to_responses_input(messages)
reasoning_items = [i for i in items if i.get("type") == "reasoning"]
assert len(reasoning_items) == 0
@ -539,7 +540,7 @@ class TestNormalizeCodexResponse:
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
msg, reason = _normalize_codex_response(response)
assert msg.content == "Hello!"
assert reason == "stop"
@ -557,7 +558,7 @@ class TestNormalizeCodexResponse:
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
msg, reason = _normalize_codex_response(response)
assert msg.content == "42"
assert "math" in msg.reasoning
assert reason == "stop"
@ -576,7 +577,7 @@ class TestNormalizeCodexResponse:
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
msg, reason = _normalize_codex_response(response)
assert msg.codex_reasoning_items is not None
assert len(msg.codex_reasoning_items) == 1
assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
@ -592,7 +593,7 @@ class TestNormalizeCodexResponse:
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
msg, reason = _normalize_codex_response(response)
assert msg.codex_reasoning_items is None
def test_tool_calls_extracted(self, monkeypatch):
@ -605,7 +606,7 @@ class TestNormalizeCodexResponse:
],
status="completed",
)
msg, reason = agent._normalize_codex_response(response)
msg, reason = _normalize_codex_response(response)
assert reason == "tool_calls"
assert len(msg.tool_calls) == 1
assert msg.tool_calls[0].function.name == "web_search"
@ -821,7 +822,7 @@ class TestCodexReasoningPreflight:
"summary": [{"type": "summary_text", "text": "Thinking about it"}]},
{"role": "assistant", "content": "hi there"},
]
normalized = agent._preflight_codex_input_items(raw_input)
normalized = _preflight_codex_input_items(raw_input)
reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
assert len(reasoning_items) == 1
assert reasoning_items[0]["encrypted_content"] == "abc123encrypted"
@ -837,7 +838,7 @@ class TestCodexReasoningPreflight:
raw_input = [
{"type": "reasoning", "encrypted_content": "abc123"},
]
normalized = agent._preflight_codex_input_items(raw_input)
normalized = _preflight_codex_input_items(raw_input)
assert len(normalized) == 1
assert "id" not in normalized[0]
assert normalized[0]["summary"] == [] # default empty summary
@ -849,7 +850,7 @@ class TestCodexReasoningPreflight:
{"type": "reasoning", "encrypted_content": ""},
{"role": "user", "content": "hello"},
]
normalized = agent._preflight_codex_input_items(raw_input)
normalized = _preflight_codex_input_items(raw_input)
reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
assert len(reasoning_items) == 0
@ -868,7 +869,7 @@ class TestCodexReasoningPreflight:
},
{"role": "user", "content": "follow up"},
]
items = agent._chat_messages_to_responses_input(messages)
items = _chat_messages_to_responses_input(messages)
reasoning_items = [i for i in items if isinstance(i, dict) and i.get("type") == "reasoning"]
assert len(reasoning_items) == 1
assert reasoning_items[0]["encrypted_content"] == "enc123"

View file

@ -16,6 +16,7 @@ from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
import run_agent
from run_agent import AIAgent
@ -4248,7 +4249,7 @@ class TestNormalizeCodexDictArguments:
json.dumps, not str(), so downstream json.loads() succeeds."""
args_dict = {"query": "weather in NYC", "units": "celsius"}
response = self._make_codex_response("function_call", args_dict)
msg, _ = agent._normalize_codex_response(response)
msg, _ = _normalize_codex_response(response)
tc = msg.tool_calls[0]
parsed = json.loads(tc.function.arguments)
assert parsed == args_dict
@ -4257,7 +4258,7 @@ class TestNormalizeCodexDictArguments:
"""dict arguments from custom_tool_call must also use json.dumps."""
args_dict = {"path": "/tmp/test.txt", "content": "hello"}
response = self._make_codex_response("custom_tool_call", args_dict)
msg, _ = agent._normalize_codex_response(response)
msg, _ = _normalize_codex_response(response)
tc = msg.tool_calls[0]
parsed = json.loads(tc.function.arguments)
assert parsed == args_dict
@ -4266,7 +4267,7 @@ class TestNormalizeCodexDictArguments:
"""String arguments must pass through without modification."""
args_str = '{"query": "test"}'
response = self._make_codex_response("function_call", args_str)
msg, _ = agent._normalize_codex_response(response)
msg, _ = _normalize_codex_response(response)
tc = msg.tool_calls[0]
assert tc.function.arguments == args_str

View file

@ -640,7 +640,8 @@ def test_run_conversation_codex_tool_round_trip(monkeypatch):
def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
agent = _build_agent(monkeypatch)
items = agent._chat_messages_to_responses_input(
from agent.codex_responses_adapter import _chat_messages_to_responses_input
items = _chat_messages_to_responses_input(
[
{"role": "user", "content": "Run terminal"},
{
@ -668,7 +669,8 @@ def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeyp
def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
agent = _build_agent(monkeypatch)
items = agent._chat_messages_to_responses_input(
from agent.codex_responses_adapter import _chat_messages_to_responses_input
items = _chat_messages_to_responses_input(
[
{"role": "user", "content": "Run terminal"},
{
@ -696,7 +698,8 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
preflight = agent._preflight_codex_api_kwargs(
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
preflight = _preflight_codex_api_kwargs(
{
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
@ -724,7 +727,8 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
agent = _build_agent(monkeypatch)
with pytest.raises(ValueError, match="function_call_output is missing call_id"):
agent._preflight_codex_api_kwargs(
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
_preflight_codex_api_kwargs(
{
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
@ -741,7 +745,8 @@ def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypat
kwargs["some_unknown_field"] = "value"
with pytest.raises(ValueError, match="unsupported field"):
agent._preflight_codex_api_kwargs(kwargs)
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
_preflight_codex_api_kwargs(kwargs)
def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
@ -752,7 +757,8 @@ def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch
kwargs["temperature"] = 0.7
kwargs["max_output_tokens"] = 4096
result = agent._preflight_codex_api_kwargs(kwargs)
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
result = _preflight_codex_api_kwargs(kwargs)
assert result["reasoning"] == {"effort": "high", "summary": "auto"}
assert result["include"] == ["reasoning.encrypted_content"]
assert result["temperature"] == 0.7
@ -764,7 +770,8 @@ def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
kwargs = _codex_request_kwargs()
kwargs["service_tier"] = "priority"
result = agent._preflight_codex_api_kwargs(kwargs)
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
result = _preflight_codex_api_kwargs(kwargs)
assert result["service_tier"] == "priority"
@ -841,7 +848,8 @@ def test_run_conversation_codex_continues_after_incomplete_interim_message(monke
def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
agent = _build_agent(monkeypatch)
assistant_message, finish_reason = agent._normalize_codex_response(
from agent.codex_responses_adapter import _normalize_codex_response
assistant_message, finish_reason = _normalize_codex_response(
_codex_commentary_message_response("I'll inspect the repository first.")
)
@ -1068,7 +1076,8 @@ def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch
sends them into the empty-content retry loop (3 retries then failure).
"""
agent = _build_agent(monkeypatch)
assistant_message, finish_reason = agent._normalize_codex_response(
from agent.codex_responses_adapter import _normalize_codex_response
assistant_message, finish_reason = _normalize_codex_response(
_codex_reasoning_only_response()
)
@ -1101,7 +1110,8 @@ def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
status="completed",
model="gpt-5-codex",
)
assistant_message, finish_reason = agent._normalize_codex_response(response)
from agent.codex_responses_adapter import _normalize_codex_response
assistant_message, finish_reason = _normalize_codex_response(response)
assert finish_reason == "stop"
assert "Here is the answer" in assistant_message.content
@ -1186,7 +1196,8 @@ def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monk
],
},
]
items = agent._chat_messages_to_responses_input(messages)
from agent.codex_responses_adapter import _chat_messages_to_responses_input
items = _chat_messages_to_responses_input(messages)
# Find the reasoning item
reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
@ -1273,7 +1284,8 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
],
},
]
items = agent._chat_messages_to_responses_input(messages)
from agent.codex_responses_adapter import _chat_messages_to_responses_input
items = _chat_messages_to_responses_input(messages)
reasoning_items = [it for it in items if it.get("type") == "reasoning"]
# Dedup: rs_aaa appears in both turns but should only be emitted once.
@ -1299,7 +1311,8 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
{"type": "reasoning", "id": "rs_zzz", "encrypted_content": "enc_b"},
{"role": "assistant", "content": "done"},
]
normalized = agent._preflight_codex_input_items(raw_input)
from agent.codex_responses_adapter import _preflight_codex_input_items
normalized = _preflight_codex_input_items(raw_input)
reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
# rs_xyz duplicate should be collapsed to one item; rs_zzz kept.