fix(auxiliary): route Codex Responses path through shared converter (#5709)

The auxiliary Codex adapter maintained its own chat->Responses conversion
loop that forwarded every non-system message's role verbatim into
Responses input[]. When flush_memories()/compression replayed session
history containing assistant tool_calls + role=tool results, those tool
messages leaked into the request and the Responses API rejected them with
HTTP 400: Invalid value: 'tool'.

Route _CodexCompletionsAdapter.create() through the same shared converter
the main agent transport uses (_chat_messages_to_responses_input), so tool
calls become function_call items and tool results become function_call_output
items with a valid call_id. Single conversion path means no future drift.

Also remove the now-dead _convert_content_for_responses() helper — its only
caller was the private conversion loop this change deletes.

Co-authored-by: ProgramCaiCai <techxacm@gmail.com>
This commit is contained in:
Teknium 2026-06-07 19:48:28 -07:00
parent 568e127612
commit 2789bf4e25
3 changed files with 124 additions and 57 deletions

View file

@ -637,54 +637,6 @@ def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
# calls to the Codex Responses API so callers don't need any changes.
def _convert_content_for_responses(content: Any) -> Any:
"""Convert chat.completions content to Responses API format.
chat.completions uses:
{"type": "text", "text": "..."}
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
Responses API uses:
{"type": "input_text", "text": "..."}
{"type": "input_image", "image_url": "data:image/png;base64,..."}
If content is a plain string, it's returned as-is (the Responses API
accepts strings directly for text-only messages).
"""
if isinstance(content, str):
return content
if not isinstance(content, list):
return str(content) if content else ""
converted: List[Dict[str, Any]] = []
for part in content:
if not isinstance(part, dict):
continue
ptype = part.get("type", "")
if ptype == "text":
converted.append({"type": "input_text", "text": part.get("text", "")})
elif ptype == "image_url":
# chat.completions nests the URL: {"image_url": {"url": "..."}}
image_data = part.get("image_url", {})
url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
entry: Dict[str, Any] = {"type": "input_image", "image_url": url}
# Preserve detail if specified
detail = image_data.get("detail") if isinstance(image_data, dict) else None
if detail:
entry["detail"] = detail
converted.append(entry)
elif ptype in {"input_text", "input_image"}:
# Already in Responses format — pass through
converted.append(part)
else:
# Unknown content type — try to preserve as text
text = part.get("text", "")
if text:
converted.append({"type": "input_text", "text": text})
return converted or ""
class _CodexCompletionsAdapter:
"""Drop-in shim that accepts chat.completions.create() kwargs and
routes them through the Codex Responses streaming API."""
@ -697,26 +649,37 @@ class _CodexCompletionsAdapter:
messages = kwargs.get("messages", [])
model = kwargs.get("model", self._model)
# Separate system/instructions from conversation messages.
# Convert chat.completions multimodal content blocks to Responses
# API format (input_text / input_image instead of text / image_url).
# Separate system/instructions from replayable conversation messages,
# then route the rest through the SINGLE shared chat->Responses
# converter used by the main agent transport
# (agent/transports/codex.py). Maintaining a private conversion loop
# here let chat-style messages with role="tool" leak straight into
# Responses input[] — which the Responses API rejects with
# "Invalid value: 'tool'. Supported values are: 'assistant', 'system',
# 'developer', and 'user'." (issue #5709, hit hard by flush_memories()
# / compression replaying real session history that includes assistant
# tool_calls + role="tool" results). The shared converter encodes
# assistant tool calls as `function_call` items and tool results as
# `function_call_output` items with a valid call_id, so every
# Responses path normalizes tool history identically and cannot drift.
from agent.codex_responses_adapter import _chat_messages_to_responses_input
instructions = "You are a helpful assistant."
input_msgs: List[Dict[str, Any]] = []
replay_messages: List[Dict[str, Any]] = []
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content") or ""
if role == "system":
instructions = content if isinstance(content, str) else str(content)
else:
input_msgs.append({
"role": role,
"content": _convert_content_for_responses(content),
})
replay_messages.append(msg)
input_items = _chat_messages_to_responses_input(replay_messages)
resp_kwargs: Dict[str, Any] = {
"model": model,
"instructions": instructions,
"input": input_msgs or [{"role": "user", "content": ""}],
"input": input_items or [{"role": "user", "content": ""}],
"store": False,
}

View file

@ -52,6 +52,7 @@ AUTHOR_MAP = {
"maxmitcham@mac.home": "maxtrigify",
"ccook@nvms.com": "ccook1963",
"thomas.paquette@gmail.com": "RyTsYdUp",
"techxacm@gmail.com": "ProgramCaiCai",
"266365592+bmoore210@users.noreply.github.com": "bmoore210",
"manishbyatroy@gmail.com": "manishbyatroy",
"chilltulpa@gmail.com": "TheGardenGallery",

View file

@ -2872,6 +2872,109 @@ class TestCodexAuxiliaryAdapterTimeout:
assert time.monotonic() - started < 0.14
class TestCodexAuxiliaryToolMessageConversion:
"""Regression for issue #5709.
The auxiliary Codex adapter used to maintain its own chat->Responses
conversion loop that forwarded every non-system message's ``role``
verbatim into Responses ``input[]``. When ``flush_memories()`` /
compression replayed real session history containing assistant
``tool_calls`` and ``role="tool"`` results, the tool messages leaked
into the request and the Responses API rejected them with
``HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant',
'system', 'developer', and 'user'.``
The fix routes the auxiliary path through the SAME shared converter the
main agent transport uses (``_chat_messages_to_responses_input``), so
no Responses request ever includes a raw ``role="tool"`` input item.
"""
def _capture_input(self, messages):
from agent.auxiliary_client import _CodexCompletionsAdapter
class _FakeCreateStream:
def __iter__(self):
return iter([
SimpleNamespace(type="response.created"),
SimpleNamespace(
type="response.output_item.done",
item=SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text="ok")],
),
),
SimpleNamespace(type="response.completed", response=SimpleNamespace(
status="completed", id="r1", usage=None,
)),
])
def close(self):
pass
class FakeResponses:
def __init__(self):
self.kwargs = None
def create(self, **kwargs):
self.kwargs = kwargs
return _FakeCreateStream()
fake_client = SimpleNamespace(responses=FakeResponses())
adapter = _CodexCompletionsAdapter(fake_client, "gpt-5.5")
adapter.create(messages=messages, model="gpt-5.5")
return fake_client.responses.kwargs
def test_tool_history_never_leaks_role_tool(self):
messages = [
{"role": "system", "content": "You are a memory summarizer."},
{"role": "user", "content": "What files did I touch?"},
{
"role": "assistant",
"content": "",
"tool_calls": [{
"id": "call_abc123",
"type": "function",
"function": {"name": "search_files", "arguments": '{"pattern":"foo"}'},
}],
},
{"role": "tool", "tool_call_id": "call_abc123", "content": "Found 3 matches"},
{"role": "assistant", "content": "You touched bar.py."},
]
kwargs = self._capture_input(messages)
input_items = kwargs["input"]
# No raw role="tool" item reaches the Responses API (the 400 trigger).
assert not any(it.get("role") == "tool" for it in input_items)
# Assistant tool call -> function_call item with a call_id.
function_calls = [it for it in input_items if it.get("type") == "function_call"]
assert function_calls, "assistant tool_call must become a function_call item"
assert function_calls[0]["call_id"] == "call_abc123"
assert function_calls[0]["name"] == "search_files"
# Tool result -> function_call_output with the matching call_id.
outputs = [it for it in input_items if it.get("type") == "function_call_output"]
assert outputs, "tool result must become a function_call_output item"
assert outputs[0]["call_id"] == "call_abc123"
# System message is hoisted to instructions, not left in input[].
assert kwargs["instructions"] == "You are a memory summarizer."
assert not any(it.get("role") == "system" for it in input_items)
def test_plain_text_history_still_works(self):
messages = [
{"role": "system", "content": "sys"},
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi there"},
]
kwargs = self._capture_input(messages)
input_items = kwargs["input"]
roles = [it.get("role") for it in input_items]
assert "user" in roles and "assistant" in roles
assert not any(it.get("role") == "tool" for it in input_items)
assert kwargs["instructions"] == "sys"
class TestCodexAuxiliaryAdapterNullOutputRecovery:
def test_recovers_output_item_when_terminal_event_has_null_output(self):
"""Regression for #11179 in auxiliary calls.