From 2789bf4e2591e4f8bc773f4f0ae3c4ac062b9631 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 7 Jun 2026 19:48:28 -0700 Subject: [PATCH] fix(auxiliary): route Codex Responses path through shared converter (#5709) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The auxiliary Codex adapter maintained its own chat->Responses conversion loop that forwarded every non-system message's role verbatim into Responses input[]. When flush_memories()/compression replayed session history containing assistant tool_calls + role=tool results, those tool messages leaked into the request and the Responses API rejected them with HTTP 400: Invalid value: 'tool'. Route _CodexCompletionsAdapter.create() through the same shared converter the main agent transport uses (_chat_messages_to_responses_input), so tool calls become function_call items and tool results become function_call_output items with a valid call_id. Single conversion path means no future drift. Also remove the now-dead _convert_content_for_responses() helper — its only caller was the private conversion loop this change deletes. Co-authored-by: ProgramCaiCai --- agent/auxiliary_client.py | 77 ++++++-------------- scripts/release.py | 1 + tests/agent/test_auxiliary_client.py | 103 +++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 57 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 79352e2fe3a..252a0b88232 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -637,54 +637,6 @@ def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str: # calls to the Codex Responses API so callers don't need any changes. -def _convert_content_for_responses(content: Any) -> Any: - """Convert chat.completions content to Responses API format. - - chat.completions uses: - {"type": "text", "text": "..."} - {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}} - - Responses API uses: - {"type": "input_text", "text": "..."} - {"type": "input_image", "image_url": "data:image/png;base64,..."} - - If content is a plain string, it's returned as-is (the Responses API - accepts strings directly for text-only messages). - """ - if isinstance(content, str): - return content - if not isinstance(content, list): - return str(content) if content else "" - - converted: List[Dict[str, Any]] = [] - for part in content: - if not isinstance(part, dict): - continue - ptype = part.get("type", "") - if ptype == "text": - converted.append({"type": "input_text", "text": part.get("text", "")}) - elif ptype == "image_url": - # chat.completions nests the URL: {"image_url": {"url": "..."}} - image_data = part.get("image_url", {}) - url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data) - entry: Dict[str, Any] = {"type": "input_image", "image_url": url} - # Preserve detail if specified - detail = image_data.get("detail") if isinstance(image_data, dict) else None - if detail: - entry["detail"] = detail - converted.append(entry) - elif ptype in {"input_text", "input_image"}: - # Already in Responses format — pass through - converted.append(part) - else: - # Unknown content type — try to preserve as text - text = part.get("text", "") - if text: - converted.append({"type": "input_text", "text": text}) - - return converted or "" - - class _CodexCompletionsAdapter: """Drop-in shim that accepts chat.completions.create() kwargs and routes them through the Codex Responses streaming API.""" @@ -697,26 +649,37 @@ class _CodexCompletionsAdapter: messages = kwargs.get("messages", []) model = kwargs.get("model", self._model) - # Separate system/instructions from conversation messages. - # Convert chat.completions multimodal content blocks to Responses - # API format (input_text / input_image instead of text / image_url). + # Separate system/instructions from replayable conversation messages, + # then route the rest through the SINGLE shared chat->Responses + # converter used by the main agent transport + # (agent/transports/codex.py). Maintaining a private conversion loop + # here let chat-style messages with role="tool" leak straight into + # Responses input[] — which the Responses API rejects with + # "Invalid value: 'tool'. Supported values are: 'assistant', 'system', + # 'developer', and 'user'." (issue #5709, hit hard by flush_memories() + # / compression replaying real session history that includes assistant + # tool_calls + role="tool" results). The shared converter encodes + # assistant tool calls as `function_call` items and tool results as + # `function_call_output` items with a valid call_id, so every + # Responses path normalizes tool history identically and cannot drift. + from agent.codex_responses_adapter import _chat_messages_to_responses_input + instructions = "You are a helpful assistant." - input_msgs: List[Dict[str, Any]] = [] + replay_messages: List[Dict[str, Any]] = [] for msg in messages: role = msg.get("role", "user") content = msg.get("content") or "" if role == "system": instructions = content if isinstance(content, str) else str(content) else: - input_msgs.append({ - "role": role, - "content": _convert_content_for_responses(content), - }) + replay_messages.append(msg) + + input_items = _chat_messages_to_responses_input(replay_messages) resp_kwargs: Dict[str, Any] = { "model": model, "instructions": instructions, - "input": input_msgs or [{"role": "user", "content": ""}], + "input": input_items or [{"role": "user", "content": ""}], "store": False, } diff --git a/scripts/release.py b/scripts/release.py index c40cfd63dad..312e53f8c22 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -52,6 +52,7 @@ AUTHOR_MAP = { "maxmitcham@mac.home": "maxtrigify", "ccook@nvms.com": "ccook1963", "thomas.paquette@gmail.com": "RyTsYdUp", + "techxacm@gmail.com": "ProgramCaiCai", "266365592+bmoore210@users.noreply.github.com": "bmoore210", "manishbyatroy@gmail.com": "manishbyatroy", "chilltulpa@gmail.com": "TheGardenGallery", diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 074372d1c6d..c446d874e57 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -2872,6 +2872,109 @@ class TestCodexAuxiliaryAdapterTimeout: assert time.monotonic() - started < 0.14 +class TestCodexAuxiliaryToolMessageConversion: + """Regression for issue #5709. + + The auxiliary Codex adapter used to maintain its own chat->Responses + conversion loop that forwarded every non-system message's ``role`` + verbatim into Responses ``input[]``. When ``flush_memories()`` / + compression replayed real session history containing assistant + ``tool_calls`` and ``role="tool"`` results, the tool messages leaked + into the request and the Responses API rejected them with + ``HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant', + 'system', 'developer', and 'user'.`` + + The fix routes the auxiliary path through the SAME shared converter the + main agent transport uses (``_chat_messages_to_responses_input``), so + no Responses request ever includes a raw ``role="tool"`` input item. + """ + + def _capture_input(self, messages): + from agent.auxiliary_client import _CodexCompletionsAdapter + + class _FakeCreateStream: + def __iter__(self): + return iter([ + SimpleNamespace(type="response.created"), + SimpleNamespace( + type="response.output_item.done", + item=SimpleNamespace( + type="message", + content=[SimpleNamespace(type="output_text", text="ok")], + ), + ), + SimpleNamespace(type="response.completed", response=SimpleNamespace( + status="completed", id="r1", usage=None, + )), + ]) + + def close(self): + pass + + class FakeResponses: + def __init__(self): + self.kwargs = None + + def create(self, **kwargs): + self.kwargs = kwargs + return _FakeCreateStream() + + fake_client = SimpleNamespace(responses=FakeResponses()) + adapter = _CodexCompletionsAdapter(fake_client, "gpt-5.5") + adapter.create(messages=messages, model="gpt-5.5") + return fake_client.responses.kwargs + + def test_tool_history_never_leaks_role_tool(self): + messages = [ + {"role": "system", "content": "You are a memory summarizer."}, + {"role": "user", "content": "What files did I touch?"}, + { + "role": "assistant", + "content": "", + "tool_calls": [{ + "id": "call_abc123", + "type": "function", + "function": {"name": "search_files", "arguments": '{"pattern":"foo"}'}, + }], + }, + {"role": "tool", "tool_call_id": "call_abc123", "content": "Found 3 matches"}, + {"role": "assistant", "content": "You touched bar.py."}, + ] + kwargs = self._capture_input(messages) + input_items = kwargs["input"] + + # No raw role="tool" item reaches the Responses API (the 400 trigger). + assert not any(it.get("role") == "tool" for it in input_items) + + # Assistant tool call -> function_call item with a call_id. + function_calls = [it for it in input_items if it.get("type") == "function_call"] + assert function_calls, "assistant tool_call must become a function_call item" + assert function_calls[0]["call_id"] == "call_abc123" + assert function_calls[0]["name"] == "search_files" + + # Tool result -> function_call_output with the matching call_id. + outputs = [it for it in input_items if it.get("type") == "function_call_output"] + assert outputs, "tool result must become a function_call_output item" + assert outputs[0]["call_id"] == "call_abc123" + + # System message is hoisted to instructions, not left in input[]. + assert kwargs["instructions"] == "You are a memory summarizer." + assert not any(it.get("role") == "system" for it in input_items) + + def test_plain_text_history_still_works(self): + messages = [ + {"role": "system", "content": "sys"}, + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi there"}, + ] + kwargs = self._capture_input(messages) + input_items = kwargs["input"] + roles = [it.get("role") for it in input_items] + assert "user" in roles and "assistant" in roles + assert not any(it.get("role") == "tool" for it in input_items) + assert kwargs["instructions"] == "sys" + + class TestCodexAuxiliaryAdapterNullOutputRecovery: def test_recovers_output_item_when_terminal_event_has_null_output(self): """Regression for #11179 in auxiliary calls.