mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Extract 12 Codex Responses API format-conversion and normalization functions from run_agent.py into agent/codex_responses_adapter.py, following the existing pattern of anthropic_adapter.py and bedrock_adapter.py. run_agent.py: 12,550 → 11,865 lines (-685 lines) Functions moved: - _chat_content_to_responses_parts (multimodal content conversion) - _summarize_user_message_for_log (multimodal message logging) - _deterministic_call_id (cache-safe fallback IDs) - _split_responses_tool_id (composite ID splitting) - _derive_responses_function_call_id (fc_ prefix conversion) - _responses_tools (schema format conversion) - _chat_messages_to_responses_input (message format conversion) - _preflight_codex_input_items (input validation) - _preflight_codex_api_kwargs (API kwargs validation) - _extract_responses_message_text (response text extraction) - _extract_responses_reasoning_text (reasoning extraction) - _normalize_codex_response (full response normalization) All functions are stateless module-level functions. AIAgent methods remain as thin one-line wrappers. Both module-level helpers are re-exported from run_agent.py for backward compatibility with existing test imports. Includes multimodal inline image support (PR #12969) that the original PR was missing. Based on PR #12975 by @kshitijk4poor.
813 lines
34 KiB
Python
813 lines
34 KiB
Python
"""Codex Responses API adapter.
|
|
|
|
Pure format-conversion and normalization logic for the OpenAI Responses API
|
|
(used by OpenAI Codex, xAI, GitHub Models, and other Responses-compatible endpoints).
|
|
|
|
Extracted from run_agent.py to isolate Responses API-specific logic from the
|
|
core agent loop. All functions are stateless — they operate on the data passed
|
|
in and return transformed results.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import re
|
|
import uuid
|
|
from types import SimpleNamespace
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Multimodal content helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
|
"""Convert chat-style multimodal content to Responses API input parts.
|
|
|
|
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
|
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
|
|
|
Returns an empty list when ``content`` is not a list or contains no
|
|
recognized parts — callers fall back to the string path.
|
|
"""
|
|
if not isinstance(content, list):
|
|
return []
|
|
converted: List[Dict[str, Any]] = []
|
|
for part in content:
|
|
if isinstance(part, str):
|
|
if part:
|
|
converted.append({"type": "input_text", "text": part})
|
|
continue
|
|
if not isinstance(part, dict):
|
|
continue
|
|
ptype = str(part.get("type") or "").strip().lower()
|
|
if ptype in {"text", "input_text", "output_text"}:
|
|
text = part.get("text")
|
|
if isinstance(text, str) and text:
|
|
converted.append({"type": "input_text", "text": text})
|
|
continue
|
|
if ptype in {"image_url", "input_image"}:
|
|
image_ref = part.get("image_url")
|
|
detail = part.get("detail")
|
|
if isinstance(image_ref, dict):
|
|
url = image_ref.get("url")
|
|
detail = image_ref.get("detail", detail)
|
|
else:
|
|
url = image_ref
|
|
if not isinstance(url, str) or not url:
|
|
continue
|
|
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
|
|
if isinstance(detail, str) and detail.strip():
|
|
image_part["detail"] = detail.strip()
|
|
converted.append(image_part)
|
|
return converted
|
|
|
|
|
|
def _summarize_user_message_for_log(content: Any) -> str:
|
|
"""Return a short text summary of a user message for logging/trajectory.
|
|
|
|
Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
|
|
parts from the API server. Logging, spinner previews, and trajectory
|
|
files all want a plain string — this helper extracts the first chunk of
|
|
text and notes any attached images. Returns an empty string for empty
|
|
lists and ``str(content)`` for unexpected scalar types.
|
|
"""
|
|
if content is None:
|
|
return ""
|
|
if isinstance(content, str):
|
|
return content
|
|
if isinstance(content, list):
|
|
text_bits: List[str] = []
|
|
image_count = 0
|
|
for part in content:
|
|
if isinstance(part, str):
|
|
if part:
|
|
text_bits.append(part)
|
|
continue
|
|
if not isinstance(part, dict):
|
|
continue
|
|
ptype = str(part.get("type") or "").strip().lower()
|
|
if ptype in {"text", "input_text", "output_text"}:
|
|
text = part.get("text")
|
|
if isinstance(text, str) and text:
|
|
text_bits.append(text)
|
|
elif ptype in {"image_url", "input_image"}:
|
|
image_count += 1
|
|
summary = " ".join(text_bits).strip()
|
|
if image_count:
|
|
note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
|
|
summary = f"{note} {summary}" if summary else note
|
|
return summary
|
|
try:
|
|
return str(content)
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ID helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
|
|
"""Generate a deterministic call_id from tool call content.
|
|
|
|
Used as a fallback when the API doesn't provide a call_id.
|
|
Deterministic IDs prevent cache invalidation — random UUIDs would
|
|
make every API call's prefix unique, breaking OpenAI's prompt cache.
|
|
"""
|
|
seed = f"{fn_name}:{arguments}:{index}"
|
|
digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
|
|
return f"call_{digest}"
|
|
|
|
|
|
def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
|
|
"""Split a stored tool id into (call_id, response_item_id)."""
|
|
if not isinstance(raw_id, str):
|
|
return None, None
|
|
value = raw_id.strip()
|
|
if not value:
|
|
return None, None
|
|
if "|" in value:
|
|
call_id, response_item_id = value.split("|", 1)
|
|
call_id = call_id.strip() or None
|
|
response_item_id = response_item_id.strip() or None
|
|
return call_id, response_item_id
|
|
if value.startswith("fc_"):
|
|
return None, value
|
|
return value, None
|
|
|
|
|
|
def _derive_responses_function_call_id(
|
|
call_id: str,
|
|
response_item_id: Optional[str] = None,
|
|
) -> str:
|
|
"""Build a valid Responses `function_call.id` (must start with `fc_`)."""
|
|
if isinstance(response_item_id, str):
|
|
candidate = response_item_id.strip()
|
|
if candidate.startswith("fc_"):
|
|
return candidate
|
|
|
|
source = (call_id or "").strip()
|
|
if source.startswith("fc_"):
|
|
return source
|
|
if source.startswith("call_") and len(source) > len("call_"):
|
|
return f"fc_{source[len('call_'):]}"
|
|
|
|
sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
|
|
if sanitized.startswith("fc_"):
|
|
return sanitized
|
|
if sanitized.startswith("call_") and len(sanitized) > len("call_"):
|
|
return f"fc_{sanitized[len('call_'):]}"
|
|
if sanitized:
|
|
return f"fc_{sanitized[:48]}"
|
|
|
|
seed = source or str(response_item_id or "") or uuid.uuid4().hex
|
|
digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
|
|
return f"fc_{digest}"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Schema conversion
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
|
|
"""Convert chat-completions tool schemas to Responses function-tool schemas."""
|
|
if not tools:
|
|
return None
|
|
|
|
converted: List[Dict[str, Any]] = []
|
|
for item in tools:
|
|
fn = item.get("function", {}) if isinstance(item, dict) else {}
|
|
name = fn.get("name")
|
|
if not isinstance(name, str) or not name.strip():
|
|
continue
|
|
converted.append({
|
|
"type": "function",
|
|
"name": name,
|
|
"description": fn.get("description", ""),
|
|
"strict": False,
|
|
"parameters": fn.get("parameters", {"type": "object", "properties": {}}),
|
|
})
|
|
return converted or None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Message format conversion
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""Convert internal chat-style messages to Responses input items."""
|
|
items: List[Dict[str, Any]] = []
|
|
seen_item_ids: set = set()
|
|
|
|
for msg in messages:
|
|
if not isinstance(msg, dict):
|
|
continue
|
|
role = msg.get("role")
|
|
if role == "system":
|
|
continue
|
|
|
|
if role in {"user", "assistant"}:
|
|
content = msg.get("content", "")
|
|
if isinstance(content, list):
|
|
content_parts = _chat_content_to_responses_parts(content)
|
|
content_text = "".join(
|
|
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
|
)
|
|
else:
|
|
content_parts = []
|
|
content_text = str(content) if content is not None else ""
|
|
|
|
if role == "assistant":
|
|
# Replay encrypted reasoning items from previous turns
|
|
# so the API can maintain coherent reasoning chains.
|
|
codex_reasoning = msg.get("codex_reasoning_items")
|
|
has_codex_reasoning = False
|
|
if isinstance(codex_reasoning, list):
|
|
for ri in codex_reasoning:
|
|
if isinstance(ri, dict) and ri.get("encrypted_content"):
|
|
item_id = ri.get("id")
|
|
if item_id and item_id in seen_item_ids:
|
|
continue
|
|
# Strip the "id" field — with store=False the
|
|
# Responses API cannot look up items by ID and
|
|
# returns 404. The encrypted_content blob is
|
|
# self-contained for reasoning chain continuity.
|
|
replay_item = {k: v for k, v in ri.items() if k != "id"}
|
|
items.append(replay_item)
|
|
if item_id:
|
|
seen_item_ids.add(item_id)
|
|
has_codex_reasoning = True
|
|
|
|
if content_parts:
|
|
items.append({"role": "assistant", "content": content_parts})
|
|
elif content_text.strip():
|
|
items.append({"role": "assistant", "content": content_text})
|
|
elif has_codex_reasoning:
|
|
# The Responses API requires a following item after each
|
|
# reasoning item (otherwise: missing_following_item error).
|
|
# When the assistant produced only reasoning with no visible
|
|
# content, emit an empty assistant message as the required
|
|
# following item.
|
|
items.append({"role": "assistant", "content": ""})
|
|
|
|
tool_calls = msg.get("tool_calls")
|
|
if isinstance(tool_calls, list):
|
|
for tc in tool_calls:
|
|
if not isinstance(tc, dict):
|
|
continue
|
|
fn = tc.get("function", {})
|
|
fn_name = fn.get("name")
|
|
if not isinstance(fn_name, str) or not fn_name.strip():
|
|
continue
|
|
|
|
embedded_call_id, embedded_response_item_id = _split_responses_tool_id(
|
|
tc.get("id")
|
|
)
|
|
call_id = tc.get("call_id")
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
call_id = embedded_call_id
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
if (
|
|
isinstance(embedded_response_item_id, str)
|
|
and embedded_response_item_id.startswith("fc_")
|
|
and len(embedded_response_item_id) > len("fc_")
|
|
):
|
|
call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
|
|
else:
|
|
_raw_args = str(fn.get("arguments", "{}"))
|
|
call_id = _deterministic_call_id(fn_name, _raw_args, len(items))
|
|
call_id = call_id.strip()
|
|
|
|
arguments = fn.get("arguments", "{}")
|
|
if isinstance(arguments, dict):
|
|
arguments = json.dumps(arguments, ensure_ascii=False)
|
|
elif not isinstance(arguments, str):
|
|
arguments = str(arguments)
|
|
arguments = arguments.strip() or "{}"
|
|
|
|
items.append({
|
|
"type": "function_call",
|
|
"call_id": call_id,
|
|
"name": fn_name,
|
|
"arguments": arguments,
|
|
})
|
|
continue
|
|
|
|
# Non-assistant (user) role: emit multimodal parts when present,
|
|
# otherwise fall back to the text payload.
|
|
if content_parts:
|
|
items.append({"role": role, "content": content_parts})
|
|
else:
|
|
items.append({"role": role, "content": content_text})
|
|
continue
|
|
|
|
if role == "tool":
|
|
raw_tool_call_id = msg.get("tool_call_id")
|
|
call_id, _ = _split_responses_tool_id(raw_tool_call_id)
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
|
|
call_id = raw_tool_call_id.strip()
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
continue
|
|
items.append({
|
|
"type": "function_call_output",
|
|
"call_id": call_id,
|
|
"output": str(msg.get("content", "") or ""),
|
|
})
|
|
|
|
return items
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Input preflight / validation
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
|
if not isinstance(raw_items, list):
|
|
raise ValueError("Codex Responses input must be a list of input items.")
|
|
|
|
normalized: List[Dict[str, Any]] = []
|
|
seen_ids: set = set()
|
|
for idx, item in enumerate(raw_items):
|
|
if not isinstance(item, dict):
|
|
raise ValueError(f"Codex Responses input[{idx}] must be an object.")
|
|
|
|
item_type = item.get("type")
|
|
if item_type == "function_call":
|
|
call_id = item.get("call_id")
|
|
name = item.get("name")
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
|
|
if not isinstance(name, str) or not name.strip():
|
|
raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
|
|
|
|
arguments = item.get("arguments", "{}")
|
|
if isinstance(arguments, dict):
|
|
arguments = json.dumps(arguments, ensure_ascii=False)
|
|
elif not isinstance(arguments, str):
|
|
arguments = str(arguments)
|
|
arguments = arguments.strip() or "{}"
|
|
|
|
normalized.append(
|
|
{
|
|
"type": "function_call",
|
|
"call_id": call_id.strip(),
|
|
"name": name.strip(),
|
|
"arguments": arguments,
|
|
}
|
|
)
|
|
continue
|
|
|
|
if item_type == "function_call_output":
|
|
call_id = item.get("call_id")
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
|
|
output = item.get("output", "")
|
|
if output is None:
|
|
output = ""
|
|
if not isinstance(output, str):
|
|
output = str(output)
|
|
|
|
normalized.append(
|
|
{
|
|
"type": "function_call_output",
|
|
"call_id": call_id.strip(),
|
|
"output": output,
|
|
}
|
|
)
|
|
continue
|
|
|
|
if item_type == "reasoning":
|
|
encrypted = item.get("encrypted_content")
|
|
if isinstance(encrypted, str) and encrypted:
|
|
item_id = item.get("id")
|
|
if isinstance(item_id, str) and item_id:
|
|
if item_id in seen_ids:
|
|
continue
|
|
seen_ids.add(item_id)
|
|
reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
|
|
# Do NOT include the "id" in the outgoing item — with
|
|
# store=False (our default) the API tries to resolve the
|
|
# id server-side and returns 404. The id is still used
|
|
# above for local deduplication via seen_ids.
|
|
summary = item.get("summary")
|
|
if isinstance(summary, list):
|
|
reasoning_item["summary"] = summary
|
|
else:
|
|
reasoning_item["summary"] = []
|
|
normalized.append(reasoning_item)
|
|
continue
|
|
|
|
role = item.get("role")
|
|
if role in {"user", "assistant"}:
|
|
content = item.get("content", "")
|
|
if content is None:
|
|
content = ""
|
|
if isinstance(content, list):
|
|
# Multimodal content from ``_chat_messages_to_responses_input``
|
|
# is already in Responses format (``input_text`` / ``input_image``).
|
|
# Validate each part and pass through.
|
|
validated: List[Dict[str, Any]] = []
|
|
for part_idx, part in enumerate(content):
|
|
if isinstance(part, str):
|
|
if part:
|
|
validated.append({"type": "input_text", "text": part})
|
|
continue
|
|
if not isinstance(part, dict):
|
|
raise ValueError(
|
|
f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
|
|
)
|
|
ptype = str(part.get("type") or "").strip().lower()
|
|
if ptype in {"input_text", "text", "output_text"}:
|
|
text = part.get("text", "")
|
|
if not isinstance(text, str):
|
|
text = str(text or "")
|
|
validated.append({"type": "input_text", "text": text})
|
|
elif ptype in {"input_image", "image_url"}:
|
|
image_ref = part.get("image_url", "")
|
|
detail = part.get("detail")
|
|
if isinstance(image_ref, dict):
|
|
url = image_ref.get("url", "")
|
|
detail = image_ref.get("detail", detail)
|
|
else:
|
|
url = image_ref
|
|
if not isinstance(url, str):
|
|
url = str(url or "")
|
|
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
|
|
if isinstance(detail, str) and detail.strip():
|
|
image_part["detail"] = detail.strip()
|
|
validated.append(image_part)
|
|
else:
|
|
raise ValueError(
|
|
f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
|
|
)
|
|
normalized.append({"role": role, "content": validated})
|
|
continue
|
|
if not isinstance(content, str):
|
|
content = str(content)
|
|
|
|
normalized.append({"role": role, "content": content})
|
|
continue
|
|
|
|
raise ValueError(
|
|
f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
|
|
)
|
|
|
|
return normalized
|
|
|
|
|
|
def _preflight_codex_api_kwargs(
|
|
api_kwargs: Any,
|
|
*,
|
|
allow_stream: bool = False,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(api_kwargs, dict):
|
|
raise ValueError("Codex Responses request must be a dict.")
|
|
|
|
required = {"model", "instructions", "input"}
|
|
missing = [key for key in required if key not in api_kwargs]
|
|
if missing:
|
|
raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
|
|
|
|
model = api_kwargs.get("model")
|
|
if not isinstance(model, str) or not model.strip():
|
|
raise ValueError("Codex Responses request 'model' must be a non-empty string.")
|
|
model = model.strip()
|
|
|
|
instructions = api_kwargs.get("instructions")
|
|
if instructions is None:
|
|
instructions = ""
|
|
if not isinstance(instructions, str):
|
|
instructions = str(instructions)
|
|
instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
|
|
|
|
normalized_input = _preflight_codex_input_items(api_kwargs.get("input"))
|
|
|
|
tools = api_kwargs.get("tools")
|
|
normalized_tools = None
|
|
if tools is not None:
|
|
if not isinstance(tools, list):
|
|
raise ValueError("Codex Responses request 'tools' must be a list when provided.")
|
|
normalized_tools = []
|
|
for idx, tool in enumerate(tools):
|
|
if not isinstance(tool, dict):
|
|
raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
|
|
if tool.get("type") != "function":
|
|
raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
|
|
|
|
name = tool.get("name")
|
|
parameters = tool.get("parameters")
|
|
if not isinstance(name, str) or not name.strip():
|
|
raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
|
|
if not isinstance(parameters, dict):
|
|
raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
|
|
|
|
description = tool.get("description", "")
|
|
if description is None:
|
|
description = ""
|
|
if not isinstance(description, str):
|
|
description = str(description)
|
|
|
|
strict = tool.get("strict", False)
|
|
if not isinstance(strict, bool):
|
|
strict = bool(strict)
|
|
|
|
normalized_tools.append(
|
|
{
|
|
"type": "function",
|
|
"name": name.strip(),
|
|
"description": description,
|
|
"strict": strict,
|
|
"parameters": parameters,
|
|
}
|
|
)
|
|
|
|
store = api_kwargs.get("store", False)
|
|
if store is not False:
|
|
raise ValueError("Codex Responses contract requires 'store' to be false.")
|
|
|
|
allowed_keys = {
|
|
"model", "instructions", "input", "tools", "store",
|
|
"reasoning", "include", "max_output_tokens", "temperature",
|
|
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
|
|
"extra_headers",
|
|
}
|
|
normalized: Dict[str, Any] = {
|
|
"model": model,
|
|
"instructions": instructions,
|
|
"input": normalized_input,
|
|
"store": False,
|
|
}
|
|
if normalized_tools is not None:
|
|
normalized["tools"] = normalized_tools
|
|
|
|
# Pass through reasoning config
|
|
reasoning = api_kwargs.get("reasoning")
|
|
if isinstance(reasoning, dict):
|
|
normalized["reasoning"] = reasoning
|
|
include = api_kwargs.get("include")
|
|
if isinstance(include, list):
|
|
normalized["include"] = include
|
|
service_tier = api_kwargs.get("service_tier")
|
|
if isinstance(service_tier, str) and service_tier.strip():
|
|
normalized["service_tier"] = service_tier.strip()
|
|
|
|
# Pass through max_output_tokens and temperature
|
|
max_output_tokens = api_kwargs.get("max_output_tokens")
|
|
if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
|
|
normalized["max_output_tokens"] = int(max_output_tokens)
|
|
temperature = api_kwargs.get("temperature")
|
|
if isinstance(temperature, (int, float)):
|
|
normalized["temperature"] = float(temperature)
|
|
|
|
# Pass through tool_choice, parallel_tool_calls, prompt_cache_key
|
|
for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
|
|
val = api_kwargs.get(passthrough_key)
|
|
if val is not None:
|
|
normalized[passthrough_key] = val
|
|
|
|
extra_headers = api_kwargs.get("extra_headers")
|
|
if extra_headers is not None:
|
|
if not isinstance(extra_headers, dict):
|
|
raise ValueError("Codex Responses request 'extra_headers' must be an object.")
|
|
normalized_headers: Dict[str, str] = {}
|
|
for key, value in extra_headers.items():
|
|
if not isinstance(key, str) or not key.strip():
|
|
raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
|
|
if value is None:
|
|
continue
|
|
normalized_headers[key.strip()] = str(value)
|
|
if normalized_headers:
|
|
normalized["extra_headers"] = normalized_headers
|
|
|
|
if allow_stream:
|
|
stream = api_kwargs.get("stream")
|
|
if stream is not None and stream is not True:
|
|
raise ValueError("Codex Responses 'stream' must be true when set.")
|
|
if stream is True:
|
|
normalized["stream"] = True
|
|
allowed_keys.add("stream")
|
|
elif "stream" in api_kwargs:
|
|
raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
|
|
|
|
unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
|
|
if unexpected:
|
|
raise ValueError(
|
|
f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
|
|
)
|
|
|
|
return normalized
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Response extraction helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _extract_responses_message_text(item: Any) -> str:
|
|
"""Extract assistant text from a Responses message output item."""
|
|
content = getattr(item, "content", None)
|
|
if not isinstance(content, list):
|
|
return ""
|
|
|
|
chunks: List[str] = []
|
|
for part in content:
|
|
ptype = getattr(part, "type", None)
|
|
if ptype not in {"output_text", "text"}:
|
|
continue
|
|
text = getattr(part, "text", None)
|
|
if isinstance(text, str) and text:
|
|
chunks.append(text)
|
|
return "".join(chunks).strip()
|
|
|
|
|
|
def _extract_responses_reasoning_text(item: Any) -> str:
|
|
"""Extract a compact reasoning text from a Responses reasoning item."""
|
|
summary = getattr(item, "summary", None)
|
|
if isinstance(summary, list):
|
|
chunks: List[str] = []
|
|
for part in summary:
|
|
text = getattr(part, "text", None)
|
|
if isinstance(text, str) and text:
|
|
chunks.append(text)
|
|
if chunks:
|
|
return "\n".join(chunks).strip()
|
|
text = getattr(item, "text", None)
|
|
if isinstance(text, str) and text:
|
|
return text.strip()
|
|
return ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Full response normalization
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
|
"""Normalize a Responses API object to an assistant_message-like object."""
|
|
output = getattr(response, "output", None)
|
|
if not isinstance(output, list) or not output:
|
|
# The Codex backend can return empty output when the answer was
|
|
# delivered entirely via stream events. Check output_text as a
|
|
# last-resort fallback before raising.
|
|
out_text = getattr(response, "output_text", None)
|
|
if isinstance(out_text, str) and out_text.strip():
|
|
logger.debug(
|
|
"Codex response has empty output but output_text is present (%d chars); "
|
|
"synthesizing output item.", len(out_text.strip()),
|
|
)
|
|
output = [SimpleNamespace(
|
|
type="message", role="assistant", status="completed",
|
|
content=[SimpleNamespace(type="output_text", text=out_text.strip())],
|
|
)]
|
|
response.output = output
|
|
else:
|
|
raise RuntimeError("Responses API returned no output items")
|
|
|
|
response_status = getattr(response, "status", None)
|
|
if isinstance(response_status, str):
|
|
response_status = response_status.strip().lower()
|
|
else:
|
|
response_status = None
|
|
|
|
if response_status in {"failed", "cancelled"}:
|
|
error_obj = getattr(response, "error", None)
|
|
if isinstance(error_obj, dict):
|
|
error_msg = error_obj.get("message") or str(error_obj)
|
|
else:
|
|
error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
|
|
raise RuntimeError(error_msg)
|
|
|
|
content_parts: List[str] = []
|
|
reasoning_parts: List[str] = []
|
|
reasoning_items_raw: List[Dict[str, Any]] = []
|
|
tool_calls: List[Any] = []
|
|
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
|
saw_commentary_phase = False
|
|
saw_final_answer_phase = False
|
|
|
|
for item in output:
|
|
item_type = getattr(item, "type", None)
|
|
item_status = getattr(item, "status", None)
|
|
if isinstance(item_status, str):
|
|
item_status = item_status.strip().lower()
|
|
else:
|
|
item_status = None
|
|
|
|
if item_status in {"queued", "in_progress", "incomplete"}:
|
|
has_incomplete_items = True
|
|
|
|
if item_type == "message":
|
|
item_phase = getattr(item, "phase", None)
|
|
if isinstance(item_phase, str):
|
|
normalized_phase = item_phase.strip().lower()
|
|
if normalized_phase in {"commentary", "analysis"}:
|
|
saw_commentary_phase = True
|
|
elif normalized_phase in {"final_answer", "final"}:
|
|
saw_final_answer_phase = True
|
|
message_text = _extract_responses_message_text(item)
|
|
if message_text:
|
|
content_parts.append(message_text)
|
|
elif item_type == "reasoning":
|
|
reasoning_text = _extract_responses_reasoning_text(item)
|
|
if reasoning_text:
|
|
reasoning_parts.append(reasoning_text)
|
|
# Capture the full reasoning item for multi-turn continuity.
|
|
# encrypted_content is an opaque blob the API needs back on
|
|
# subsequent turns to maintain coherent reasoning chains.
|
|
encrypted = getattr(item, "encrypted_content", None)
|
|
if isinstance(encrypted, str) and encrypted:
|
|
raw_item = {"type": "reasoning", "encrypted_content": encrypted}
|
|
item_id = getattr(item, "id", None)
|
|
if isinstance(item_id, str) and item_id:
|
|
raw_item["id"] = item_id
|
|
# Capture summary — required by the API when replaying reasoning items
|
|
summary = getattr(item, "summary", None)
|
|
if isinstance(summary, list):
|
|
raw_summary = []
|
|
for part in summary:
|
|
text = getattr(part, "text", None)
|
|
if isinstance(text, str):
|
|
raw_summary.append({"type": "summary_text", "text": text})
|
|
raw_item["summary"] = raw_summary
|
|
reasoning_items_raw.append(raw_item)
|
|
elif item_type == "function_call":
|
|
if item_status in {"queued", "in_progress", "incomplete"}:
|
|
continue
|
|
fn_name = getattr(item, "name", "") or ""
|
|
arguments = getattr(item, "arguments", "{}")
|
|
if not isinstance(arguments, str):
|
|
arguments = json.dumps(arguments, ensure_ascii=False)
|
|
raw_call_id = getattr(item, "call_id", None)
|
|
raw_item_id = getattr(item, "id", None)
|
|
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
|
|
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
|
|
call_id = call_id.strip()
|
|
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
|
|
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
|
|
tool_calls.append(SimpleNamespace(
|
|
id=call_id,
|
|
call_id=call_id,
|
|
response_item_id=response_item_id,
|
|
type="function",
|
|
function=SimpleNamespace(name=fn_name, arguments=arguments),
|
|
))
|
|
elif item_type == "custom_tool_call":
|
|
fn_name = getattr(item, "name", "") or ""
|
|
arguments = getattr(item, "input", "{}")
|
|
if not isinstance(arguments, str):
|
|
arguments = json.dumps(arguments, ensure_ascii=False)
|
|
raw_call_id = getattr(item, "call_id", None)
|
|
raw_item_id = getattr(item, "id", None)
|
|
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
|
|
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
|
|
call_id = call_id.strip()
|
|
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
|
|
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
|
|
tool_calls.append(SimpleNamespace(
|
|
id=call_id,
|
|
call_id=call_id,
|
|
response_item_id=response_item_id,
|
|
type="function",
|
|
function=SimpleNamespace(name=fn_name, arguments=arguments),
|
|
))
|
|
|
|
final_text = "\n".join([p for p in content_parts if p]).strip()
|
|
if not final_text and hasattr(response, "output_text"):
|
|
out_text = getattr(response, "output_text", "")
|
|
if isinstance(out_text, str):
|
|
final_text = out_text.strip()
|
|
|
|
assistant_message = SimpleNamespace(
|
|
content=final_text,
|
|
tool_calls=tool_calls,
|
|
reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
|
|
reasoning_content=None,
|
|
reasoning_details=None,
|
|
codex_reasoning_items=reasoning_items_raw or None,
|
|
)
|
|
|
|
if tool_calls:
|
|
finish_reason = "tool_calls"
|
|
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
|
finish_reason = "incomplete"
|
|
elif reasoning_items_raw and not final_text:
|
|
# Response contains only reasoning (encrypted thinking state) with
|
|
# no visible content or tool calls. The model is still thinking and
|
|
# needs another turn to produce the actual answer. Marking this as
|
|
# "stop" would send it into the empty-content retry loop which burns
|
|
# 3 retries then fails — treat it as incomplete instead so the Codex
|
|
# continuation path handles it correctly.
|
|
finish_reason = "incomplete"
|
|
else:
|
|
finish_reason = "stop"
|
|
return assistant_message, finish_reason
|