feat: add ResponsesApiTransport + wire all Codex transport paths

Add ResponsesApiTransport wrapping codex_responses_adapter.py behind the ProviderTransport ABC. Auto-registered via _discover_transports(). Wire ALL Codex transport methods to production paths in run_agent.py: - build_kwargs: main _build_api_kwargs codex branch (50 lines extracted) - normalize_response: main loop + flush + summary + retry (4 sites) - convert_tools: memory flush tool override - convert_messages: called internally via build_kwargs - validate_response: response validation gate - preflight_kwargs: request sanitization (2 sites) Remove 7 dead legacy wrappers from AIAgent (_responses_tools, _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_api_kwargs, _preflight_codex_input_items, _extract_responses_message_text, _extract_responses_reasoning_text). Keep 3 ID manipulation methods still used by _build_assistant_message. Update 18 test call sites across 3 test files to call adapter functions directly instead of through deleted AIAgent wrappers. 24 new tests. 343 codex/responses/transport tests pass (0 failures). PR 4 of the provider transport refactor.
2026-04-25 00:51:20 +00:00 · 2026-04-21 14:24:41 +05:30 · 2026-04-21 14:24:41 +05:30 · c832ebd67c
commit c832ebd67c
parent 09dd5eb6a5
7 changed files with 589 additions and 169 deletions
--- a/agent/transports/init.py
+++ b/agent/transports/init.py
@ -37,3 +37,7 @@ def _discover_transports() -> None:
        import agent.transports.anthropic  # noqa: F401
    except ImportError:
        pass
    try:
        import agent.transports.codex  # noqa: F401
    except ImportError:
        pass
--- a/agent/transports/codex.py
+++ b/agent/transports/codex.py
@ -0,0 +1,217 @@
 """OpenAI Responses API (Codex) transport.
 Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
 This transport owns format conversion and normalization — NOT client lifecycle,
 streaming, or the _run_codex_stream() call path.
 """
 from typing import Any, Dict, List, Optional
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage
 class ResponsesApiTransport(ProviderTransport):
    """Transport for api_mode='codex_responses'.
    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
    """
    @property
    def api_mode(self) -> str:
        return "codex_responses"
    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
        """Convert OpenAI chat messages to Responses API input items."""
        from agent.codex_responses_adapter import _chat_messages_to_responses_input
        return _chat_messages_to_responses_input(messages)
    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
        """Convert OpenAI tool schemas to Responses API function definitions."""
        from agent.codex_responses_adapter import _responses_tools
        return _responses_tools(tools)
    def build_kwargs(
        self,
        model: str,
        messages: List[Dict[str, Any]],
        tools: Optional[List[Dict[str, Any]]] = None,
        **params,
    ) -> Dict[str, Any]:
        """Build Responses API kwargs.
        Calls convert_messages and convert_tools internally.
        params:
            instructions: str — system prompt (extracted from messages[0] if not given)
            reasoning_config: dict | None — {effort, enabled}
            session_id: str | None — used for prompt_cache_key + xAI conv header
            max_tokens: int | None — max_output_tokens
            request_overrides: dict | None — extra kwargs merged in
            provider: str | None — provider name for backend-specific logic
            base_url: str | None — endpoint URL
            base_url_hostname: str | None — hostname for backend detection
            is_github_responses: bool — Copilot/GitHub models backend
            is_codex_backend: bool — chatgpt.com/backend-api/codex
            is_xai_responses: bool — xAI/Grok backend
            github_reasoning_extra: dict | None — Copilot reasoning params
        """
        from agent.codex_responses_adapter import (
            _chat_messages_to_responses_input,
            _responses_tools,
        )
        from run_agent import DEFAULT_AGENT_IDENTITY
        instructions = params.get("instructions", "")
        payload_messages = messages
        if not instructions:
            if messages and messages[0].get("role") == "system":
                instructions = str(messages[0].get("content") or "").strip()
                payload_messages = messages[1:]
        if not instructions:
            instructions = DEFAULT_AGENT_IDENTITY
        is_github_responses = params.get("is_github_responses", False)
        is_codex_backend = params.get("is_codex_backend", False)
        is_xai_responses = params.get("is_xai_responses", False)
        # Resolve reasoning effort
        reasoning_effort = "medium"
        reasoning_enabled = True
        reasoning_config = params.get("reasoning_config")
        if reasoning_config and isinstance(reasoning_config, dict):
            if reasoning_config.get("enabled") is False:
                reasoning_enabled = False
            elif reasoning_config.get("effort"):
                reasoning_effort = reasoning_config["effort"]
        _effort_clamp = {"minimal": "low"}
        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
        kwargs = {
            "model": model,
            "instructions": instructions,
            "input": _chat_messages_to_responses_input(payload_messages),
            "tools": _responses_tools(tools),
            "tool_choice": "auto",
            "parallel_tool_calls": True,
            "store": False,
        }
        session_id = params.get("session_id")
        if not is_github_responses and session_id:
            kwargs["prompt_cache_key"] = session_id
        if reasoning_enabled and is_xai_responses:
            kwargs["include"] = ["reasoning.encrypted_content"]
        elif reasoning_enabled:
            if is_github_responses:
                github_reasoning = params.get("github_reasoning_extra")
                if github_reasoning is not None:
                    kwargs["reasoning"] = github_reasoning
            else:
                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
                kwargs["include"] = ["reasoning.encrypted_content"]
        elif not is_github_responses and not is_xai_responses:
            kwargs["include"] = []
        request_overrides = params.get("request_overrides")
        if request_overrides:
            kwargs.update(request_overrides)
        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
            kwargs["max_output_tokens"] = max_tokens
        if is_xai_responses and session_id:
            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
        return kwargs
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize Codex Responses API response to NormalizedResponse."""
        from agent.codex_responses_adapter import (
            _normalize_codex_response,
            _extract_responses_message_text,
            _extract_responses_reasoning_text,
        )
        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
        msg, finish_reason = _normalize_codex_response(response)
        tool_calls = None
        if msg and msg.tool_calls:
            tool_calls = []
            for tc in msg.tool_calls:
                provider_data = {}
                if hasattr(tc, "call_id") and tc.call_id:
                    provider_data["call_id"] = tc.call_id
                if hasattr(tc, "response_item_id") and tc.response_item_id:
                    provider_data["response_item_id"] = tc.response_item_id
                tool_calls.append(ToolCall(
                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
                    provider_data=provider_data or None,
                ))
        # Extract reasoning items for provider_data
        provider_data = {}
        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
            provider_data["reasoning_details"] = msg.reasoning_details
        return NormalizedResponse(
            content=msg.content if msg else None,
            tool_calls=tool_calls,
            finish_reason=finish_reason or "stop",
            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
            usage=None,  # Codex usage is extracted separately in normalize_usage()
            provider_data=provider_data or None,
        )
    def validate_response(self, response: Any) -> bool:
        """Check Codex Responses API response has valid output structure.
        Returns True only if response.output is a non-empty list.
        Does NOT check output_text fallback — the caller handles that
        with diagnostic logging for stream backfill recovery.
        """
        if response is None:
            return False
        output = getattr(response, "output", None)
        if not isinstance(output, list) or not output:
            return False
        return True
    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
        """Validate and sanitize Codex API kwargs before the call.
        Normalizes input items, strips unsupported fields, validates structure.
        """
        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
    def map_finish_reason(self, raw_reason: str) -> str:
        """Map Codex response.status to OpenAI finish_reason.
        Codex uses response.status ('completed', 'incomplete') +
        response.incomplete_details.reason for granular mapping.
        This method handles the simple status string; the caller
        should check incomplete_details separately for 'max_output_tokens'.
        """
        _MAP = {
            "completed": "stop",
            "incomplete": "length",
            "failed": "stop",
            "cancelled": "stop",
        }
        return _MAP.get(raw_reason, "stop")
 # Auto-register on import
 from agent.transports import register_transport  # noqa: E402
 register_transport("codex_responses", ResponsesApiTransport)
--- a/run_agent.py
+++ b/run_agent.py
@ -4308,10 +4308,6 @@ class AIAgent:
        if self._memory_store:
            self._memory_store.load_from_disk()
    def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
        """Convert chat-completions tool schemas to Responses function-tool schemas."""
        return _codex_responses_tools(tools if tools is not None else self.tools)
    @staticmethod
    def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
        """Generate a deterministic call_id from tool call content.
@ -4335,33 +4331,6 @@ class AIAgent:
        """Build a valid Responses `function_call.id` (must start with `fc_`)."""
        return _codex_derive_responses_function_call_id(call_id, response_item_id)
    def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Convert internal chat-style messages to Responses input items."""
        return _codex_chat_messages_to_responses_input(messages)
    def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
        return _codex_preflight_codex_input_items(raw_items)
    def _preflight_codex_api_kwargs(
        self,
        api_kwargs: Any,
        *,
        allow_stream: bool = False,
    ) -> Dict[str, Any]:
        return _codex_preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
    def _extract_responses_message_text(self, item: Any) -> str:
        """Extract assistant text from a Responses message output item."""
        return _codex_extract_responses_message_text(item)
    def _extract_responses_reasoning_text(self, item: Any) -> str:
        """Extract a compact reasoning text from a Responses reasoning item."""
        return _codex_extract_responses_reasoning_text(item)
    def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
        """Normalize a Responses API object to an assistant_message-like object."""
        return _codex_normalize_codex_response(response)
    def _thread_identity(self) -> str:
        thread = threading.current_thread()
        return f"{thread.name}:{thread.ident}"
@ -4854,7 +4823,7 @@ class AIAgent:
        active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback")
        fallback_kwargs = dict(api_kwargs)
        fallback_kwargs["stream"] = True
-        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
+        fallback_kwargs = self._get_codex_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
        stream_or_response = active_client.responses.create(**fallback_kwargs)
        # Compatibility shim for mocks or providers that still return a concrete response.
@ -6596,6 +6565,15 @@ class AIAgent:
            self._anthropic_transport = t
        return t
    def _get_codex_transport(self):
        """Return the cached ResponsesApiTransport instance (lazy singleton)."""
        t = getattr(self, "_codex_transport", None)
        if t is None:
            from agent.transports import get_transport
            t = get_transport("codex_responses")
            self._codex_transport = t
        return t
    def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
        if not any(
            isinstance(msg, dict) and self._content_has_image_parts(msg.get("content"))
@ -6752,14 +6730,7 @@ class AIAgent:
            }
        if self.api_mode == "codex_responses":
-            instructions = ""
+            _ct = self._get_codex_transport()
            payload_messages = api_messages
            if api_messages and api_messages[0].get("role") == "system":
                instructions = str(api_messages[0].get("content") or "").strip()
                payload_messages = api_messages[1:]
            if not instructions:
                instructions = DEFAULT_AGENT_IDENTITY
            is_github_responses = (
                base_url_host_matches(self.base_url, "models.github.ai")
                or base_url_host_matches(self.base_url, "api.githubcopilot.com")
@ -6771,64 +6742,20 @@ class AIAgent:
                    and "/backend-api/codex" in self._base_url_lower
                )
            )
            # Resolve reasoning effort: config > default (medium)
            reasoning_effort = "medium"
            reasoning_enabled = True
            if self.reasoning_config and isinstance(self.reasoning_config, dict):
                if self.reasoning_config.get("enabled") is False:
                    reasoning_enabled = False
                elif self.reasoning_config.get("effort"):
                    reasoning_effort = self.reasoning_config["effort"]
            # Clamp effort levels not supported by the Responses API model.
            # GPT-5.4 supports none/low/medium/high/xhigh but not "minimal".
            # "minimal" is valid on OpenRouter and GPT-5 but fails on 5.2/5.4.
            _effort_clamp = {"minimal": "low"}
            reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
            kwargs = {
                "model": self.model,
                "instructions": instructions,
                "input": self._chat_messages_to_responses_input(payload_messages),
                "tools": self._responses_tools(),
                "tool_choice": "auto",
                "parallel_tool_calls": True,
                "store": False,
            }
            if not is_github_responses:
                kwargs["prompt_cache_key"] = self.session_id
            is_xai_responses = self.provider == "xai" or self._base_url_hostname == "api.x.ai"
-
+            return _ct.build_kwargs(
-            if reasoning_enabled and is_xai_responses:
+                model=self.model,
-                # xAI reasons automatically — no effort param, just include encrypted content
+                messages=api_messages,
-                kwargs["include"] = ["reasoning.encrypted_content"]
+                tools=self.tools,
-            elif reasoning_enabled:
+                reasoning_config=self.reasoning_config,
-                if is_github_responses:
+                session_id=getattr(self, "session_id", None),
-                    # Copilot's Responses route advertises reasoning-effort support,
+                max_tokens=self.max_tokens,
-                    # but not OpenAI-specific prompt cache or encrypted reasoning
+                request_overrides=self.request_overrides,
-                    # fields. Keep the payload to the documented subset.
+                is_github_responses=is_github_responses,
-                    github_reasoning = self._github_models_reasoning_extra_body()
+                is_codex_backend=is_codex_backend,
-                    if github_reasoning is not None:
+                is_xai_responses=is_xai_responses,
-                        kwargs["reasoning"] = github_reasoning
+                github_reasoning_extra=self._github_models_reasoning_extra_body() if is_github_responses else None,
-                else:
+            )
                    kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
                    kwargs["include"] = ["reasoning.encrypted_content"]
            elif not is_github_responses and not is_xai_responses:
                kwargs["include"] = []
            if self.request_overrides:
                kwargs.update(self.request_overrides)
            if self.max_tokens is not None and not is_codex_backend:
                kwargs["max_output_tokens"] = self.max_tokens
            if is_xai_responses and getattr(self, "session_id", None):
                kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
            return kwargs
        sanitized_messages = api_messages
        needs_sanitization = False
@ -7438,7 +7365,7 @@ class AIAgent:
            if not _aux_available and self.api_mode == "codex_responses":
                # No auxiliary client -- use the Codex Responses path directly
                codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
+                codex_kwargs["tools"] = self._get_codex_transport().convert_tools([memory_tool_def])
                if _flush_temperature is not None:
                    codex_kwargs["temperature"] = _flush_temperature
                else:
@ -7473,9 +7400,15 @@ class AIAgent:
            # Extract tool calls from the response, handling all API formats
            tool_calls = []
            if self.api_mode == "codex_responses" and not _aux_available:
-                assistant_msg, _ = self._normalize_codex_response(response)
+                _ct_flush = self._get_codex_transport()
-                if assistant_msg and assistant_msg.tool_calls:
+                _cnr_flush = _ct_flush.normalize_response(response)
-                    tool_calls = assistant_msg.tool_calls
+                if _cnr_flush and _cnr_flush.tool_calls:
                    tool_calls = [
                        SimpleNamespace(
                            id=tc.id, type="function",
                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
                        ) for tc in _cnr_flush.tool_calls
                    ]
            elif self.api_mode == "anthropic_messages" and not _aux_available:
                _tfn = self._get_anthropic_transport()
                _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
@ -8519,8 +8452,9 @@ class AIAgent:
                codex_kwargs = self._build_api_kwargs(api_messages)
                codex_kwargs.pop("tools", None)
                summary_response = self._run_codex_stream(codex_kwargs)
-                assistant_message, _ = self._normalize_codex_response(summary_response)
+                _ct_sum = self._get_codex_transport()
-                final_response = (assistant_message.content or "").strip() if assistant_message else ""
+                _cnr_sum = _ct_sum.normalize_response(summary_response)
                final_response = (_cnr_sum.content or "").strip()
            else:
                summary_kwargs = {
                    "model": self.model,
@ -8577,8 +8511,9 @@ class AIAgent:
                    codex_kwargs = self._build_api_kwargs(api_messages)
                    codex_kwargs.pop("tools", None)
                    retry_response = self._run_codex_stream(codex_kwargs)
-                    retry_msg, _ = self._normalize_codex_response(retry_response)
+                    _ct_retry = self._get_codex_transport()
-                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
+                    _cnr_retry = _ct_retry.normalize_response(retry_response)
                    final_response = (_cnr_retry.content or "").strip()
                elif self.api_mode == "anthropic_messages":
                    _tretry = self._get_anthropic_transport()
                    _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None,
@ -9340,7 +9275,7 @@ class AIAgent:
                    if self._force_ascii_payload:
                        _sanitize_structure_non_ascii(api_kwargs)
                    if self.api_mode == "codex_responses":
-                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
+                        api_kwargs = self._get_codex_transport().preflight_kwargs(api_kwargs, allow_stream=False)
                    try:
                        from hermes_cli.plugins import invoke_hook as _invoke_hook
@ -9428,38 +9363,34 @@ class AIAgent:
                    response_invalid = False
                    error_details = []
                    if self.api_mode == "codex_responses":
-                        output_items = getattr(response, "output", None) if response is not None else None
+                        _ct_v = self._get_codex_transport()
-                        if response is None:
+                        if not _ct_v.validate_response(response):
-                            response_invalid = True
+                            if response is None:
                            error_details.append("response is None")
                        elif not isinstance(output_items, list):
                            response_invalid = True
                            error_details.append("response.output is not a list")
                        elif not output_items:
                            # Stream backfill may have failed, but
                            # _normalize_codex_response can still recover
                            # from response.output_text. Only mark invalid
                            # when that fallback is also absent.
                            _out_text = getattr(response, "output_text", None)
                            _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
                            if _out_text_stripped:
                                logger.debug(
                                    "Codex response.output is empty but output_text is present "
                                    "(%d chars); deferring to normalization.",
                                    len(_out_text_stripped),
                                )
                            else:
                                _resp_status = getattr(response, "status", None)
                                _resp_incomplete = getattr(response, "incomplete_details", None)
                                logger.warning(
                                    "Codex response.output is empty after stream backfill "
                                    "(status=%s, incomplete_details=%s, model=%s). %s",
                                    _resp_status, _resp_incomplete,
                                    getattr(response, "model", None),
                                    f"api_mode={self.api_mode} provider={self.provider}",
                                )
                                response_invalid = True
-                                error_details.append("response.output is empty")
+                                error_details.append("response is None")
                            else:
                                # output_text fallback: stream backfill may have failed
                                # but normalize can still recover from output_text
                                _out_text = getattr(response, "output_text", None)
                                _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
                                if _out_text_stripped:
                                    logger.debug(
                                        "Codex response.output is empty but output_text is present "
                                        "(%d chars); deferring to normalization.",
                                        len(_out_text_stripped),
                                    )
                                else:
                                    _resp_status = getattr(response, "status", None)
                                    _resp_incomplete = getattr(response, "incomplete_details", None)
                                    logger.warning(
                                        "Codex response.output is empty after stream backfill "
                                        "(status=%s, incomplete_details=%s, model=%s). %s",
                                        _resp_status, _resp_incomplete,
                                        getattr(response, "model", None),
                                        f"api_mode={self.api_mode} provider={self.provider}",
                                    )
                                    response_invalid = True
                                    error_details.append("response.output is empty")
                    elif self.api_mode == "anthropic_messages":
                        _tv = self._get_anthropic_transport()
                        if not _tv.validate_response(response):
@ -10885,7 +10816,40 @@ class AIAgent:
            try:
                if self.api_mode == "codex_responses":
-                    assistant_message, finish_reason = self._normalize_codex_response(response)
+                    _ct = self._get_codex_transport()
                    _cnr = _ct.normalize_response(response)
                    # Back-compat shim: downstream expects SimpleNamespace with
                    # codex-specific fields (.codex_reasoning_items, .reasoning_details,
                    # and .call_id/.response_item_id on tool calls).
                    _tc_list = None
                    if _cnr.tool_calls:
                        _tc_list = []
                        for tc in _cnr.tool_calls:
                            _tc_ns = SimpleNamespace(
                                id=tc.id, type="function",
                                function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
                            )
                            if tc.provider_data:
                                if tc.provider_data.get("call_id"):
                                    _tc_ns.call_id = tc.provider_data["call_id"]
                                if tc.provider_data.get("response_item_id"):
                                    _tc_ns.response_item_id = tc.provider_data["response_item_id"]
                            _tc_list.append(_tc_ns)
                    assistant_message = SimpleNamespace(
                        content=_cnr.content,
                        tool_calls=_tc_list or None,
                        reasoning=_cnr.reasoning,
                        reasoning_content=None,
                        codex_reasoning_items=(
                            _cnr.provider_data.get("codex_reasoning_items")
                            if _cnr.provider_data else None
                        ),
                        reasoning_details=(
                            _cnr.provider_data.get("reasoning_details")
                            if _cnr.provider_data else None
                        ),
                    )
                    finish_reason = _cnr.finish_reason
                elif self.api_mode == "anthropic_messages":
                    _transport = self._get_anthropic_transport()
                    _nr = _transport.normalize_response(
--- a/tests/agent/transports/test_codex_transport.py
+++ b/tests/agent/transports/test_codex_transport.py
@ -0,0 +1,220 @@
 """Tests for the ResponsesApiTransport (Codex)."""
 import json
 import pytest
 from types import SimpleNamespace
 from agent.transports import get_transport
 from agent.transports.types import NormalizedResponse, ToolCall
@pytest.fixture
 def transport():
    import agent.transports.codex  # noqa: F401
    return get_transport("codex_responses")
 class TestCodexTransportBasic:
    def test_api_mode(self, transport):
        assert transport.api_mode == "codex_responses"
    def test_registered_on_import(self, transport):
        assert transport is not None
    def test_convert_tools(self, transport):
        tools = [{
            "type": "function",
            "function": {
                "name": "terminal",
                "description": "Run a command",
                "parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
            }
        }]
        result = transport.convert_tools(tools)
        assert len(result) == 1
        assert result[0]["type"] == "function"
        assert result[0]["name"] == "terminal"
 class TestCodexBuildKwargs:
    def test_basic_kwargs(self, transport):
        messages = [
            {"role": "system", "content": "You are helpful."},
            {"role": "user", "content": "Hello"},
        ]
        kw = transport.build_kwargs(
            model="gpt-5.4",
            messages=messages,
            tools=[],
        )
        assert kw["model"] == "gpt-5.4"
        assert kw["instructions"] == "You are helpful."
        assert "input" in kw
        assert kw["store"] is False
    def test_system_extracted_from_messages(self, transport):
        messages = [
            {"role": "system", "content": "Custom system prompt"},
            {"role": "user", "content": "Hi"},
        ]
        kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
        assert kw["instructions"] == "Custom system prompt"
    def test_no_system_uses_default(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
        assert kw["instructions"]  # should be non-empty default
    def test_reasoning_config(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gpt-5.4", messages=messages, tools=[],
            reasoning_config={"effort": "high"},
        )
        assert kw.get("reasoning", {}).get("effort") == "high"
    def test_reasoning_disabled(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gpt-5.4", messages=messages, tools=[],
            reasoning_config={"enabled": False},
        )
        assert "reasoning" not in kw or kw.get("include") == []
    def test_session_id_sets_cache_key(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gpt-5.4", messages=messages, tools=[],
            session_id="test-session-123",
        )
        assert kw.get("prompt_cache_key") == "test-session-123"
    def test_github_responses_no_cache_key(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gpt-5.4", messages=messages, tools=[],
            session_id="test-session",
            is_github_responses=True,
        )
        assert "prompt_cache_key" not in kw
    def test_max_tokens(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gpt-5.4", messages=messages, tools=[],
            max_tokens=4096,
        )
        assert kw.get("max_output_tokens") == 4096
    def test_codex_backend_no_max_output_tokens(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gpt-5.4", messages=messages, tools=[],
            max_tokens=4096,
            is_codex_backend=True,
        )
        assert "max_output_tokens" not in kw
    def test_xai_headers(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="grok-3", messages=messages, tools=[],
            session_id="conv-123",
            is_xai_responses=True,
        )
        assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-123"
    def test_minimal_effort_clamped(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gpt-5.4", messages=messages, tools=[],
            reasoning_config={"effort": "minimal"},
        )
        # "minimal" should be clamped to "low"
        assert kw.get("reasoning", {}).get("effort") == "low"
 class TestCodexValidateResponse:
    def test_none_response(self, transport):
        assert transport.validate_response(None) is False
    def test_empty_output(self, transport):
        r = SimpleNamespace(output=[], output_text=None)
        assert transport.validate_response(r) is False
    def test_valid_output(self, transport):
        r = SimpleNamespace(output=[{"type": "message", "content": []}])
        assert transport.validate_response(r) is True
    def test_output_text_fallback_not_valid(self, transport):
        """validate_response is strict — output_text doesn't make it valid.
        The caller handles output_text fallback with diagnostic logging."""
        r = SimpleNamespace(output=None, output_text="Some text")
        assert transport.validate_response(r) is False
 class TestCodexMapFinishReason:
    def test_completed(self, transport):
        assert transport.map_finish_reason("completed") == "stop"
    def test_incomplete(self, transport):
        assert transport.map_finish_reason("incomplete") == "length"
    def test_failed(self, transport):
        assert transport.map_finish_reason("failed") == "stop"
    def test_unknown(self, transport):
        assert transport.map_finish_reason("unknown_status") == "stop"
 class TestCodexNormalizeResponse:
    def test_text_response(self, transport):
        """Normalize a simple text Codex response."""
        r = SimpleNamespace(
            output=[
                SimpleNamespace(
                    type="message",
                    role="assistant",
                    content=[SimpleNamespace(type="output_text", text="Hello world")],
                    status="completed",
                ),
            ],
            status="completed",
            incomplete_details=None,
            usage=SimpleNamespace(input_tokens=10, output_tokens=5,
                                  input_tokens_details=None, output_tokens_details=None),
        )
        nr = transport.normalize_response(r)
        assert isinstance(nr, NormalizedResponse)
        assert nr.content == "Hello world"
        assert nr.finish_reason == "stop"
    def test_tool_call_response(self, transport):
        """Normalize a Codex response with tool calls."""
        r = SimpleNamespace(
            output=[
                SimpleNamespace(
                    type="function_call",
                    call_id="call_abc123",
                    name="terminal",
                    arguments=json.dumps({"command": "ls"}),
                    id="fc_abc123",
                    status="completed",
                ),
            ],
            status="completed",
            incomplete_details=None,
            usage=SimpleNamespace(input_tokens=10, output_tokens=20,
                                  input_tokens_details=None, output_tokens_details=None),
        )
        nr = transport.normalize_response(r)
        assert nr.finish_reason == "tool_calls"
        assert len(nr.tool_calls) == 1
        tc = nr.tool_calls[0]
        assert tc.name == "terminal"
        assert '"command"' in tc.arguments
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@ -12,6 +12,7 @@ from types import SimpleNamespace
 from unittest.mock import patch, MagicMock
 import pytest
 from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@ -446,7 +447,7 @@ class TestChatMessagesToResponsesInput:
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "user", "content": "hello"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        assert items == [{"role": "user", "content": "hello"}]
    def test_system_messages_filtered(self, monkeypatch):
@ -456,7 +457,7 @@ class TestChatMessagesToResponsesInput:
            {"role": "system", "content": "be helpful"},
            {"role": "user", "content": "hello"},
        ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        assert len(items) == 1
        assert items[0]["role"] == "user"
@ -472,7 +473,7 @@ class TestChatMessagesToResponsesInput:
                "function": {"name": "web_search", "arguments": '{"query": "test"}'},
            }],
        }]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        fc_items = [i for i in items if i.get("type") == "function_call"]
        assert len(fc_items) == 1
        assert fc_items[0]["name"] == "web_search"
@ -482,7 +483,7 @@ class TestChatMessagesToResponsesInput:
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        assert items[0]["type"] == "function_call_output"
        assert items[0]["call_id"] == "call_abc"
        assert items[0]["output"] == "result here"
@ -502,7 +503,7 @@ class TestChatMessagesToResponsesInput:
            },
            {"role": "user", "content": "continue"},
        ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 1
        assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
@ -515,7 +516,7 @@ class TestChatMessagesToResponsesInput:
            {"role": "assistant", "content": "hi"},
            {"role": "user", "content": "hello"},
        ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 0
@ -539,7 +540,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert msg.content == "Hello!"
        assert reason == "stop"
@ -557,7 +558,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert msg.content == "42"
        assert "math" in msg.reasoning
        assert reason == "stop"
@ -576,7 +577,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert msg.codex_reasoning_items is not None
        assert len(msg.codex_reasoning_items) == 1
        assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
@ -592,7 +593,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert msg.codex_reasoning_items is None
    def test_tool_calls_extracted(self, monkeypatch):
@ -605,7 +606,7 @@ class TestNormalizeCodexResponse:
            ],
            status="completed",
        )
-        msg, reason = agent._normalize_codex_response(response)
+        msg, reason = _normalize_codex_response(response)
        assert reason == "tool_calls"
        assert len(msg.tool_calls) == 1
        assert msg.tool_calls[0].function.name == "web_search"
@ -821,7 +822,7 @@ class TestCodexReasoningPreflight:
             "summary": [{"type": "summary_text", "text": "Thinking about it"}]},
            {"role": "assistant", "content": "hi there"},
        ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
        reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 1
        assert reasoning_items[0]["encrypted_content"] == "abc123encrypted"
@ -837,7 +838,7 @@ class TestCodexReasoningPreflight:
        raw_input = [
            {"type": "reasoning", "encrypted_content": "abc123"},
        ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
        assert len(normalized) == 1
        assert "id" not in normalized[0]
        assert normalized[0]["summary"] == []  # default empty summary
@ -849,7 +850,7 @@ class TestCodexReasoningPreflight:
            {"type": "reasoning", "encrypted_content": ""},
            {"role": "user", "content": "hello"},
        ]
-        normalized = agent._preflight_codex_input_items(raw_input)
+        normalized = _preflight_codex_input_items(raw_input)
        reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 0
@ -868,7 +869,7 @@ class TestCodexReasoningPreflight:
            },
            {"role": "user", "content": "follow up"},
        ]
-        items = agent._chat_messages_to_responses_input(messages)
+        items = _chat_messages_to_responses_input(messages)
        reasoning_items = [i for i in items if isinstance(i, dict) and i.get("type") == "reasoning"]
        assert len(reasoning_items) == 1
        assert reasoning_items[0]["encrypted_content"] == "enc123"
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -16,6 +16,7 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
 import run_agent
 from run_agent import AIAgent
@ -4248,7 +4249,7 @@ class TestNormalizeCodexDictArguments:
        json.dumps, not str(), so downstream json.loads() succeeds."""
        args_dict = {"query": "weather in NYC", "units": "celsius"}
        response = self._make_codex_response("function_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
        tc = msg.tool_calls[0]
        parsed = json.loads(tc.function.arguments)
        assert parsed == args_dict
@ -4257,7 +4258,7 @@ class TestNormalizeCodexDictArguments:
        """dict arguments from custom_tool_call must also use json.dumps."""
        args_dict = {"path": "/tmp/test.txt", "content": "hello"}
        response = self._make_codex_response("custom_tool_call", args_dict)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
        tc = msg.tool_calls[0]
        parsed = json.loads(tc.function.arguments)
        assert parsed == args_dict
@ -4266,7 +4267,7 @@ class TestNormalizeCodexDictArguments:
        """String arguments must pass through without modification."""
        args_str = '{"query": "test"}'
        response = self._make_codex_response("function_call", args_str)
-        msg, _ = agent._normalize_codex_response(response)
+        msg, _ = _normalize_codex_response(response)
        tc = msg.tool_calls[0]
        assert tc.function.arguments == args_str
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@ -640,7 +640,8 @@ def test_run_conversation_codex_tool_round_trip(monkeypatch):
 def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeypatch):
    agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
    items = _chat_messages_to_responses_input(
        [
            {"role": "user", "content": "Run terminal"},
            {
@ -668,7 +669,8 @@ def test_chat_messages_to_responses_input_uses_call_id_for_function_call(monkeyp
 def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
    agent = _build_agent(monkeypatch)
-    items = agent._chat_messages_to_responses_input(
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
    items = _chat_messages_to_responses_input(
        [
            {"role": "user", "content": "Run terminal"},
            {
@ -696,7 +698,8 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
 def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
    agent = _build_agent(monkeypatch)
-    preflight = agent._preflight_codex_api_kwargs(
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
    preflight = _preflight_codex_api_kwargs(
        {
            "model": "gpt-5-codex",
            "instructions": "You are Hermes.",
@ -724,7 +727,8 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
    agent = _build_agent(monkeypatch)
    with pytest.raises(ValueError, match="function_call_output is missing call_id"):
-        agent._preflight_codex_api_kwargs(
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
        _preflight_codex_api_kwargs(
            {
                "model": "gpt-5-codex",
                "instructions": "You are Hermes.",
@ -741,7 +745,8 @@ def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypat
    kwargs["some_unknown_field"] = "value"
    with pytest.raises(ValueError, match="unsupported field"):
-        agent._preflight_codex_api_kwargs(kwargs)
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
        _preflight_codex_api_kwargs(kwargs)
 def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
@ -752,7 +757,8 @@ def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch
    kwargs["temperature"] = 0.7
    kwargs["max_output_tokens"] = 4096
-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
    result = _preflight_codex_api_kwargs(kwargs)
    assert result["reasoning"] == {"effort": "high", "summary": "auto"}
    assert result["include"] == ["reasoning.encrypted_content"]
    assert result["temperature"] == 0.7
@ -764,7 +770,8 @@ def test_preflight_codex_api_kwargs_allows_service_tier(monkeypatch):
    kwargs = _codex_request_kwargs()
    kwargs["service_tier"] = "priority"
-    result = agent._preflight_codex_api_kwargs(kwargs)
+    from agent.codex_responses_adapter import _preflight_codex_api_kwargs
    result = _preflight_codex_api_kwargs(kwargs)
    assert result["service_tier"] == "priority"
@ -841,7 +848,8 @@ def test_run_conversation_codex_continues_after_incomplete_interim_message(monke
 def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch):
    agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
    assistant_message, finish_reason = _normalize_codex_response(
        _codex_commentary_message_response("I'll inspect the repository first.")
    )
@ -1068,7 +1076,8 @@ def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch
    sends them into the empty-content retry loop (3 retries then failure).
    """
    agent = _build_agent(monkeypatch)
-    assistant_message, finish_reason = agent._normalize_codex_response(
+    from agent.codex_responses_adapter import _normalize_codex_response
    assistant_message, finish_reason = _normalize_codex_response(
        _codex_reasoning_only_response()
    )
@ -1101,7 +1110,8 @@ def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
        status="completed",
        model="gpt-5-codex",
    )
-    assistant_message, finish_reason = agent._normalize_codex_response(response)
+    from agent.codex_responses_adapter import _normalize_codex_response
    assistant_message, finish_reason = _normalize_codex_response(response)
    assert finish_reason == "stop"
    assert "Here is the answer" in assistant_message.content
@ -1186,7 +1196,8 @@ def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monk
            ],
        },
    ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
    items = _chat_messages_to_responses_input(messages)
    # Find the reasoning item
    reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
@ -1273,7 +1284,8 @@ def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch
            ],
        },
    ]
-    items = agent._chat_messages_to_responses_input(messages)
+    from agent.codex_responses_adapter import _chat_messages_to_responses_input
    items = _chat_messages_to_responses_input(messages)
    reasoning_items = [it for it in items if it.get("type") == "reasoning"]
    # Dedup: rs_aaa appears in both turns but should only be emitted once.
@ -1299,7 +1311,8 @@ def test_preflight_codex_input_deduplicates_reasoning_ids(monkeypatch):
        {"type": "reasoning", "id": "rs_zzz", "encrypted_content": "enc_b"},
        {"role": "assistant", "content": "done"},
    ]
-    normalized = agent._preflight_codex_input_items(raw_input)
+    from agent.codex_responses_adapter import _preflight_codex_input_items
    normalized = _preflight_codex_input_items(raw_input)
    reasoning_items = [it for it in normalized if it.get("type") == "reasoning"]
    # rs_xyz duplicate should be collapsed to one item; rs_zzz kept.