mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
xAI's /v1/responses endpoint rejects service_tier with HTTP 400 "Argument not supported: service_tier" when users activate /fast mode. Also add a safety-net strip_slash_enum call in _preflight_codex_api_kwargs to catch any tool schemas that might slip through the caller-level sanitization. xAI's Responses API grammar compiler rejects enum values containing forward slashes (e.g. HuggingFace model IDs like "Qwen/Qwen3.5-0.8B") with the opaque "Invalid arguments passed to the model" error. Fixes the root cause of "Invalid arguments passed to the model" errors reported by xAI OAuth (SuperGrok) users.
351 lines
16 KiB
Python
351 lines
16 KiB
Python
"""OpenAI Responses API (Codex) transport.
|
|
|
|
Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
|
|
This transport owns format conversion and normalization — NOT client lifecycle,
|
|
streaming, or the _run_codex_stream() call path.
|
|
"""
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from agent.transports.base import ProviderTransport
|
|
from agent.transports.types import NormalizedResponse, ToolCall
|
|
|
|
|
|
class ResponsesApiTransport(ProviderTransport):
|
|
"""Transport for api_mode='codex_responses'.
|
|
|
|
Wraps the functions extracted into codex_responses_adapter.py (PR 1).
|
|
"""
|
|
|
|
# Issuer kind of the most recent build_kwargs / convert_messages call.
|
|
# Used as a fallback when normalize_response is invoked without an
|
|
# explicit ``issuer_kind`` kwarg, so reasoning items captured from a
|
|
# response are stamped with the endpoint that minted them. Plain class
|
|
# attribute default; mutated on the instance, not the class.
|
|
_last_issuer_kind: Optional[str] = None
|
|
|
|
@property
|
|
def api_mode(self) -> str:
|
|
return "codex_responses"
|
|
|
|
def _resolve_issuer_kind(self, params: Dict[str, Any]) -> str:
|
|
"""Classify the current Responses endpoint from transport params."""
|
|
from agent.codex_responses_adapter import _classify_responses_issuer
|
|
return _classify_responses_issuer(
|
|
is_xai_responses=bool(params.get("is_xai_responses")),
|
|
is_github_responses=bool(params.get("is_github_responses")),
|
|
is_codex_backend=bool(params.get("is_codex_backend")),
|
|
base_url=params.get("base_url"),
|
|
)
|
|
|
|
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
|
"""Convert OpenAI chat messages to Responses API input items."""
|
|
from agent.codex_responses_adapter import _chat_messages_to_responses_input
|
|
issuer = self._resolve_issuer_kind(kwargs)
|
|
self._last_issuer_kind = issuer
|
|
return _chat_messages_to_responses_input(
|
|
messages,
|
|
is_xai_responses=bool(kwargs.get("is_xai_responses")),
|
|
replay_encrypted_reasoning=bool(
|
|
kwargs.get("replay_encrypted_reasoning", True)
|
|
),
|
|
current_issuer_kind=issuer,
|
|
)
|
|
|
|
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
|
"""Convert OpenAI tool schemas to Responses API function definitions."""
|
|
from agent.codex_responses_adapter import _responses_tools
|
|
return _responses_tools(tools)
|
|
|
|
def build_kwargs(
|
|
self,
|
|
model: str,
|
|
messages: List[Dict[str, Any]],
|
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
**params,
|
|
) -> Dict[str, Any]:
|
|
"""Build Responses API kwargs.
|
|
|
|
Calls convert_messages and convert_tools internally.
|
|
|
|
params:
|
|
instructions: str — system prompt (extracted from messages[0] if not given)
|
|
reasoning_config: dict | None — {effort, enabled}
|
|
session_id: str | None — used for prompt_cache_key + xAI conv header
|
|
max_tokens: int | None — max_output_tokens
|
|
timeout: float | None — per-request timeout forwarded to the SDK
|
|
request_overrides: dict | None — extra kwargs merged in
|
|
provider: str | None — provider name for backend-specific logic
|
|
base_url: str | None — endpoint URL
|
|
base_url_hostname: str | None — hostname for backend detection
|
|
is_github_responses: bool — Copilot/GitHub models backend
|
|
is_codex_backend: bool — chatgpt.com/backend-api/codex
|
|
is_xai_responses: bool — xAI/Grok backend
|
|
github_reasoning_extra: dict | None — Copilot reasoning params
|
|
"""
|
|
from agent.codex_responses_adapter import (
|
|
_chat_messages_to_responses_input,
|
|
_responses_tools,
|
|
)
|
|
|
|
from run_agent import DEFAULT_AGENT_IDENTITY
|
|
|
|
instructions = params.get("instructions", "")
|
|
payload_messages = messages
|
|
if not instructions:
|
|
if messages and messages[0].get("role") == "system":
|
|
instructions = str(messages[0].get("content") or "").strip()
|
|
payload_messages = messages[1:]
|
|
if not instructions:
|
|
instructions = DEFAULT_AGENT_IDENTITY
|
|
|
|
is_github_responses = params.get("is_github_responses", False)
|
|
is_codex_backend = params.get("is_codex_backend", False)
|
|
is_xai_responses = params.get("is_xai_responses", False)
|
|
replay_encrypted_reasoning = bool(
|
|
params.get("replay_encrypted_reasoning", True)
|
|
)
|
|
|
|
# Resolve the issuing endpoint for this call. Stashed on the
|
|
# transport so normalize_response can stamp it onto reasoning
|
|
# items captured from the response, and passed to the input
|
|
# converter so foreign-issuer reasoning blocks in history are
|
|
# dropped before the API rejects them.
|
|
issuer_kind = self._resolve_issuer_kind(params)
|
|
self._last_issuer_kind = issuer_kind
|
|
|
|
# Resolve reasoning effort
|
|
reasoning_effort = "medium"
|
|
reasoning_enabled = True
|
|
reasoning_config = params.get("reasoning_config")
|
|
if reasoning_config and isinstance(reasoning_config, dict):
|
|
if reasoning_config.get("enabled") is False:
|
|
reasoning_enabled = False
|
|
elif reasoning_config.get("effort"):
|
|
reasoning_effort = reasoning_config["effort"]
|
|
|
|
_effort_clamp = {"minimal": "low"}
|
|
reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
|
|
|
|
response_tools = _responses_tools(tools)
|
|
kwargs = {
|
|
"model": model,
|
|
"instructions": instructions,
|
|
"input": _chat_messages_to_responses_input(
|
|
payload_messages,
|
|
is_xai_responses=is_xai_responses,
|
|
replay_encrypted_reasoning=replay_encrypted_reasoning,
|
|
current_issuer_kind=issuer_kind,
|
|
),
|
|
"tools": response_tools,
|
|
"store": False,
|
|
}
|
|
if response_tools:
|
|
kwargs["tool_choice"] = "auto"
|
|
kwargs["parallel_tool_calls"] = True
|
|
|
|
session_id = params.get("session_id")
|
|
# xAI Responses takes prompt_cache_key in extra_body (set further
|
|
# down); GitHub Models opts out of cache-key routing entirely.
|
|
if not is_github_responses and not is_xai_responses and session_id:
|
|
kwargs["prompt_cache_key"] = session_id
|
|
|
|
if reasoning_enabled and is_xai_responses:
|
|
from agent.model_metadata import grok_supports_reasoning_effort
|
|
|
|
# Ask xAI to echo back encrypted reasoning items so we can
|
|
# replay them on subsequent turns for cross-turn coherence.
|
|
# See agent/codex_responses_adapter._chat_messages_to_responses_input
|
|
# for the May 2026 reversal of the earlier suppression gate.
|
|
kwargs["include"] = (
|
|
["reasoning.encrypted_content"] if replay_encrypted_reasoning else []
|
|
)
|
|
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
|
|
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
|
|
# those models reason natively. Only send the effort dial when
|
|
# the target model is on the allowlist; otherwise send no
|
|
# `reasoning` key at all and let the model reason on its own.
|
|
if grok_supports_reasoning_effort(model):
|
|
kwargs["reasoning"] = {"effort": reasoning_effort}
|
|
elif reasoning_enabled:
|
|
if is_github_responses:
|
|
github_reasoning = params.get("github_reasoning_extra")
|
|
if github_reasoning is not None:
|
|
kwargs["reasoning"] = github_reasoning
|
|
else:
|
|
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
|
|
kwargs["include"] = (
|
|
["reasoning.encrypted_content"] if replay_encrypted_reasoning else []
|
|
)
|
|
elif not is_github_responses and not is_xai_responses:
|
|
kwargs["include"] = []
|
|
|
|
request_overrides = params.get("request_overrides")
|
|
if request_overrides:
|
|
kwargs.update(request_overrides)
|
|
|
|
# xAI Responses API rejects ``service_tier`` (HTTP 400 "Argument not
|
|
# supported: service_tier") — hit when ``/fast`` priority-processing
|
|
# mode lingers from a prior model in the same session, or when a
|
|
# user explicitly sets ``agent.service_tier`` in config.yaml. The
|
|
# main-loop guard (``resolve_fast_mode_overrides`` only returns
|
|
# ``service_tier`` for OpenAI fast-eligible models) doesn't cover
|
|
# those leak paths, so strip defensively when targeting xAI. See
|
|
# #28490 for the original report.
|
|
if is_xai_responses:
|
|
kwargs.pop("service_tier", None)
|
|
|
|
# Forward per-request timeout to the SDK so OpenAI/Anthropic clients
|
|
# honor it. Without this, ``providers.<id>.request_timeout_seconds``
|
|
# is silently dropped on the main agent Codex path while the
|
|
# chat_completions path and auxiliary Codex adapter both forward it.
|
|
timeout = kwargs.get("timeout", params.get("timeout"))
|
|
if (
|
|
isinstance(timeout, (int, float))
|
|
and not isinstance(timeout, bool)
|
|
and 0 < float(timeout) < float("inf")
|
|
):
|
|
kwargs["timeout"] = float(timeout)
|
|
else:
|
|
kwargs.pop("timeout", None)
|
|
|
|
if is_codex_backend:
|
|
prompt_cache_key = kwargs.get("prompt_cache_key")
|
|
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
|
|
if cache_scope_id:
|
|
existing_extra_headers = kwargs.get("extra_headers")
|
|
merged_extra_headers: Dict[str, str] = {}
|
|
if isinstance(existing_extra_headers, dict):
|
|
merged_extra_headers.update(
|
|
{
|
|
str(key): str(value)
|
|
for key, value in existing_extra_headers.items()
|
|
if key and value is not None
|
|
}
|
|
)
|
|
merged_extra_headers["session_id"] = cache_scope_id
|
|
merged_extra_headers["x-client-request-id"] = cache_scope_id
|
|
kwargs["extra_headers"] = merged_extra_headers
|
|
|
|
max_tokens = params.get("max_tokens")
|
|
if max_tokens is not None and not is_codex_backend:
|
|
kwargs["max_output_tokens"] = max_tokens
|
|
|
|
if is_xai_responses and session_id:
|
|
existing_extra_headers = kwargs.get("extra_headers")
|
|
merged_extra_headers: Dict[str, str] = {}
|
|
if isinstance(existing_extra_headers, dict):
|
|
merged_extra_headers.update(
|
|
{
|
|
str(key): str(value)
|
|
for key, value in existing_extra_headers.items()
|
|
if key and value is not None
|
|
}
|
|
)
|
|
merged_extra_headers["x-grok-conv-id"] = session_id
|
|
kwargs["extra_headers"] = merged_extra_headers
|
|
|
|
# xAI Responses cache-routing — body-level field per
|
|
# https://docs.x.ai/developers/advanced-api-usage/prompt-caching/maximizing-cache-hits.
|
|
# Sent via extra_body (not the typed kwarg) so it survives openai
|
|
# SDK builds whose Responses.stream() signature has dropped the field.
|
|
existing_extra_body = kwargs.get("extra_body")
|
|
merged_extra_body: Dict[str, Any] = {}
|
|
if isinstance(existing_extra_body, dict):
|
|
merged_extra_body.update(existing_extra_body)
|
|
merged_extra_body.setdefault("prompt_cache_key", session_id)
|
|
kwargs["extra_body"] = merged_extra_body
|
|
|
|
return kwargs
|
|
|
|
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
|
"""Normalize Codex Responses API response to NormalizedResponse."""
|
|
from agent.codex_responses_adapter import (
|
|
_normalize_codex_response,
|
|
)
|
|
|
|
# Issuer for this response = explicit kwarg if the caller knows it,
|
|
# otherwise the stash from the matching build_kwargs/convert_messages
|
|
# call. Either way it gets stamped onto reasoning items so future
|
|
# turns can detect a model swap and drop foreign-issuer blobs.
|
|
issuer_kind = kwargs.get("issuer_kind") or self._last_issuer_kind
|
|
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
|
|
msg, finish_reason = _normalize_codex_response(response, issuer_kind=issuer_kind)
|
|
|
|
tool_calls = None
|
|
if msg and msg.tool_calls:
|
|
tool_calls = []
|
|
for tc in msg.tool_calls:
|
|
provider_data = {}
|
|
if hasattr(tc, "call_id") and tc.call_id:
|
|
provider_data["call_id"] = tc.call_id
|
|
if hasattr(tc, "response_item_id") and tc.response_item_id:
|
|
provider_data["response_item_id"] = tc.response_item_id
|
|
tool_calls.append(ToolCall(
|
|
id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
|
|
name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
|
|
arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
|
|
provider_data=provider_data or None,
|
|
))
|
|
|
|
# Extract reasoning items for provider_data
|
|
provider_data = {}
|
|
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
|
|
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
|
|
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
|
|
provider_data["codex_message_items"] = msg.codex_message_items
|
|
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
|
|
provider_data["reasoning_details"] = msg.reasoning_details
|
|
|
|
return NormalizedResponse(
|
|
content=msg.content if msg else None,
|
|
tool_calls=tool_calls,
|
|
finish_reason=finish_reason or "stop",
|
|
reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
|
|
usage=None, # Codex usage is extracted separately in normalize_usage()
|
|
provider_data=provider_data or None,
|
|
)
|
|
|
|
def validate_response(self, response: Any) -> bool:
|
|
"""Check Codex Responses API response has valid output structure.
|
|
|
|
Returns True only if response.output is a non-empty list.
|
|
Does NOT check output_text fallback — the caller handles that
|
|
with diagnostic logging for stream backfill recovery.
|
|
"""
|
|
if response is None:
|
|
return False
|
|
output = getattr(response, "output", None)
|
|
if not isinstance(output, list) or not output:
|
|
return False
|
|
return True
|
|
|
|
def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
|
|
"""Validate and sanitize Codex API kwargs before the call.
|
|
|
|
Normalizes input items, strips unsupported fields, validates structure.
|
|
"""
|
|
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
|
|
return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
|
|
|
|
def map_finish_reason(self, raw_reason: str) -> str:
|
|
"""Map Codex response.status to OpenAI finish_reason.
|
|
|
|
Codex uses response.status ('completed', 'incomplete') +
|
|
response.incomplete_details.reason for granular mapping.
|
|
This method handles the simple status string; the caller
|
|
should check incomplete_details separately for 'max_output_tokens'.
|
|
"""
|
|
_MAP = {
|
|
"completed": "stop",
|
|
"incomplete": "length",
|
|
"failed": "stop",
|
|
"cancelled": "stop",
|
|
}
|
|
return _MAP.get(raw_reason, "stop")
|
|
|
|
|
|
# Auto-register on import
|
|
from agent.transports import register_transport # noqa: E402
|
|
|
|
register_transport("codex_responses", ResponsesApiTransport)
|