diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 807ed076875..56b2d5c1bfb 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -711,8 +711,12 @@ class _CodexCompletionsAdapter: # keywords (HTTP 400). Strip them here to match the parity guarantee that # chat_completion_helpers.py provides for the main-agent xAI path. try: - from tools.schema_sanitizer import strip_pattern_and_format + from tools.schema_sanitizer import ( + strip_pattern_and_format, + strip_slash_enum, + ) tools, _ = strip_pattern_and_format(list(tools)) + tools, _ = strip_slash_enum(tools) except Exception as exc: logger.warning( "Auxiliary client: failed to sanitize tool schemas for " diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index 350a54e4066..2e0caebcbe3 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -291,10 +291,17 @@ def build_api_kwargs(agent, api_messages: list) -> dict: # in tool schemas (HTTP 400 "Invalid arguments passed to the model"). # Most commonly hit when MCP-derived tools carry JSON Schema validation # keywords through. Strip them before building kwargs. See #27197. + # It also rejects ``enum`` values containing ``/`` (HuggingFace IDs + # like ``Qwen/Qwen3.5-0.8B`` shipped by MCP servers) — same 400 with + # the same opaque message; strip those enums too. if is_xai_responses: try: - from tools.schema_sanitizer import strip_pattern_and_format + from tools.schema_sanitizer import ( + strip_pattern_and_format, + strip_slash_enum, + ) tools_for_api, _ = strip_pattern_and_format(tools_for_api) + tools_for_api, _ = strip_slash_enum(tools_for_api) except Exception as exc: logger.warning( "%s⚠️ Failed to sanitize tool schemas for xAI: %s", diff --git a/tests/tools/test_schema_sanitizer.py b/tests/tools/test_schema_sanitizer.py index 8c865e87b8d..b856440ef40 100644 --- a/tests/tools/test_schema_sanitizer.py +++ b/tests/tools/test_schema_sanitizer.py @@ -9,7 +9,11 @@ from __future__ import annotations import copy -from tools.schema_sanitizer import sanitize_tool_schemas, strip_pattern_and_format +from tools.schema_sanitizer import ( + sanitize_tool_schemas, + strip_pattern_and_format, + strip_slash_enum, +) def _tool(name: str, parameters: dict) -> dict: @@ -491,3 +495,142 @@ def test_strip_responses_mixed_formats(): # Verify structure preserved assert result[0]["function"]["parameters"]["type"] == "object" assert result[1]["parameters"]["type"] == "object" + + +# ───────────────────────────────────────────────────────────────────────── +# strip_slash_enum — reactive recovery when xAI's /v1/responses (and +# /v1/chat/completions) grammar-compiler rejects enum values containing +# a forward slash. Symptom: HTTP 400 "Invalid arguments passed to the +# model" before any token is emitted. Most commonly hit by MCP-derived +# tools whose enum lists HuggingFace IDs like "Qwen/Qwen3.5-0.8B". +# ───────────────────────────────────────────────────────────────────────── + + +def test_strip_slash_enum_removes_huggingface_id_enum(): + """enum containing HF-style 'owner/name' IDs → stripped.""" + tools = [_tool("train", { + "type": "object", + "properties": { + "model": { + "type": "string", + "enum": ["Qwen/Qwen3.5-0.8B", "openai/gpt-oss-20b"], + }, + }, + })] + _, stripped = strip_slash_enum(tools) + assert stripped == 1 + prop = tools[0]["function"]["parameters"]["properties"]["model"] + assert "enum" not in prop + # Type + description survive so the model still gets the prompting hint. + assert prop["type"] == "string" + + +def test_strip_slash_enum_preserves_slashless_enum(): + """enum without any '/' → preserved.""" + tools = [_tool("pick", { + "type": "object", + "properties": { + "mode": {"type": "string", "enum": ["fast", "slow"]}, + }, + })] + _, stripped = strip_slash_enum(tools) + assert stripped == 0 + assert tools[0]["function"]["parameters"]["properties"]["mode"]["enum"] == ["fast", "slow"] + + +def test_strip_slash_enum_partial_match_strips_whole_enum(): + """Any single value containing '/' triggers removal of the entire enum. + + Rationale: if we kept the slashless values, the model could still pick + them, but xAI's grammar-compile failure is all-or-nothing on the enum + keyword — keeping a mixed-content enum would still 400. Drop it whole. + """ + tools = [_tool("pick", { + "type": "object", + "properties": { + "target": {"type": "string", "enum": ["local", "hf://Qwen/Qwen3"]}, + }, + })] + _, stripped = strip_slash_enum(tools) + assert stripped == 1 + assert "enum" not in tools[0]["function"]["parameters"]["properties"]["target"] + + +def test_strip_slash_enum_responses_format(): + """Responses-format tools (no `function` wrapper) are also handled.""" + tools = [{ + "type": "function", + "name": "mcp_prime_lab_train_model", + "parameters": { + "type": "object", + "properties": { + "model": { + "type": "string", + "enum": ["Qwen/Qwen3.5-0.8B", "meta-llama/Llama-3.2-1B-Instruct"], + }, + }, + }, + }] + _, stripped = strip_slash_enum(tools) + assert stripped == 1 + assert "enum" not in tools[0]["parameters"]["properties"]["model"] + + +def test_strip_slash_enum_recurses_into_anyof(): + """enum-with-slash inside an anyOf variant is also stripped.""" + tools = [_tool("t", { + "type": "object", + "properties": { + "value": { + "anyOf": [ + {"type": "string", "enum": ["owner/repo"]}, + {"type": "null"}, + ], + }, + }, + })] + _, stripped = strip_slash_enum(tools) + assert stripped == 1 + variants = tools[0]["function"]["parameters"]["properties"]["value"]["anyOf"] + assert "enum" not in variants[0] + assert variants[0]["type"] == "string" + + +def test_strip_slash_enum_is_idempotent(): + """Second call on already-stripped tools is a no-op.""" + tools = [_tool("t", { + "type": "object", + "properties": {"m": {"type": "string", "enum": ["a/b"]}}, + })] + _, first = strip_slash_enum(tools) + _, second = strip_slash_enum(tools) + assert first == 1 + assert second == 0 + + +def test_strip_slash_enum_empty_returns_zero(): + tools, stripped = strip_slash_enum([]) + assert tools == [] + assert stripped == 0 + + +def test_strip_slash_enum_none_returns_zero(): + tools, stripped = strip_slash_enum(None) + assert tools is None + assert stripped == 0 + + +def test_strip_slash_enum_ignores_non_string_enum_values(): + """Integer/boolean enum values can't contain '/' — leave them alone.""" + tools = [_tool("t", { + "type": "object", + "properties": { + "level": {"type": "integer", "enum": [1, 2, 3]}, + "flag": {"type": "boolean", "enum": [True, False]}, + }, + })] + _, stripped = strip_slash_enum(tools) + assert stripped == 0 + props = tools[0]["function"]["parameters"]["properties"] + assert props["level"]["enum"] == [1, 2, 3] + assert props["flag"]["enum"] == [True, False] diff --git a/tools/schema_sanitizer.py b/tools/schema_sanitizer.py index 0d03998d366..e9677ac4a1b 100644 --- a/tools/schema_sanitizer.py +++ b/tools/schema_sanitizer.py @@ -380,3 +380,66 @@ def strip_pattern_and_format(tools: list[dict]) -> tuple[list[dict], int]: stripped, ) return tools, stripped + + +def strip_slash_enum(tools: list[dict]) -> tuple[list[dict], int]: + """Strip ``enum`` keywords whose string values contain a forward slash. + + xAI's ``/v1/responses`` and ``/v1/chat/completions`` endpoints compile + tool schemas to a grammar that rejects ``enum`` values containing ``/`` + (the request fails with HTTP 400 "Invalid arguments passed to the + model" before any token is emitted). Most commonly hit by MCP-derived + tools whose enum lists HuggingFace model IDs (``Qwen/Qwen3.5-0.8B``, + ``openai/gpt-oss-20b``) or owner/name environment IDs. The constraint + is purely a prompting hint; dropping it lets the model still see the + field description and pick a value, without xAI tripping on the slash. + + Args: + tools: OpenAI-format or Responses-format tool list, mutated in + place. Callers that need to preserve the original should + deep-copy first. + + Returns: + ``(tools, stripped_count)`` — same list reference plus a count of + how many ``enum`` keywords were removed. + """ + if not tools: + return tools, 0 + + stripped = 0 + + def _walk(node: Any) -> None: + nonlocal stripped + if isinstance(node, dict): + enum_val = node.get("enum") + if isinstance(enum_val, list) and any( + isinstance(v, str) and "/" in v for v in enum_val + ): + node.pop("enum", None) + stripped += 1 + for v in node.values(): + _walk(v) + elif isinstance(node, list): + for item in node: + _walk(item) + + for tool in tools: + if not isinstance(tool, dict): + continue + fn = tool.get("function") + if isinstance(fn, dict): + params = fn.get("parameters") + if isinstance(params, dict): + _walk(params) + continue + params = tool.get("parameters") + if isinstance(params, dict): + _walk(params) + + if stripped: + logger.info( + "schema_sanitizer: stripped %d enum keyword(s) containing '/' " + "from tool schemas (xAI Responses grammar-compile recovery)", + stripped, + ) + return tools, stripped