fix(xai-responses): strip enum values containing '/' from tool schemas

xAI's /v1/responses and /v1/chat/completions endpoints reject tool schemas
whose enum values contain a forward slash with a generic HTTP 400 'Invalid
arguments passed to the model.' before any token is emitted — the schema
compiler trips on the '/' character regardless of where it appears.

Most commonly hit by MCP-derived tools whose enum lists HuggingFace model
IDs ('Qwen/Qwen3.5-0.8B', 'openai/gpt-oss-20b') or owner/name environment
identifiers.

Mirrors the existing strip_pattern_and_format sanitizer (PR for #27197).
The new strip_slash_enum walks tool parameters and drops the entire enum
keyword when any value contains '/' — keeping it partial would still 400
since xAI's failure is all-or-nothing on the enum. The field description
still reaches the model so the prompting hint is preserved.

Wired in at both code paths for parity:
  - agent/chat_completion_helpers.py (main agent xAI Responses path)
  - agent/auxiliary_client.py (aux client xAI Responses path, matching
    the same parity guarantee 2fae8fba9 established for pattern/format)

Salvaged from #28021 by @Slimydog21 — contributor's branch was severely
stale (would have reverted ~5000 LOC across azure/kanban/i18n); fix
re-applied surgically on current main with their sanitizer + 9 tests
preserved verbatim. Author noreply email used (original was a Mac
hostname leak).
This commit is contained in:
Slimydog21 2026-05-18 10:36:51 -07:00 committed by Teknium
parent d9331eecee
commit aae1615977
4 changed files with 220 additions and 3 deletions

View file

@ -9,7 +9,11 @@ from __future__ import annotations
import copy
from tools.schema_sanitizer import sanitize_tool_schemas, strip_pattern_and_format
from tools.schema_sanitizer import (
sanitize_tool_schemas,
strip_pattern_and_format,
strip_slash_enum,
)
def _tool(name: str, parameters: dict) -> dict:
@ -491,3 +495,142 @@ def test_strip_responses_mixed_formats():
# Verify structure preserved
assert result[0]["function"]["parameters"]["type"] == "object"
assert result[1]["parameters"]["type"] == "object"
# ─────────────────────────────────────────────────────────────────────────
# strip_slash_enum — reactive recovery when xAI's /v1/responses (and
# /v1/chat/completions) grammar-compiler rejects enum values containing
# a forward slash. Symptom: HTTP 400 "Invalid arguments passed to the
# model" before any token is emitted. Most commonly hit by MCP-derived
# tools whose enum lists HuggingFace IDs like "Qwen/Qwen3.5-0.8B".
# ─────────────────────────────────────────────────────────────────────────
def test_strip_slash_enum_removes_huggingface_id_enum():
"""enum containing HF-style 'owner/name' IDs → stripped."""
tools = [_tool("train", {
"type": "object",
"properties": {
"model": {
"type": "string",
"enum": ["Qwen/Qwen3.5-0.8B", "openai/gpt-oss-20b"],
},
},
})]
_, stripped = strip_slash_enum(tools)
assert stripped == 1
prop = tools[0]["function"]["parameters"]["properties"]["model"]
assert "enum" not in prop
# Type + description survive so the model still gets the prompting hint.
assert prop["type"] == "string"
def test_strip_slash_enum_preserves_slashless_enum():
"""enum without any '/' → preserved."""
tools = [_tool("pick", {
"type": "object",
"properties": {
"mode": {"type": "string", "enum": ["fast", "slow"]},
},
})]
_, stripped = strip_slash_enum(tools)
assert stripped == 0
assert tools[0]["function"]["parameters"]["properties"]["mode"]["enum"] == ["fast", "slow"]
def test_strip_slash_enum_partial_match_strips_whole_enum():
"""Any single value containing '/' triggers removal of the entire enum.
Rationale: if we kept the slashless values, the model could still pick
them, but xAI's grammar-compile failure is all-or-nothing on the enum
keyword keeping a mixed-content enum would still 400. Drop it whole.
"""
tools = [_tool("pick", {
"type": "object",
"properties": {
"target": {"type": "string", "enum": ["local", "hf://Qwen/Qwen3"]},
},
})]
_, stripped = strip_slash_enum(tools)
assert stripped == 1
assert "enum" not in tools[0]["function"]["parameters"]["properties"]["target"]
def test_strip_slash_enum_responses_format():
"""Responses-format tools (no `function` wrapper) are also handled."""
tools = [{
"type": "function",
"name": "mcp_prime_lab_train_model",
"parameters": {
"type": "object",
"properties": {
"model": {
"type": "string",
"enum": ["Qwen/Qwen3.5-0.8B", "meta-llama/Llama-3.2-1B-Instruct"],
},
},
},
}]
_, stripped = strip_slash_enum(tools)
assert stripped == 1
assert "enum" not in tools[0]["parameters"]["properties"]["model"]
def test_strip_slash_enum_recurses_into_anyof():
"""enum-with-slash inside an anyOf variant is also stripped."""
tools = [_tool("t", {
"type": "object",
"properties": {
"value": {
"anyOf": [
{"type": "string", "enum": ["owner/repo"]},
{"type": "null"},
],
},
},
})]
_, stripped = strip_slash_enum(tools)
assert stripped == 1
variants = tools[0]["function"]["parameters"]["properties"]["value"]["anyOf"]
assert "enum" not in variants[0]
assert variants[0]["type"] == "string"
def test_strip_slash_enum_is_idempotent():
"""Second call on already-stripped tools is a no-op."""
tools = [_tool("t", {
"type": "object",
"properties": {"m": {"type": "string", "enum": ["a/b"]}},
})]
_, first = strip_slash_enum(tools)
_, second = strip_slash_enum(tools)
assert first == 1
assert second == 0
def test_strip_slash_enum_empty_returns_zero():
tools, stripped = strip_slash_enum([])
assert tools == []
assert stripped == 0
def test_strip_slash_enum_none_returns_zero():
tools, stripped = strip_slash_enum(None)
assert tools is None
assert stripped == 0
def test_strip_slash_enum_ignores_non_string_enum_values():
"""Integer/boolean enum values can't contain '/' — leave them alone."""
tools = [_tool("t", {
"type": "object",
"properties": {
"level": {"type": "integer", "enum": [1, 2, 3]},
"flag": {"type": "boolean", "enum": [True, False]},
},
})]
_, stripped = strip_slash_enum(tools)
assert stripped == 0
props = tools[0]["function"]["parameters"]["properties"]
assert props["level"]["enum"] == [1, 2, 3]
assert props["flag"]["enum"] == [True, False]