"""Tests for tools/schema_sanitizer.py. Targets the known llama.cpp ``json-schema-to-grammar`` failure modes that cause ``HTTP 400: Unable to generate parser for this template. ... Unrecognized schema: "object"`` errors on local inference backends. """ from __future__ import annotations import copy from tools.schema_sanitizer import ( sanitize_tool_schemas, strip_pattern_and_format, strip_slash_enum, ) def _tool(name: str, parameters: dict) -> dict: return {"type": "function", "function": {"name": name, "parameters": parameters}} def test_object_without_properties_gets_empty_properties(): tools = [_tool("t", {"type": "object"})] out = sanitize_tool_schemas(tools) assert out[0]["function"]["parameters"] == {"type": "object", "properties": {}} def test_nested_object_without_properties_gets_empty_properties(): tools = [_tool("t", { "type": "object", "properties": { "name": {"type": "string"}, "arguments": {"type": "object", "description": "free-form"}, }, "required": ["name"], })] out = sanitize_tool_schemas(tools) args = out[0]["function"]["parameters"]["properties"]["arguments"] assert args["type"] == "object" assert args["properties"] == {} assert args["description"] == "free-form" def test_bare_string_object_value_replaced_with_schema_dict(): # Malformed: a property's schema value is the bare string "object". # This is the exact shape llama.cpp reports as `Unrecognized schema: "object"`. tools = [_tool("t", { "type": "object", "properties": { "payload": "object", # <-- invalid, should be {"type": "object"} }, })] out = sanitize_tool_schemas(tools) payload = out[0]["function"]["parameters"]["properties"]["payload"] assert isinstance(payload, dict) assert payload["type"] == "object" assert payload["properties"] == {} def test_bare_string_primitive_value_replaced_with_schema_dict(): tools = [_tool("t", { "type": "object", "properties": {"name": "string"}, })] out = sanitize_tool_schemas(tools) assert out[0]["function"]["parameters"]["properties"]["name"] == {"type": "string"} def test_nullable_type_array_collapsed_to_single_string(): tools = [_tool("t", { "type": "object", "properties": { "maybe_name": {"type": ["string", "null"]}, }, })] out = sanitize_tool_schemas(tools) prop = out[0]["function"]["parameters"]["properties"]["maybe_name"] assert prop["type"] == "string" assert prop.get("nullable") is True def test_anyof_nested_objects_sanitized(): tools = [_tool("t", { "type": "object", "properties": { "opt": { "anyOf": [ {"type": "object"}, # bare object {"type": "string"}, ], }, }, })] out = sanitize_tool_schemas(tools) variants = out[0]["function"]["parameters"]["properties"]["opt"]["anyOf"] assert variants[0] == {"type": "object", "properties": {}} assert variants[1] == {"type": "string"} def test_missing_parameters_gets_default_object_schema(): tools = [{"type": "function", "function": {"name": "t"}}] out = sanitize_tool_schemas(tools) assert out[0]["function"]["parameters"] == {"type": "object", "properties": {}} def test_non_dict_parameters_gets_default_object_schema(): tools = [_tool("t", "object")] # pathological out = sanitize_tool_schemas(tools) assert out[0]["function"]["parameters"] == {"type": "object", "properties": {}} def test_required_pruned_to_existing_properties(): tools = [_tool("t", { "type": "object", "properties": {"name": {"type": "string"}}, "required": ["name", "missing_field"], })] out = sanitize_tool_schemas(tools) assert out[0]["function"]["parameters"]["required"] == ["name"] def test_required_all_missing_is_dropped(): tools = [_tool("t", { "type": "object", "properties": {}, "required": ["x", "y"], })] out = sanitize_tool_schemas(tools) assert "required" not in out[0]["function"]["parameters"] def test_well_formed_schema_unchanged(): schema = { "type": "object", "properties": { "path": {"type": "string", "description": "File path"}, "offset": {"type": "integer", "minimum": 1}, }, "required": ["path"], } tools = [_tool("read_file", copy.deepcopy(schema))] out = sanitize_tool_schemas(tools) assert out[0]["function"]["parameters"] == schema def test_additional_properties_bool_preserved(): tools = [_tool("t", { "type": "object", "properties": { "payload": { "type": "object", "properties": {}, "additionalProperties": True, }, }, })] out = sanitize_tool_schemas(tools) payload = out[0]["function"]["parameters"]["properties"]["payload"] assert payload["additionalProperties"] is True def test_additional_properties_schema_sanitized(): tools = [_tool("t", { "type": "object", "properties": { "dict_field": { "type": "object", "additionalProperties": {"type": "object"}, # bare object schema }, }, })] out = sanitize_tool_schemas(tools) field = out[0]["function"]["parameters"]["properties"]["dict_field"] assert field["additionalProperties"] == {"type": "object", "properties": {}} def test_deepcopy_does_not_mutate_input(): original = { "type": "object", "properties": {"x": {"type": "object"}}, } tools = [_tool("t", original)] _ = sanitize_tool_schemas(tools) # Original should still lack properties on the nested object assert "properties" not in original["properties"]["x"] def test_items_sanitized_in_array_schema(): tools = [_tool("t", { "type": "object", "properties": { "bag": { "type": "array", "items": {"type": "object"}, # bare object items }, }, })] out = sanitize_tool_schemas(tools) items = out[0]["function"]["parameters"]["properties"]["bag"]["items"] assert items == {"type": "object", "properties": {}} def test_empty_tools_list_returns_empty(): assert sanitize_tool_schemas([]) == [] def test_none_tools_returns_none(): assert sanitize_tool_schemas(None) is None # ───────────────────────────────────────────────────────────────────────── # strip_pattern_and_format — reactive recovery when llama.cpp rejects a # schema with an HTTP 400 grammar-parse error. Must be opt-in (only # invoked on recovery) and must not damage property names. # ───────────────────────────────────────────────────────────────────────── def test_strip_pattern_removes_schema_pattern_keyword(): """`pattern` as a sibling of `type` → stripped.""" tools = [_tool("t", { "type": "object", "properties": { "date": {"type": "string", "pattern": "\\d{4,4}-\\d{2,2}-\\d{2,2}"}, }, })] _, stripped = strip_pattern_and_format(tools) assert stripped == 1 prop = tools[0]["function"]["parameters"]["properties"]["date"] assert "pattern" not in prop assert prop["type"] == "string" def test_strip_format_removes_schema_format_keyword(): """`format` as a sibling of `type` → stripped.""" tools = [_tool("t", { "type": "object", "properties": { "ts": {"type": "string", "format": "date-time"}, }, })] _, stripped = strip_pattern_and_format(tools) assert stripped == 1 assert "format" not in tools[0]["function"]["parameters"]["properties"]["ts"] def test_strip_preserves_property_named_pattern(): """Property literally *named* 'pattern' (search_files) must survive.""" tools = [_tool("search_files", { "type": "object", "properties": { "pattern": {"type": "string", "description": "Regex pattern..."}, "limit": {"type": "integer"}, }, "required": ["pattern"], })] _, stripped = strip_pattern_and_format(tools) assert stripped == 0 params = tools[0]["function"]["parameters"] # Property named "pattern" still exists with its schema intact assert "pattern" in params["properties"] assert params["properties"]["pattern"]["type"] == "string" assert params["required"] == ["pattern"] def test_strip_recurses_into_anyof_variants(): """Pattern/format inside anyOf variant schemas are also stripped.""" tools = [_tool("t", { "type": "object", "properties": { "value": { "anyOf": [ {"type": "string", "pattern": "[A-Z]+", "format": "uuid"}, {"type": "integer"}, ], }, }, })] _, stripped = strip_pattern_and_format(tools) assert stripped == 2 variants = tools[0]["function"]["parameters"]["properties"]["value"]["anyOf"] assert "pattern" not in variants[0] assert "format" not in variants[0] assert variants[0]["type"] == "string" def test_strip_is_idempotent(): """Second call on already-stripped tools is a no-op.""" tools = [_tool("t", { "type": "object", "properties": {"d": {"type": "string", "pattern": "\\d+"}}, })] _, first = strip_pattern_and_format(tools) _, second = strip_pattern_and_format(tools) assert first == 1 assert second == 0 def test_strip_empty_tools_returns_zero(): tools, stripped = strip_pattern_and_format([]) assert tools == [] assert stripped == 0 def test_strip_none_returns_zero(): tools, stripped = strip_pattern_and_format(None) assert tools is None assert stripped == 0 def test_strip_responses_format_strips_format_keyword(): """Responses-format: keyword should be stripped.""" from tools.schema_sanitizer import strip_pattern_and_format tools = [ { "name": "get_event", "parameters": { "type": "object", "properties": { "ts": {"type": "string", "format": "date-time"}, } }, "type": "function" } ] result, stripped = strip_pattern_and_format(tools) assert stripped == 1, f"Expected 1 format stripped, got {stripped}" assert "format" not in result[0]["parameters"]["properties"]["ts"], "format should be stripped" assert result[0]["parameters"]["properties"]["ts"]["type"] == "string", "type should be preserved" def test_top_level_allof_stripped_for_codex_backend_compat(): """OpenAI Codex backend rejects top-level allOf/oneOf/anyOf/enum/not.""" tools = [_tool("memory", { "type": "object", "properties": { "action": {"type": "string", "enum": ["add", "replace"]}, "content": {"type": "string"}, }, "required": ["action"], "allOf": [ { "if": {"properties": {"action": {"const": "add"}}, "required": ["action"]}, "then": {"required": ["content"]}, }, ], })] out = sanitize_tool_schemas(tools) params = out[0]["function"]["parameters"] assert "allOf" not in params # Properties and required survive. assert params["required"] == ["action"] assert "content" in params["properties"] def test_top_level_oneof_anyof_enum_not_stripped(): """All five forbidden top-level combinators are dropped.""" tools = [_tool("t", { "type": "object", "properties": {"x": {"type": "string"}}, "oneOf": [{"required": ["x"]}], "anyOf": [{"required": ["x"]}], "enum": ["bogus-top-level"], "not": {"required": ["y"]}, })] out = sanitize_tool_schemas(tools) params = out[0]["function"]["parameters"] for key in ("oneOf", "anyOf", "enum", "not"): assert key not in params, f"{key} should be stripped from top level" def test_nested_allof_preserved(): """Combinators inside a property's schema are preserved (only top is strict).""" tools = [_tool("t", { "type": "object", "properties": { "config": { "type": "object", "properties": {"mode": {"type": "string"}}, "allOf": [{"required": ["mode"]}], }, }, })] out = sanitize_tool_schemas(tools) nested = out[0]["function"]["parameters"]["properties"]["config"] assert "allOf" in nested assert nested["allOf"] == [{"required": ["mode"]}] def test_strip_responses_format_tools(): """strip_pattern_and_format should handle Responses-format tools (no function wrapper).""" from tools.schema_sanitizer import strip_pattern_and_format # Responses-format: {"name": "...", "parameters": {...}, "type": "function"} tools = [ { "name": "mcp_firecrawl_search", "parameters": { "type": "object", "properties": { "query": {"type": "string"}, "includeDomains": { "type": "array", "items": { "type": "string", "pattern": "^(?=.{1,253}$)(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]$" } } } }, "type": "function" } ] result, stripped = strip_pattern_and_format(tools) assert stripped == 1, f"Expected 1 pattern stripped, got {stripped}" # Verify pattern keyword was removed from includeDomains domains = result[0]["parameters"]["properties"]["includeDomains"]["items"] assert "pattern" not in domains, f"pattern should be stripped: {domains}" assert domains["type"] == "string", "type should be preserved" def test_strip_responses_idempotent(): """Second call on already-stripped Responses-format tools should return 0.""" from tools.schema_sanitizer import strip_pattern_and_format tools = [ { "name": "search_files", "parameters": { "type": "object", "properties": { "pattern": {"type": "string"} # This is a property named pattern, NOT schema keyword } } } ] # Pass 1 - property named 'pattern' should NOT be stripped result, first = strip_pattern_and_format(tools) assert first == 0, f"Expected 0 stripped (property pattern preserved), got {first}" assert "pattern" in result[0]["parameters"]["properties"], "property named pattern should survive" # Pass 2 - idempotent _, second = strip_pattern_and_format(tools) assert second == 0, f"Expected 0 on second pass, got {second}" def test_strip_responses_mixed_formats(): """Mixed list of OpenAI-format and Responses-format tools should both be sanitized.""" from tools.schema_sanitizer import strip_pattern_and_format tools = [ # OpenAI-format: {"function": {"parameters": {...}}} { "type": "function", "function": { "name": "search", "parameters": { "type": "object", "properties": { "query": {"type": "string", "pattern": "^[a-z]+$"} } } } }, # Responses-format: {"name": "...", "parameters": {...}} { "name": "get_time", "parameters": { "type": "object", "properties": { "tz": {"type": "string", "format": "date-time"} } }, "type": "function" } ] result, stripped = strip_pattern_and_format(tools) assert stripped == 2, f"Expected 2 stripped (1 pattern + 1 format), got {stripped}" # OpenAI-format tool: pattern stripped from parameters openai_params = result[0]["function"]["parameters"]["properties"]["query"] assert "pattern" not in openai_params, f"pattern should be stripped: {openai_params}" # Responses-format tool: format stripped resp_params = result[1]["parameters"]["properties"]["tz"] assert "format" not in resp_params, f"format should be stripped: {resp_params}" # Verify structure preserved assert result[0]["function"]["parameters"]["type"] == "object" assert result[1]["parameters"]["type"] == "object" # ───────────────────────────────────────────────────────────────────────── # strip_slash_enum — reactive recovery when xAI's /v1/responses (and # /v1/chat/completions) grammar-compiler rejects enum values containing # a forward slash. Symptom: HTTP 400 "Invalid arguments passed to the # model" before any token is emitted. Most commonly hit by MCP-derived # tools whose enum lists HuggingFace IDs like "Qwen/Qwen3.5-0.8B". # ───────────────────────────────────────────────────────────────────────── def test_strip_slash_enum_removes_huggingface_id_enum(): """enum containing HF-style 'owner/name' IDs → stripped.""" tools = [_tool("train", { "type": "object", "properties": { "model": { "type": "string", "enum": ["Qwen/Qwen3.5-0.8B", "openai/gpt-oss-20b"], }, }, })] _, stripped = strip_slash_enum(tools) assert stripped == 1 prop = tools[0]["function"]["parameters"]["properties"]["model"] assert "enum" not in prop # Type + description survive so the model still gets the prompting hint. assert prop["type"] == "string" def test_strip_slash_enum_preserves_slashless_enum(): """enum without any '/' → preserved.""" tools = [_tool("pick", { "type": "object", "properties": { "mode": {"type": "string", "enum": ["fast", "slow"]}, }, })] _, stripped = strip_slash_enum(tools) assert stripped == 0 assert tools[0]["function"]["parameters"]["properties"]["mode"]["enum"] == ["fast", "slow"] def test_strip_slash_enum_partial_match_strips_whole_enum(): """Any single value containing '/' triggers removal of the entire enum. Rationale: if we kept the slashless values, the model could still pick them, but xAI's grammar-compile failure is all-or-nothing on the enum keyword — keeping a mixed-content enum would still 400. Drop it whole. """ tools = [_tool("pick", { "type": "object", "properties": { "target": {"type": "string", "enum": ["local", "hf://Qwen/Qwen3"]}, }, })] _, stripped = strip_slash_enum(tools) assert stripped == 1 assert "enum" not in tools[0]["function"]["parameters"]["properties"]["target"] def test_strip_slash_enum_responses_format(): """Responses-format tools (no `function` wrapper) are also handled.""" tools = [{ "type": "function", "name": "mcp_prime_lab_train_model", "parameters": { "type": "object", "properties": { "model": { "type": "string", "enum": ["Qwen/Qwen3.5-0.8B", "meta-llama/Llama-3.2-1B-Instruct"], }, }, }, }] _, stripped = strip_slash_enum(tools) assert stripped == 1 assert "enum" not in tools[0]["parameters"]["properties"]["model"] def test_strip_slash_enum_recurses_into_anyof(): """enum-with-slash inside an anyOf variant is also stripped.""" tools = [_tool("t", { "type": "object", "properties": { "value": { "anyOf": [ {"type": "string", "enum": ["owner/repo"]}, {"type": "null"}, ], }, }, })] _, stripped = strip_slash_enum(tools) assert stripped == 1 variants = tools[0]["function"]["parameters"]["properties"]["value"]["anyOf"] assert "enum" not in variants[0] assert variants[0]["type"] == "string" def test_strip_slash_enum_is_idempotent(): """Second call on already-stripped tools is a no-op.""" tools = [_tool("t", { "type": "object", "properties": {"m": {"type": "string", "enum": ["a/b"]}}, })] _, first = strip_slash_enum(tools) _, second = strip_slash_enum(tools) assert first == 1 assert second == 0 def test_strip_slash_enum_empty_returns_zero(): tools, stripped = strip_slash_enum([]) assert tools == [] assert stripped == 0 def test_strip_slash_enum_none_returns_zero(): tools, stripped = strip_slash_enum(None) assert tools is None assert stripped == 0 def test_strip_slash_enum_ignores_non_string_enum_values(): """Integer/boolean enum values can't contain '/' — leave them alone.""" tools = [_tool("t", { "type": "object", "properties": { "level": {"type": "integer", "enum": [1, 2, 3]}, "flag": {"type": "boolean", "enum": [True, False]}, }, })] _, stripped = strip_slash_enum(tools) assert stripped == 0 props = tools[0]["function"]["parameters"]["properties"] assert props["level"]["enum"] == [1, 2, 3] assert props["flag"]["enum"] == [True, False]