From 93e109a1d552b03c847b96077428048cceb012cd Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 16 May 2026 13:02:19 -0700 Subject: [PATCH] fix(moonshot): strip $ref siblings and collapse tuple items in tool schemas (#27104) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port from anomalyco/opencode#24730: Moonshot's JSON Schema validator rejects two shapes that the rest of the JSON Schema ecosystem accepts: 1. $ref nodes with sibling keywords. Moonshot expands the reference before validation and then rejects the node if keys like `description`, `type`, or `default` appear alongside $ref. MCP-sourced tool schemas commonly put a `description` on $ref-typed properties so the model sees the field hint — which worked on every provider except Moonshot. 2. Tuple-style `items` arrays (positional element schemas). Moonshot's engine requires ONE schema applied to every array element. Common in tool schemas generated from Go/Protobuf that model fixed-length arrays as `[{type:number}, {type:number}]`. Repairs applied in `agent/moonshot_schema.py`: - Rule 3: when a node has `$ref`, return `{"$ref": }` only (strip every sibling). The referenced definition still carries its own description on the target node, which Moonshot accepts. - Rule 4: when `items` is a list, collapse to the first element schema (falling back to `{}` which is then filled by the generic missing-type rule). Preserves `minItems` / `maxItems` / other siblings. Tests: 10 new cases across TestRefSiblingStripping + TestTupleItems, plus the existing TestMissingTypeFilled::test_ref_node_is_not_given_synthetic_type still passes (it asserted plain $ref passes through; now it passes through as exactly `{"$ref": "..."}` which is strictly compatible). All 35 tests in test_moonshot_schema.py pass. --- agent/moonshot_schema.py | 31 ++++++ tests/agent/test_moonshot_schema.py | 163 ++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) diff --git a/agent/moonshot_schema.py b/agent/moonshot_schema.py index f22176f936e..6f785af5469 100644 --- a/agent/moonshot_schema.py +++ b/agent/moonshot_schema.py @@ -15,6 +15,18 @@ and MoonshotAI/kimi-cli#1595: 2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not the parent. Presence of both causes "type should be defined in anyOf items instead of the parent schema". +3. ``enum`` arrays on scalar-typed nodes may not contain ``null`` or empty + strings. Strip those entries (drop the enum entirely if it becomes empty). +4. ``$ref`` nodes may not carry sibling keywords. Moonshot expands the + reference before validation and then rejects the node if sibling keys + like ``description`` remain on the same node as ``$ref``. Strip every + sibling from ``$ref`` nodes so only ``{"$ref": "..."}`` survives. + (Ported from anomalyco/opencode#24730.) +5. ``items`` may not be a tuple-style array (``items: [schemaA, schemaB]`` + for positional element schemas). Moonshot's schema engine requires a + single object schema applied to every array element. Collapse tuple + ``items`` to the first element schema (or ``{}`` if the tuple is empty). + (Ported from anomalyco/opencode#24730.) The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it @@ -66,6 +78,16 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any: } elif key in _SCHEMA_LIST_KEYS and isinstance(value, list): repaired[key] = [_repair_schema(v, is_schema=True) for v in value] + elif key == "items" and isinstance(value, list): + # Rule 5: tuple-style ``items`` arrays (positional element + # schemas) are not accepted by Moonshot. Collapse to the + # first element schema if present, else to ``{}``. This + # matches opencode's behaviour for moonshotai / kimi models. + first = value[0] if value else {} + if isinstance(first, dict): + repaired[key] = _repair_schema(first, is_schema=True) + else: + repaired[key] = first elif key in _SCHEMA_NODE_KEYS: # items / not / additionalProperties: single nested schema. # additionalProperties can also be a bool — leave those alone. @@ -130,6 +152,15 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any: else: repaired.pop("enum") + # Rule 4: $ref nodes must not have sibling keywords. Moonshot expands + # the reference before validation and then rejects the node if siblings + # like ``description`` / ``type`` / ``default`` appear alongside $ref. + # The referenced definition still carries its own description on the + # target node, which Moonshot accepts. + # (Ported from anomalyco/opencode#24730.) + if "$ref" in repaired: + return {"$ref": repaired["$ref"]} + return repaired diff --git a/tests/agent/test_moonshot_schema.py b/tests/agent/test_moonshot_schema.py index 2ce2daa096a..8ba508c5dbd 100644 --- a/tests/agent/test_moonshot_schema.py +++ b/tests/agent/test_moonshot_schema.py @@ -6,6 +6,11 @@ the JSON Schema ecosystem accepts: 1. Properties without ``type`` — Moonshot requires ``type`` on every node. 2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside ``anyOf`` children. +3. ``$ref`` with sibling keywords — Moonshot expands the ref first and then + rejects ``description``/``type`` siblings on the same node. + (Ported from anomalyco/opencode#24730.) +4. Tuple-style ``items`` arrays — Moonshot requires a single item schema, + not positional ones. (Ported from anomalyco/opencode#24730.) These tests cover the repairs applied by ``agent/moonshot_schema.py``. """ @@ -180,6 +185,164 @@ class TestAnyOfParentType: assert db_type["enum"] == ["mysql", "postgresql"] # "" stripped by enum cleanup +class TestRefSiblingStripping: + """Rule 4: ``$ref`` nodes may not carry sibling keywords on Moonshot. + + Ported from anomalyco/opencode#24730. The real-world failure was MCP tools + whose generated schemas put a ``description`` on a ``$ref`` property so the + model would see the field's human-readable hint. The reference stays — the + referenced definition still owns the description (on the target node itself) + and still serves the model's context. + """ + + def test_description_sibling_stripped_from_ref(self): + params = { + "type": "object", + "properties": { + "variantOptions": { + "$ref": "#/$defs/VariantOptions", + "description": "Required. The variant options for generation.", + }, + }, + "$defs": { + "VariantOptions": { + "type": "object", + "properties": {}, + "description": "Configuration options.", + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + # Sibling stripped. + assert out["properties"]["variantOptions"] == {"$ref": "#/$defs/VariantOptions"} + # The target definition's own description is preserved — we only strip + # siblings ON the $ref node, not on the thing it points at. + assert out["$defs"]["VariantOptions"]["description"] == "Configuration options." + + def test_multiple_siblings_all_stripped(self): + params = { + "type": "object", + "properties": { + "p": { + "$ref": "#/$defs/T", + "type": "object", + "description": "x", + "default": {}, + "title": "P", + }, + }, + "$defs": {"T": {"type": "object"}}, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["p"] == {"$ref": "#/$defs/T"} + + def test_ref_without_siblings_unchanged(self): + params = { + "type": "object", + "properties": {"p": {"$ref": "#/$defs/T"}}, + "$defs": {"T": {"type": "object"}}, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["p"] == {"$ref": "#/$defs/T"} + + def test_ref_inside_anyof_children(self): + params = { + "type": "object", + "properties": { + "v": { + "anyOf": [ + {"$ref": "#/$defs/A", "description": "variant A"}, + {"type": "null"}, + ], + }, + }, + "$defs": {"A": {"type": "object"}}, + } + out = sanitize_moonshot_tool_parameters(params) + # Main's existing Rule 2 collapses anyOf-with-null down to the + # single non-null branch (Moonshot rejects null branches in anyOf + # outright). That branch was originally `{"$ref": ..., "description": ...}`; + # Rule 4 then strips the sibling, leaving exactly `{"$ref": "..."}`. + # The test name still applies — Rule 4 ran on the $ref branch — it + # just happens after the anyOf collapse on this input. + assert out["properties"]["v"] == {"$ref": "#/$defs/A"} + + +class TestTupleItems: + """Rule 5: tuple-style ``items`` arrays collapse to a single schema. + + Ported from anomalyco/opencode#24730. Moonshot's schema engine requires + ``items`` to be ONE schema object applied to every array element; tuple- + style positional item schemas are rejected. We collapse to the first + element's schema (which is the "closest" interpretation of positional → + single) and drop the rest. + """ + + def test_tuple_items_collapsed_to_first(self): + params = { + "type": "object", + "properties": { + "renderedSize": { + "type": "array", + "items": [{"type": "number"}, {"type": "number"}], + "minItems": 2, + "maxItems": 2, + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["renderedSize"]["items"] == {"type": "number"} + # Sibling constraints are preserved — only the tuple shape is repaired. + assert out["properties"]["renderedSize"]["minItems"] == 2 + + def test_empty_tuple_items_becomes_empty_schema(self): + # Empty tuple collapses to ``{}``; the generic repair then fills a + # synthetic ``type`` because Moonshot requires ``type`` on every + # schema node. Either ``{}`` or ``{"type": "string"}`` is a valid + # final shape for Moonshot — both accept any string element — but we + # always go through ``_fill_missing_type`` so the result is fully + # well-formed without needing the consumer to patch it later. + params = { + "type": "object", + "properties": { + "things": {"type": "array", "items": []}, + }, + } + out = sanitize_moonshot_tool_parameters(params) + items = out["properties"]["things"]["items"] + # Must be a dict and must carry a ``type`` (the whole point of Rule 1). + assert isinstance(items, dict) + assert items.get("type") + + def test_tuple_items_first_element_is_repaired(self): + # The first element itself has a missing type — it should be filled. + params = { + "type": "object", + "properties": { + "pair": { + "type": "array", + "items": [{"description": "first"}, {"description": "second"}], + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + # Repaired to a single schema with a synthetic type. + assert out["properties"]["pair"]["items"] == { + "description": "first", + "type": "string", + } + + def test_single_schema_items_unchanged(self): + params = { + "type": "object", + "properties": { + "tags": {"type": "array", "items": {"type": "string"}}, + }, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["tags"]["items"] == {"type": "string"} + + class TestTopLevelGuarantees: """The returned top-level schema is always a well-formed object."""