fix(tools): wrap bare scalars in single-element list for array-typed args

Open-weight models (DeepSeek, Qwen, GLM) sometimes emit tool calls like
`{"urls": "https://a.com"}` when the tool schema declares
`type: array`.  The call was JSON-valid but semantically wrong, and
`coerce_tool_args` would pass the bare string through — the tool then
failed with a confusing type error.

`coerce_tool_args` now wraps non-list, non-null values in a
single-element list when the schema declares `array`.  Strings still go
through `_coerce_value` first so JSON-encoded arrays
(`'["a","b"]'`) parse correctly and nullable `"null"` still
becomes `None`.  `None` itself is preserved — tools with sensible
defaults already handle it, and we don't want to silently mask a
deliberate null.

Salvaged from #19652 (NikolayGusev-astra) — the broader validate-then-
repair layer had several issues (duplicated existing coercion,
mis-classified `old_string` as a path field, prepended non-JSON
prefixes to tool results that break downstream JSON parsing, hardcoded
offset/limit defaults unsuitable for non-read_file tools).  The one
genuinely new capability is wrapping bare scalars, which is implemented
here directly inside the existing coercion path.

Co-authored-by: Nikolay Gusev <ngusev@astralinux.ru>
This commit is contained in:
Nikolay Gusev 2026-05-04 04:58:35 -07:00 committed by Teknium
parent 6f864f8f94
commit fdf9343c51
3 changed files with 98 additions and 6 deletions

View file

@ -511,6 +511,12 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
Handles ``"type": "integer"``, ``"type": "number"``, ``"type": "boolean"``, Handles ``"type": "integer"``, ``"type": "number"``, ``"type": "boolean"``,
and union types (``"type": ["integer", "string"]``). and union types (``"type": ["integer", "string"]``).
Also wraps bare scalar values in a single-element list when the schema
declares ``"type": "array"``. Open-weight models (DeepSeek, Qwen, GLM)
sometimes emit ``{"urls": "https://a.com"}`` when the tool expects
``{"urls": ["https://a.com"]}``; wrapping here avoids a confusing tool
failure on what is otherwise a well-formed call.
""" """
if not args or not isinstance(args, dict): if not args or not isinstance(args, dict):
return args return args
@ -523,13 +529,42 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
if not properties: if not properties:
return args return args
for key, value in args.items(): for key, value in list(args.items()):
if not isinstance(value, str):
continue
prop_schema = properties.get(key) prop_schema = properties.get(key)
if not prop_schema: if not prop_schema:
continue continue
expected = prop_schema.get("type") expected = prop_schema.get("type")
# Wrap bare non-list values when the schema declares ``array``.
# Strings still go through _coerce_value first so JSON-encoded
# arrays (``'["a","b"]'``) get parsed and nullable ``"null"``
# becomes ``None`` rather than ``["null"]``.
# ``None`` itself is preserved — we don't know whether the model
# meant "omit" or "empty list", and tools with sensible defaults
# (e.g. read_file's normalize_read_pagination) already handle it.
if expected == "array" and value is not None and not isinstance(value, (list, tuple)):
if isinstance(value, str):
coerced = _coerce_value(value, expected, schema=prop_schema)
if coerced is not value:
# _coerce_value handled it (JSON-parsed list or
# nullable "null" → None).
args[key] = coerced
continue
args[key] = [value]
logger.info(
"coerce_tool_args: wrapped bare string in list for %s.%s",
tool_name, key,
)
continue
args[key] = [value]
logger.info(
"coerce_tool_args: wrapped bare %s in list for %s.%s",
type(value).__name__, tool_name, key,
)
continue
if not isinstance(value, str):
continue
if not expected and not _schema_allows_null(prop_schema): if not expected and not _schema_allows_null(prop_schema):
continue continue
coerced = _coerce_value(value, expected, schema=prop_schema) coerced = _coerce_value(value, expected, schema=prop_schema)

View file

@ -48,6 +48,7 @@ AUTHOR_MAP = {
"127238744+teknium1@users.noreply.github.com": "teknium1", "127238744+teknium1@users.noreply.github.com": "teknium1",
"159539633+MottledShadow@users.noreply.github.com": "MottledShadow", "159539633+MottledShadow@users.noreply.github.com": "MottledShadow",
"aludwin+gh@gmail.com": "adamludwin", "aludwin+gh@gmail.com": "adamludwin",
"ngusev@astralinux.ru": "NikolayGusev-astra",
"2093036+exiao@users.noreply.github.com": "exiao", "2093036+exiao@users.noreply.github.com": "exiao",
"rylen.anil@gmail.com": "rylena", "rylen.anil@gmail.com": "rylena",
"godnanijatin@gmail.com": "jatingodnani", "godnanijatin@gmail.com": "jatingodnani",

View file

@ -297,13 +297,69 @@ class TestCoerceToolArgs:
result = coerce_tool_args("test_tool", args) result = coerce_tool_args("test_tool", args)
assert result["stages"] is None assert result["stages"] is None
def test_invalid_json_array_preserved_as_string(self): def test_invalid_json_array_wrapped_in_single_element_list(self):
"""If the string isn't valid JSON, pass it through — let the tool decide.""" """A bare string gets wrapped into ``[value]`` when the schema says array.
Open-weight models (DeepSeek, Qwen, GLM) sometimes emit
``{"urls": "https://a.com"}`` when the tool expects a list.
Wrapping produces a valid dispatch rather than a confusing tool
failure. This supersedes the earlier "pass the string through"
behavior no real tool handles a bare string as an array
gracefully.
"""
schema = self._mock_schema({"items": {"type": "array"}}) schema = self._mock_schema({"items": {"type": "array"}})
with patch("model_tools.registry.get_schema", return_value=schema): with patch("model_tools.registry.get_schema", return_value=schema):
args = {"items": "not-json"} args = {"items": "not-json"}
result = coerce_tool_args("test_tool", args) result = coerce_tool_args("test_tool", args)
assert result["items"] == "not-json" assert result["items"] == ["not-json"]
def test_bare_string_wrapped_as_array(self):
"""Bare string on array field → single-element list."""
schema = self._mock_schema({"urls": {"type": "array", "items": {"type": "string"}}})
with patch("model_tools.registry.get_schema", return_value=schema):
args = {"urls": "https://a.com"}
result = coerce_tool_args("test_tool", args)
assert result["urls"] == ["https://a.com"]
def test_bare_int_wrapped_as_array(self):
"""Bare non-string scalars (int, bool, float) also get wrapped."""
schema = self._mock_schema({"ids": {"type": "array", "items": {"type": "integer"}}})
with patch("model_tools.registry.get_schema", return_value=schema):
args = {"ids": 5}
result = coerce_tool_args("test_tool", args)
assert result["ids"] == [5]
def test_bare_dict_wrapped_as_array(self):
"""Bare dict on array field → single-element list."""
schema = self._mock_schema({"items": {"type": "array"}})
with patch("model_tools.registry.get_schema", return_value=schema):
args = {"items": {"a": 1}}
result = coerce_tool_args("test_tool", args)
assert result["items"] == [{"a": 1}]
def test_none_on_array_field_preserved(self):
"""``None`` is never wrapped — tools with defaults handle it."""
schema = self._mock_schema({"items": {"type": "array"}})
with patch("model_tools.registry.get_schema", return_value=schema):
args = {"items": None}
result = coerce_tool_args("test_tool", args)
assert result["items"] is None
def test_existing_list_passthrough(self):
"""An already-valid list is not touched."""
schema = self._mock_schema({"items": {"type": "array"}})
with patch("model_tools.registry.get_schema", return_value=schema):
args = {"items": ["a", "b"]}
result = coerce_tool_args("test_tool", args)
assert result["items"] == ["a", "b"]
def test_json_encoded_array_still_parses(self):
"""JSON-encoded strings still parse (not double-wrapped)."""
schema = self._mock_schema({"items": {"type": "array"}})
with patch("model_tools.registry.get_schema", return_value=schema):
args = {"items": '["a","b"]'}
result = coerce_tool_args("test_tool", args)
assert result["items"] == ["a", "b"]
def test_extra_args_without_schema_left_alone(self): def test_extra_args_without_schema_left_alone(self):
"""Args not in the schema properties are not touched.""" """Args not in the schema properties are not touched."""