Merge pull request #52618 from NousResearch/salvage/14185-todo-coercion

fix(tools): defensive type coercion in todo_tool for malformed LLM input (#14185)
This commit is contained in:
kshitij 2026-06-26 02:02:18 +05:30 committed by GitHub
commit 42bea9e298
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 150 additions and 0 deletions

View file

@ -0,0 +1,133 @@
"""Tests for defensive type coercion in todo_tool (issue #14185).
Covers three crash patterns:
1. todos is a JSON string instead of a list
2. todos list contains non-dict items (e.g., bare strings)
3. Well-formed input continues to work unchanged
"""
import json
from tools.todo_tool import TodoStore, todo_tool
class TestJsonStringCoercion:
"""Guard 1: todo_tool() recovers when LLM sends todos as a JSON string."""
def test_json_string_is_parsed_into_list(self):
store = TodoStore()
todos_str = json.dumps([
{"id": "t1", "content": "Do A", "status": "pending"},
{"id": "t2", "content": "Do B", "status": "in_progress"},
])
result = json.loads(todo_tool(todos=todos_str, store=store))
assert "error" not in result
assert result["summary"]["total"] == 2
assert result["todos"][0]["id"] == "t1"
assert result["todos"][1]["status"] == "in_progress"
def test_unparseable_string_returns_error(self):
store = TodoStore()
result = json.loads(todo_tool(todos="not valid json [", store=store))
assert "error" in result
def test_json_string_that_parses_to_non_list_returns_error(self):
store = TodoStore()
# Valid JSON, but a dict instead of a list
result = json.loads(todo_tool(todos='{"id": "1"}', store=store))
assert "error" in result
def test_non_list_non_string_returns_error(self):
store = TodoStore()
result = json.loads(todo_tool(todos=42, store=store))
assert "error" in result
class TestNonDictListItems:
"""Guards 2 & 3: _validate and _dedupe_by_id handle non-dict items."""
def test_string_item_in_list_does_not_crash(self):
store = TodoStore()
result = store.write(["not-a-dict"])
assert len(result) == 1
assert result[0]["id"] == "?"
assert result[0]["content"] == "(invalid item)"
assert result[0]["status"] == "pending"
def test_mixed_valid_and_invalid_items(self):
store = TodoStore()
result = store.write([
{"id": "1", "content": "Real task", "status": "pending"},
"garbage",
42,
{"id": "2", "content": "Another task", "status": "completed"},
])
assert len(result) == 4
# Valid items are preserved
assert result[0]["id"] == "1"
assert result[0]["content"] == "Real task"
assert result[3]["id"] == "2"
# Invalid items get placeholder values
assert result[1]["content"] == "(invalid item)"
assert result[2]["content"] == "(invalid item)"
def test_none_item_in_list(self):
store = TodoStore()
result = store.write([None])
assert len(result) == 1
assert result[0]["id"] == "?"
def test_integer_item_in_list(self):
store = TodoStore()
result = store.write([123])
assert len(result) == 1
assert result[0]["content"] == "(invalid item)"
def test_non_dict_items_via_todo_tool(self):
"""End-to-end: non-dict list items produce valid output, not a crash."""
store = TodoStore()
result = json.loads(todo_tool(todos=["bad", "also bad"], store=store))
assert "error" not in result
assert result["summary"]["total"] == 2
assert result["summary"]["pending"] == 2
class TestWellFormedInputUnchanged:
"""Regression: normal usage must not be affected by the guards."""
def test_normal_write_and_read(self):
store = TodoStore()
items = [
{"id": "a", "content": "First", "status": "pending"},
{"id": "b", "content": "Second", "status": "in_progress"},
]
result = json.loads(todo_tool(todos=items, store=store))
assert result["summary"]["total"] == 2
assert result["summary"]["pending"] == 1
assert result["summary"]["in_progress"] == 1
def test_merge_mode_still_works(self):
store = TodoStore()
store.write([{"id": "1", "content": "Original", "status": "pending"}])
result = json.loads(todo_tool(
todos=[{"id": "1", "status": "completed"}],
merge=True,
store=store,
))
assert result["summary"]["completed"] == 1
assert result["todos"][0]["content"] == "Original"
def test_read_mode_still_works(self):
store = TodoStore()
store.write([{"id": "x", "content": "Task", "status": "pending"}])
result = json.loads(todo_tool(store=store))
assert result["summary"]["total"] == 1
def test_dedup_still_works(self):
store = TodoStore()
result = store.write([
{"id": "1", "content": "v1", "status": "pending"},
{"id": "1", "content": "v2", "status": "in_progress"},
])
assert len(result) == 1
assert result[0]["content"] == "v2"

View file

@ -158,6 +158,9 @@ class TodoStore:
Ensures required fields exist and status is valid.
Returns a clean dict with only {id, content, status}.
"""
if not isinstance(item, dict):
return {"id": "?", "content": "(invalid item)", "status": "pending"}
item_id = str(item.get("id", "")).strip()
if not item_id:
item_id = "?"
@ -179,6 +182,10 @@ class TodoStore:
"""Collapse duplicate ids, keeping the last occurrence in its position."""
last_index: Dict[str, int] = {}
for i, item in enumerate(todos):
if not isinstance(item, dict):
# Non-dict items get a synthetic key so _validate can handle them
last_index[f"__invalid_{i}"] = i
continue
item_id = str(item.get("id", "")).strip() or "?"
last_index[item_id] = i
return [todos[i] for i in sorted(last_index.values())]
@ -204,6 +211,16 @@ def todo_tool(
return tool_error("TodoStore not initialized")
if todos is not None:
# Guard: LLM sometimes sends todos as a JSON string instead of a list
if isinstance(todos, str):
try:
todos = json.loads(todos)
except (json.JSONDecodeError, TypeError):
return tool_error("todos must be a list of objects, got unparseable string")
if not isinstance(todos, list):
return tool_error(
f"todos must be a list, got {type(todos).__name__}"
)
items = store.write(todos, merge)
else:
items = store.read()