From 0be10607d937a8c0acf63839370fd64fb06a39de Mon Sep 17 00:00:00 2001 From: Tranquil-Flow Date: Thu, 25 Jun 2026 23:42:42 +0530 Subject: [PATCH] fix(tools): defensive type coercion in todo_tool for malformed LLM input (#14185) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit todo_tool crashed with `AttributeError: 'str' object has no attribute 'get'` when the LLM emitted the `todos` param as a JSON-encoded string instead of an array, or as a list containing non-dict items (observed intermittently on Claude 4.5/4.6/4.7, and after a prior tool-call rejection where the model "self-corrects" by wrapping the list in json.dumps). Three additive guards, no behavior change for well-formed input: - todo_tool(): if `todos` is a str, json.loads it; reject unparseable strings and non-list values with a clear tool_error instead of crashing downstream. - _validate(): non-dict items return a {id:"?", content:"(invalid item)"} placeholder rather than calling .get() on a str/int/None. - _dedupe_by_id(): non-dict items get a synthetic key so _validate handles them. Salvaged from #14785 by @Tranquil-Flow (authorship preserved via cherry-pick). Comprehensive tests: JSON-string coercion (parse / unparseable / non-list / non-string), non-dict list items (str/None/int/mixed), and a well-formed- unchanged regression class — both guards mutation-verified to fail without them. Closes #14185. Supersedes #14187, #22505, #14350 (same fix, less/no test coverage) and #16952 (bundled unrelated scope-creep). --- tests/tools/test_todo_tool_type_coercion.py | 133 ++++++++++++++++++++ tools/todo_tool.py | 17 +++ 2 files changed, 150 insertions(+) create mode 100644 tests/tools/test_todo_tool_type_coercion.py diff --git a/tests/tools/test_todo_tool_type_coercion.py b/tests/tools/test_todo_tool_type_coercion.py new file mode 100644 index 00000000000..12d4afaf008 --- /dev/null +++ b/tests/tools/test_todo_tool_type_coercion.py @@ -0,0 +1,133 @@ +"""Tests for defensive type coercion in todo_tool (issue #14185). + +Covers three crash patterns: +1. todos is a JSON string instead of a list +2. todos list contains non-dict items (e.g., bare strings) +3. Well-formed input continues to work unchanged +""" + +import json + +from tools.todo_tool import TodoStore, todo_tool + + +class TestJsonStringCoercion: + """Guard 1: todo_tool() recovers when LLM sends todos as a JSON string.""" + + def test_json_string_is_parsed_into_list(self): + store = TodoStore() + todos_str = json.dumps([ + {"id": "t1", "content": "Do A", "status": "pending"}, + {"id": "t2", "content": "Do B", "status": "in_progress"}, + ]) + result = json.loads(todo_tool(todos=todos_str, store=store)) + assert "error" not in result + assert result["summary"]["total"] == 2 + assert result["todos"][0]["id"] == "t1" + assert result["todos"][1]["status"] == "in_progress" + + def test_unparseable_string_returns_error(self): + store = TodoStore() + result = json.loads(todo_tool(todos="not valid json [", store=store)) + assert "error" in result + + def test_json_string_that_parses_to_non_list_returns_error(self): + store = TodoStore() + # Valid JSON, but a dict instead of a list + result = json.loads(todo_tool(todos='{"id": "1"}', store=store)) + assert "error" in result + + def test_non_list_non_string_returns_error(self): + store = TodoStore() + result = json.loads(todo_tool(todos=42, store=store)) + assert "error" in result + + +class TestNonDictListItems: + """Guards 2 & 3: _validate and _dedupe_by_id handle non-dict items.""" + + def test_string_item_in_list_does_not_crash(self): + store = TodoStore() + result = store.write(["not-a-dict"]) + assert len(result) == 1 + assert result[0]["id"] == "?" + assert result[0]["content"] == "(invalid item)" + assert result[0]["status"] == "pending" + + def test_mixed_valid_and_invalid_items(self): + store = TodoStore() + result = store.write([ + {"id": "1", "content": "Real task", "status": "pending"}, + "garbage", + 42, + {"id": "2", "content": "Another task", "status": "completed"}, + ]) + assert len(result) == 4 + # Valid items are preserved + assert result[0]["id"] == "1" + assert result[0]["content"] == "Real task" + assert result[3]["id"] == "2" + # Invalid items get placeholder values + assert result[1]["content"] == "(invalid item)" + assert result[2]["content"] == "(invalid item)" + + def test_none_item_in_list(self): + store = TodoStore() + result = store.write([None]) + assert len(result) == 1 + assert result[0]["id"] == "?" + + def test_integer_item_in_list(self): + store = TodoStore() + result = store.write([123]) + assert len(result) == 1 + assert result[0]["content"] == "(invalid item)" + + def test_non_dict_items_via_todo_tool(self): + """End-to-end: non-dict list items produce valid output, not a crash.""" + store = TodoStore() + result = json.loads(todo_tool(todos=["bad", "also bad"], store=store)) + assert "error" not in result + assert result["summary"]["total"] == 2 + assert result["summary"]["pending"] == 2 + + +class TestWellFormedInputUnchanged: + """Regression: normal usage must not be affected by the guards.""" + + def test_normal_write_and_read(self): + store = TodoStore() + items = [ + {"id": "a", "content": "First", "status": "pending"}, + {"id": "b", "content": "Second", "status": "in_progress"}, + ] + result = json.loads(todo_tool(todos=items, store=store)) + assert result["summary"]["total"] == 2 + assert result["summary"]["pending"] == 1 + assert result["summary"]["in_progress"] == 1 + + def test_merge_mode_still_works(self): + store = TodoStore() + store.write([{"id": "1", "content": "Original", "status": "pending"}]) + result = json.loads(todo_tool( + todos=[{"id": "1", "status": "completed"}], + merge=True, + store=store, + )) + assert result["summary"]["completed"] == 1 + assert result["todos"][0]["content"] == "Original" + + def test_read_mode_still_works(self): + store = TodoStore() + store.write([{"id": "x", "content": "Task", "status": "pending"}]) + result = json.loads(todo_tool(store=store)) + assert result["summary"]["total"] == 1 + + def test_dedup_still_works(self): + store = TodoStore() + result = store.write([ + {"id": "1", "content": "v1", "status": "pending"}, + {"id": "1", "content": "v2", "status": "in_progress"}, + ]) + assert len(result) == 1 + assert result[0]["content"] == "v2" diff --git a/tools/todo_tool.py b/tools/todo_tool.py index 960dab66603..fca24e86807 100644 --- a/tools/todo_tool.py +++ b/tools/todo_tool.py @@ -158,6 +158,9 @@ class TodoStore: Ensures required fields exist and status is valid. Returns a clean dict with only {id, content, status}. """ + if not isinstance(item, dict): + return {"id": "?", "content": "(invalid item)", "status": "pending"} + item_id = str(item.get("id", "")).strip() if not item_id: item_id = "?" @@ -179,6 +182,10 @@ class TodoStore: """Collapse duplicate ids, keeping the last occurrence in its position.""" last_index: Dict[str, int] = {} for i, item in enumerate(todos): + if not isinstance(item, dict): + # Non-dict items get a synthetic key so _validate can handle them + last_index[f"__invalid_{i}"] = i + continue item_id = str(item.get("id", "")).strip() or "?" last_index[item_id] = i return [todos[i] for i in sorted(last_index.values())] @@ -204,6 +211,16 @@ def todo_tool( return tool_error("TodoStore not initialized") if todos is not None: + # Guard: LLM sometimes sends todos as a JSON string instead of a list + if isinstance(todos, str): + try: + todos = json.loads(todos) + except (json.JSONDecodeError, TypeError): + return tool_error("todos must be a list of objects, got unparseable string") + if not isinstance(todos, list): + return tool_error( + f"todos must be a list, got {type(todos).__name__}" + ) items = store.write(todos, merge) else: items = store.read()