From 0be10607d937a8c0acf63839370fd64fb06a39de Mon Sep 17 00:00:00 2001
From: Tranquil-Flow <tranquil_flow@protonmail.com>
Date: Thu, 25 Jun 2026 23:42:42 +0530
Subject: [PATCH] fix(tools): defensive type coercion in todo_tool for
 malformed LLM input (#14185)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

todo_tool crashed with `AttributeError: 'str' object has no attribute 'get'`
when the LLM emitted the `todos` param as a JSON-encoded string instead of an
array, or as a list containing non-dict items (observed intermittently on
Claude 4.5/4.6/4.7, and after a prior tool-call rejection where the model
"self-corrects" by wrapping the list in json.dumps).

Three additive guards, no behavior change for well-formed input:
- todo_tool(): if `todos` is a str, json.loads it; reject unparseable strings
  and non-list values with a clear tool_error instead of crashing downstream.
- _validate(): non-dict items return a {id:"?", content:"(invalid item)"}
  placeholder rather than calling .get() on a str/int/None.
- _dedupe_by_id(): non-dict items get a synthetic key so _validate handles them.

Salvaged from #14785 by @Tranquil-Flow (authorship preserved via cherry-pick).
Comprehensive tests: JSON-string coercion (parse / unparseable / non-list /
non-string), non-dict list items (str/None/int/mixed), and a well-formed-
unchanged regression class — both guards mutation-verified to fail without them.

Closes #14185. Supersedes #14187, #22505, #14350 (same fix, less/no test
coverage) and #16952 (bundled unrelated scope-creep).
---
 tests/tools/test_todo_tool_type_coercion.py | 133 ++++++++++++++++++++
 tools/todo_tool.py                          |  17 +++
 2 files changed, 150 insertions(+)
 create mode 100644 tests/tools/test_todo_tool_type_coercion.py

diff --git a/tests/tools/test_todo_tool_type_coercion.py b/tests/tools/test_todo_tool_type_coercion.py
new file mode 100644
index 00000000000..12d4afaf008
--- /dev/null
+++ b/tests/tools/test_todo_tool_type_coercion.py
@@ -0,0 +1,133 @@
+"""Tests for defensive type coercion in todo_tool (issue #14185).
+
+Covers three crash patterns:
+1. todos is a JSON string instead of a list
+2. todos list contains non-dict items (e.g., bare strings)
+3. Well-formed input continues to work unchanged
+"""
+
+import json
+
+from tools.todo_tool import TodoStore, todo_tool
+
+
+class TestJsonStringCoercion:
+    """Guard 1: todo_tool() recovers when LLM sends todos as a JSON string."""
+
+    def test_json_string_is_parsed_into_list(self):
+        store = TodoStore()
+        todos_str = json.dumps([
+            {"id": "t1", "content": "Do A", "status": "pending"},
+            {"id": "t2", "content": "Do B", "status": "in_progress"},
+        ])
+        result = json.loads(todo_tool(todos=todos_str, store=store))
+        assert "error" not in result
+        assert result["summary"]["total"] == 2
+        assert result["todos"][0]["id"] == "t1"
+        assert result["todos"][1]["status"] == "in_progress"
+
+    def test_unparseable_string_returns_error(self):
+        store = TodoStore()
+        result = json.loads(todo_tool(todos="not valid json [", store=store))
+        assert "error" in result
+
+    def test_json_string_that_parses_to_non_list_returns_error(self):
+        store = TodoStore()
+        # Valid JSON, but a dict instead of a list
+        result = json.loads(todo_tool(todos='{"id": "1"}', store=store))
+        assert "error" in result
+
+    def test_non_list_non_string_returns_error(self):
+        store = TodoStore()
+        result = json.loads(todo_tool(todos=42, store=store))
+        assert "error" in result
+
+
+class TestNonDictListItems:
+    """Guards 2 & 3: _validate and _dedupe_by_id handle non-dict items."""
+
+    def test_string_item_in_list_does_not_crash(self):
+        store = TodoStore()
+        result = store.write(["not-a-dict"])
+        assert len(result) == 1
+        assert result[0]["id"] == "?"
+        assert result[0]["content"] == "(invalid item)"
+        assert result[0]["status"] == "pending"
+
+    def test_mixed_valid_and_invalid_items(self):
+        store = TodoStore()
+        result = store.write([
+            {"id": "1", "content": "Real task", "status": "pending"},
+            "garbage",
+            42,
+            {"id": "2", "content": "Another task", "status": "completed"},
+        ])
+        assert len(result) == 4
+        # Valid items are preserved
+        assert result[0]["id"] == "1"
+        assert result[0]["content"] == "Real task"
+        assert result[3]["id"] == "2"
+        # Invalid items get placeholder values
+        assert result[1]["content"] == "(invalid item)"
+        assert result[2]["content"] == "(invalid item)"
+
+    def test_none_item_in_list(self):
+        store = TodoStore()
+        result = store.write([None])
+        assert len(result) == 1
+        assert result[0]["id"] == "?"
+
+    def test_integer_item_in_list(self):
+        store = TodoStore()
+        result = store.write([123])
+        assert len(result) == 1
+        assert result[0]["content"] == "(invalid item)"
+
+    def test_non_dict_items_via_todo_tool(self):
+        """End-to-end: non-dict list items produce valid output, not a crash."""
+        store = TodoStore()
+        result = json.loads(todo_tool(todos=["bad", "also bad"], store=store))
+        assert "error" not in result
+        assert result["summary"]["total"] == 2
+        assert result["summary"]["pending"] == 2
+
+
+class TestWellFormedInputUnchanged:
+    """Regression: normal usage must not be affected by the guards."""
+
+    def test_normal_write_and_read(self):
+        store = TodoStore()
+        items = [
+            {"id": "a", "content": "First", "status": "pending"},
+            {"id": "b", "content": "Second", "status": "in_progress"},
+        ]
+        result = json.loads(todo_tool(todos=items, store=store))
+        assert result["summary"]["total"] == 2
+        assert result["summary"]["pending"] == 1
+        assert result["summary"]["in_progress"] == 1
+
+    def test_merge_mode_still_works(self):
+        store = TodoStore()
+        store.write([{"id": "1", "content": "Original", "status": "pending"}])
+        result = json.loads(todo_tool(
+            todos=[{"id": "1", "status": "completed"}],
+            merge=True,
+            store=store,
+        ))
+        assert result["summary"]["completed"] == 1
+        assert result["todos"][0]["content"] == "Original"
+
+    def test_read_mode_still_works(self):
+        store = TodoStore()
+        store.write([{"id": "x", "content": "Task", "status": "pending"}])
+        result = json.loads(todo_tool(store=store))
+        assert result["summary"]["total"] == 1
+
+    def test_dedup_still_works(self):
+        store = TodoStore()
+        result = store.write([
+            {"id": "1", "content": "v1", "status": "pending"},
+            {"id": "1", "content": "v2", "status": "in_progress"},
+        ])
+        assert len(result) == 1
+        assert result[0]["content"] == "v2"
diff --git a/tools/todo_tool.py b/tools/todo_tool.py
index 960dab66603..fca24e86807 100644
--- a/tools/todo_tool.py
+++ b/tools/todo_tool.py
@@ -158,6 +158,9 @@ class TodoStore:
         Ensures required fields exist and status is valid.
         Returns a clean dict with only {id, content, status}.
         """
+        if not isinstance(item, dict):
+            return {"id": "?", "content": "(invalid item)", "status": "pending"}
+
         item_id = str(item.get("id", "")).strip()
         if not item_id:
             item_id = "?"
@@ -179,6 +182,10 @@ class TodoStore:
         """Collapse duplicate ids, keeping the last occurrence in its position."""
         last_index: Dict[str, int] = {}
         for i, item in enumerate(todos):
+            if not isinstance(item, dict):
+                # Non-dict items get a synthetic key so _validate can handle them
+                last_index[f"__invalid_{i}"] = i
+                continue
             item_id = str(item.get("id", "")).strip() or "?"
             last_index[item_id] = i
         return [todos[i] for i in sorted(last_index.values())]
@@ -204,6 +211,16 @@ def todo_tool(
         return tool_error("TodoStore not initialized")
 
     if todos is not None:
+        # Guard: LLM sometimes sends todos as a JSON string instead of a list
+        if isinstance(todos, str):
+            try:
+                todos = json.loads(todos)
+            except (json.JSONDecodeError, TypeError):
+                return tool_error("todos must be a list of objects, got unparseable string")
+        if not isinstance(todos, list):
+            return tool_error(
+                f"todos must be a list, got {type(todos).__name__}"
+            )
         items = store.write(todos, merge)
     else:
         items = store.read()