mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
Merge pull request #52618 from NousResearch/salvage/14185-todo-coercion
fix(tools): defensive type coercion in todo_tool for malformed LLM input (#14185)
This commit is contained in:
commit
42bea9e298
2 changed files with 150 additions and 0 deletions
133
tests/tools/test_todo_tool_type_coercion.py
Normal file
133
tests/tools/test_todo_tool_type_coercion.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
"""Tests for defensive type coercion in todo_tool (issue #14185).
|
||||
|
||||
Covers three crash patterns:
|
||||
1. todos is a JSON string instead of a list
|
||||
2. todos list contains non-dict items (e.g., bare strings)
|
||||
3. Well-formed input continues to work unchanged
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
from tools.todo_tool import TodoStore, todo_tool
|
||||
|
||||
|
||||
class TestJsonStringCoercion:
|
||||
"""Guard 1: todo_tool() recovers when LLM sends todos as a JSON string."""
|
||||
|
||||
def test_json_string_is_parsed_into_list(self):
|
||||
store = TodoStore()
|
||||
todos_str = json.dumps([
|
||||
{"id": "t1", "content": "Do A", "status": "pending"},
|
||||
{"id": "t2", "content": "Do B", "status": "in_progress"},
|
||||
])
|
||||
result = json.loads(todo_tool(todos=todos_str, store=store))
|
||||
assert "error" not in result
|
||||
assert result["summary"]["total"] == 2
|
||||
assert result["todos"][0]["id"] == "t1"
|
||||
assert result["todos"][1]["status"] == "in_progress"
|
||||
|
||||
def test_unparseable_string_returns_error(self):
|
||||
store = TodoStore()
|
||||
result = json.loads(todo_tool(todos="not valid json [", store=store))
|
||||
assert "error" in result
|
||||
|
||||
def test_json_string_that_parses_to_non_list_returns_error(self):
|
||||
store = TodoStore()
|
||||
# Valid JSON, but a dict instead of a list
|
||||
result = json.loads(todo_tool(todos='{"id": "1"}', store=store))
|
||||
assert "error" in result
|
||||
|
||||
def test_non_list_non_string_returns_error(self):
|
||||
store = TodoStore()
|
||||
result = json.loads(todo_tool(todos=42, store=store))
|
||||
assert "error" in result
|
||||
|
||||
|
||||
class TestNonDictListItems:
|
||||
"""Guards 2 & 3: _validate and _dedupe_by_id handle non-dict items."""
|
||||
|
||||
def test_string_item_in_list_does_not_crash(self):
|
||||
store = TodoStore()
|
||||
result = store.write(["not-a-dict"])
|
||||
assert len(result) == 1
|
||||
assert result[0]["id"] == "?"
|
||||
assert result[0]["content"] == "(invalid item)"
|
||||
assert result[0]["status"] == "pending"
|
||||
|
||||
def test_mixed_valid_and_invalid_items(self):
|
||||
store = TodoStore()
|
||||
result = store.write([
|
||||
{"id": "1", "content": "Real task", "status": "pending"},
|
||||
"garbage",
|
||||
42,
|
||||
{"id": "2", "content": "Another task", "status": "completed"},
|
||||
])
|
||||
assert len(result) == 4
|
||||
# Valid items are preserved
|
||||
assert result[0]["id"] == "1"
|
||||
assert result[0]["content"] == "Real task"
|
||||
assert result[3]["id"] == "2"
|
||||
# Invalid items get placeholder values
|
||||
assert result[1]["content"] == "(invalid item)"
|
||||
assert result[2]["content"] == "(invalid item)"
|
||||
|
||||
def test_none_item_in_list(self):
|
||||
store = TodoStore()
|
||||
result = store.write([None])
|
||||
assert len(result) == 1
|
||||
assert result[0]["id"] == "?"
|
||||
|
||||
def test_integer_item_in_list(self):
|
||||
store = TodoStore()
|
||||
result = store.write([123])
|
||||
assert len(result) == 1
|
||||
assert result[0]["content"] == "(invalid item)"
|
||||
|
||||
def test_non_dict_items_via_todo_tool(self):
|
||||
"""End-to-end: non-dict list items produce valid output, not a crash."""
|
||||
store = TodoStore()
|
||||
result = json.loads(todo_tool(todos=["bad", "also bad"], store=store))
|
||||
assert "error" not in result
|
||||
assert result["summary"]["total"] == 2
|
||||
assert result["summary"]["pending"] == 2
|
||||
|
||||
|
||||
class TestWellFormedInputUnchanged:
|
||||
"""Regression: normal usage must not be affected by the guards."""
|
||||
|
||||
def test_normal_write_and_read(self):
|
||||
store = TodoStore()
|
||||
items = [
|
||||
{"id": "a", "content": "First", "status": "pending"},
|
||||
{"id": "b", "content": "Second", "status": "in_progress"},
|
||||
]
|
||||
result = json.loads(todo_tool(todos=items, store=store))
|
||||
assert result["summary"]["total"] == 2
|
||||
assert result["summary"]["pending"] == 1
|
||||
assert result["summary"]["in_progress"] == 1
|
||||
|
||||
def test_merge_mode_still_works(self):
|
||||
store = TodoStore()
|
||||
store.write([{"id": "1", "content": "Original", "status": "pending"}])
|
||||
result = json.loads(todo_tool(
|
||||
todos=[{"id": "1", "status": "completed"}],
|
||||
merge=True,
|
||||
store=store,
|
||||
))
|
||||
assert result["summary"]["completed"] == 1
|
||||
assert result["todos"][0]["content"] == "Original"
|
||||
|
||||
def test_read_mode_still_works(self):
|
||||
store = TodoStore()
|
||||
store.write([{"id": "x", "content": "Task", "status": "pending"}])
|
||||
result = json.loads(todo_tool(store=store))
|
||||
assert result["summary"]["total"] == 1
|
||||
|
||||
def test_dedup_still_works(self):
|
||||
store = TodoStore()
|
||||
result = store.write([
|
||||
{"id": "1", "content": "v1", "status": "pending"},
|
||||
{"id": "1", "content": "v2", "status": "in_progress"},
|
||||
])
|
||||
assert len(result) == 1
|
||||
assert result[0]["content"] == "v2"
|
||||
|
|
@ -158,6 +158,9 @@ class TodoStore:
|
|||
Ensures required fields exist and status is valid.
|
||||
Returns a clean dict with only {id, content, status}.
|
||||
"""
|
||||
if not isinstance(item, dict):
|
||||
return {"id": "?", "content": "(invalid item)", "status": "pending"}
|
||||
|
||||
item_id = str(item.get("id", "")).strip()
|
||||
if not item_id:
|
||||
item_id = "?"
|
||||
|
|
@ -179,6 +182,10 @@ class TodoStore:
|
|||
"""Collapse duplicate ids, keeping the last occurrence in its position."""
|
||||
last_index: Dict[str, int] = {}
|
||||
for i, item in enumerate(todos):
|
||||
if not isinstance(item, dict):
|
||||
# Non-dict items get a synthetic key so _validate can handle them
|
||||
last_index[f"__invalid_{i}"] = i
|
||||
continue
|
||||
item_id = str(item.get("id", "")).strip() or "?"
|
||||
last_index[item_id] = i
|
||||
return [todos[i] for i in sorted(last_index.values())]
|
||||
|
|
@ -204,6 +211,16 @@ def todo_tool(
|
|||
return tool_error("TodoStore not initialized")
|
||||
|
||||
if todos is not None:
|
||||
# Guard: LLM sometimes sends todos as a JSON string instead of a list
|
||||
if isinstance(todos, str):
|
||||
try:
|
||||
todos = json.loads(todos)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return tool_error("todos must be a list of objects, got unparseable string")
|
||||
if not isinstance(todos, list):
|
||||
return tool_error(
|
||||
f"todos must be a list, got {type(todos).__name__}"
|
||||
)
|
||||
items = store.write(todos, merge)
|
||||
else:
|
||||
items = store.read()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue