mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
The todo list is re-injected into the model's context after every context-compression event (TodoStore.format_for_injection), so an oversized todo item or an unbounded number of items defeats the compression it is meant to ride through. TodoStore.write/_validate previously enforced no size or count bounds, so a single 50KB item produced a ~50KB re-injection block on every subsequent turn. Add two caps: - MAX_TODO_CONTENT_CHARS (4000): per-item content is truncated with a marker. Routed through a shared _cap_content() so the merge-update path (which writes content directly, bypassing _validate) is capped too. - MAX_TODO_ITEMS (256): total list length is bounded, keeping the highest-priority head (list order is priority). Both caps are generous relative to real plans — a todo item is a short task description and active lists are a handful of items. NOT a security fix. Raised externally via GHSA-5g4g-6jrg-mw3g, which framed a caller-supplied conversation_history on the authenticated API server replaying into _hydrate_todo_store as a DoS. That path is authenticated (the API server refuses to start without API_SERVER_KEY) and self-scoped (the caller supplies their own entire history and can only inflate their own response chain — forged role=tool entries are never persisted to the session DB), so it is out of scope as a vulnerability under SECURITY.md 3.2. These bounds are footgun containment that also applies to the trusted agent path, where the model itself authors the todos. Credit to the reporter for the observation. Co-authored-by: YLChen-007 <30854794+YLChen-007@users.noreply.github.com>
177 lines
6.4 KiB
Python
177 lines
6.4 KiB
Python
"""Tests for the todo tool module."""
|
|
|
|
import json
|
|
|
|
from tools.todo_tool import TodoStore, todo_tool
|
|
|
|
|
|
class TestWriteAndRead:
|
|
def test_write_replaces_list(self):
|
|
store = TodoStore()
|
|
items = [
|
|
{"id": "1", "content": "First task", "status": "pending"},
|
|
{"id": "2", "content": "Second task", "status": "in_progress"},
|
|
]
|
|
result = store.write(items)
|
|
assert len(result) == 2
|
|
assert result[0]["id"] == "1"
|
|
assert result[1]["status"] == "in_progress"
|
|
|
|
def test_read_returns_copy(self):
|
|
store = TodoStore()
|
|
store.write([{"id": "1", "content": "Task", "status": "pending"}])
|
|
items = store.read()
|
|
items[0]["content"] = "MUTATED"
|
|
assert store.read()[0]["content"] == "Task"
|
|
|
|
def test_write_deduplicates_duplicate_ids(self):
|
|
store = TodoStore()
|
|
result = store.write([
|
|
{"id": "1", "content": "First version", "status": "pending"},
|
|
{"id": "2", "content": "Other task", "status": "pending"},
|
|
{"id": "1", "content": "Latest version", "status": "in_progress"},
|
|
])
|
|
assert result == [
|
|
{"id": "2", "content": "Other task", "status": "pending"},
|
|
{"id": "1", "content": "Latest version", "status": "in_progress"},
|
|
]
|
|
|
|
|
|
class TestHasItems:
|
|
def test_empty_store(self):
|
|
store = TodoStore()
|
|
assert store.has_items() is False
|
|
|
|
def test_non_empty_store(self):
|
|
store = TodoStore()
|
|
store.write([{"id": "1", "content": "x", "status": "pending"}])
|
|
assert store.has_items() is True
|
|
|
|
|
|
class TestFormatForInjection:
|
|
def test_empty_returns_none(self):
|
|
store = TodoStore()
|
|
assert store.format_for_injection() is None
|
|
|
|
def test_non_empty_has_markers(self):
|
|
store = TodoStore()
|
|
store.write([
|
|
{"id": "1", "content": "Do thing", "status": "completed"},
|
|
{"id": "2", "content": "Next", "status": "pending"},
|
|
{"id": "3", "content": "Working", "status": "in_progress"},
|
|
])
|
|
text = store.format_for_injection()
|
|
# Completed items are filtered out of injection
|
|
assert "[x]" not in text
|
|
assert "Do thing" not in text
|
|
# Active items are included
|
|
assert "[ ]" in text
|
|
assert "[>]" in text
|
|
assert "Next" in text
|
|
assert "Working" in text
|
|
assert "context compression" in text.lower()
|
|
|
|
|
|
class TestMergeMode:
|
|
def test_update_existing_by_id(self):
|
|
store = TodoStore()
|
|
store.write([
|
|
{"id": "1", "content": "Original", "status": "pending"},
|
|
])
|
|
store.write(
|
|
[{"id": "1", "status": "completed"}],
|
|
merge=True,
|
|
)
|
|
items = store.read()
|
|
assert len(items) == 1
|
|
assert items[0]["status"] == "completed"
|
|
assert items[0]["content"] == "Original"
|
|
|
|
def test_merge_appends_new(self):
|
|
store = TodoStore()
|
|
store.write([{"id": "1", "content": "First", "status": "pending"}])
|
|
store.write(
|
|
[{"id": "2", "content": "Second", "status": "pending"}],
|
|
merge=True,
|
|
)
|
|
items = store.read()
|
|
assert len(items) == 2
|
|
|
|
|
|
class TestTodoToolFunction:
|
|
def test_read_mode(self):
|
|
store = TodoStore()
|
|
store.write([{"id": "1", "content": "Task", "status": "pending"}])
|
|
result = json.loads(todo_tool(store=store))
|
|
assert result["summary"]["total"] == 1
|
|
assert result["summary"]["pending"] == 1
|
|
|
|
def test_write_mode(self):
|
|
store = TodoStore()
|
|
result = json.loads(todo_tool(
|
|
todos=[{"id": "1", "content": "New", "status": "in_progress"}],
|
|
store=store,
|
|
))
|
|
assert result["summary"]["in_progress"] == 1
|
|
|
|
def test_no_store_returns_error(self):
|
|
result = json.loads(todo_tool())
|
|
assert "error" in result
|
|
|
|
|
|
class TestTodoStoreBounds:
|
|
"""Bounds on persisted todo state (GHSA-5g4g-6jrg-mw3g hardening).
|
|
|
|
The todo list is re-injected into context after every compression event,
|
|
so an unbounded item — whether authored by the model or replayed from
|
|
caller-supplied history on the API server's _hydrate_todo_store path —
|
|
would defeat the compression it rides through. These pin the caps.
|
|
Not a security boundary (the API surface is authenticated and the caller
|
|
supplies their own history); this is footgun containment / parity.
|
|
"""
|
|
|
|
def test_oversized_content_is_truncated(self):
|
|
from tools.todo_tool import MAX_TODO_CONTENT_CHARS
|
|
store = TodoStore()
|
|
store.write([{"id": "1", "content": "A" * 50001, "status": "pending"}])
|
|
item = store.read()[0]
|
|
assert len(item["content"]) <= MAX_TODO_CONTENT_CHARS
|
|
assert item["content"].endswith("… [truncated]")
|
|
|
|
def test_injection_block_is_bounded(self):
|
|
from tools.todo_tool import MAX_TODO_CONTENT_CHARS
|
|
store = TodoStore()
|
|
store.write([{"id": "1", "content": "A" * 50001, "status": "pending"}])
|
|
inj = store.format_for_injection()
|
|
# Before the fix this was ~50085 chars; now it tracks the cap.
|
|
assert len(inj) < MAX_TODO_CONTENT_CHARS + 200
|
|
|
|
def test_merge_update_content_is_capped(self):
|
|
"""The merge path updates content directly, bypassing _validate —
|
|
verify it is capped too."""
|
|
from tools.todo_tool import MAX_TODO_CONTENT_CHARS
|
|
store = TodoStore()
|
|
store.write([{"id": "1", "content": "short", "status": "pending"}])
|
|
store.write([{"id": "1", "content": "B" * 50001}], merge=True)
|
|
assert len(store.read()[0]["content"]) <= MAX_TODO_CONTENT_CHARS
|
|
|
|
def test_item_count_is_bounded(self):
|
|
from tools.todo_tool import MAX_TODO_ITEMS
|
|
store = TodoStore()
|
|
store.write([
|
|
{"id": str(i), "content": f"task {i}", "status": "pending"}
|
|
for i in range(5000)
|
|
])
|
|
assert len(store.read()) == MAX_TODO_ITEMS
|
|
|
|
def test_normal_list_is_unchanged(self):
|
|
"""No regression: ordinary plans pass through untouched (no marker,
|
|
same content, same order)."""
|
|
store = TodoStore()
|
|
store.write([
|
|
{"id": "1", "content": "write the report", "status": "in_progress"},
|
|
{"id": "2", "content": "review PR", "status": "pending"},
|
|
])
|
|
items = store.read()
|
|
assert [i["content"] for i in items] == ["write the report", "review PR"]
|
|
assert "[truncated]" not in items[0]["content"]
|