hardening(todo): bound TodoStore item content length and count

The todo list is re-injected into the model's context after every
context-compression event (TodoStore.format_for_injection), so an oversized
todo item or an unbounded number of items defeats the compression it is meant
to ride through. TodoStore.write/_validate previously enforced no size or count
bounds, so a single 50KB item produced a ~50KB re-injection block on every
subsequent turn.

Add two caps:
- MAX_TODO_CONTENT_CHARS (4000): per-item content is truncated with a marker.
  Routed through a shared _cap_content() so the merge-update path (which writes
  content directly, bypassing _validate) is capped too.
- MAX_TODO_ITEMS (256): total list length is bounded, keeping the
  highest-priority head (list order is priority).

Both caps are generous relative to real plans — a todo item is a short task
description and active lists are a handful of items.

NOT a security fix. Raised externally via GHSA-5g4g-6jrg-mw3g, which framed a
caller-supplied conversation_history on the authenticated API server replaying
into _hydrate_todo_store as a DoS. That path is authenticated (the API server
refuses to start without API_SERVER_KEY) and self-scoped (the caller supplies
their own entire history and can only inflate their own response chain — forged
role=tool entries are never persisted to the session DB), so it is out of scope
as a vulnerability under SECURITY.md 3.2. These bounds are footgun containment
that also applies to the trusted agent path, where the model itself authors the
todos. Credit to the reporter for the observation.

Co-authored-by: YLChen-007 <30854794+YLChen-007@users.noreply.github.com>
This commit is contained in:
Teknium 2026-06-07 17:52:45 -07:00
parent 9c5d1afbe9
commit 69a293b419
2 changed files with 90 additions and 1 deletions

View file

@ -117,3 +117,61 @@ class TestTodoToolFunction:
def test_no_store_returns_error(self):
result = json.loads(todo_tool())
assert "error" in result
class TestTodoStoreBounds:
"""Bounds on persisted todo state (GHSA-5g4g-6jrg-mw3g hardening).
The todo list is re-injected into context after every compression event,
so an unbounded item whether authored by the model or replayed from
caller-supplied history on the API server's _hydrate_todo_store path —
would defeat the compression it rides through. These pin the caps.
Not a security boundary (the API surface is authenticated and the caller
supplies their own history); this is footgun containment / parity.
"""
def test_oversized_content_is_truncated(self):
from tools.todo_tool import MAX_TODO_CONTENT_CHARS
store = TodoStore()
store.write([{"id": "1", "content": "A" * 50001, "status": "pending"}])
item = store.read()[0]
assert len(item["content"]) <= MAX_TODO_CONTENT_CHARS
assert item["content"].endswith("… [truncated]")
def test_injection_block_is_bounded(self):
from tools.todo_tool import MAX_TODO_CONTENT_CHARS
store = TodoStore()
store.write([{"id": "1", "content": "A" * 50001, "status": "pending"}])
inj = store.format_for_injection()
# Before the fix this was ~50085 chars; now it tracks the cap.
assert len(inj) < MAX_TODO_CONTENT_CHARS + 200
def test_merge_update_content_is_capped(self):
"""The merge path updates content directly, bypassing _validate —
verify it is capped too."""
from tools.todo_tool import MAX_TODO_CONTENT_CHARS
store = TodoStore()
store.write([{"id": "1", "content": "short", "status": "pending"}])
store.write([{"id": "1", "content": "B" * 50001}], merge=True)
assert len(store.read()[0]["content"]) <= MAX_TODO_CONTENT_CHARS
def test_item_count_is_bounded(self):
from tools.todo_tool import MAX_TODO_ITEMS
store = TodoStore()
store.write([
{"id": str(i), "content": f"task {i}", "status": "pending"}
for i in range(5000)
])
assert len(store.read()) == MAX_TODO_ITEMS
def test_normal_list_is_unchanged(self):
"""No regression: ordinary plans pass through untouched (no marker,
same content, same order)."""
store = TodoStore()
store.write([
{"id": "1", "content": "write the report", "status": "in_progress"},
{"id": "2", "content": "review PR", "status": "pending"},
])
items = store.read()
assert [i["content"] for i in items] == ["write the report", "review PR"]
assert "[truncated]" not in items[0]["content"]

View file

@ -21,6 +21,17 @@ from typing import Dict, Any, List, Optional
# Valid status values for todo items
VALID_STATUSES = {"pending", "in_progress", "completed", "cancelled"}
# Bounds on persisted todo state. The todo list is a planning aid the model
# re-reads after every context-compression event (see format_for_injection),
# so unbounded item content or count defeats the compression it rides through.
# These caps keep a single oversized item (whether authored by the model or
# replayed from caller-supplied history on the API server) from inflating the
# re-injection block. Generous relative to real plans — a todo item is a short
# task description, and active lists are a handful of items, not hundreds.
MAX_TODO_CONTENT_CHARS = 4000
MAX_TODO_ITEMS = 256
_TRUNCATION_MARKER = "… [truncated]"
class TodoStore:
"""
@ -58,7 +69,7 @@ class TodoStore:
if item_id in existing:
# Update only the fields the LLM actually provided
if "content" in t and t["content"]:
existing[item_id]["content"] = str(t["content"]).strip()
existing[item_id]["content"] = self._cap_content(str(t["content"]).strip())
if "status" in t and t["status"]:
status = str(t["status"]).strip().lower()
if status in VALID_STATUSES:
@ -77,6 +88,11 @@ class TodoStore:
rebuilt.append(current)
seen.add(current["id"])
self._items = rebuilt
# Bound total item count so a replayed/oversized list can't grow the
# re-injection block without limit. Keep the highest-priority head
# (list order is priority).
if len(self._items) > MAX_TODO_ITEMS:
self._items = self._items[:MAX_TODO_ITEMS]
return self.read()
def read(self) -> List[Dict[str, str]]:
@ -121,6 +137,19 @@ class TodoStore:
return "\n".join(lines)
@staticmethod
def _cap_content(content: str) -> str:
"""Truncate oversized todo content to MAX_TODO_CONTENT_CHARS.
A single huge item would otherwise inflate the post-compression
re-injection block (format_for_injection) without bound. Keep the
head the actionable part of a task description plus a marker.
"""
if len(content) > MAX_TODO_CONTENT_CHARS:
keep = MAX_TODO_CONTENT_CHARS - len(_TRUNCATION_MARKER)
return content[:keep] + _TRUNCATION_MARKER
return content
@staticmethod
def _validate(item: Dict[str, Any]) -> Dict[str, str]:
"""
@ -136,6 +165,8 @@ class TodoStore:
content = str(item.get("content", "")).strip()
if not content:
content = "(no description)"
else:
content = TodoStore._cap_content(content)
status = str(item.get("status", "pending")).strip().lower()
if status not in VALID_STATUSES: