hermes-agent/tools/todo_tool.py
Teknium 69a293b419 hardening(todo): bound TodoStore item content length and count
The todo list is re-injected into the model's context after every
context-compression event (TodoStore.format_for_injection), so an oversized
todo item or an unbounded number of items defeats the compression it is meant
to ride through. TodoStore.write/_validate previously enforced no size or count
bounds, so a single 50KB item produced a ~50KB re-injection block on every
subsequent turn.

Add two caps:
- MAX_TODO_CONTENT_CHARS (4000): per-item content is truncated with a marker.
  Routed through a shared _cap_content() so the merge-update path (which writes
  content directly, bypassing _validate) is capped too.
- MAX_TODO_ITEMS (256): total list length is bounded, keeping the
  highest-priority head (list order is priority).

Both caps are generous relative to real plans — a todo item is a short task
description and active lists are a handful of items.

NOT a security fix. Raised externally via GHSA-5g4g-6jrg-mw3g, which framed a
caller-supplied conversation_history on the authenticated API server replaying
into _hydrate_todo_store as a DoS. That path is authenticated (the API server
refuses to start without API_SERVER_KEY) and self-scoped (the caller supplies
their own entire history and can only inflate their own response chain — forged
role=tool entries are never persisted to the session DB), so it is out of scope
as a vulnerability under SECURITY.md 3.2. These bounds are footgun containment
that also applies to the trusted agent path, where the model itself authors the
todos. Credit to the reporter for the observation.

Co-authored-by: YLChen-007 <30854794+YLChen-007@users.noreply.github.com>
2026-06-07 18:06:27 -07:00

308 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Todo Tool Module - Planning & Task Management
Provides an in-memory task list the agent uses to decompose complex tasks,
track progress, and maintain focus across long conversations. The state
lives on the AIAgent instance (one per session) and is re-injected into
the conversation after context compression events.
Design:
- Single `todo` tool: provide `todos` param to write, omit to read
- Every call returns the full current list
- No system prompt mutation, no tool response modification
- Behavioral guidance lives entirely in the tool schema description
"""
import json
from typing import Dict, Any, List, Optional
# Valid status values for todo items
VALID_STATUSES = {"pending", "in_progress", "completed", "cancelled"}
# Bounds on persisted todo state. The todo list is a planning aid the model
# re-reads after every context-compression event (see format_for_injection),
# so unbounded item content or count defeats the compression it rides through.
# These caps keep a single oversized item (whether authored by the model or
# replayed from caller-supplied history on the API server) from inflating the
# re-injection block. Generous relative to real plans — a todo item is a short
# task description, and active lists are a handful of items, not hundreds.
MAX_TODO_CONTENT_CHARS = 4000
MAX_TODO_ITEMS = 256
_TRUNCATION_MARKER = "… [truncated]"
class TodoStore:
"""
In-memory todo list. One instance per AIAgent (one per session).
Items are ordered -- list position is priority. Each item has:
- id: unique string identifier (agent-chosen)
- content: task description
- status: pending | in_progress | completed | cancelled
"""
def __init__(self):
self._items: List[Dict[str, str]] = []
def write(self, todos: List[Dict[str, Any]], merge: bool = False) -> List[Dict[str, str]]:
"""
Write todos. Returns the full current list after writing.
Args:
todos: list of {id, content, status} dicts
merge: if False, replace the entire list. If True, update
existing items by id and append new ones.
"""
if not merge:
# Replace mode: new list entirely
self._items = [self._validate(t) for t in self._dedupe_by_id(todos)]
else:
# Merge mode: update existing items by id, append new ones
existing = {item["id"]: item for item in self._items}
for t in self._dedupe_by_id(todos):
item_id = str(t.get("id", "")).strip()
if not item_id:
continue # Can't merge without an id
if item_id in existing:
# Update only the fields the LLM actually provided
if "content" in t and t["content"]:
existing[item_id]["content"] = self._cap_content(str(t["content"]).strip())
if "status" in t and t["status"]:
status = str(t["status"]).strip().lower()
if status in VALID_STATUSES:
existing[item_id]["status"] = status
else:
# New item -- validate fully and append to end
validated = self._validate(t)
existing[validated["id"]] = validated
self._items.append(validated)
# Rebuild _items preserving order for existing items
seen = set()
rebuilt = []
for item in self._items:
current = existing.get(item["id"], item)
if current["id"] not in seen:
rebuilt.append(current)
seen.add(current["id"])
self._items = rebuilt
# Bound total item count so a replayed/oversized list can't grow the
# re-injection block without limit. Keep the highest-priority head
# (list order is priority).
if len(self._items) > MAX_TODO_ITEMS:
self._items = self._items[:MAX_TODO_ITEMS]
return self.read()
def read(self) -> List[Dict[str, str]]:
"""Return a copy of the current list."""
return [item.copy() for item in self._items]
def has_items(self) -> bool:
"""Check if there are any items in the list."""
return bool(self._items)
def format_for_injection(self) -> Optional[str]:
"""
Render the todo list for post-compression injection.
Returns a human-readable string to append to the compressed
message history, or None if the list is empty.
"""
if not self._items:
return None
# Status markers for compact display
markers = {
"completed": "[x]",
"in_progress": "[>]",
"pending": "[ ]",
"cancelled": "[~]",
}
# Only inject pending/in_progress items — completed/cancelled ones
# cause the model to re-do finished work after compression.
active_items = [
item for item in self._items
if item["status"] in {"pending", "in_progress"}
]
if not active_items:
return None
lines = ["[Your active task list was preserved across context compression]"]
for item in active_items:
marker = markers.get(item["status"], "[?]")
lines.append(f"- {marker} {item['id']}. {item['content']} ({item['status']})")
return "\n".join(lines)
@staticmethod
def _cap_content(content: str) -> str:
"""Truncate oversized todo content to MAX_TODO_CONTENT_CHARS.
A single huge item would otherwise inflate the post-compression
re-injection block (format_for_injection) without bound. Keep the
head — the actionable part of a task description — plus a marker.
"""
if len(content) > MAX_TODO_CONTENT_CHARS:
keep = MAX_TODO_CONTENT_CHARS - len(_TRUNCATION_MARKER)
return content[:keep] + _TRUNCATION_MARKER
return content
@staticmethod
def _validate(item: Dict[str, Any]) -> Dict[str, str]:
"""
Validate and normalize a todo item.
Ensures required fields exist and status is valid.
Returns a clean dict with only {id, content, status}.
"""
item_id = str(item.get("id", "")).strip()
if not item_id:
item_id = "?"
content = str(item.get("content", "")).strip()
if not content:
content = "(no description)"
else:
content = TodoStore._cap_content(content)
status = str(item.get("status", "pending")).strip().lower()
if status not in VALID_STATUSES:
status = "pending"
return {"id": item_id, "content": content, "status": status}
@staticmethod
def _dedupe_by_id(todos: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Collapse duplicate ids, keeping the last occurrence in its position."""
last_index: Dict[str, int] = {}
for i, item in enumerate(todos):
item_id = str(item.get("id", "")).strip() or "?"
last_index[item_id] = i
return [todos[i] for i in sorted(last_index.values())]
def todo_tool(
todos: Optional[List[Dict[str, Any]]] = None,
merge: bool = False,
store: Optional[TodoStore] = None,
) -> str:
"""
Single entry point for the todo tool. Reads or writes depending on params.
Args:
todos: if provided, write these items. If None, read current list.
merge: if True, update by id. If False (default), replace entire list.
store: the TodoStore instance from the AIAgent.
Returns:
JSON string with the full current list and summary metadata.
"""
if store is None:
return tool_error("TodoStore not initialized")
if todos is not None:
items = store.write(todos, merge)
else:
items = store.read()
# Build summary counts
pending = sum(1 for i in items if i["status"] == "pending")
in_progress = sum(1 for i in items if i["status"] == "in_progress")
completed = sum(1 for i in items if i["status"] == "completed")
cancelled = sum(1 for i in items if i["status"] == "cancelled")
return json.dumps({
"todos": items,
"summary": {
"total": len(items),
"pending": pending,
"in_progress": in_progress,
"completed": completed,
"cancelled": cancelled,
},
}, ensure_ascii=False)
def check_todo_requirements() -> bool:
"""Todo tool has no external requirements -- always available."""
return True
# =============================================================================
# OpenAI Function-Calling Schema
# =============================================================================
# Behavioral guidance is baked into the description so it's part of the
# static tool schema (cached, never changes mid-conversation).
TODO_SCHEMA = {
"name": "todo",
"description": (
"Manage your task list for the current session. Use for complex tasks "
"with 3+ steps or when the user provides multiple tasks. "
"Call with no parameters to read the current list.\n\n"
"Writing:\n"
"- Provide 'todos' array to create/update items\n"
"- merge=false (default): replace the entire list with a fresh plan\n"
"- merge=true: update existing items by id, add any new ones\n\n"
"Each item: {id: string, content: string, "
"status: pending|in_progress|completed|cancelled}\n"
"List order is priority. Only ONE item in_progress at a time.\n"
"Mark items completed immediately when done. If something fails, "
"cancel it and add a revised item.\n\n"
"Always returns the full current list."
),
"parameters": {
"type": "object",
"properties": {
"todos": {
"type": "array",
"description": "Task items to write. Omit to read current list.",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique item identifier"
},
"content": {
"type": "string",
"description": "Task description"
},
"status": {
"type": "string",
"enum": ["pending", "in_progress", "completed", "cancelled"],
"description": "Current status"
}
},
"required": ["id", "content", "status"]
}
},
"merge": {
"type": "boolean",
"description": (
"true: update existing items by id, add new ones. "
"false (default): replace the entire list."
),
"default": False
}
},
"required": []
}
}
# --- Registry ---
from tools.registry import registry, tool_error
registry.register(
name="todo",
toolset="todo",
schema=TODO_SCHEMA,
handler=lambda args, **kw: todo_tool(
todos=args.get("todos"), merge=args.get("merge", False), store=kw.get("store")),
check_fn=check_todo_requirements,
emoji="📋",
)