mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
The todo list is re-injected into the model's context after every context-compression event (TodoStore.format_for_injection), so an oversized todo item or an unbounded number of items defeats the compression it is meant to ride through. TodoStore.write/_validate previously enforced no size or count bounds, so a single 50KB item produced a ~50KB re-injection block on every subsequent turn. Add two caps: - MAX_TODO_CONTENT_CHARS (4000): per-item content is truncated with a marker. Routed through a shared _cap_content() so the merge-update path (which writes content directly, bypassing _validate) is capped too. - MAX_TODO_ITEMS (256): total list length is bounded, keeping the highest-priority head (list order is priority). Both caps are generous relative to real plans — a todo item is a short task description and active lists are a handful of items. NOT a security fix. Raised externally via GHSA-5g4g-6jrg-mw3g, which framed a caller-supplied conversation_history on the authenticated API server replaying into _hydrate_todo_store as a DoS. That path is authenticated (the API server refuses to start without API_SERVER_KEY) and self-scoped (the caller supplies their own entire history and can only inflate their own response chain — forged role=tool entries are never persisted to the session DB), so it is out of scope as a vulnerability under SECURITY.md 3.2. These bounds are footgun containment that also applies to the trusted agent path, where the model itself authors the todos. Credit to the reporter for the observation. Co-authored-by: YLChen-007 <30854794+YLChen-007@users.noreply.github.com>
308 lines
11 KiB
Python
308 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Todo Tool Module - Planning & Task Management
|
|
|
|
Provides an in-memory task list the agent uses to decompose complex tasks,
|
|
track progress, and maintain focus across long conversations. The state
|
|
lives on the AIAgent instance (one per session) and is re-injected into
|
|
the conversation after context compression events.
|
|
|
|
Design:
|
|
- Single `todo` tool: provide `todos` param to write, omit to read
|
|
- Every call returns the full current list
|
|
- No system prompt mutation, no tool response modification
|
|
- Behavioral guidance lives entirely in the tool schema description
|
|
"""
|
|
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
|
|
# Valid status values for todo items
|
|
VALID_STATUSES = {"pending", "in_progress", "completed", "cancelled"}
|
|
|
|
# Bounds on persisted todo state. The todo list is a planning aid the model
|
|
# re-reads after every context-compression event (see format_for_injection),
|
|
# so unbounded item content or count defeats the compression it rides through.
|
|
# These caps keep a single oversized item (whether authored by the model or
|
|
# replayed from caller-supplied history on the API server) from inflating the
|
|
# re-injection block. Generous relative to real plans — a todo item is a short
|
|
# task description, and active lists are a handful of items, not hundreds.
|
|
MAX_TODO_CONTENT_CHARS = 4000
|
|
MAX_TODO_ITEMS = 256
|
|
_TRUNCATION_MARKER = "… [truncated]"
|
|
|
|
|
|
class TodoStore:
|
|
"""
|
|
In-memory todo list. One instance per AIAgent (one per session).
|
|
|
|
Items are ordered -- list position is priority. Each item has:
|
|
- id: unique string identifier (agent-chosen)
|
|
- content: task description
|
|
- status: pending | in_progress | completed | cancelled
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._items: List[Dict[str, str]] = []
|
|
|
|
def write(self, todos: List[Dict[str, Any]], merge: bool = False) -> List[Dict[str, str]]:
|
|
"""
|
|
Write todos. Returns the full current list after writing.
|
|
|
|
Args:
|
|
todos: list of {id, content, status} dicts
|
|
merge: if False, replace the entire list. If True, update
|
|
existing items by id and append new ones.
|
|
"""
|
|
if not merge:
|
|
# Replace mode: new list entirely
|
|
self._items = [self._validate(t) for t in self._dedupe_by_id(todos)]
|
|
else:
|
|
# Merge mode: update existing items by id, append new ones
|
|
existing = {item["id"]: item for item in self._items}
|
|
for t in self._dedupe_by_id(todos):
|
|
item_id = str(t.get("id", "")).strip()
|
|
if not item_id:
|
|
continue # Can't merge without an id
|
|
|
|
if item_id in existing:
|
|
# Update only the fields the LLM actually provided
|
|
if "content" in t and t["content"]:
|
|
existing[item_id]["content"] = self._cap_content(str(t["content"]).strip())
|
|
if "status" in t and t["status"]:
|
|
status = str(t["status"]).strip().lower()
|
|
if status in VALID_STATUSES:
|
|
existing[item_id]["status"] = status
|
|
else:
|
|
# New item -- validate fully and append to end
|
|
validated = self._validate(t)
|
|
existing[validated["id"]] = validated
|
|
self._items.append(validated)
|
|
# Rebuild _items preserving order for existing items
|
|
seen = set()
|
|
rebuilt = []
|
|
for item in self._items:
|
|
current = existing.get(item["id"], item)
|
|
if current["id"] not in seen:
|
|
rebuilt.append(current)
|
|
seen.add(current["id"])
|
|
self._items = rebuilt
|
|
# Bound total item count so a replayed/oversized list can't grow the
|
|
# re-injection block without limit. Keep the highest-priority head
|
|
# (list order is priority).
|
|
if len(self._items) > MAX_TODO_ITEMS:
|
|
self._items = self._items[:MAX_TODO_ITEMS]
|
|
return self.read()
|
|
|
|
def read(self) -> List[Dict[str, str]]:
|
|
"""Return a copy of the current list."""
|
|
return [item.copy() for item in self._items]
|
|
|
|
def has_items(self) -> bool:
|
|
"""Check if there are any items in the list."""
|
|
return bool(self._items)
|
|
|
|
def format_for_injection(self) -> Optional[str]:
|
|
"""
|
|
Render the todo list for post-compression injection.
|
|
|
|
Returns a human-readable string to append to the compressed
|
|
message history, or None if the list is empty.
|
|
"""
|
|
if not self._items:
|
|
return None
|
|
|
|
# Status markers for compact display
|
|
markers = {
|
|
"completed": "[x]",
|
|
"in_progress": "[>]",
|
|
"pending": "[ ]",
|
|
"cancelled": "[~]",
|
|
}
|
|
|
|
# Only inject pending/in_progress items — completed/cancelled ones
|
|
# cause the model to re-do finished work after compression.
|
|
active_items = [
|
|
item for item in self._items
|
|
if item["status"] in {"pending", "in_progress"}
|
|
]
|
|
if not active_items:
|
|
return None
|
|
|
|
lines = ["[Your active task list was preserved across context compression]"]
|
|
for item in active_items:
|
|
marker = markers.get(item["status"], "[?]")
|
|
lines.append(f"- {marker} {item['id']}. {item['content']} ({item['status']})")
|
|
|
|
return "\n".join(lines)
|
|
|
|
@staticmethod
|
|
def _cap_content(content: str) -> str:
|
|
"""Truncate oversized todo content to MAX_TODO_CONTENT_CHARS.
|
|
|
|
A single huge item would otherwise inflate the post-compression
|
|
re-injection block (format_for_injection) without bound. Keep the
|
|
head — the actionable part of a task description — plus a marker.
|
|
"""
|
|
if len(content) > MAX_TODO_CONTENT_CHARS:
|
|
keep = MAX_TODO_CONTENT_CHARS - len(_TRUNCATION_MARKER)
|
|
return content[:keep] + _TRUNCATION_MARKER
|
|
return content
|
|
|
|
@staticmethod
|
|
def _validate(item: Dict[str, Any]) -> Dict[str, str]:
|
|
"""
|
|
Validate and normalize a todo item.
|
|
|
|
Ensures required fields exist and status is valid.
|
|
Returns a clean dict with only {id, content, status}.
|
|
"""
|
|
item_id = str(item.get("id", "")).strip()
|
|
if not item_id:
|
|
item_id = "?"
|
|
|
|
content = str(item.get("content", "")).strip()
|
|
if not content:
|
|
content = "(no description)"
|
|
else:
|
|
content = TodoStore._cap_content(content)
|
|
|
|
status = str(item.get("status", "pending")).strip().lower()
|
|
if status not in VALID_STATUSES:
|
|
status = "pending"
|
|
|
|
return {"id": item_id, "content": content, "status": status}
|
|
|
|
@staticmethod
|
|
def _dedupe_by_id(todos: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""Collapse duplicate ids, keeping the last occurrence in its position."""
|
|
last_index: Dict[str, int] = {}
|
|
for i, item in enumerate(todos):
|
|
item_id = str(item.get("id", "")).strip() or "?"
|
|
last_index[item_id] = i
|
|
return [todos[i] for i in sorted(last_index.values())]
|
|
|
|
|
|
def todo_tool(
|
|
todos: Optional[List[Dict[str, Any]]] = None,
|
|
merge: bool = False,
|
|
store: Optional[TodoStore] = None,
|
|
) -> str:
|
|
"""
|
|
Single entry point for the todo tool. Reads or writes depending on params.
|
|
|
|
Args:
|
|
todos: if provided, write these items. If None, read current list.
|
|
merge: if True, update by id. If False (default), replace entire list.
|
|
store: the TodoStore instance from the AIAgent.
|
|
|
|
Returns:
|
|
JSON string with the full current list and summary metadata.
|
|
"""
|
|
if store is None:
|
|
return tool_error("TodoStore not initialized")
|
|
|
|
if todos is not None:
|
|
items = store.write(todos, merge)
|
|
else:
|
|
items = store.read()
|
|
|
|
# Build summary counts
|
|
pending = sum(1 for i in items if i["status"] == "pending")
|
|
in_progress = sum(1 for i in items if i["status"] == "in_progress")
|
|
completed = sum(1 for i in items if i["status"] == "completed")
|
|
cancelled = sum(1 for i in items if i["status"] == "cancelled")
|
|
|
|
return json.dumps({
|
|
"todos": items,
|
|
"summary": {
|
|
"total": len(items),
|
|
"pending": pending,
|
|
"in_progress": in_progress,
|
|
"completed": completed,
|
|
"cancelled": cancelled,
|
|
},
|
|
}, ensure_ascii=False)
|
|
|
|
|
|
def check_todo_requirements() -> bool:
|
|
"""Todo tool has no external requirements -- always available."""
|
|
return True
|
|
|
|
|
|
# =============================================================================
|
|
# OpenAI Function-Calling Schema
|
|
# =============================================================================
|
|
# Behavioral guidance is baked into the description so it's part of the
|
|
# static tool schema (cached, never changes mid-conversation).
|
|
|
|
TODO_SCHEMA = {
|
|
"name": "todo",
|
|
"description": (
|
|
"Manage your task list for the current session. Use for complex tasks "
|
|
"with 3+ steps or when the user provides multiple tasks. "
|
|
"Call with no parameters to read the current list.\n\n"
|
|
"Writing:\n"
|
|
"- Provide 'todos' array to create/update items\n"
|
|
"- merge=false (default): replace the entire list with a fresh plan\n"
|
|
"- merge=true: update existing items by id, add any new ones\n\n"
|
|
"Each item: {id: string, content: string, "
|
|
"status: pending|in_progress|completed|cancelled}\n"
|
|
"List order is priority. Only ONE item in_progress at a time.\n"
|
|
"Mark items completed immediately when done. If something fails, "
|
|
"cancel it and add a revised item.\n\n"
|
|
"Always returns the full current list."
|
|
),
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"todos": {
|
|
"type": "array",
|
|
"description": "Task items to write. Omit to read current list.",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"id": {
|
|
"type": "string",
|
|
"description": "Unique item identifier"
|
|
},
|
|
"content": {
|
|
"type": "string",
|
|
"description": "Task description"
|
|
},
|
|
"status": {
|
|
"type": "string",
|
|
"enum": ["pending", "in_progress", "completed", "cancelled"],
|
|
"description": "Current status"
|
|
}
|
|
},
|
|
"required": ["id", "content", "status"]
|
|
}
|
|
},
|
|
"merge": {
|
|
"type": "boolean",
|
|
"description": (
|
|
"true: update existing items by id, add new ones. "
|
|
"false (default): replace the entire list."
|
|
),
|
|
"default": False
|
|
}
|
|
},
|
|
"required": []
|
|
}
|
|
}
|
|
|
|
|
|
# --- Registry ---
|
|
from tools.registry import registry, tool_error
|
|
|
|
registry.register(
|
|
name="todo",
|
|
toolset="todo",
|
|
schema=TODO_SCHEMA,
|
|
handler=lambda args, **kw: todo_tool(
|
|
todos=args.get("todos"), merge=args.get("merge", False), store=kw.get("store")),
|
|
check_fn=check_todo_requirements,
|
|
emoji="📋",
|
|
)
|