diff --git a/tests/tools/test_todo_tool.py b/tests/tools/test_todo_tool.py
index 6215078525c..dbb64e80ee6 100644
--- a/tests/tools/test_todo_tool.py
+++ b/tests/tools/test_todo_tool.py
@@ -117,3 +117,61 @@ class TestTodoToolFunction:
     def test_no_store_returns_error(self):
         result = json.loads(todo_tool())
         assert "error" in result
+
+
+class TestTodoStoreBounds:
+    """Bounds on persisted todo state (GHSA-5g4g-6jrg-mw3g hardening).
+
+    The todo list is re-injected into context after every compression event,
+    so an unbounded item — whether authored by the model or replayed from
+    caller-supplied history on the API server's _hydrate_todo_store path —
+    would defeat the compression it rides through. These pin the caps.
+    Not a security boundary (the API surface is authenticated and the caller
+    supplies their own history); this is footgun containment / parity.
+    """
+
+    def test_oversized_content_is_truncated(self):
+        from tools.todo_tool import MAX_TODO_CONTENT_CHARS
+        store = TodoStore()
+        store.write([{"id": "1", "content": "A" * 50001, "status": "pending"}])
+        item = store.read()[0]
+        assert len(item["content"]) <= MAX_TODO_CONTENT_CHARS
+        assert item["content"].endswith("… [truncated]")
+
+    def test_injection_block_is_bounded(self):
+        from tools.todo_tool import MAX_TODO_CONTENT_CHARS
+        store = TodoStore()
+        store.write([{"id": "1", "content": "A" * 50001, "status": "pending"}])
+        inj = store.format_for_injection()
+        # Before the fix this was ~50085 chars; now it tracks the cap.
+        assert len(inj) < MAX_TODO_CONTENT_CHARS + 200
+
+    def test_merge_update_content_is_capped(self):
+        """The merge path updates content directly, bypassing _validate —
+        verify it is capped too."""
+        from tools.todo_tool import MAX_TODO_CONTENT_CHARS
+        store = TodoStore()
+        store.write([{"id": "1", "content": "short", "status": "pending"}])
+        store.write([{"id": "1", "content": "B" * 50001}], merge=True)
+        assert len(store.read()[0]["content"]) <= MAX_TODO_CONTENT_CHARS
+
+    def test_item_count_is_bounded(self):
+        from tools.todo_tool import MAX_TODO_ITEMS
+        store = TodoStore()
+        store.write([
+            {"id": str(i), "content": f"task {i}", "status": "pending"}
+            for i in range(5000)
+        ])
+        assert len(store.read()) == MAX_TODO_ITEMS
+
+    def test_normal_list_is_unchanged(self):
+        """No regression: ordinary plans pass through untouched (no marker,
+        same content, same order)."""
+        store = TodoStore()
+        store.write([
+            {"id": "1", "content": "write the report", "status": "in_progress"},
+            {"id": "2", "content": "review PR", "status": "pending"},
+        ])
+        items = store.read()
+        assert [i["content"] for i in items] == ["write the report", "review PR"]
+        assert "[truncated]" not in items[0]["content"]
diff --git a/tools/todo_tool.py b/tools/todo_tool.py
index 99d9ffe8515..960dab66603 100644
--- a/tools/todo_tool.py
+++ b/tools/todo_tool.py
@@ -21,6 +21,17 @@ from typing import Dict, Any, List, Optional
 # Valid status values for todo items
 VALID_STATUSES = {"pending", "in_progress", "completed", "cancelled"}
 
+# Bounds on persisted todo state. The todo list is a planning aid the model
+# re-reads after every context-compression event (see format_for_injection),
+# so unbounded item content or count defeats the compression it rides through.
+# These caps keep a single oversized item (whether authored by the model or
+# replayed from caller-supplied history on the API server) from inflating the
+# re-injection block. Generous relative to real plans — a todo item is a short
+# task description, and active lists are a handful of items, not hundreds.
+MAX_TODO_CONTENT_CHARS = 4000
+MAX_TODO_ITEMS = 256
+_TRUNCATION_MARKER = "… [truncated]"
+
 
 class TodoStore:
     """
@@ -58,7 +69,7 @@ class TodoStore:
                 if item_id in existing:
                     # Update only the fields the LLM actually provided
                     if "content" in t and t["content"]:
-                        existing[item_id]["content"] = str(t["content"]).strip()
+                        existing[item_id]["content"] = self._cap_content(str(t["content"]).strip())
                     if "status" in t and t["status"]:
                         status = str(t["status"]).strip().lower()
                         if status in VALID_STATUSES:
@@ -77,6 +88,11 @@ class TodoStore:
                     rebuilt.append(current)
                     seen.add(current["id"])
             self._items = rebuilt
+        # Bound total item count so a replayed/oversized list can't grow the
+        # re-injection block without limit. Keep the highest-priority head
+        # (list order is priority).
+        if len(self._items) > MAX_TODO_ITEMS:
+            self._items = self._items[:MAX_TODO_ITEMS]
         return self.read()
 
     def read(self) -> List[Dict[str, str]]:
@@ -121,6 +137,19 @@ class TodoStore:
 
         return "\n".join(lines)
 
+    @staticmethod
+    def _cap_content(content: str) -> str:
+        """Truncate oversized todo content to MAX_TODO_CONTENT_CHARS.
+
+        A single huge item would otherwise inflate the post-compression
+        re-injection block (format_for_injection) without bound. Keep the
+        head — the actionable part of a task description — plus a marker.
+        """
+        if len(content) > MAX_TODO_CONTENT_CHARS:
+            keep = MAX_TODO_CONTENT_CHARS - len(_TRUNCATION_MARKER)
+            return content[:keep] + _TRUNCATION_MARKER
+        return content
+
     @staticmethod
     def _validate(item: Dict[str, Any]) -> Dict[str, str]:
         """
@@ -136,6 +165,8 @@ class TodoStore:
         content = str(item.get("content", "")).strip()
         if not content:
             content = "(no description)"
+        else:
+            content = TodoStore._cap_content(content)
 
         status = str(item.get("status", "pending")).strip().lower()
         if status not in VALID_STATUSES: