fix: red-team QA -- 5 bugs found and fixed

Bug #1 (Critical): get_parent_session_id() doesn't exist on SessionDB. Replaced with get_session(sid)['parent_session_id']. Also fixed lineage walk loop that only ever iterated once due to unconditional break. Bug #2 (Medium): Path traversal via session_id. CheckpointStore._path_for now validates session_id with regex ^[A-Za-z0-9_-]+$ to reject /, \, .. Bug #3 (Critical): 'checkpoint' tool never offered to model. Added 'checkpoint' to the 'todo' toolset tools list in TOOLSETS dict. Bug #4 (Low): flush_checkpoint test didn't call flush_checkpoint. Test now calls AIAgent.flush_checkpoint() directly via mock agent. Bug #5 (Medium): Injection fallback grabbed ANY in_progress checkpoint from unrelated sessions. Removed the broad fallback; caller now walks the parent lineage chain and passes the specific session_id that has a checkpoint file. Added 13 regression tests in test_checkpoint_regression.py covering all five bug fixes.
2026-05-17 04:31:55 +00:00 · 2026-04-23 00:21:25 -04:00 · 2026-04-23 00:21:25 -04:00 · 748f1a0417
commit 748f1a0417
parent c528185e0e
5 changed files with 185 additions and 28 deletions
--- a/tests/test_checkpoint_flush.py
+++ b/tests/test_checkpoint_flush.py
@ -1,7 +1,7 @@
-"""Test that flush_checkpoint is called during context compression."""
+"""Test that flush_checkpoint writes a checkpoint via checkpoint_tool."""
 import json
 import pytest
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch


 def test_flush_checkpoint_method_exists():
@ -13,27 +13,41 @@ def test_flush_checkpoint_method_exists():
 def test_flush_checkpoint_writes_to_store(tmp_path):
    """flush_checkpoint should write a checkpoint with current session state."""
    from agent.checkpoint_store import CheckpointStore
-    from tools.checkpoint_tool import checkpoint_tool
+
    store = CheckpointStore(checkpoints_dir=tmp_path / "checkpoints")

+    mock_todo = MagicMock()
+    mock_todo._items = [
+        {"content": "Step 1", "status": "completed"},
+        {"content": "Step 2", "status": "in_progress"},
+    ]
+    mock_todo.format_for_injection.return_value = "- [x] Step 1\n- [~] Step 2"
+
+    mock_session_db = MagicMock()
+    mock_session_db.get_session_title.return_value = "Test session title"
+
    mock_agent = MagicMock()
    mock_agent.session_id = "flush_test_session"
    mock_agent._checkpoint_store = store
-    mock_agent._todo_store = MagicMock()
-    mock_agent._todo_store.format_for_injection.return_value = "- [x] Step 1"
+    mock_agent._todo_store = mock_todo
+    mock_agent._session_db = mock_session_db

-    # Verify the checkpoint tool writes successfully (same path flush_checkpoint uses)
-    result = checkpoint_tool(
-        action="write",
-        task="Auto-checkpoint before compression",
-        progress=[],
-        state={},
-        decisions=[],
-        store=store,
-        agent=mock_agent,
-    )
-    data = json.loads(result)
-    assert data["success"] is True
+    # Call the real flush_checkpoint method
+    from run_agent import AIAgent
+    AIAgent.flush_checkpoint(mock_agent)
+
+    # Verify a checkpoint was written for this session
    saved = store.read("flush_test_session")
    assert saved is not None
-    assert saved["task"] == "Auto-checkpoint before compression"
+    assert saved["task"] == "Test session title"
+    assert saved["status"] == "in_progress"
+
+
+def test_flush_checkpoint_noops_without_store(tmp_path):
+    """flush_checkpoint should silently return if _checkpoint_store is None."""
+    mock_agent = MagicMock()
+    mock_agent._checkpoint_store = None
+
+    from run_agent import AIAgent
+    # Should not raise
+    AIAgent.flush_checkpoint(mock_agent)