hermes-agent/tests/test_checkpoint_flush.py
AJ 748f1a0417 fix: red-team QA -- 5 bugs found and fixed
Bug #1 (Critical): get_parent_session_id() doesn't exist on SessionDB.
Replaced with get_session(sid)['parent_session_id']. Also fixed lineage
walk loop that only ever iterated once due to unconditional break.

Bug #2 (Medium): Path traversal via session_id. CheckpointStore._path_for
now validates session_id with regex ^[A-Za-z0-9_-]+$ to reject /, \, ..

Bug #3 (Critical): 'checkpoint' tool never offered to model. Added
'checkpoint' to the 'todo' toolset tools list in TOOLSETS dict.

Bug #4 (Low): flush_checkpoint test didn't call flush_checkpoint. Test now
calls AIAgent.flush_checkpoint() directly via mock agent.

Bug #5 (Medium): Injection fallback grabbed ANY in_progress checkpoint
from unrelated sessions. Removed the broad fallback; caller now walks the
parent lineage chain and passes the specific session_id that has a
checkpoint file.

Added 13 regression tests in test_checkpoint_regression.py covering all
five bug fixes.
2026-04-24 19:33:14 -04:00

53 lines
No EOL
1.8 KiB
Python

"""Test that flush_checkpoint writes a checkpoint via checkpoint_tool."""
import json
import pytest
from unittest.mock import MagicMock, patch
def test_flush_checkpoint_method_exists():
"""AIAgent must have a flush_checkpoint method."""
from run_agent import AIAgent
assert hasattr(AIAgent, "flush_checkpoint")
def test_flush_checkpoint_writes_to_store(tmp_path):
"""flush_checkpoint should write a checkpoint with current session state."""
from agent.checkpoint_store import CheckpointStore
store = CheckpointStore(checkpoints_dir=tmp_path / "checkpoints")
mock_todo = MagicMock()
mock_todo._items = [
{"content": "Step 1", "status": "completed"},
{"content": "Step 2", "status": "in_progress"},
]
mock_todo.format_for_injection.return_value = "- [x] Step 1\n- [~] Step 2"
mock_session_db = MagicMock()
mock_session_db.get_session_title.return_value = "Test session title"
mock_agent = MagicMock()
mock_agent.session_id = "flush_test_session"
mock_agent._checkpoint_store = store
mock_agent._todo_store = mock_todo
mock_agent._session_db = mock_session_db
# Call the real flush_checkpoint method
from run_agent import AIAgent
AIAgent.flush_checkpoint(mock_agent)
# Verify a checkpoint was written for this session
saved = store.read("flush_test_session")
assert saved is not None
assert saved["task"] == "Test session title"
assert saved["status"] == "in_progress"
def test_flush_checkpoint_noops_without_store(tmp_path):
"""flush_checkpoint should silently return if _checkpoint_store is None."""
mock_agent = MagicMock()
mock_agent._checkpoint_store = None
from run_agent import AIAgent
# Should not raise
AIAgent.flush_checkpoint(mock_agent)