hermes-agent/tests/agent/test_compression_concurrent_fork.py
Teknium a30480bd2b
fix(compression): prevent session-id fork from concurrent compressions (#34351)
* fix(compression): prevent session-id fork from concurrent compressions

When two AIAgent instances share the same session_id (most commonly the
parent-turn agent and its background-review fork, which inherits
session_id verbatim via background_review.py L451), both can call
compress_context() on overlapping snapshots of the same conversation.
Each ends the parent and creates its own NEW child session in state.db,
both parented to the same old id. The gateway SessionEntry only catches
one rotation; the other becomes an orphan that silently accumulates
writes — Damien's incident shape (parent 20260527_234659_e65f0e → two
children, only one visible).

Adds a state.db-backed per-session compression lock. Acquired before
the rotation in conversation_compression.compress_context(); on
failure, the caller returns messages unchanged so the auto-compress
retry loop stops cleanly. TTL (5min default) reclaims locks abandoned
by crashed compressors. Lock holder identity (pid:tid:agent:nonce) is
preserved for diagnostics via get_compression_lock_holder().

Schema bumped 13 -> 14 to track the new compression_locks table.
Reconciled additively via the existing declarative-column pattern;
no data migration needed for existing DBs.

Regression test reproduces Damien's shape: two threads racing
_compress_context on a shared parent_sid. Without the lock the test
deterministically produces 2 child sessions; with the lock, exactly 1.

Covers all six compression entry points (preflight in conversation_loop,
mid-turn fallback, hygiene compression in gateway, /compact, CLI
/compress, TUI /compress). ACP /compress was already protected by
nulling out _session_db before its compress call.

* ci: trigger rerun (transient GitHub API rate limit on CodeQL workflow)
2026-05-28 21:40:39 -07:00

173 lines
7 KiB
Python

"""Regression: prevent transcript fork when two paths compress the same session_id.
Damien's incident (Discord, 2026-05-28): a long Hermes session in a Discord
gateway hit the compression threshold at the end of a turn. The parent agent
finished delivering the response and ``conversation_loop.py`` fired
``_spawn_background_review(...)`` — which builds a forked ``AIAgent`` that
inherits ``agent.session_id`` (see ``agent/background_review.py``::
``review_agent.session_id = agent.session_id``). Roughly two seconds later
a synthetic ``Background process proc_… completed`` event arrived and
started a fresh turn on the same parent ``session_id`` (still cached in the
gateway's ``SessionEntry``). Both paths hit preflight compression on the
same parent transcript and called ``_compress_context`` concurrently. Each
ended the parent and created its own CHILD session in ``state.db``, both
parented to the same old id. The gateway's ``SessionEntry`` only caught one
rotation; the other child became an orphan that silently accumulated writes.
Repro shape on Damien's machine:
parent 20260527_234659_e65f0e ended_at=set end_reason='compression'
child 20260528_113619_fc80e1 parent=20260527_234659_e65f0e (in SessionEntry)
child <orphan> parent=20260527_234659_e65f0e (silent writes)
This regression simulates the two concurrent ``compress_context`` calls
against a shared ``state.db`` and asserts that the per-session compression
lock added in this PR prevents the orphan child. Without the lock the
fixture deterministically produces 2 children; with the lock, exactly 1.
"""
from __future__ import annotations
import os
import threading
import time
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from hermes_state import SessionDB
def _build_agent_with_db(db: SessionDB, session_id: str):
"""Build an AIAgent that's wired to ``db`` and pinned to ``session_id``."""
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
from run_agent import AIAgent
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
session_db=db,
session_id=session_id,
skip_context_files=True,
skip_memory=True,
)
# Stub the compressor so it returns deterministic output and DOESN'T make
# an LLM call. Sleep inside compress() so the two threads' rotations
# actually overlap — without that the OS could happen to serialize them
# and hide the bug.
compressor = MagicMock()
def _compress_with_overlap(*_a, **_kw):
time.sleep(0.25)
return [
{"role": "user", "content": "[CONTEXT COMPACTION] summary"},
{"role": "user", "content": "tail"},
]
compressor.compress.side_effect = _compress_with_overlap
compressor.compression_count = 1
compressor.last_prompt_tokens = 0
compressor.last_completion_tokens = 0
compressor._last_summary_error = None
compressor._last_compress_aborted = False
compressor._last_aux_model_failure_model = None
compressor._last_aux_model_failure_error = None
agent.context_compressor = compressor
return agent
def _count_children(db: SessionDB, parent_sid: str) -> int:
"""Count rows in state.db whose parent_session_id == parent_sid."""
rows = db._conn.execute(
"SELECT id FROM sessions WHERE parent_session_id = ?",
(parent_sid,),
).fetchall()
return len(rows)
def test_concurrent_compression_does_not_fork_session(tmp_path: Path) -> None:
"""Two AIAgents that share a session_id MUST NOT both rotate it.
Without the per-session compression lock this fixture deterministically
produces 2 child sessions (transcript fork). With the lock the second
path aborts cleanly, leaving exactly 1 canonical child.
"""
db = SessionDB(db_path=tmp_path / "state.db")
parent_sid = "PARENT_TEST_SESSION"
db.create_session(parent_sid, source="discord")
# Two agents on the same session_id, both wired to the same db —
# mirrors the parent-turn agent + the background-review fork right
# after a turn ends.
agent_a = _build_agent_with_db(db, parent_sid)
agent_b = _build_agent_with_db(db, parent_sid)
messages = [{"role": "user", "content": f"m{i}"} for i in range(20)]
def run(agent):
try:
agent._compress_context(messages, "sys", approx_tokens=120_000)
except Exception:
# Surface to the test if either raises — should not happen.
raise
t_a = threading.Thread(target=run, args=(agent_a,), name="main_turn")
t_b = threading.Thread(target=run, args=(agent_b,), name="review_fork")
t_a.start()
t_b.start()
t_a.join(timeout=10)
t_b.join(timeout=10)
# Exactly one canonical child — not two orphans.
assert _count_children(db, parent_sid) == 1, (
"Compression lock failed: parent session has multiple children in state.db "
"(transcript fork). This is Damien's incident shape — see the test docstring."
)
# And exactly one of the two agents actually rotated its session_id; the
# other should still hold the parent_sid (its compression was skipped).
rotated = sum(
1 for a in (agent_a, agent_b) if a.session_id != parent_sid
)
assert rotated == 1, (
f"Expected exactly one agent to rotate session_id, got {rotated}. "
"Both agents rotating means the lock didn't serialize them."
)
# The lock must be released after the winner finished.
assert db.get_compression_lock_holder(parent_sid) is None, (
"Compression lock leaked: still held after both rotations completed."
)
def test_skipped_compression_returns_messages_unchanged(tmp_path: Path) -> None:
"""The loser of the lock race must return its input messages verbatim.
Callers (preflight compression in ``conversation_loop.py``) detect the
no-op via ``len(returned) == len(input)`` and stop the auto-compress
retry loop. If the skipped path returned the compressed view, that
detection would break and the caller would mutate the conversation
without going through state.db rotation.
"""
db = SessionDB(db_path=tmp_path / "state.db")
parent_sid = "LOSER_TEST"
db.create_session(parent_sid, source="discord")
# Pre-acquire the lock so the agent's compress_context sees it held.
held = db.try_acquire_compression_lock(parent_sid, "external_holder")
assert held is True
agent = _build_agent_with_db(db, parent_sid)
messages = [{"role": "user", "content": "m1"}, {"role": "user", "content": "m2"}]
compressed, _sp = agent._compress_context(messages, "sys", approx_tokens=120_000)
# Skipped: messages returned verbatim, no rotation
assert compressed is messages or compressed == messages
assert agent.session_id == parent_sid
# Compressor was never called (the skip happens before .compress())
agent.context_compressor.compress.assert_not_called()