mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
* fix(compression): prevent session-id fork from concurrent compressions When two AIAgent instances share the same session_id (most commonly the parent-turn agent and its background-review fork, which inherits session_id verbatim via background_review.py L451), both can call compress_context() on overlapping snapshots of the same conversation. Each ends the parent and creates its own NEW child session in state.db, both parented to the same old id. The gateway SessionEntry only catches one rotation; the other becomes an orphan that silently accumulates writes — Damien's incident shape (parent 20260527_234659_e65f0e → two children, only one visible). Adds a state.db-backed per-session compression lock. Acquired before the rotation in conversation_compression.compress_context(); on failure, the caller returns messages unchanged so the auto-compress retry loop stops cleanly. TTL (5min default) reclaims locks abandoned by crashed compressors. Lock holder identity (pid:tid:agent:nonce) is preserved for diagnostics via get_compression_lock_holder(). Schema bumped 13 -> 14 to track the new compression_locks table. Reconciled additively via the existing declarative-column pattern; no data migration needed for existing DBs. Regression test reproduces Damien's shape: two threads racing _compress_context on a shared parent_sid. Without the lock the test deterministically produces 2 child sessions; with the lock, exactly 1. Covers all six compression entry points (preflight in conversation_loop, mid-turn fallback, hygiene compression in gateway, /compact, CLI /compress, TUI /compress). ACP /compress was already protected by nulling out _session_db before its compress call. * ci: trigger rerun (transient GitHub API rate limit on CodeQL workflow)
149 lines
5.4 KiB
Python
149 lines
5.4 KiB
Python
"""Tests for ``SessionDB`` compression-lock primitives.
|
|
|
|
These cover the atomic per-session lock that prevents two compression
|
|
paths from racing on the same ``session_id`` and producing orphan child
|
|
sessions (Damien's "parent → two orphan children" repro shape, see
|
|
``tests/agent/test_compression_concurrent_fork.py`` for the
|
|
behavioural regression test).
|
|
|
|
Focus here: the lock primitives themselves (acquire, release, TTL,
|
|
diagnostic accessor) — not the wiring into compression.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from hermes_state import SessionDB
|
|
|
|
|
|
@pytest.fixture
|
|
def db(tmp_path: Path) -> SessionDB:
|
|
return SessionDB(tmp_path / "state.db")
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Single-holder semantics
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
def test_acquire_succeeds_when_unlocked(db: SessionDB) -> None:
|
|
assert db.try_acquire_compression_lock("sess1", "holder1") is True
|
|
assert db.get_compression_lock_holder("sess1") == "holder1"
|
|
|
|
|
|
def test_acquire_blocks_second_holder(db: SessionDB) -> None:
|
|
assert db.try_acquire_compression_lock("sess1", "holder1") is True
|
|
assert db.try_acquire_compression_lock("sess1", "holder2") is False
|
|
# First holder still owns it
|
|
assert db.get_compression_lock_holder("sess1") == "holder1"
|
|
|
|
|
|
def test_release_allows_reacquire(db: SessionDB) -> None:
|
|
db.try_acquire_compression_lock("sess1", "holder1")
|
|
db.release_compression_lock("sess1", "holder1")
|
|
assert db.get_compression_lock_holder("sess1") is None
|
|
assert db.try_acquire_compression_lock("sess1", "holder2") is True
|
|
|
|
|
|
def test_release_with_wrong_holder_is_noop(db: SessionDB) -> None:
|
|
db.try_acquire_compression_lock("sess1", "holder1")
|
|
# Late-returning compressor must not release a lock it doesn't own
|
|
db.release_compression_lock("sess1", "holder_other")
|
|
assert db.get_compression_lock_holder("sess1") == "holder1"
|
|
|
|
|
|
def test_release_when_unlocked_is_noop(db: SessionDB) -> None:
|
|
# No exception, no state change
|
|
db.release_compression_lock("never_locked", "holder1")
|
|
assert db.get_compression_lock_holder("never_locked") is None
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Per-session isolation
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
def test_locks_are_per_session(db: SessionDB) -> None:
|
|
assert db.try_acquire_compression_lock("sess1", "holder1") is True
|
|
# Different session: independent lock
|
|
assert db.try_acquire_compression_lock("sess2", "holder2") is True
|
|
assert db.get_compression_lock_holder("sess1") == "holder1"
|
|
assert db.get_compression_lock_holder("sess2") == "holder2"
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# TTL / expiry recovery
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
def test_expired_lock_is_reclaimable(db: SessionDB) -> None:
|
|
"""A crashed compressor must not permanently block the session."""
|
|
# Acquire with a very short TTL
|
|
db.try_acquire_compression_lock("sess1", "crashed_holder", ttl_seconds=0.05)
|
|
time.sleep(0.1)
|
|
# Holder check honours expiry
|
|
assert db.get_compression_lock_holder("sess1") is None
|
|
# New holder can claim it
|
|
assert db.try_acquire_compression_lock("sess1", "fresh_holder") is True
|
|
assert db.get_compression_lock_holder("sess1") == "fresh_holder"
|
|
|
|
|
|
def test_non_expired_lock_is_held(db: SessionDB) -> None:
|
|
db.try_acquire_compression_lock("sess1", "holder1", ttl_seconds=60)
|
|
# Immediately after, still held
|
|
assert db.try_acquire_compression_lock("sess1", "holder2") is False
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Empty / invalid input
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
def test_acquire_empty_session_id_returns_false(db: SessionDB) -> None:
|
|
assert db.try_acquire_compression_lock("", "holder1") is False
|
|
|
|
|
|
def test_release_empty_session_id_is_noop(db: SessionDB) -> None:
|
|
# No exception
|
|
db.release_compression_lock("", "holder1")
|
|
|
|
|
|
def test_holder_empty_session_id_returns_none(db: SessionDB) -> None:
|
|
assert db.get_compression_lock_holder("") is None
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Concurrency: real threads racing on the same session_id
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
def test_concurrent_acquire_only_one_winner(db: SessionDB) -> None:
|
|
"""Damien's race shape: N threads call acquire on the same session_id;
|
|
exactly one must win, the rest must be cleanly rejected."""
|
|
results: list[bool] = []
|
|
barrier = threading.Barrier(8)
|
|
lock = threading.Lock()
|
|
|
|
def try_acquire(idx: int) -> None:
|
|
holder = f"thread_{idx}"
|
|
barrier.wait() # synchronize start
|
|
got = db.try_acquire_compression_lock("contended_session", holder)
|
|
with lock:
|
|
results.append(got)
|
|
|
|
threads = [threading.Thread(target=try_acquire, args=(i,)) for i in range(8)]
|
|
for t in threads:
|
|
t.start()
|
|
for t in threads:
|
|
t.join()
|
|
|
|
# Exactly one thread acquired
|
|
assert sum(1 for r in results if r is True) == 1
|
|
assert sum(1 for r in results if r is False) == 7
|
|
# The single winner still owns it
|
|
assert db.get_compression_lock_holder("contended_session") is not None
|