hermes-agent/tests/run_agent/test_in_place_compaction.py
kshitijk4poor 854d75723f fix(compression): keep compaction-archived turns discoverable in session_search
Follow-up to the soft-archive durability fix. Reusing the rewind/undo active=0
flag for compaction-archived turns inherited the wrong search semantics: undo
rows are intentionally HIDDEN from session_search (the user took them back), but
compaction-archived turns must stay DISCOVERABLE — that is the whole point of
Teknium's "searchable / recoverable" requirement. As built, search_messages
defaulted to WHERE active=1, so after in-place compaction the pre-compaction
turns were in the FTS index but filtered out of the default search. (The earlier
"searchable" claim only held for a raw FTS query / include_inactive=True, not
the actual session_search tool.)

Empirically confirmed the gap: search 'HMAC' returned 2 hits before compaction,
1 after (only the summary's mention) — the originals were hidden.

Fix — a `compacted` flag distinct from `active`, giving a 3-way state:
- active=1, compacted=0  → live context (normal)
- active=0, compacted=1  → compaction-archived: OUT of live context, IN search
- active=0, compacted=0  → rewind/undo: OUT of live context, OUT of search

Changes:
- messages.compacted INTEGER NOT NULL DEFAULT 0 added to SCHEMA_SQL. Declarative
  _reconcile_columns adds it on existing DBs — no version bump (plain column add).
- archive_and_compact: UPDATE … SET active=0, compacted=1 (was active=0 only).
- search_messages: default WHERE active=1 → (active=1 OR compacted=1), on BOTH
  the main FTS5 path and the trigram CJK path. include_inactive=True still
  returns everything. The short-CJK LIKE fallback already returns all rows
  (no active filter) — unchanged.
- Docstrings on archive_and_compact + search_messages document the 3-way state.

Verified: after compaction, session_search default finds the archived originals
(ids 1 & 4); rewind/undo rows stay hidden by default (recoverable via
include_inactive); live context still excludes both. 322 in-place + hermes_state
tests and 46 session_search tests green; ruff clean. Mutation check: reverting
the search WHERE to active-only fails the new searchable test.

(Surfaced by the question "is search semantic or only FTS?" — answer: session
search is FTS5 keyword/BM25 only, no embeddings over the transcript; semantic
retrieval lives in the optional memory-provider layer. Tracing that confirmed
the active-only filter gap above.)
2026-06-20 10:57:07 -07:00

316 lines
14 KiB
Python

"""Tests for in-place context compaction (config: compression.in_place, #38763).
When ``compression.in_place`` is True, ``compress_context()`` rewrites the
message list and rebuilds the system prompt but keeps the SAME ``session_id``:
no ``end_session``, no ``parent_session_id`` child row, no ``name #N`` title
renumber, no flush-cursor reset. This eliminates the session-rotation bug
cluster (#33618 /goal loss, #14238 lost response, #33907 orphans, #45117 search
gaps, #42228 null cwd). When the flag is False (default), rotation behaves
exactly as before.
"""
import os
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
def _make_agent(session_db, session_id, *, in_place):
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
from run_agent import AIAgent
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
session_db=session_db,
session_id=session_id,
skip_context_files=True,
skip_memory=True,
)
agent.compression_in_place = in_place
# Mock the compressor to return a deterministic shrunk transcript so the
# test exercises the DB-mutation path, not summarization quality.
def _fake_compress(messages, current_tokens=None, focus_topic=None, force=False):
return [
{"role": "user", "content": "[CONTEXT COMPACTION] summary of prior turns"},
{"role": "assistant", "content": "recent reply"},
]
agent.context_compressor.compress = _fake_compress
agent.context_compressor._last_compress_aborted = False
agent.context_compressor._last_summary_error = None
agent.context_compressor.compression_count = 1
return agent
def _seed(db, sid, title, n=8):
db.create_session(sid, "cli", model="test/model")
db.set_session_title(sid, title)
for i in range(n):
db.append_message(
session_id=sid,
role="user" if i % 2 == 0 else "assistant",
content=f"msg {i}",
)
class TestInPlaceCompaction:
def test_in_place_keeps_same_session_id(self):
"""In-place mode: id unchanged, no child row, no rename, history kept."""
from hermes_state import SessionDB
from agent.conversation_compression import compress_context
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
sid = "20260619_120000_aaaaaa"
_seed(db, sid, "my-research")
agent = _make_agent(db, sid, in_place=True)
agent._last_flushed_db_idx = 5
messages = [{"role": "user", "content": f"m{i}"} for i in range(8)]
compressed, _sp = compress_context(
agent, messages, approx_tokens=100_000, system_message="sys"
)
# Identity never moved.
assert agent.session_id == sid
# No continuation row forked.
child = db._conn.execute(
"SELECT id FROM sessions WHERE parent_session_id = ?", (sid,)
).fetchall()
assert child == []
# Session not ended; title untouched (no "#2").
row = db.get_session(sid)
assert row["end_reason"] is None
assert row["title"] == "my-research"
# DURABLE, NON-DESTRUCTIVE compaction (the core invariant, per
# Teknium's review): the LIVE context is the compacted set, but the
# pre-compaction turns are PRESERVED on disk (active=0), not deleted
# — searchable + recoverable under the SAME id. A resume reloads the
# compacted set so compaction actually shrinks the live session and
# doesn't immediately re-compact (#38763).
reloaded = db.get_messages_as_conversation(sid)
assert len(reloaded) == 2
assert [m.get("content") for m in reloaded] == [
"[CONTEXT COMPACTION] summary of prior turns",
"recent reply",
]
assert row["message_count"] == 2 # live (active) count
# NON-DESTRUCTIVE: the 8 seeded originals survive at active=0
# alongside the 2 compacted rows — nothing was DELETEd.
all_rows = db.get_messages(sid, include_inactive=True)
assert len(all_rows) == 10
archived = [m for m in all_rows if not m.get("active", 1)]
assert len(archived) == 8
# The originals remain FTS-searchable (active=0 is a content-
# preserving UPDATE; the fts triggers don't key on active).
hit = db._conn.execute(
"SELECT 1 FROM messages_fts f JOIN messages m ON m.id = f.rowid "
"WHERE m.session_id = ? AND messages_fts MATCH 'msg' AND m.active = 0 "
"LIMIT 1",
(sid,),
).fetchone()
assert hit is not None
# Flush identity/cursor reset so next-turn appends diff against the
# compacted transcript (rebuilds the identity set on next flush).
assert agent._last_flushed_db_idx == 0
assert agent._flushed_db_message_ids == set()
# Rotation-independent in-place signal set for the gateway.
assert agent._last_compaction_in_place is True
# Live transcript actually shrank.
assert len(compressed) == 2
def test_in_place_alternation_preserved(self):
"""The compacted list must not introduce consecutive same-role messages."""
from hermes_state import SessionDB
from agent.conversation_compression import compress_context
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
sid = "20260619_120500_cccccc"
_seed(db, sid, "alt")
agent = _make_agent(db, sid, in_place=True)
messages = [{"role": "user", "content": f"m{i}"} for i in range(8)]
compressed, _ = compress_context(
agent, messages, approx_tokens=100_000, system_message="sys"
)
roles = [m["role"] for m in compressed if m.get("role") != "system"]
assert all(roles[i] != roles[i + 1] for i in range(len(roles) - 1))
def test_in_place_skips_redundant_preflush(self):
"""In-place must NOT pre-flush current-turn messages: replace_messages
rewrites the whole row, so a flush would INSERT rows it immediately
deletes (wasted writes). The current-turn tail survives via the
compressor's `compressed` output, not the flush."""
from hermes_state import SessionDB
from agent.conversation_compression import compress_context
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
_seed(db, "ip_flush", "f")
agent = _make_agent(db, "ip_flush", in_place=True)
calls = {"n": 0}
agent._flush_messages_to_session_db = lambda *a, **k: calls.__setitem__(
"n", calls["n"] + 1
)
compress_context(
agent, [{"role": "user", "content": "x"}] * 8,
approx_tokens=100_000, system_message="sys",
)
assert calls["n"] == 0
def test_rotation_still_preflushes(self):
"""Rotation MUST pre-flush so current-turn messages survive in the
preserved old (parent) session before it is ended (#47202)."""
from hermes_state import SessionDB
from agent.conversation_compression import compress_context
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
_seed(db, "rot_flush", "f")
agent = _make_agent(db, "rot_flush", in_place=False)
calls = {"n": 0}
agent._flush_messages_to_session_db = lambda *a, **k: calls.__setitem__(
"n", calls["n"] + 1
)
compress_context(
agent, [{"role": "user", "content": "x"}] * 8,
approx_tokens=100_000, system_message="sys",
)
assert calls["n"] == 1
class TestRotationStillDefault:
def test_rotation_when_flag_off(self):
"""Regression guard: flag off => legacy rotation is unchanged."""
from hermes_state import SessionDB
from agent.conversation_compression import compress_context
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
sid = "20260619_130000_bbbbbb"
_seed(db, sid, "my-research")
agent = _make_agent(db, sid, in_place=False)
agent._last_flushed_db_idx = 5
messages = [{"role": "user", "content": f"m{i}"} for i in range(8)]
compress_context(
agent, messages, approx_tokens=100_000, system_message="sys"
)
# Identity rotated to a fresh id.
assert agent.session_id != sid
# Old session ended via compression; continuation forked + renamed.
assert db.get_session(sid)["end_reason"] == "compression"
child = db._conn.execute(
"SELECT id, title FROM sessions WHERE parent_session_id = ?", (sid,)
).fetchall()
assert len(child) == 1
assert child[0]["title"] == "my-research #2"
# Flush cursor reset for the new row.
assert agent._last_flushed_db_idx == 0
# Rotation mode does NOT set the in-place signal.
assert getattr(agent, "_last_compaction_in_place", False) is False
class TestInPlaceSignalForGateway:
"""compress_context must expose a rotation-independent flag the gateway can
read (instead of an id-change diff) to re-baseline transcript handling."""
def test_signal_set_on_in_place_unset_on_rotation(self):
from hermes_state import SessionDB
from agent.conversation_compression import compress_context
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
# in-place → flag True
_seed(db, "s_ip", "ip")
a_ip = _make_agent(db, "s_ip", in_place=True)
compress_context(
a_ip, [{"role": "user", "content": "x"}] * 8,
approx_tokens=100_000, system_message="sys",
)
assert a_ip._last_compaction_in_place is True
# rotation → flag False
_seed(db, "s_rot", "rot")
a_rot = _make_agent(db, "s_rot", in_place=False)
compress_context(
a_rot, [{"role": "user", "content": "x"}] * 8,
approx_tokens=100_000, system_message="sys",
)
assert a_rot._last_compaction_in_place is False
class TestInPlaceConfigDefault:
def test_flag_defaults_off(self):
from hermes_cli.config import DEFAULT_CONFIG
assert DEFAULT_CONFIG["compression"].get("in_place") is False
class TestCompactedTurnsStaySearchable:
"""Teknium's review hinges on the pre-compaction transcript staying
DISCOVERABLE after in-place compaction. Compaction-archived rows
(active=0, compacted=1) must surface in session_search by default, while
rewind/undo rows (active=0, compacted=0) must stay hidden. The two share
the active flag but are distinguished by the compacted flag."""
def test_compacted_turns_found_by_default_search(self):
from hermes_state import SessionDB
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
sid = "20260619_search"
db.create_session(sid, "cli", model="test/model")
for r, c in [
("user", "configure the HMAC secret"),
("assistant", "set it in config.yaml"),
("user", "deploy returns 403"),
("assistant", "rotate the HMAC"),
("user", "works now"),
("assistant", "great"),
]:
db.append_message(session_id=sid, role=r, content=c)
before = db.search_messages("HMAC", role_filter=["user", "assistant"])
assert len(before) == 2
db.archive_and_compact(
sid,
[
{"role": "user", "content": "[SUMMARY] earlier setup"},
{"role": "assistant", "content": "ok"},
],
)
# The archived originals (active=0, compacted=1) are still found by
# the DEFAULT search — this is the durability requirement.
after = db.search_messages("HMAC", role_filter=["user", "assistant"])
assert {m["id"] for m in after} == {1, 4}
# Live context still excludes them.
assert len(db.get_messages_as_conversation(sid)) == 2
def test_rewound_turns_stay_hidden(self):
"""Rewind/undo (active=0, compacted=0) must NOT leak into default
search — the distinction the compacted flag preserves."""
from hermes_state import SessionDB
with tempfile.TemporaryDirectory() as tmp:
db = SessionDB(db_path=Path(tmp) / "t.db")
sid = "20260619_undo"
db.create_session(sid, "cli", model="test/model")
db.append_message(session_id=sid, role="user", content="ZEBRAWORD remember this")
db.append_message(session_id=sid, role="assistant", content="noted")
db.rewind_to_message(sid, db.get_messages(sid)[0]["id"])
assert db.search_messages("ZEBRAWORD", role_filter=["user", "assistant"]) == []
recovered = db.search_messages(
"ZEBRAWORD", role_filter=["user", "assistant"], include_inactive=True
)
assert len(recovered) == 1