mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-29 11:42:04 +00:00
These 7 test sites assert rotation behavior (fork, child sessions, lock contention, logging session-context follows id rotation, boundary hooks fire on rotation). Pin each builder to in_place=False explicitly so they keep exercising the retained rotation fallback regardless of the global default (flipped to True in #38763). Rotation stays a working opt-out fallback and deserves continued coverage — these are NOT deleted. Pinned sites: - test_compression_concurrent_fork._build_agent_with_db - test_compression_logging_session_context._build_agent_with_db - test_compression_rotation_state._build_agent_with_db - test_compression_boundary_hook._make_agent (2 helpers: CompressionBoundaryHook + SessionCompressEvent) - test_compression_concurrent_sessions._build_agent_with_db
320 lines
14 KiB
Python
320 lines
14 KiB
Python
"""Tests for in-place context compaction (config: compression.in_place, #38763).
|
|
|
|
When ``compression.in_place`` is True, ``compress_context()`` rewrites the
|
|
message list and rebuilds the system prompt but keeps the SAME ``session_id``:
|
|
no ``end_session``, no ``parent_session_id`` child row, no ``name #N`` title
|
|
renumber, no flush-cursor reset. This eliminates the session-rotation bug
|
|
cluster (#33618 /goal loss, #14238 lost response, #33907 orphans, #45117 search
|
|
gaps, #42228 null cwd). When the flag is False (default), rotation behaves
|
|
exactly as before.
|
|
"""
|
|
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
|
|
def _make_agent(session_db, session_id, *, in_place):
|
|
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
|
|
from run_agent import AIAgent
|
|
|
|
agent = AIAgent(
|
|
api_key="test-key",
|
|
base_url="https://openrouter.ai/api/v1",
|
|
model="test/model",
|
|
quiet_mode=True,
|
|
session_db=session_db,
|
|
session_id=session_id,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
agent.compression_in_place = in_place
|
|
# Mock the compressor to return a deterministic shrunk transcript so the
|
|
# test exercises the DB-mutation path, not summarization quality.
|
|
def _fake_compress(messages, current_tokens=None, focus_topic=None, force=False):
|
|
return [
|
|
{"role": "user", "content": "[CONTEXT COMPACTION] summary of prior turns"},
|
|
{"role": "assistant", "content": "recent reply"},
|
|
]
|
|
|
|
agent.context_compressor.compress = _fake_compress
|
|
agent.context_compressor._last_compress_aborted = False
|
|
agent.context_compressor._last_summary_error = None
|
|
agent.context_compressor.compression_count = 1
|
|
return agent
|
|
|
|
|
|
def _seed(db, sid, title, n=8):
|
|
db.create_session(sid, "cli", model="test/model")
|
|
db.set_session_title(sid, title)
|
|
for i in range(n):
|
|
db.append_message(
|
|
session_id=sid,
|
|
role="user" if i % 2 == 0 else "assistant",
|
|
content=f"msg {i}",
|
|
)
|
|
|
|
|
|
class TestInPlaceCompaction:
|
|
def test_in_place_keeps_same_session_id(self):
|
|
"""In-place mode: id unchanged, no child row, no rename, history kept."""
|
|
from hermes_state import SessionDB
|
|
from agent.conversation_compression import compress_context
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
db = SessionDB(db_path=Path(tmp) / "t.db")
|
|
sid = "20260619_120000_aaaaaa"
|
|
_seed(db, sid, "my-research")
|
|
agent = _make_agent(db, sid, in_place=True)
|
|
agent._last_flushed_db_idx = 5
|
|
|
|
messages = [{"role": "user", "content": f"m{i}"} for i in range(8)]
|
|
compressed, _sp = compress_context(
|
|
agent, messages, approx_tokens=100_000, system_message="sys"
|
|
)
|
|
|
|
# Identity never moved.
|
|
assert agent.session_id == sid
|
|
# No continuation row forked.
|
|
child = db._conn.execute(
|
|
"SELECT id FROM sessions WHERE parent_session_id = ?", (sid,)
|
|
).fetchall()
|
|
assert child == []
|
|
# Session not ended; title untouched (no "#2").
|
|
row = db.get_session(sid)
|
|
assert row["end_reason"] is None
|
|
assert row["title"] == "my-research"
|
|
# DURABLE, NON-DESTRUCTIVE compaction (the core invariant, per
|
|
# Teknium's review): the LIVE context is the compacted set, but the
|
|
# pre-compaction turns are PRESERVED on disk (active=0), not deleted
|
|
# — searchable + recoverable under the SAME id. A resume reloads the
|
|
# compacted set so compaction actually shrinks the live session and
|
|
# doesn't immediately re-compact (#38763).
|
|
reloaded = db.get_messages_as_conversation(sid)
|
|
assert len(reloaded) == 2
|
|
assert [m.get("content") for m in reloaded] == [
|
|
"[CONTEXT COMPACTION] summary of prior turns",
|
|
"recent reply",
|
|
]
|
|
assert row["message_count"] == 2 # live (active) count
|
|
# NON-DESTRUCTIVE: the 8 seeded originals survive at active=0
|
|
# alongside the 2 compacted rows — nothing was DELETEd.
|
|
all_rows = db.get_messages(sid, include_inactive=True)
|
|
assert len(all_rows) == 10
|
|
archived = [m for m in all_rows if not m.get("active", 1)]
|
|
assert len(archived) == 8
|
|
# The originals remain FTS-searchable (active=0 is a content-
|
|
# preserving UPDATE; the fts triggers don't key on active).
|
|
hit = db._conn.execute(
|
|
"SELECT 1 FROM messages_fts f JOIN messages m ON m.id = f.rowid "
|
|
"WHERE m.session_id = ? AND messages_fts MATCH 'msg' AND m.active = 0 "
|
|
"LIMIT 1",
|
|
(sid,),
|
|
).fetchone()
|
|
assert hit is not None
|
|
# Flush identity/cursor reset so next-turn appends diff against the
|
|
# compacted transcript (rebuilds the identity set on next flush).
|
|
assert agent._last_flushed_db_idx == 0
|
|
assert agent._flushed_db_message_ids == set()
|
|
# Rotation-independent in-place signal set for the gateway.
|
|
assert agent._last_compaction_in_place is True
|
|
# Live transcript actually shrank.
|
|
assert len(compressed) == 2
|
|
|
|
def test_in_place_alternation_preserved(self):
|
|
"""The compacted list must not introduce consecutive same-role messages."""
|
|
from hermes_state import SessionDB
|
|
from agent.conversation_compression import compress_context
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
db = SessionDB(db_path=Path(tmp) / "t.db")
|
|
sid = "20260619_120500_cccccc"
|
|
_seed(db, sid, "alt")
|
|
agent = _make_agent(db, sid, in_place=True)
|
|
messages = [{"role": "user", "content": f"m{i}"} for i in range(8)]
|
|
compressed, _ = compress_context(
|
|
agent, messages, approx_tokens=100_000, system_message="sys"
|
|
)
|
|
roles = [m["role"] for m in compressed if m.get("role") != "system"]
|
|
assert all(roles[i] != roles[i + 1] for i in range(len(roles) - 1))
|
|
|
|
def test_in_place_skips_redundant_preflush(self):
|
|
"""In-place must NOT pre-flush current-turn messages: replace_messages
|
|
rewrites the whole row, so a flush would INSERT rows it immediately
|
|
deletes (wasted writes). The current-turn tail survives via the
|
|
compressor's `compressed` output, not the flush."""
|
|
from hermes_state import SessionDB
|
|
from agent.conversation_compression import compress_context
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
db = SessionDB(db_path=Path(tmp) / "t.db")
|
|
_seed(db, "ip_flush", "f")
|
|
agent = _make_agent(db, "ip_flush", in_place=True)
|
|
calls = {"n": 0}
|
|
agent._flush_messages_to_session_db = lambda *a, **k: calls.__setitem__(
|
|
"n", calls["n"] + 1
|
|
)
|
|
compress_context(
|
|
agent, [{"role": "user", "content": "x"}] * 8,
|
|
approx_tokens=100_000, system_message="sys",
|
|
)
|
|
assert calls["n"] == 0
|
|
|
|
def test_rotation_still_preflushes(self):
|
|
"""Rotation MUST pre-flush so current-turn messages survive in the
|
|
preserved old (parent) session before it is ended (#47202)."""
|
|
from hermes_state import SessionDB
|
|
from agent.conversation_compression import compress_context
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
db = SessionDB(db_path=Path(tmp) / "t.db")
|
|
_seed(db, "rot_flush", "f")
|
|
agent = _make_agent(db, "rot_flush", in_place=False)
|
|
calls = {"n": 0}
|
|
agent._flush_messages_to_session_db = lambda *a, **k: calls.__setitem__(
|
|
"n", calls["n"] + 1
|
|
)
|
|
compress_context(
|
|
agent, [{"role": "user", "content": "x"}] * 8,
|
|
approx_tokens=100_000, system_message="sys",
|
|
)
|
|
assert calls["n"] == 1
|
|
|
|
|
|
class TestRotationFallbackWhenFlagOff:
|
|
def test_rotation_when_flag_off(self):
|
|
"""Rotation is now the OPT-OUT fallback (default flipped to in-place in
|
|
#38763). With in_place=False explicitly set, legacy rotation is
|
|
unchanged — forks a renamed continuation session."""
|
|
from hermes_state import SessionDB
|
|
from agent.conversation_compression import compress_context
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
db = SessionDB(db_path=Path(tmp) / "t.db")
|
|
sid = "20260619_130000_bbbbbb"
|
|
_seed(db, sid, "my-research")
|
|
agent = _make_agent(db, sid, in_place=False)
|
|
agent._last_flushed_db_idx = 5
|
|
|
|
messages = [{"role": "user", "content": f"m{i}"} for i in range(8)]
|
|
compress_context(
|
|
agent, messages, approx_tokens=100_000, system_message="sys"
|
|
)
|
|
|
|
# Identity rotated to a fresh id.
|
|
assert agent.session_id != sid
|
|
# Old session ended via compression; continuation forked + renamed.
|
|
assert db.get_session(sid)["end_reason"] == "compression"
|
|
child = db._conn.execute(
|
|
"SELECT id, title FROM sessions WHERE parent_session_id = ?", (sid,)
|
|
).fetchall()
|
|
assert len(child) == 1
|
|
assert child[0]["title"] == "my-research #2"
|
|
# Flush cursor reset for the new row.
|
|
assert agent._last_flushed_db_idx == 0
|
|
# Rotation mode does NOT set the in-place signal.
|
|
assert getattr(agent, "_last_compaction_in_place", False) is False
|
|
|
|
|
|
class TestInPlaceSignalForGateway:
|
|
"""compress_context must expose a rotation-independent flag the gateway can
|
|
read (instead of an id-change diff) to re-baseline transcript handling."""
|
|
|
|
def test_signal_set_on_in_place_unset_on_rotation(self):
|
|
from hermes_state import SessionDB
|
|
from agent.conversation_compression import compress_context
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
db = SessionDB(db_path=Path(tmp) / "t.db")
|
|
# in-place → flag True
|
|
_seed(db, "s_ip", "ip")
|
|
a_ip = _make_agent(db, "s_ip", in_place=True)
|
|
compress_context(
|
|
a_ip, [{"role": "user", "content": "x"}] * 8,
|
|
approx_tokens=100_000, system_message="sys",
|
|
)
|
|
assert a_ip._last_compaction_in_place is True
|
|
|
|
# rotation → flag False
|
|
_seed(db, "s_rot", "rot")
|
|
a_rot = _make_agent(db, "s_rot", in_place=False)
|
|
compress_context(
|
|
a_rot, [{"role": "user", "content": "x"}] * 8,
|
|
approx_tokens=100_000, system_message="sys",
|
|
)
|
|
assert a_rot._last_compaction_in_place is False
|
|
|
|
|
|
class TestInPlaceConfigDefault:
|
|
def test_flag_defaults_on(self):
|
|
"""In-place is the default as of #38763 (rotation is now opt-out via
|
|
compression.in_place: false)."""
|
|
from hermes_cli.config import DEFAULT_CONFIG
|
|
|
|
assert DEFAULT_CONFIG["compression"].get("in_place") is True
|
|
|
|
|
|
class TestCompactedTurnsStaySearchable:
|
|
"""Teknium's review hinges on the pre-compaction transcript staying
|
|
DISCOVERABLE after in-place compaction. Compaction-archived rows
|
|
(active=0, compacted=1) must surface in session_search by default, while
|
|
rewind/undo rows (active=0, compacted=0) must stay hidden. The two share
|
|
the active flag but are distinguished by the compacted flag."""
|
|
|
|
def test_compacted_turns_found_by_default_search(self):
|
|
from hermes_state import SessionDB
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
db = SessionDB(db_path=Path(tmp) / "t.db")
|
|
sid = "20260619_search"
|
|
db.create_session(sid, "cli", model="test/model")
|
|
for r, c in [
|
|
("user", "configure the HMAC secret"),
|
|
("assistant", "set it in config.yaml"),
|
|
("user", "deploy returns 403"),
|
|
("assistant", "rotate the HMAC"),
|
|
("user", "works now"),
|
|
("assistant", "great"),
|
|
]:
|
|
db.append_message(session_id=sid, role=r, content=c)
|
|
|
|
before = db.search_messages("HMAC", role_filter=["user", "assistant"])
|
|
assert len(before) == 2
|
|
|
|
db.archive_and_compact(
|
|
sid,
|
|
[
|
|
{"role": "user", "content": "[SUMMARY] earlier setup"},
|
|
{"role": "assistant", "content": "ok"},
|
|
],
|
|
)
|
|
|
|
# The archived originals (active=0, compacted=1) are still found by
|
|
# the DEFAULT search — this is the durability requirement.
|
|
after = db.search_messages("HMAC", role_filter=["user", "assistant"])
|
|
assert {m["id"] for m in after} == {1, 4}
|
|
# Live context still excludes them.
|
|
assert len(db.get_messages_as_conversation(sid)) == 2
|
|
|
|
def test_rewound_turns_stay_hidden(self):
|
|
"""Rewind/undo (active=0, compacted=0) must NOT leak into default
|
|
search — the distinction the compacted flag preserves."""
|
|
from hermes_state import SessionDB
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
db = SessionDB(db_path=Path(tmp) / "t.db")
|
|
sid = "20260619_undo"
|
|
db.create_session(sid, "cli", model="test/model")
|
|
db.append_message(session_id=sid, role="user", content="ZEBRAWORD remember this")
|
|
db.append_message(session_id=sid, role="assistant", content="noted")
|
|
db.rewind_to_message(sid, db.get_messages(sid)[0]["id"])
|
|
|
|
assert db.search_messages("ZEBRAWORD", role_filter=["user", "assistant"]) == []
|
|
recovered = db.search_messages(
|
|
"ZEBRAWORD", role_filter=["user", "assistant"], include_inactive=True
|
|
)
|
|
assert len(recovered) == 1
|
|
|