fix(compression): disable compression on background-review fork to prevent cross-turn stale-parent fork (#41708)

The per-session compression lock prevents same-window concurrent forks but
not cross-turn ones: the background-review fork shares the parent's
session_id, so if it won a compression race its new child session was never
adopted by the gateway (the fork is single-lifecycle). The next foreground
turn then started from the stale parent and compressed it again, leaving the
same parent with two sibling children.

Set review_agent.compression_enabled = False so the fork never triggers
compression. Both trigger sites in conversation_loop.py gate on
compression_enabled before calling _compress_context, so the fork can never
rotate the shared parent. Review needs full context anyway — compressing
would degrade the memory/skill summary.

The per-session lock is kept as defense-in-depth for any future shared-session
path. Adds a regression test that fails without the flag and passes with it.

Closes #38727
This commit is contained in:
Teknium 2026-06-07 22:06:48 -07:00 committed by GitHub
parent 48ae8029aa
commit a77bc2c08d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 83 additions and 0 deletions

View file

@ -449,6 +449,17 @@ def _run_review_in_thread(
# if a future code path bypasses the cache.
review_agent.session_start = agent.session_start
review_agent.session_id = agent.session_id
# Never let the review fork compress. It shares the parent's
# session_id, so if it won a compression race it would rotate the
# parent into a NEW child that the gateway never adopts (the fork
# is single-lifecycle and dies right after this run_conversation).
# The foreground turn would then start from the stale parent and
# compress it again, leaving the same parent with two sibling
# children (issue #38727). Review also needs full context to
# produce a good memory/skill summary — compressing would strip
# detail. Both compression triggers in conversation_loop.py gate on
# agent.compression_enabled, so this short-circuits both paths.
review_agent.compression_enabled = False
from model_tools import get_tool_definitions
from hermes_cli.plugins import (

View file

@ -238,3 +238,75 @@ def test_missing_lock_subsystem_fails_open_not_infinite_loop(tmp_path: Path) ->
)
# Session rotated (compression succeeded end-to-end).
assert agent.session_id != parent_sid
def test_review_fork_disables_compression_to_prevent_stale_parent_fork() -> None:
"""The background-review fork must set ``compression_enabled = False``
so it can never compress the parent it shares a session_id with
(issue #38727).
The per-session compression lock only serialises a SAME-WINDOW concurrent
race. It does NOT stop a stale parent from being compressed again in a
LATER turn: if ``review_agent`` had won the race, its new child session is
never adopted by the gateway (the fork is single-lifecycle and dies right
after one ``run_conversation``), so the foreground path would start the
next turn from the stale parent and compress it AGAIN leaving the same
parent with two sibling children.
The fix makes the review fork never trigger compression at all. Both
compression trigger sites in ``agent/conversation_loop.py`` gate on
``agent.compression_enabled`` BEFORE calling ``_compress_context``:
preflight (``if agent.compression_enabled and len(messages) > ...``)
mid-loop (``if agent.compression_enabled and _compressor.should_compress(...)``)
so a fork with the flag cleared never reaches the rotation path.
This test pins the contract at the source: ``_run_review_in_thread``
must set ``review_agent.compression_enabled = False`` on the fork it
builds. It calls the real worker synchronously with
``AIAgent.run_conversation`` patched (so no LLM call happens) and
captures the constructed review agent to assert the flag.
"""
import tempfile
import agent.background_review as br
captured = {}
def _fake_run_conversation(self, *_a, **_k):
captured["compression_enabled"] = self.compression_enabled
captured["session_id"] = self.session_id
return {"final_response": "", "messages": []}
parent_sid = "REVIEW_FORK_FLAG_TEST"
with tempfile.TemporaryDirectory() as td:
db = SessionDB(db_path=Path(td) / "state.db")
db.create_session(parent_sid, source="discord")
parent = _build_agent_with_db(db, parent_sid)
# The worker does a local ``from run_agent import AIAgent``; patching
# the class method covers that import path.
from run_agent import AIAgent
with patch.object(AIAgent, "run_conversation", _fake_run_conversation):
br._run_review_in_thread(
parent,
[{"role": "user", "content": "hi"}],
"review this conversation",
)
assert captured, (
"_run_review_in_thread never reached run_conversation — the spawn path "
"changed; update this test to capture the review AIAgent."
)
assert captured["session_id"] == parent_sid, (
"Review fork should inherit the parent's session_id (shared id is the "
"whole reason compression must be disabled)."
)
assert captured["compression_enabled"] is False, (
"FIX REGRESSION: background-review fork did NOT disable compression. "
"It shares the parent's session_id, so an enabled fork can rotate the "
"parent into an orphan child (issue #38727). The trigger gates in "
"conversation_loop.py only short-circuit when compression_enabled is "
"False — this flag MUST be cleared on the review fork."
)