From 393929831e0214dfe3d19ccb7d73a12d1eb9d728 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 28 Mar 2026 12:23:43 -0700 Subject: [PATCH] fix(gateway): preserve transcript on /compress and hygiene compression (salvage #3516) (#3556) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(gateway): preserve full transcript on /compress instead of overwriting The /compress command calls _compress_context() which correctly ends the old session (preserving its full transcript in SQLite) and creates a new session_id for the continuation. However, it then immediately called rewrite_transcript() on the OLD session_id, overwriting the preserved transcript with the compressed version — destroying searchable history. Auto-compression (triggered by context pressure) does not have this bug because the gateway already handles the session_id swap via the agent.session_id != session_id check after _run_agent_sync. Fix: after _compress_context creates the new session, write the compressed messages into the NEW session_id and update the session store pointer. The old session's full transcript stays intact and searchable via session_search. Before: /compress destroys original messages, session_search can't find details from compressed portions. After: /compress behaves like /new for history — full transcript preserved, compressed context for the live session. * fix(gateway): preserve transcript on /compress and hygiene compression Apply session_id swap after _compress_context in both /compress handler and hygiene pre-compression. _compress_context creates a new session (ending the old one), but both paths were calling rewrite_transcript on the OLD session_id — overwriting the preserved transcript and destroying searchable history. Now follows the same pattern as the auto-compression handler (lines 5415-5423): detect the new session_id, update the session store entry, and write compressed messages to the new session. Also fix FakeCompressAgent test mock to include session_id attribute and simulate the session_id change that real _compress_context performs. Co-authored-by: MacroAnarchy --------- Co-authored-by: MacroAnarchy --- gateway/run.py | 24 +++++++++++++++++++++--- tests/gateway/test_session_hygiene.py | 4 ++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index e2a2211dd..847db36c9 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -2204,6 +2204,15 @@ class GatewayRunner: ), ) + # _compress_context ends the old session and creates + # a new session_id. Write compressed messages into + # the NEW session so the old transcript stays intact + # and searchable via session_search. + _hyg_new_sid = _hyg_agent.session_id + if _hyg_new_sid != session_entry.session_id: + session_entry.session_id = _hyg_new_sid + self.session_store._save() + self.session_store.rewrite_transcript( session_entry.session_id, _compressed ) @@ -3998,13 +4007,22 @@ class GatewayRunner: loop = asyncio.get_event_loop() compressed, _ = await loop.run_in_executor( None, - lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens), + lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens) ) - self.session_store.rewrite_transcript(session_entry.session_id, compressed) + # _compress_context already calls end_session() on the old session + # (preserving its full transcript in SQLite) and creates a new + # session_id for the continuation. Write the compressed messages + # into the NEW session so the original history stays searchable. + new_session_id = tmp_agent.session_id + if new_session_id != session_entry.session_id: + session_entry.session_id = new_session_id + self.session_store._save() + + self.session_store.rewrite_transcript(new_session_id, compressed) # Reset stored token count — transcript changed, old value is stale self.session_store.update_session( - session_entry.session_key, last_prompt_tokens=0, + session_entry.session_key, last_prompt_tokens=0 ) new_count = len(compressed) new_tokens = estimate_messages_tokens_rough(compressed) diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py index 80d249347..b8ff8f8a8 100644 --- a/tests/gateway/test_session_hygiene.py +++ b/tests/gateway/test_session_hygiene.py @@ -304,8 +304,12 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t class FakeCompressAgent: def __init__(self, **kwargs): self.model = kwargs.get("model") + self.session_id = kwargs.get("session_id", "fake-session") + self._print_fn = None def _compress_context(self, messages, *_args, **_kwargs): + # Simulate real _compress_context: create a new session_id + self.session_id = f"{self.session_id}_compressed" return ([{"role": "assistant", "content": "compressed"}], None) fake_run_agent = types.ModuleType("run_agent")