diff --git a/gateway/run.py b/gateway/run.py index e2a2211dd..847db36c9 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -2204,6 +2204,15 @@ class GatewayRunner: ), ) + # _compress_context ends the old session and creates + # a new session_id. Write compressed messages into + # the NEW session so the old transcript stays intact + # and searchable via session_search. + _hyg_new_sid = _hyg_agent.session_id + if _hyg_new_sid != session_entry.session_id: + session_entry.session_id = _hyg_new_sid + self.session_store._save() + self.session_store.rewrite_transcript( session_entry.session_id, _compressed ) @@ -3998,13 +4007,22 @@ class GatewayRunner: loop = asyncio.get_event_loop() compressed, _ = await loop.run_in_executor( None, - lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens), + lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens) ) - self.session_store.rewrite_transcript(session_entry.session_id, compressed) + # _compress_context already calls end_session() on the old session + # (preserving its full transcript in SQLite) and creates a new + # session_id for the continuation. Write the compressed messages + # into the NEW session so the original history stays searchable. + new_session_id = tmp_agent.session_id + if new_session_id != session_entry.session_id: + session_entry.session_id = new_session_id + self.session_store._save() + + self.session_store.rewrite_transcript(new_session_id, compressed) # Reset stored token count — transcript changed, old value is stale self.session_store.update_session( - session_entry.session_key, last_prompt_tokens=0, + session_entry.session_key, last_prompt_tokens=0 ) new_count = len(compressed) new_tokens = estimate_messages_tokens_rough(compressed) diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py index 80d249347..b8ff8f8a8 100644 --- a/tests/gateway/test_session_hygiene.py +++ b/tests/gateway/test_session_hygiene.py @@ -304,8 +304,12 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t class FakeCompressAgent: def __init__(self, **kwargs): self.model = kwargs.get("model") + self.session_id = kwargs.get("session_id", "fake-session") + self._print_fn = None def _compress_context(self, messages, *_args, **_kwargs): + # Simulate real _compress_context: create a new session_id + self.session_id = f"{self.session_id}_compressed" return ([{"role": "assistant", "content": "compressed"}], None) fake_run_agent = types.ModuleType("run_agent")