From 6b408e131c48d90d44b5f1a33c9da26b70cdc7a1 Mon Sep 17 00:00:00 2001 From: haileymarshall Date: Sat, 18 Apr 2026 13:18:36 +0100 Subject: [PATCH] fix(gateway): pass session_key (not session_id) to active-process check during prune MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SessionStore.prune_old_entries was calling self._has_active_processes_fn(entry.session_id) but the callback wired up in gateway/run.py is process_registry.has_active_for_session, which compares against session_key, not session_id. Every other caller in session.py (_is_session_expired, _should_reset) already passes session_key, so prune was the only outlier — and because session_id and session_key live in different namespaces, the guard never fired. Result in production: sessions with live background processes (queued cron output, detached agents, long-running Bash) were pruned out of _entries despite the docstring promising they'd be preserved. When the process finished and tried to deliver output, the session_key to session_id mapping was gone and the work was effectively orphaned. Also update the existing test_prune_skips_entries_with_active_processes, which was checking the wrong interface (its mock callback took session_id so it agreed with the buggy implementation). The test now uses a session_key-based mock, matching the production callback's real contract, and a new regression guard test pins the behaviour. Swallowed exceptions inside the prune loop now log at debug level instead of silently disappearing. --- gateway/session.py | 12 ++++++-- tests/gateway/test_session_store_prune.py | 37 ++++++++++++++++++++--- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/gateway/session.py b/gateway/session.py index 8b31c2b0a..81278e852 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -926,12 +926,18 @@ class SessionStore: continue # Never prune sessions with an active background process # attached — the user may still be waiting on output. + # The callback is keyed by session_key (see process_registry. + # has_active_for_session); passing session_id here used to + # never match, so active sessions got pruned anyway. if self._has_active_processes_fn is not None: try: - if self._has_active_processes_fn(entry.session_id): + if self._has_active_processes_fn(entry.session_key): continue - except Exception: - pass + except Exception as exc: + logger.debug( + "has_active_processes_fn raised during prune for %s: %s", + entry.session_key, exc, + ) if entry.updated_at < cutoff: removed_keys.append(key) for key in removed_keys: diff --git a/tests/gateway/test_session_store_prune.py b/tests/gateway/test_session_store_prune.py index 9b1dca297..34fa21e25 100644 --- a/tests/gateway/test_session_store_prune.py +++ b/tests/gateway/test_session_store_prune.py @@ -117,11 +117,20 @@ class TestPruneBasics: assert "idle" not in store._entries def test_prune_skips_entries_with_active_processes(self, tmp_path): - """Sessions with active bg processes aren't pruned even if old.""" - active_session_ids = {"sid_active"} + """Sessions with active bg processes aren't pruned even if old. - def _has_active(session_id: str) -> bool: - return session_id in active_session_ids + The callback is keyed by session_key — matching what + process_registry.has_active_for_session() actually consumes in + gateway/run.py. Prior to the fix this test passed the callback a + session_id, which silently matched an implementation bug where + prune_old_entries was also passing session_id; real-world usage + (via process_registry) takes a session_key and never matched, so + active sessions were still being pruned. + """ + active_session_keys = {"active"} + + def _has_active(session_key: str) -> bool: + return session_key in active_session_keys store = _make_store(tmp_path, has_active_processes_fn=_has_active) store._entries["active"] = _entry( @@ -137,6 +146,26 @@ class TestPruneBasics: assert "active" in store._entries assert "idle" not in store._entries + def test_prune_active_check_uses_session_key_not_session_id(self, tmp_path): + """Regression guard: a callback that only recognises session_ids must + NOT protect entries during prune. This pins the fix so a future + refactor can't silently revert to passing session_id again. + """ + def _recognises_only_ids(identifier: str) -> bool: + return identifier.startswith("sid_") + + store = _make_store(tmp_path, has_active_processes_fn=_recognises_only_ids) + store._entries["active"] = _entry( + "active", age_days=1000, session_id="sid_active" + ) + + removed = store.prune_old_entries(max_age_days=90) + + # Entry is pruned because the callback receives "active" (session_key), + # not "sid_active" (session_id), so _recognises_only_ids returns False. + assert removed == 1 + assert "active" not in store._entries + def test_prune_does_not_write_disk_when_no_removals(self, tmp_path): """If nothing is evictable, _save() should NOT be called.""" store = _make_store(tmp_path)