fix(session_search): demote cron below interactive sessions in discover ranking (#53597)

Cron jobs accumulate large volumes of repetitive vocabulary (recurring project names, dates, summaries) and out-number a user's interactive sessions. Under bare BM25 they dominate the top FTS rows, so discover's early-exit-at-N dedup collects only cron sessions and the user's own conversations never surface — "recall blindness" (#19434). - _order_for_recall() stable-sorts FTS rows so interactive sources rank above cron before lineage dedup; within each class BM25/recency order is preserved. Cron is demoted, not excluded, so it still surfaces when it is the only match. - raise discover scan limit 50 -> 300 so buried interactive matches are in hand for the demotion pass. Fixes the cron-flooding sub-bug of #19434. The split-brain sub-bug is covered by #52798; the child-session sub-bug is superseded by in-place compaction.
2026-06-30 11:52:04 +00:00 · 2026-06-27 04:41:22 -07:00 · 2026-06-27 04:41:22 -07:00 · fe1c1c1121
commit fe1c1c1121
parent cd592c105c
2 changed files with 110 additions and 1 deletions
--- a/tests/tools/test_session_search.py
+++ b/tests/tools/test_session_search.py
@ -570,3 +570,71 @@ class TestCrossProfileRead:
            assert result["success"] is True, kwargs
            assert result["mode"] == "read"
            assert result["session_id"] == "s_other"
+
+
+# =========================================================================
+# Cron demotion in discover ranking (#19434)
+# =========================================================================
+
+class TestCronDemotion:
+    def _seed_cron_and_interactive(self, db):
+        """One interactive (telegram) session and several cron sessions, all
+        matching the same query. Cron rows accumulate repetitive vocabulary
+        and out-number the user's single interactive session — the live-data
+        symptom in #19434.
+        """
+        now = int(time.time())
+        # Interactive user session — older, so it loses on bare recency too.
+        db.create_session("s_user", source="telegram")
+        db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?",
+                         (now - 90000, "s_user"))
+        db.append_message("s_user", role="user", content="how is the venom project going")
+        db.append_message("s_user", role="assistant", content="The venom project shipped its first milestone.")
+        # Several cron sessions, all newer and all stuffed with the same terms.
+        for i in range(8):
+            sid = f"cron_{i}"
+            db.create_session(sid, source="cron")
+            db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?",
+                             (now - 1000 - i, sid))
+            db.append_message(sid, role="user", content="venom project daily status")
+            db.append_message(sid, role="assistant", content="venom project venom project venom summary")
+        db._conn.commit()
+
+    def test_interactive_session_surfaces_above_cron(self, db):
+        self._seed_cron_and_interactive(db)
+        result = json.loads(session_search(query="venom project", limit=1, db=db))
+        assert result["success"] is True
+        assert result["count"] == 1
+        # With cron drowning FTS, bare BM25/recency would return a cron_* hit.
+        # Demotion must put the user's interactive session first.
+        assert result["results"][0]["source"] == "telegram"
+        assert result["results"][0]["session_id"] == "s_user"
+
+    def test_cron_still_reachable_when_only_match(self, db):
+        """Demotion must not exclude cron — when only cron matches, it still
+        comes back."""
+        now = int(time.time())
+        db.create_session("cron_only", source="cron")
+        db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?",
+                         (now - 500, "cron_only"))
+        db.append_message("cron_only", role="user", content="quarterly archive sweep")
+        db.append_message("cron_only", role="assistant", content="Archive sweep complete.")
+        db._conn.commit()
+        result = json.loads(session_search(query="archive sweep", db=db))
+        assert result["success"] is True
+        assert result["count"] == 1
+        assert result["results"][0]["source"] == "cron"
+
+    def test_order_for_recall_is_stable_within_class(self):
+        from tools.session_search_tool import _order_for_recall
+        rows = [
+            {"id": 1, "source": "cron"},
+            {"id": 2, "source": "telegram"},
+            {"id": 3, "source": "cron"},
+            {"id": 4, "source": "cli"},
+            {"id": 5, "source": None},
+        ]
+        ordered = _order_for_recall(rows)
+        # Interactive rows first, in original relative order; cron last, in
+        # original relative order.
+        assert [r["id"] for r in ordered] == [2, 4, 5, 1, 3]
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@ -39,6 +39,22 @@ from typing import Any, Dict, List, Optional, Union
 # user's session history.
 _HIDDEN_SESSION_SOURCES = ("subagent", "tool")

+# Automation sources that are kept searchable but DEMOTED below interactive
+# sessions in discover ranking. Cron jobs run on a schedule and accumulate
+# large volumes of repetitive vocabulary (recurring project names, dates,
+# "session", summaries); under bare BM25 they dominate the top-N FTS rows and
+# starve out the user's own interactive sessions, producing "recall blindness"
+# where only cron sessions surface (#19434). Demoting — not excluding — keeps
+# cron content reachable when it's the only match, while interactive sessions
+# always win when both match.
+_DEMOTED_SESSION_SOURCES = ("cron",)
+
+# How many FTS rows discover scans before dedup-by-lineage. The interactive
+# vs automation split below only helps if enough rows are in hand to find
+# interactive matches buried under a wall of cron hits, so this is well above
+# the handful of distinct sessions a typical query returns.
+_DISCOVER_SCAN_LIMIT = 300
+

 def _format_timestamp(ts: Union[int, float, str, None]) -> str:
    """Convert a Unix timestamp (float/int) or ISO string to a human-readable date.
@ -87,6 +103,23 @@ def _resolve_to_parent(db, session_id: str) -> str:
    return cur


+def _order_for_recall(raw_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Stable-sort FTS rows so interactive sessions rank above automation.
+
+    Within each class (interactive vs demoted) the original BM25 ``rank``
+    order is preserved — Python's sort is stable, and rows arrive already
+    ranked by relevance. This only changes cross-class ordering: a cron hit
+    never displaces an interactive hit during lineage dedup, so the user's
+    own conversations surface first even when cron rows out-rank them under
+    bare BM25 (#19434). Demoted rows still appear when they're the only
+    matches.
+    """
+    return sorted(
+        raw_results,
+        key=lambda r: 1 if (r.get("source") or "") in _DEMOTED_SESSION_SOURCES else 0,
+    )
+
+
 def _shape_message(m: Dict[str, Any], anchor_id: Optional[int] = None) -> Dict[str, Any]:
    """Slim a message row for the tool response. Keeps content even if empty."""
    entry = {
@ -481,7 +514,9 @@ def _discover(
            query=query,
            role_filter=role_list,
            exclude_sources=list(_HIDDEN_SESSION_SOURCES),
-            limit=50,  # widen so dedup-by-lineage can find distinct sessions
+            limit=_DISCOVER_SCAN_LIMIT,  # widen so dedup-by-lineage can find
+            # distinct sessions AND so interactive matches buried under a wall
+            # of cron rows are still in hand for the demotion pass below.
            offset=0,
            sort=sort,
        )
@ -489,6 +524,12 @@ def _discover(
        logging.error("FTS5 search failed: %s", e, exc_info=True)
        return tool_error(f"Search failed: {e}", success=False)

+    # Demote automation (cron) rows below interactive ones before dedup, so a
+    # high-volume cron corpus can't starve the user's own sessions out of the
+    # top `limit` results (#19434). Stable — preserves BM25/recency order
+    # within each class.
+    raw_results = _order_for_recall(raw_results)
+
    if not raw_results and not title_result:
        return json.dumps({
            "success": True,