diff --git a/tests/tools/test_session_search.py b/tests/tools/test_session_search.py index 024070e6761..ec879f18f09 100644 --- a/tests/tools/test_session_search.py +++ b/tests/tools/test_session_search.py @@ -570,3 +570,71 @@ class TestCrossProfileRead: assert result["success"] is True, kwargs assert result["mode"] == "read" assert result["session_id"] == "s_other" + + +# ========================================================================= +# Cron demotion in discover ranking (#19434) +# ========================================================================= + +class TestCronDemotion: + def _seed_cron_and_interactive(self, db): + """One interactive (telegram) session and several cron sessions, all + matching the same query. Cron rows accumulate repetitive vocabulary + and out-number the user's single interactive session — the live-data + symptom in #19434. + """ + now = int(time.time()) + # Interactive user session — older, so it loses on bare recency too. + db.create_session("s_user", source="telegram") + db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?", + (now - 90000, "s_user")) + db.append_message("s_user", role="user", content="how is the venom project going") + db.append_message("s_user", role="assistant", content="The venom project shipped its first milestone.") + # Several cron sessions, all newer and all stuffed with the same terms. + for i in range(8): + sid = f"cron_{i}" + db.create_session(sid, source="cron") + db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?", + (now - 1000 - i, sid)) + db.append_message(sid, role="user", content="venom project daily status") + db.append_message(sid, role="assistant", content="venom project venom project venom summary") + db._conn.commit() + + def test_interactive_session_surfaces_above_cron(self, db): + self._seed_cron_and_interactive(db) + result = json.loads(session_search(query="venom project", limit=1, db=db)) + assert result["success"] is True + assert result["count"] == 1 + # With cron drowning FTS, bare BM25/recency would return a cron_* hit. + # Demotion must put the user's interactive session first. + assert result["results"][0]["source"] == "telegram" + assert result["results"][0]["session_id"] == "s_user" + + def test_cron_still_reachable_when_only_match(self, db): + """Demotion must not exclude cron — when only cron matches, it still + comes back.""" + now = int(time.time()) + db.create_session("cron_only", source="cron") + db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?", + (now - 500, "cron_only")) + db.append_message("cron_only", role="user", content="quarterly archive sweep") + db.append_message("cron_only", role="assistant", content="Archive sweep complete.") + db._conn.commit() + result = json.loads(session_search(query="archive sweep", db=db)) + assert result["success"] is True + assert result["count"] == 1 + assert result["results"][0]["source"] == "cron" + + def test_order_for_recall_is_stable_within_class(self): + from tools.session_search_tool import _order_for_recall + rows = [ + {"id": 1, "source": "cron"}, + {"id": 2, "source": "telegram"}, + {"id": 3, "source": "cron"}, + {"id": 4, "source": "cli"}, + {"id": 5, "source": None}, + ] + ordered = _order_for_recall(rows) + # Interactive rows first, in original relative order; cron last, in + # original relative order. + assert [r["id"] for r in ordered] == [2, 4, 5, 1, 3] diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index 3a1040e5273..d4d168ec3fa 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -39,6 +39,22 @@ from typing import Any, Dict, List, Optional, Union # user's session history. _HIDDEN_SESSION_SOURCES = ("subagent", "tool") +# Automation sources that are kept searchable but DEMOTED below interactive +# sessions in discover ranking. Cron jobs run on a schedule and accumulate +# large volumes of repetitive vocabulary (recurring project names, dates, +# "session", summaries); under bare BM25 they dominate the top-N FTS rows and +# starve out the user's own interactive sessions, producing "recall blindness" +# where only cron sessions surface (#19434). Demoting — not excluding — keeps +# cron content reachable when it's the only match, while interactive sessions +# always win when both match. +_DEMOTED_SESSION_SOURCES = ("cron",) + +# How many FTS rows discover scans before dedup-by-lineage. The interactive +# vs automation split below only helps if enough rows are in hand to find +# interactive matches buried under a wall of cron hits, so this is well above +# the handful of distinct sessions a typical query returns. +_DISCOVER_SCAN_LIMIT = 300 + def _format_timestamp(ts: Union[int, float, str, None]) -> str: """Convert a Unix timestamp (float/int) or ISO string to a human-readable date. @@ -87,6 +103,23 @@ def _resolve_to_parent(db, session_id: str) -> str: return cur +def _order_for_recall(raw_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Stable-sort FTS rows so interactive sessions rank above automation. + + Within each class (interactive vs demoted) the original BM25 ``rank`` + order is preserved — Python's sort is stable, and rows arrive already + ranked by relevance. This only changes cross-class ordering: a cron hit + never displaces an interactive hit during lineage dedup, so the user's + own conversations surface first even when cron rows out-rank them under + bare BM25 (#19434). Demoted rows still appear when they're the only + matches. + """ + return sorted( + raw_results, + key=lambda r: 1 if (r.get("source") or "") in _DEMOTED_SESSION_SOURCES else 0, + ) + + def _shape_message(m: Dict[str, Any], anchor_id: Optional[int] = None) -> Dict[str, Any]: """Slim a message row for the tool response. Keeps content even if empty.""" entry = { @@ -481,7 +514,9 @@ def _discover( query=query, role_filter=role_list, exclude_sources=list(_HIDDEN_SESSION_SOURCES), - limit=50, # widen so dedup-by-lineage can find distinct sessions + limit=_DISCOVER_SCAN_LIMIT, # widen so dedup-by-lineage can find + # distinct sessions AND so interactive matches buried under a wall + # of cron rows are still in hand for the demotion pass below. offset=0, sort=sort, ) @@ -489,6 +524,12 @@ def _discover( logging.error("FTS5 search failed: %s", e, exc_info=True) return tool_error(f"Search failed: {e}", success=False) + # Demote automation (cron) rows below interactive ones before dedup, so a + # high-volume cron corpus can't starve the user's own sessions out of the + # top `limit` results (#19434). Stable — preserves BM25/recency order + # within each class. + raw_results = _order_for_recall(raw_results) + if not raw_results and not title_result: return json.dumps({ "success": True,