fix(session_search): demote cron below interactive sessions in discover ranking (#53597)

Cron jobs accumulate large volumes of repetitive vocabulary (recurring
project names, dates, summaries) and out-number a user's interactive
sessions. Under bare BM25 they dominate the top FTS rows, so discover's
early-exit-at-N dedup collects only cron sessions and the user's own
conversations never surface — "recall blindness" (#19434).

- _order_for_recall() stable-sorts FTS rows so interactive sources rank
  above cron before lineage dedup; within each class BM25/recency order
  is preserved. Cron is demoted, not excluded, so it still surfaces when
  it is the only match.
- raise discover scan limit 50 -> 300 so buried interactive matches are
  in hand for the demotion pass.

Fixes the cron-flooding sub-bug of #19434. The split-brain sub-bug is
covered by #52798; the child-session sub-bug is superseded by in-place
compaction.
This commit is contained in:
Teknium 2026-06-27 04:41:22 -07:00 committed by GitHub
parent cd592c105c
commit fe1c1c1121
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 110 additions and 1 deletions

View file

@ -570,3 +570,71 @@ class TestCrossProfileRead:
assert result["success"] is True, kwargs
assert result["mode"] == "read"
assert result["session_id"] == "s_other"
# =========================================================================
# Cron demotion in discover ranking (#19434)
# =========================================================================
class TestCronDemotion:
def _seed_cron_and_interactive(self, db):
"""One interactive (telegram) session and several cron sessions, all
matching the same query. Cron rows accumulate repetitive vocabulary
and out-number the user's single interactive session — the live-data
symptom in #19434.
"""
now = int(time.time())
# Interactive user session — older, so it loses on bare recency too.
db.create_session("s_user", source="telegram")
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?",
(now - 90000, "s_user"))
db.append_message("s_user", role="user", content="how is the venom project going")
db.append_message("s_user", role="assistant", content="The venom project shipped its first milestone.")
# Several cron sessions, all newer and all stuffed with the same terms.
for i in range(8):
sid = f"cron_{i}"
db.create_session(sid, source="cron")
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?",
(now - 1000 - i, sid))
db.append_message(sid, role="user", content="venom project daily status")
db.append_message(sid, role="assistant", content="venom project venom project venom summary")
db._conn.commit()
def test_interactive_session_surfaces_above_cron(self, db):
self._seed_cron_and_interactive(db)
result = json.loads(session_search(query="venom project", limit=1, db=db))
assert result["success"] is True
assert result["count"] == 1
# With cron drowning FTS, bare BM25/recency would return a cron_* hit.
# Demotion must put the user's interactive session first.
assert result["results"][0]["source"] == "telegram"
assert result["results"][0]["session_id"] == "s_user"
def test_cron_still_reachable_when_only_match(self, db):
"""Demotion must not exclude cron — when only cron matches, it still
comes back."""
now = int(time.time())
db.create_session("cron_only", source="cron")
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?",
(now - 500, "cron_only"))
db.append_message("cron_only", role="user", content="quarterly archive sweep")
db.append_message("cron_only", role="assistant", content="Archive sweep complete.")
db._conn.commit()
result = json.loads(session_search(query="archive sweep", db=db))
assert result["success"] is True
assert result["count"] == 1
assert result["results"][0]["source"] == "cron"
def test_order_for_recall_is_stable_within_class(self):
from tools.session_search_tool import _order_for_recall
rows = [
{"id": 1, "source": "cron"},
{"id": 2, "source": "telegram"},
{"id": 3, "source": "cron"},
{"id": 4, "source": "cli"},
{"id": 5, "source": None},
]
ordered = _order_for_recall(rows)
# Interactive rows first, in original relative order; cron last, in
# original relative order.
assert [r["id"] for r in ordered] == [2, 4, 5, 1, 3]

View file

@ -39,6 +39,22 @@ from typing import Any, Dict, List, Optional, Union
# user's session history.
_HIDDEN_SESSION_SOURCES = ("subagent", "tool")
# Automation sources that are kept searchable but DEMOTED below interactive
# sessions in discover ranking. Cron jobs run on a schedule and accumulate
# large volumes of repetitive vocabulary (recurring project names, dates,
# "session", summaries); under bare BM25 they dominate the top-N FTS rows and
# starve out the user's own interactive sessions, producing "recall blindness"
# where only cron sessions surface (#19434). Demoting — not excluding — keeps
# cron content reachable when it's the only match, while interactive sessions
# always win when both match.
_DEMOTED_SESSION_SOURCES = ("cron",)
# How many FTS rows discover scans before dedup-by-lineage. The interactive
# vs automation split below only helps if enough rows are in hand to find
# interactive matches buried under a wall of cron hits, so this is well above
# the handful of distinct sessions a typical query returns.
_DISCOVER_SCAN_LIMIT = 300
def _format_timestamp(ts: Union[int, float, str, None]) -> str:
"""Convert a Unix timestamp (float/int) or ISO string to a human-readable date.
@ -87,6 +103,23 @@ def _resolve_to_parent(db, session_id: str) -> str:
return cur
def _order_for_recall(raw_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Stable-sort FTS rows so interactive sessions rank above automation.
Within each class (interactive vs demoted) the original BM25 ``rank``
order is preserved Python's sort is stable, and rows arrive already
ranked by relevance. This only changes cross-class ordering: a cron hit
never displaces an interactive hit during lineage dedup, so the user's
own conversations surface first even when cron rows out-rank them under
bare BM25 (#19434). Demoted rows still appear when they're the only
matches.
"""
return sorted(
raw_results,
key=lambda r: 1 if (r.get("source") or "") in _DEMOTED_SESSION_SOURCES else 0,
)
def _shape_message(m: Dict[str, Any], anchor_id: Optional[int] = None) -> Dict[str, Any]:
"""Slim a message row for the tool response. Keeps content even if empty."""
entry = {
@ -481,7 +514,9 @@ def _discover(
query=query,
role_filter=role_list,
exclude_sources=list(_HIDDEN_SESSION_SOURCES),
limit=50, # widen so dedup-by-lineage can find distinct sessions
limit=_DISCOVER_SCAN_LIMIT, # widen so dedup-by-lineage can find
# distinct sessions AND so interactive matches buried under a wall
# of cron rows are still in hand for the demotion pass below.
offset=0,
sort=sort,
)
@ -489,6 +524,12 @@ def _discover(
logging.error("FTS5 search failed: %s", e, exc_info=True)
return tool_error(f"Search failed: {e}", success=False)
# Demote automation (cron) rows below interactive ones before dedup, so a
# high-volume cron corpus can't starve the user's own sessions out of the
# top `limit` results (#19434). Stable — preserves BM25/recency order
# within each class.
raw_results = _order_for_recall(raw_results)
if not raw_results and not title_result:
return json.dumps({
"success": True,