mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
fix(session_search): demote cron below interactive sessions in discover ranking (#53597)
Cron jobs accumulate large volumes of repetitive vocabulary (recurring project names, dates, summaries) and out-number a user's interactive sessions. Under bare BM25 they dominate the top FTS rows, so discover's early-exit-at-N dedup collects only cron sessions and the user's own conversations never surface — "recall blindness" (#19434). - _order_for_recall() stable-sorts FTS rows so interactive sources rank above cron before lineage dedup; within each class BM25/recency order is preserved. Cron is demoted, not excluded, so it still surfaces when it is the only match. - raise discover scan limit 50 -> 300 so buried interactive matches are in hand for the demotion pass. Fixes the cron-flooding sub-bug of #19434. The split-brain sub-bug is covered by #52798; the child-session sub-bug is superseded by in-place compaction.
This commit is contained in:
parent
cd592c105c
commit
fe1c1c1121
2 changed files with 110 additions and 1 deletions
|
|
@ -570,3 +570,71 @@ class TestCrossProfileRead:
|
|||
assert result["success"] is True, kwargs
|
||||
assert result["mode"] == "read"
|
||||
assert result["session_id"] == "s_other"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Cron demotion in discover ranking (#19434)
|
||||
# =========================================================================
|
||||
|
||||
class TestCronDemotion:
|
||||
def _seed_cron_and_interactive(self, db):
|
||||
"""One interactive (telegram) session and several cron sessions, all
|
||||
matching the same query. Cron rows accumulate repetitive vocabulary
|
||||
and out-number the user's single interactive session — the live-data
|
||||
symptom in #19434.
|
||||
"""
|
||||
now = int(time.time())
|
||||
# Interactive user session — older, so it loses on bare recency too.
|
||||
db.create_session("s_user", source="telegram")
|
||||
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?",
|
||||
(now - 90000, "s_user"))
|
||||
db.append_message("s_user", role="user", content="how is the venom project going")
|
||||
db.append_message("s_user", role="assistant", content="The venom project shipped its first milestone.")
|
||||
# Several cron sessions, all newer and all stuffed with the same terms.
|
||||
for i in range(8):
|
||||
sid = f"cron_{i}"
|
||||
db.create_session(sid, source="cron")
|
||||
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?",
|
||||
(now - 1000 - i, sid))
|
||||
db.append_message(sid, role="user", content="venom project daily status")
|
||||
db.append_message(sid, role="assistant", content="venom project venom project venom summary")
|
||||
db._conn.commit()
|
||||
|
||||
def test_interactive_session_surfaces_above_cron(self, db):
|
||||
self._seed_cron_and_interactive(db)
|
||||
result = json.loads(session_search(query="venom project", limit=1, db=db))
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 1
|
||||
# With cron drowning FTS, bare BM25/recency would return a cron_* hit.
|
||||
# Demotion must put the user's interactive session first.
|
||||
assert result["results"][0]["source"] == "telegram"
|
||||
assert result["results"][0]["session_id"] == "s_user"
|
||||
|
||||
def test_cron_still_reachable_when_only_match(self, db):
|
||||
"""Demotion must not exclude cron — when only cron matches, it still
|
||||
comes back."""
|
||||
now = int(time.time())
|
||||
db.create_session("cron_only", source="cron")
|
||||
db._conn.execute("UPDATE sessions SET started_at = ? WHERE id = ?",
|
||||
(now - 500, "cron_only"))
|
||||
db.append_message("cron_only", role="user", content="quarterly archive sweep")
|
||||
db.append_message("cron_only", role="assistant", content="Archive sweep complete.")
|
||||
db._conn.commit()
|
||||
result = json.loads(session_search(query="archive sweep", db=db))
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 1
|
||||
assert result["results"][0]["source"] == "cron"
|
||||
|
||||
def test_order_for_recall_is_stable_within_class(self):
|
||||
from tools.session_search_tool import _order_for_recall
|
||||
rows = [
|
||||
{"id": 1, "source": "cron"},
|
||||
{"id": 2, "source": "telegram"},
|
||||
{"id": 3, "source": "cron"},
|
||||
{"id": 4, "source": "cli"},
|
||||
{"id": 5, "source": None},
|
||||
]
|
||||
ordered = _order_for_recall(rows)
|
||||
# Interactive rows first, in original relative order; cron last, in
|
||||
# original relative order.
|
||||
assert [r["id"] for r in ordered] == [2, 4, 5, 1, 3]
|
||||
|
|
|
|||
|
|
@ -39,6 +39,22 @@ from typing import Any, Dict, List, Optional, Union
|
|||
# user's session history.
|
||||
_HIDDEN_SESSION_SOURCES = ("subagent", "tool")
|
||||
|
||||
# Automation sources that are kept searchable but DEMOTED below interactive
|
||||
# sessions in discover ranking. Cron jobs run on a schedule and accumulate
|
||||
# large volumes of repetitive vocabulary (recurring project names, dates,
|
||||
# "session", summaries); under bare BM25 they dominate the top-N FTS rows and
|
||||
# starve out the user's own interactive sessions, producing "recall blindness"
|
||||
# where only cron sessions surface (#19434). Demoting — not excluding — keeps
|
||||
# cron content reachable when it's the only match, while interactive sessions
|
||||
# always win when both match.
|
||||
_DEMOTED_SESSION_SOURCES = ("cron",)
|
||||
|
||||
# How many FTS rows discover scans before dedup-by-lineage. The interactive
|
||||
# vs automation split below only helps if enough rows are in hand to find
|
||||
# interactive matches buried under a wall of cron hits, so this is well above
|
||||
# the handful of distinct sessions a typical query returns.
|
||||
_DISCOVER_SCAN_LIMIT = 300
|
||||
|
||||
|
||||
def _format_timestamp(ts: Union[int, float, str, None]) -> str:
|
||||
"""Convert a Unix timestamp (float/int) or ISO string to a human-readable date.
|
||||
|
|
@ -87,6 +103,23 @@ def _resolve_to_parent(db, session_id: str) -> str:
|
|||
return cur
|
||||
|
||||
|
||||
def _order_for_recall(raw_results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Stable-sort FTS rows so interactive sessions rank above automation.
|
||||
|
||||
Within each class (interactive vs demoted) the original BM25 ``rank``
|
||||
order is preserved — Python's sort is stable, and rows arrive already
|
||||
ranked by relevance. This only changes cross-class ordering: a cron hit
|
||||
never displaces an interactive hit during lineage dedup, so the user's
|
||||
own conversations surface first even when cron rows out-rank them under
|
||||
bare BM25 (#19434). Demoted rows still appear when they're the only
|
||||
matches.
|
||||
"""
|
||||
return sorted(
|
||||
raw_results,
|
||||
key=lambda r: 1 if (r.get("source") or "") in _DEMOTED_SESSION_SOURCES else 0,
|
||||
)
|
||||
|
||||
|
||||
def _shape_message(m: Dict[str, Any], anchor_id: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""Slim a message row for the tool response. Keeps content even if empty."""
|
||||
entry = {
|
||||
|
|
@ -481,7 +514,9 @@ def _discover(
|
|||
query=query,
|
||||
role_filter=role_list,
|
||||
exclude_sources=list(_HIDDEN_SESSION_SOURCES),
|
||||
limit=50, # widen so dedup-by-lineage can find distinct sessions
|
||||
limit=_DISCOVER_SCAN_LIMIT, # widen so dedup-by-lineage can find
|
||||
# distinct sessions AND so interactive matches buried under a wall
|
||||
# of cron rows are still in hand for the demotion pass below.
|
||||
offset=0,
|
||||
sort=sort,
|
||||
)
|
||||
|
|
@ -489,6 +524,12 @@ def _discover(
|
|||
logging.error("FTS5 search failed: %s", e, exc_info=True)
|
||||
return tool_error(f"Search failed: {e}", success=False)
|
||||
|
||||
# Demote automation (cron) rows below interactive ones before dedup, so a
|
||||
# high-volume cron corpus can't starve the user's own sessions out of the
|
||||
# top `limit` results (#19434). Stable — preserves BM25/recency order
|
||||
# within each class.
|
||||
raw_results = _order_for_recall(raw_results)
|
||||
|
||||
if not raw_results and not title_result:
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue