diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 36b1e9df2d5..e5f9d095252 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -172,7 +172,7 @@ hermes-agent/ │ ├── vision_tools.py # Image analysis via multimodal models │ ├── delegate_tool.py # Subagent spawning and parallel task execution │ ├── code_execution_tool.py # Sandboxed Python with RPC tool access -│ ├── session_search_tool.py # Search past conversations with FTS5 + summarization +│ ├── session_search_tool.py # Search past conversations with FTS5 + anchored windows │ ├── cronjob_tools.py # Scheduled task management │ ├── skill_tools.py # Skill search, load, manage │ └── environments/ # Terminal execution backends diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py index b5c70392946..61551a65dc9 100644 --- a/agent/agent_runtime_helpers.py +++ b/agent/agent_runtime_helpers.py @@ -1503,6 +1503,10 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i query=function_args.get("query", ""), role_filter=function_args.get("role_filter"), limit=function_args.get("limit", 3), + session_id=function_args.get("session_id"), + around_message_id=function_args.get("around_message_id"), + window=function_args.get("window", 5), + sort=function_args.get("sort"), db=session_db, current_session_id=agent.session_id, ) diff --git a/agent/tool_executor.py b/agent/tool_executor.py index a30cc3078bb..12bc7255139 100644 --- a/agent/tool_executor.py +++ b/agent/tool_executor.py @@ -622,6 +622,10 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe query=function_args.get("query", ""), role_filter=function_args.get("role_filter"), limit=function_args.get("limit", 3), + session_id=function_args.get("session_id"), + around_message_id=function_args.get("around_message_id"), + window=function_args.get("window", 5), + sort=function_args.get("sort"), db=session_db, current_session_id=agent.session_id, ) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 3f9bdd69ed4..6510532a7c7 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -871,15 +871,10 @@ DEFAULT_CONFIG = { "timeout": 120, # seconds — compression summarises large contexts; increase for local models "extra_body": {}, }, - "session_search": { - "provider": "auto", - "model": "", - "base_url": "", - "api_key": "", - "timeout": 30, - "extra_body": {}, - "max_concurrency": 3, # Clamp parallel summaries to avoid request-burst 429s on small providers - }, + # Note: session_search no longer uses an auxiliary LLM (PR #27590 — + # single-shape tool returns DB content directly). The old + # ``auxiliary.session_search.*`` block was removed here. Existing + # values in user config.yaml files are harmless leftovers and ignored. "skills_hub": { "provider": "auto", "model": "", diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py index 51f4dd2c0b6..060c441a150 100644 --- a/hermes_cli/tips.py +++ b/hermes_cli/tips.py @@ -458,8 +458,6 @@ TIPS = [ 'image_gen.model in config.yaml picks the FAL model: flux-2/klein, gpt-image-2, nano-banana-pro, and more.', 'image_gen.provider routes image generation through a plugin (OpenAI Images, Codex, FAL) instead of the default.', 'AUXILIARY_VISION_BASE_URL + AUXILIARY_VISION_API_KEY point vision analysis at any OpenAI-compatible endpoint.', - 'auxiliary.session_search.max_concurrency bounds how many matched sessions are summarized in parallel (default 3).', - 'auxiliary.session_search.extra_body forwards provider-specific OpenAI-compatible fields on summarization calls.', # --- Security --- 'security.tirith_fail_open: false makes Hermes block commands when the tirith scanner itself errors out.', diff --git a/hermes_state.py b/hermes_state.py index f693f391f78..51d9f0b406f 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -25,7 +25,7 @@ from pathlib import Path from agent.memory_manager import sanitize_context from hermes_constants import get_hermes_home -from typing import Any, Callable, Dict, List, Optional, TypeVar +from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar logger = logging.getLogger(__name__) @@ -1618,6 +1618,204 @@ class SessionDB: result.append(msg) return result + def get_messages_around( + self, + session_id: str, + around_message_id: int, + window: int = 5, + ) -> Dict[str, Any]: + """Load a window of messages anchored on a specific message id. + + Returns a dict with: + - ``window``: up to ``window`` messages before the anchor, the anchor + itself, and up to ``window`` messages after, ordered by id ascending. + - ``messages_before``: count of messages strictly before the anchor + still in the session (== window unless we hit the start). + - ``messages_after``: count of messages strictly after the anchor + still in the session (== window unless we hit the end). + + Used by ``session_search`` for both the discovery shape (anchored on the + FTS5 match) and the scroll shape (anchored on any message id). The + ``messages_before`` / ``messages_after`` counts let the caller detect + session boundaries: when either is less than ``window``, the agent has + reached one end of the session. + + Returns an empty window when ``around_message_id`` is not a real id in + ``session_id`` — callers decide how to surface that. + """ + if window < 0: + window = 0 + with self._lock: + # Confirm the anchor exists in this session. + anchor_exists = self._conn.execute( + "SELECT 1 FROM messages WHERE id = ? AND session_id = ? LIMIT 1", + (around_message_id, session_id), + ).fetchone() + if not anchor_exists: + return {"window": [], "messages_before": 0, "messages_after": 0} + + # Two queries: anchor + before (DESC, take window+1), and after + # (ASC, take window). Final order is id ASC. + before_rows = self._conn.execute( + "SELECT * FROM messages " + "WHERE session_id = ? AND id <= ? " + "ORDER BY id DESC LIMIT ?", + (session_id, around_message_id, window + 1), + ).fetchall() + after_rows = self._conn.execute( + "SELECT * FROM messages " + "WHERE session_id = ? AND id > ? " + "ORDER BY id ASC LIMIT ?", + (session_id, around_message_id, window), + ).fetchall() + + # before_rows is DESC; reverse so it's ASC, then concatenate after_rows. + rows = list(reversed(before_rows)) + list(after_rows) + result = [] + for row in rows: + msg = dict(row) + if "content" in msg: + msg["content"] = self._decode_content(msg["content"]) + if msg.get("tool_calls"): + try: + msg["tool_calls"] = json.loads(msg["tool_calls"]) + except (json.JSONDecodeError, TypeError): + logger.warning( + "Failed to deserialize tool_calls in get_messages_around, falling back to []" + ) + msg["tool_calls"] = [] + result.append(msg) + + # before_rows includes the anchor itself; subtract 1 for the count of + # messages strictly before the anchor in the returned slice. + messages_before = max(0, len(before_rows) - 1) + messages_after = len(after_rows) + return { + "window": result, + "messages_before": messages_before, + "messages_after": messages_after, + } + + def get_anchored_view( + self, + session_id: str, + around_message_id: int, + window: int = 5, + bookend: int = 3, + keep_roles: Optional[Tuple[str, ...]] = ("user", "assistant"), + ) -> Dict[str, Any]: + """Return an anchored window plus session bookends. + + Built on top of ``get_messages_around``. Three slices: + + - ``window``: messages immediately surrounding the anchor. Filtered + to ``keep_roles`` (tool-response noise dropped by default), EXCEPT + the anchor itself is always preserved regardless of role. + - ``bookend_start``: first ``bookend`` user/assistant messages of the + session — but only those whose id is strictly before the window's + first message id. Empty when the window already overlaps the + session head. Empty-content messages (tool-call-only assistant + turns) are skipped so they don't crowd out actual prose openings. + - ``bookend_end``: last ``bookend`` user/assistant messages of the + session, same non-overlap rule at the tail. + + Bookends let an FTS5 hit anywhere in a long session yield the goal + (opening) and the resolution (closing) on a single call — without + loading the whole transcript. + + Returns ``{"window": [], "messages_before": 0, "messages_after": 0, + "bookend_start": [], "bookend_end": []}`` when the anchor isn't in + the session. + + ``keep_roles=None`` disables role filtering (raw window + raw + bookends). + """ + if bookend < 0: + bookend = 0 + + # Reuse the primitive — handles anchor-existence, content decoding, + # tool_calls deserialisation, and boundary counts. + primitive = self.get_messages_around( + session_id, around_message_id, window=window + ) + window_rows = primitive["window"] + if not window_rows: + return { + "window": [], + "messages_before": 0, + "messages_after": 0, + "bookend_start": [], + "bookend_end": [], + } + + # Apply role filter to the window, but never drop the anchor itself. + if keep_roles is not None: + keep_set = set(keep_roles) + filtered_window = [ + m for m in window_rows + if m.get("id") == around_message_id or m.get("role") in keep_set + ] + else: + filtered_window = window_rows + + window_min_id = window_rows[0]["id"] + window_max_id = window_rows[-1]["id"] + + # Fetch bookends only when there's room outside the window. SQL filters + # by id range, role, and non-empty content — tool-call-only assistant + # turns (content='' with tool_calls populated) are excluded so they + # don't crowd out actual prose openings/closings. + bookend_start_rows: List[Any] = [] + bookend_end_rows: List[Any] = [] + if bookend > 0: + with self._lock: + role_clause = "" + role_params: list = [] + if keep_roles is not None: + role_placeholders = ",".join("?" for _ in keep_roles) + role_clause = f" AND role IN ({role_placeholders})" + role_params = list(keep_roles) + + bookend_start_rows = self._conn.execute( + f"SELECT * FROM messages " + f"WHERE session_id = ? AND id < ?{role_clause} " + f"AND length(content) > 0 " + f"ORDER BY id ASC LIMIT ?", + (session_id, window_min_id, *role_params, bookend), + ).fetchall() + + bookend_end_rows = self._conn.execute( + f"SELECT * FROM messages " + f"WHERE session_id = ? AND id > ?{role_clause} " + f"AND length(content) > 0 " + f"ORDER BY id DESC LIMIT ?", + (session_id, window_max_id, *role_params, bookend), + ).fetchall() + # End rows came back DESC for the LIMIT cap; flip to ASC. + bookend_end_rows = list(reversed(bookend_end_rows)) + + def _hydrate(row) -> Dict[str, Any]: + msg = dict(row) + if "content" in msg: + msg["content"] = self._decode_content(msg["content"]) + if msg.get("tool_calls"): + try: + msg["tool_calls"] = json.loads(msg["tool_calls"]) + except (json.JSONDecodeError, TypeError): + logger.warning( + "Failed to deserialize tool_calls in get_anchored_view, falling back to []" + ) + msg["tool_calls"] = [] + return msg + + return { + "window": filtered_window, + "messages_before": primitive["messages_before"], + "messages_after": primitive["messages_after"], + "bookend_start": [_hydrate(r) for r in bookend_start_rows], + "bookend_end": [_hydrate(r) for r in bookend_end_rows], + } + def resolve_resume_session_id(self, session_id: str) -> str: """Redirect a resume target to the descendant session that holds the messages. @@ -1885,6 +2083,7 @@ class SessionDB: role_filter: List[str] = None, limit: int = 20, offset: int = 0, + sort: str = None, ) -> List[Dict[str, Any]]: """ Full-text search across session messages using FTS5. @@ -1897,6 +2096,15 @@ class SessionDB: Returns matching messages with session metadata, content snippet, and surrounding context (1 message before and after the match). + + ``sort`` controls temporal ordering: + - ``None`` (default): FTS5 BM25 relevance only. Time-neutral. + - ``"newest"``: order by message timestamp DESC, then by rank. + - ``"oldest"``: order by message timestamp ASC, then by rank. + + The short-CJK LIKE fallback already orders by timestamp DESC and + ignores ``sort``. The trigram CJK path honours ``sort`` like the main + FTS5 path. """ if not query or not query.strip(): return [] @@ -1905,6 +2113,25 @@ class SessionDB: if not query: return [] + # Normalise sort. Anything not in the allowed set falls back to None + # (FTS5 rank-only) so callers can pass through user input without + # validation. + if isinstance(sort, str): + sort_norm = sort.strip().lower() + if sort_norm not in ("newest", "oldest"): + sort_norm = None + else: + sort_norm = None + + # ORDER BY shared across the main FTS5 path and trigram CJK path. + # With sort set, timestamp is primary and rank is the tiebreaker. + if sort_norm == "newest": + order_by_sql = "ORDER BY m.timestamp DESC, rank" + elif sort_norm == "oldest": + order_by_sql = "ORDER BY m.timestamp ASC, rank" + else: + order_by_sql = "ORDER BY rank" + # Build WHERE clauses dynamically where_clauses = ["messages_fts MATCH ?"] params: list = [query] @@ -1943,7 +2170,7 @@ class SessionDB: JOIN messages m ON m.id = messages_fts.rowid JOIN sessions s ON s.id = m.session_id WHERE {where_sql} - ORDER BY rank + {order_by_sql} LIMIT ? OFFSET ? """ @@ -2012,7 +2239,7 @@ class SessionDB: JOIN messages m ON m.id = messages_fts_trigram.rowid JOIN sessions s ON s.id = m.session_id WHERE {' AND '.join(tri_where)} - ORDER BY rank + {order_by_sql} LIMIT ? OFFSET ? """ tri_params.extend([limit, offset]) diff --git a/scripts/release.py b/scripts/release.py index d554e474fe6..e9f35d5433c 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -1058,6 +1058,7 @@ AUTHOR_MAP = { "openclaw@agent.local": "29206394", # PR #22194 salvage (sudo -S brute-force guard, #9590) "freedemon@gmail.com": "fr33d3m0n", # PR #21128 salvage (sudo stdin/askpass DANGEROUS, #17873 cat 4) "zhaowh3613@outlook.com": "VinceZcrikl", # PR #23647 salvage (npm UTF-8 decode on GBK Windows) + "abcdjmm970703@gmail.com": "JabberELF", # PR #20238 seed (session_search dual-mode, evolved into single-shape) "anton.kuenzi@gmail.com": "ZeterMordio", # PR #11754 salvage (zsh completion compdef + _arguments syntax) "23yntong@stu.edu.cn": "iuyup", # PR #6155 salvage (shell=True hardening) "86501179+1RB@users.noreply.github.com": "1RB", # PR #25462 salvage (discord forwarded messages) diff --git a/tests/hermes_state/test_get_anchored_view.py b/tests/hermes_state/test_get_anchored_view.py new file mode 100644 index 00000000000..b1bf2f5a06a --- /dev/null +++ b/tests/hermes_state/test_get_anchored_view.py @@ -0,0 +1,161 @@ +"""Tests for SessionDB.get_anchored_view — anchored window + session bookends. + +Used by the discovery shape of session_search: an FTS5 match becomes the +anchor, the call returns goal (bookend_start) + match (window) + resolution +(bookend_end) in a single round trip, no LLM. +""" +import pytest + +from hermes_state import SessionDB + + +@pytest.fixture +def db(tmp_path): + return SessionDB(tmp_path / "state.db") + + +def _seed_long_session(db, sid="s1", n=30): + """Create a long session with alternating user/assistant prose. Returns ids ascending.""" + db.create_session(sid, source="cli") + ids = [] + for i in range(n): + role = "user" if i % 2 == 0 else "assistant" + mid = db.append_message(sid, role=role, content=f"prose msg {i}") + ids.append(mid) + return ids + + +class TestWindowAndBookendShape: + def test_returns_window_with_bookend_start_and_end(self, db): + ids = _seed_long_session(db, n=30) + # Anchor mid-session + anchor = ids[15] + view = db.get_anchored_view("s1", anchor, window=3, bookend=3) + assert len(view["window"]) == 7 # ±3 + anchor + assert len(view["bookend_start"]) == 3 + assert len(view["bookend_end"]) == 3 + # bookend_start is the first 3 ids of the session + assert [m["id"] for m in view["bookend_start"]] == ids[:3] + # bookend_end is the last 3 ids of the session + assert [m["id"] for m in view["bookend_end"]] == ids[-3:] + + def test_window_anchor_marked_correctly(self, db): + ids = _seed_long_session(db, n=20) + anchor = ids[10] + view = db.get_anchored_view("s1", anchor, window=2, bookend=3) + # Anchor message is present in the window + anchor_msgs = [m for m in view["window"] if m["id"] == anchor] + assert len(anchor_msgs) == 1 + + +class TestBookendOverlap: + """Bookends shouldn't duplicate messages that are already in the window.""" + + def test_bookend_start_empty_when_window_covers_session_head(self, db): + ids = _seed_long_session(db, n=10) + # Anchor on msg 1 (id index 1), window=3 → covers ids[0..4] + anchor = ids[1] + view = db.get_anchored_view("s1", anchor, window=3, bookend=3) + # Window includes session head, so bookend_start should be empty + assert view["bookend_start"] == [] + # bookend_end is still populated + assert len(view["bookend_end"]) > 0 + + def test_bookend_end_empty_when_window_covers_session_tail(self, db): + ids = _seed_long_session(db, n=10) + # Anchor on second-to-last + anchor = ids[-2] + view = db.get_anchored_view("s1", anchor, window=3, bookend=3) + assert view["bookend_end"] == [] + assert len(view["bookend_start"]) > 0 + + def test_short_session_both_bookends_empty(self, db): + ids = _seed_long_session(db, n=5) + view = db.get_anchored_view("s1", ids[2], window=10, bookend=3) + # Window covers entire session + assert view["bookend_start"] == [] + assert view["bookend_end"] == [] + # And window has all 5 messages + assert len(view["window"]) == 5 + + +class TestRoleFiltering: + def test_tool_role_filtered_from_window(self, db): + db.create_session("s1", source="cli") + user_ids = [] + for i in range(5): + user_ids.append(db.append_message("s1", role="user", content=f"u{i}")) + db.append_message("s1", role="tool", content=f"tool output {i}", tool_name="x") + # Anchor on user message + view = db.get_anchored_view("s1", user_ids[2], window=5, bookend=0) + # No tool messages should appear in the window + roles = [m.get("role") for m in view["window"]] + assert "tool" not in roles + + def test_anchor_preserved_even_when_tool_role(self, db): + db.create_session("s1", source="cli") + db.append_message("s1", role="user", content="ask") + tool_id = db.append_message("s1", role="tool", content="tool output", tool_name="x") + db.append_message("s1", role="user", content="follow-up") + # Anchor on the tool message — should still appear despite default filter + view = db.get_anchored_view("s1", tool_id, window=5, bookend=0) + ids_in_window = [m["id"] for m in view["window"]] + assert tool_id in ids_in_window + + def test_keep_roles_none_disables_filter(self, db): + db.create_session("s1", source="cli") + anchor_id = db.append_message("s1", role="user", content="ask") + db.append_message("s1", role="tool", content="output", tool_name="x") + view = db.get_anchored_view("s1", anchor_id, window=5, bookend=0, keep_roles=None) + roles = [m.get("role") for m in view["window"]] + assert "tool" in roles + + +class TestEmptyContentFilter: + """Tool-call-only assistant turns (empty content) should be skipped in bookends.""" + + def test_empty_content_messages_excluded_from_bookends(self, db): + db.create_session("s1", source="cli") + # Real prose opener + opener = db.append_message("s1", role="user", content="Let's start the work") + # Empty content assistant turn (tool-call-only — common in agent loops) + db.append_message("s1", role="assistant", content="", tool_calls=[{"id": "t1", "function": {"name": "x", "arguments": "{}"}}]) + # More prose + for i in range(20): + db.append_message("s1", role="user" if i % 2 == 0 else "assistant", content=f"prose {i}") + # Another empty assistant near the end + db.append_message("s1", role="assistant", content="", tool_calls=[{"id": "t2", "function": {"name": "y", "arguments": "{}"}}]) + # Prose closer + closer = db.append_message("s1", role="assistant", content="Final decision: ship it.") + + # Anchor mid-session + view = db.get_anchored_view("s1", opener + 15, window=2, bookend=3) + # Bookend_start should not contain the empty-content tool-call turn + for m in view["bookend_start"]: + assert m.get("content"), "bookend_start should skip empty-content messages" + # Bookend_end should include the closer + end_contents = [m.get("content") for m in view["bookend_end"]] + assert any("Final decision" in (c or "") for c in end_contents) + + +class TestAnchorValidation: + def test_missing_anchor_returns_empty_view(self, db): + _seed_long_session(db, n=10) + view = db.get_anchored_view("s1", 999999, window=5, bookend=3) + assert view["window"] == [] + assert view["bookend_start"] == [] + assert view["bookend_end"] == [] + assert view["messages_before"] == 0 + assert view["messages_after"] == 0 + + +class TestSessionIsolation: + """Bookends must not cross session boundaries.""" + + def test_bookends_only_from_anchor_session(self, db): + ids1 = _seed_long_session(db, sid="s1", n=20) + _seed_long_session(db, sid="s2", n=20) + view = db.get_anchored_view("s1", ids1[10], window=2, bookend=3) + # All bookend messages should have session_id = s1 (or session_id col) + for m in view["bookend_start"] + view["bookend_end"]: + assert m.get("session_id") == "s1" diff --git a/tests/hermes_state/test_get_messages_around.py b/tests/hermes_state/test_get_messages_around.py new file mode 100644 index 00000000000..4569d2b12be --- /dev/null +++ b/tests/hermes_state/test_get_messages_around.py @@ -0,0 +1,148 @@ +"""Tests for SessionDB.get_messages_around (anchored-window primitive). + +Used by session_search both for the discovery shape (FTS5 match as anchor) +and the scroll shape (user-supplied anchor). Returns a window of messages +around the anchor plus before/after counts so callers can detect session +boundaries. +""" +import pytest + +from hermes_state import SessionDB + + +@pytest.fixture +def db(tmp_path): + return SessionDB(tmp_path / "state.db") + + +def _seed(db, sid="s1", n=10): + """Create session with n alternating user/assistant messages, return ids ascending.""" + db.create_session(sid, source="cli") + ids = [] + for i in range(n): + role = "user" if i % 2 == 0 else "assistant" + # append_message returns the new id + mid = db.append_message(sid, role=role, content=f"msg {i}") + ids.append(mid) + return ids + + +class TestBasicWindow: + def test_returns_window_around_anchor(self, db): + ids = _seed(db, n=10) + anchor = ids[5] + view = db.get_messages_around("s1", anchor, window=2) + # Expected: 2 before + anchor + 2 after = 5 messages + msgs = view["window"] + assert len(msgs) == 5 + assert [m["id"] for m in msgs] == [ids[3], ids[4], ids[5], ids[6], ids[7]] + assert view["messages_before"] == 2 + assert view["messages_after"] == 2 + + def test_window_zero_returns_only_anchor(self, db): + ids = _seed(db, n=5) + view = db.get_messages_around("s1", ids[2], window=0) + assert len(view["window"]) == 1 + assert view["window"][0]["id"] == ids[2] + assert view["messages_before"] == 0 + assert view["messages_after"] == 0 + + def test_negative_window_clamps_to_zero(self, db): + ids = _seed(db, n=5) + view = db.get_messages_around("s1", ids[2], window=-3) + # Just anchor, like window=0 + assert len(view["window"]) == 1 + assert view["window"][0]["id"] == ids[2] + + +class TestBoundaryDetection: + """messages_before / messages_after tell the agent it's at start/end.""" + + def test_at_session_start_messages_before_is_short(self, db): + ids = _seed(db, n=10) + # Anchor on first message; ask for window=5 + view = db.get_messages_around("s1", ids[0], window=5) + assert view["messages_before"] == 0 # nothing before the first msg + assert view["messages_after"] == 5 + # window contains anchor + 5 after = 6 messages + assert len(view["window"]) == 6 + + def test_at_session_end_messages_after_is_short(self, db): + ids = _seed(db, n=10) + view = db.get_messages_around("s1", ids[-1], window=5) + assert view["messages_before"] == 5 + assert view["messages_after"] == 0 + assert len(view["window"]) == 6 + + def test_window_larger_than_session(self, db): + ids = _seed(db, n=3) + view = db.get_messages_around("s1", ids[1], window=50) + # All 3 messages return, both boundaries hit + assert len(view["window"]) == 3 + assert view["messages_before"] == 1 + assert view["messages_after"] == 1 + + +class TestAnchorValidation: + def test_missing_anchor_returns_empty(self, db): + _seed(db, n=5) + view = db.get_messages_around("s1", 99999, window=5) + assert view["window"] == [] + assert view["messages_before"] == 0 + assert view["messages_after"] == 0 + + def test_anchor_in_different_session_returns_empty(self, db): + # Two sessions, ask for s1's anchor in s2's namespace + ids1 = _seed(db, sid="s1", n=5) + _seed(db, sid="s2", n=5) + view = db.get_messages_around("s2", ids1[2], window=2) + assert view["window"] == [] + + +class TestScrollPattern: + """The forward/backward scroll loop the agent will run.""" + + def test_scroll_forward_re_anchored_on_last_id(self, db): + ids = _seed(db, n=20) + anchor = ids[5] + v1 = db.get_messages_around("s1", anchor, window=3) + last_id = v1["window"][-1]["id"] + v2 = db.get_messages_around("s1", last_id, window=3) + # Boundary id (last_id) appears in both windows (in v2 it's the anchor) + assert last_id in [m["id"] for m in v1["window"]] + assert last_id in [m["id"] for m in v2["window"]] + # v2's window extends beyond v1 + assert max(m["id"] for m in v2["window"]) > max(m["id"] for m in v1["window"]) + + def test_scroll_backward_re_anchored_on_first_id(self, db): + ids = _seed(db, n=20) + anchor = ids[10] + v1 = db.get_messages_around("s1", anchor, window=3) + first_id = v1["window"][0]["id"] + v2 = db.get_messages_around("s1", first_id, window=3) + assert first_id in [m["id"] for m in v1["window"]] + assert first_id in [m["id"] for m in v2["window"]] + assert min(m["id"] for m in v2["window"]) < min(m["id"] for m in v1["window"]) + + +class TestContentHydration: + def test_content_is_decoded(self, db): + ids = _seed(db, n=3) + view = db.get_messages_around("s1", ids[1], window=1) + for m in view["window"]: + assert isinstance(m.get("content"), str) + assert m["content"].startswith("msg ") + + def test_tool_calls_deserialized(self, db): + db.create_session("s1", source="cli") + # Message with tool_calls (pass list — append_message JSON-encodes it) + tc_payload = [{"id": "t1", "function": {"name": "x", "arguments": "{}"}}] + db.append_message("s1", role="assistant", content="", tool_calls=tc_payload) + mid = db.append_message("s1", role="tool", content="result", tool_name="x") + + view = db.get_messages_around("s1", mid, window=2) + # Find the assistant message with tool_calls + asst = [m for m in view["window"] if m.get("role") == "assistant"] + assert asst, "expected an assistant message" + # tool_calls should be a list after hydration, not a string + assert isinstance(asst[0].get("tool_calls"), list) diff --git a/tests/tools/test_llm_content_none_guard.py b/tests/tools/test_llm_content_none_guard.py index b0adea8c7ad..5ecdc725d7d 100644 --- a/tests/tools/test_llm_content_none_guard.py +++ b/tests/tools/test_llm_content_none_guard.py @@ -155,24 +155,6 @@ class TestSkillsGuardContentNone: assert content == "" -# ── session_search_tool (line 164) ──────────────────────────────────────── - -class TestSessionSearchContentNone: - """tools/session_search_tool.py — _summarize_session() return line""" - - def test_none_content_raises_before_fix(self): - response = _make_response(None) - - with pytest.raises(AttributeError): - response.choices[0].message.content.strip() - - def test_none_content_safe_with_or_guard(self): - response = _make_response(None) - - content = (response.choices[0].message.content or "").strip() - assert content == "" - - # ── integration: verify the actual source lines are guarded ─────────────── class TestSourceLinesAreGuarded: @@ -218,13 +200,6 @@ class TestSourceLinesAreGuarded: ".content.strip() — apply `(... or \"\").strip()` guard" ) - def test_session_search_tool_guarded(self): - src = self._read_file("tools/session_search_tool.py") - assert ".message.content.strip()" not in src, ( - "tools/session_search_tool.py still has unguarded " - ".content.strip() — apply `(... or \"\").strip()` guard" - ) - # ── extract_content_or_reasoning() ──────────────────────────────────────── diff --git a/tests/tools/test_session_search.py b/tests/tools/test_session_search.py index 8e67f230349..3f517aa1a4b 100644 --- a/tests/tools/test_session_search.py +++ b/tests/tools/test_session_search.py @@ -1,578 +1,401 @@ -"""Tests for tools/session_search_tool.py — helper functions and search dispatcher.""" +"""Tests for the single-shape session_search tool. -import asyncio +Three calling shapes: + 1. DISCOVERY — pass query → FTS5 + anchored window + bookends per hit + 2. SCROLL — pass session_id + around_message_id → just the window + 3. BROWSE — no args → recent sessions chronologically + +All run zero LLM calls. +""" import json import time + import pytest +from hermes_state import SessionDB from tools.session_search_tool import ( - _format_timestamp, - _format_conversation, - _truncate_around_matches, - _get_session_search_max_concurrency, - _list_recent_sessions, - _HIDDEN_SESSION_SOURCES, - MAX_SESSION_CHARS, SESSION_SEARCH_SCHEMA, + _HIDDEN_SESSION_SOURCES, + _format_timestamp, + session_search, ) +@pytest.fixture +def db(tmp_path): + return SessionDB(tmp_path / "state.db") + + +def _seed_modpack_sessions(db): + """Create three sessions about a modpack so FTS5 has hits to dedupe.""" + now = int(time.time()) + # Older session — modpack origin + db.create_session("s_oldest", source="cli") + db._conn.execute("UPDATE sessions SET started_at = ?, title = ? WHERE id = ?", + (now - 30000, "Building the Modpack", "s_oldest")) + db.append_message("s_oldest", role="user", content="Let's build a Minecraft modpack") + db.append_message("s_oldest", role="assistant", content="Great. Let me scaffold the modpack repo.") + db.append_message("s_oldest", role="user", content="Use NeoForge 1.21.1") + db.append_message("s_oldest", role="assistant", content="Done. Modpack repo created with NeoForge 1.21.1.") + db.append_message("s_oldest", role="assistant", content="Tier-0 mods installed; modpack smoke test passes.") + + # Middle session — modpack quest coverage + db.create_session("s_middle", source="cli") + db._conn.execute("UPDATE sessions SET started_at = ?, title = ? WHERE id = ?", + (now - 15000, "Modpack Quest Coverage", "s_middle")) + db.append_message("s_middle", role="user", content="Deep-dive every modpack reference quest guide") + db.append_message("s_middle", role="assistant", content="Surveying ATM10 questbook for modpack inspiration.") + db.append_message("s_middle", role="user", content="Update the modpack version too") + db.append_message("s_middle", role="assistant", content="Modpack version bumped 0.4 → 0.8.5; quest coverage page added.") + + # Newest session — modpack mob spawn fix + db.create_session("s_newest", source="cli") + db._conn.execute("UPDATE sessions SET started_at = ?, title = ? WHERE id = ?", + (now - 1000, "Modpack Mob Spawn Fix", "s_newest")) + db.append_message("s_newest", role="user", content="Fix the modpack mob spawning") + db.append_message("s_newest", role="assistant", content="Investigating elite mob gating in the modpack KubeJS.") + db.append_message("s_newest", role="assistant", content="Shipped commit b850442. Modpack alternator nerfed too.") + db._conn.commit() + + # ========================================================================= -# Tool schema guidance +# Schema invariants # ========================================================================= -class TestHiddenSessionSources: - """Verify the _HIDDEN_SESSION_SOURCES constant used for third-party isolation.""" +class TestSchema: + def test_schema_has_required_params(self): + params = SESSION_SEARCH_SCHEMA["parameters"]["properties"] + # Discovery shape + assert "query" in params + assert "limit" in params + assert "sort" in params + # Scroll shape + assert "session_id" in params + assert "around_message_id" in params + assert "window" in params + # Shared + assert "role_filter" in params - def test_tool_source_is_hidden(self): + def test_no_mode_parameter(self): + # Mode is inferred from which args are set — no explicit mode param + params = SESSION_SEARCH_SCHEMA["parameters"]["properties"] + assert "mode" not in params + + def test_sort_enum(self): + params = SESSION_SEARCH_SCHEMA["parameters"]["properties"] + assert params["sort"]["enum"] == ["newest", "oldest"] + + def test_schema_description_teaches_scroll(self): + desc = SESSION_SEARCH_SCHEMA["description"] + assert "SCROLL" in desc + assert "DISCOVERY" in desc + assert "BROWSE" in desc + # Must explain how to scroll + assert "scroll FORWARD" in desc or "messages[-1]" in desc + + def test_no_llm_promise_in_description(self): + # The new design never calls an LLM + desc = SESSION_SEARCH_SCHEMA["description"].lower() + assert "no llm" in desc + + +class TestHiddenSources: + def test_tool_source_hidden(self): assert "tool" in _HIDDEN_SESSION_SOURCES - def test_standard_sources_not_hidden(self): - for src in ("cli", "telegram", "discord", "slack", "cron"): - assert src not in _HIDDEN_SESSION_SOURCES - - -class TestSessionSearchSchema: - def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self): - description = SESSION_SEARCH_SCHEMA["description"] - assert "past conversations" in description - assert "recent turns of the current session" not in description - - -# ========================================================================= -# _format_timestamp -# ========================================================================= class TestFormatTimestamp: - def test_unix_float(self): - ts = 1700000000.0 # Nov 14, 2023 - result = _format_timestamp(ts) - assert "2023" in result or "November" in result + def test_unix_timestamp(self): + out = _format_timestamp(1700000000) + assert "2023" in out - def test_unix_int(self): - result = _format_timestamp(1700000000) - assert isinstance(result, str) - assert len(result) > 5 - - def test_iso_string(self): - result = _format_timestamp("2024-01-15T10:30:00") - assert isinstance(result, str) - - def test_none_returns_unknown(self): + def test_none(self): assert _format_timestamp(None) == "unknown" - def test_numeric_string(self): - result = _format_timestamp("1700000000.0") - assert isinstance(result, str) - assert "unknown" not in result.lower() + def test_iso_string_passthrough(self): + out = _format_timestamp("not-a-number-string") + assert out == "not-a-number-string" # ========================================================================= -# _format_conversation +# Browse shape (no args) # ========================================================================= -class TestFormatConversation: - def test_basic_messages(self): - msgs = [ - {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi there!"}, - ] - result = _format_conversation(msgs) - assert "[USER]: Hello" in result - assert "[ASSISTANT]: Hi there!" in result - - def test_tool_message(self): - msgs = [ - {"role": "tool", "content": "search results", "tool_name": "web_search"}, - ] - result = _format_conversation(msgs) - assert "[TOOL:web_search]" in result - - def test_long_tool_output_truncated(self): - msgs = [ - {"role": "tool", "content": "x" * 1000, "tool_name": "terminal"}, - ] - result = _format_conversation(msgs) - assert "[truncated]" in result - - def test_assistant_with_tool_calls(self): - msgs = [ - { - "role": "assistant", - "content": "", - "tool_calls": [ - {"function": {"name": "web_search"}}, - {"function": {"name": "terminal"}}, - ], - }, - ] - result = _format_conversation(msgs) - assert "web_search" in result - assert "terminal" in result - - def test_empty_messages(self): - result = _format_conversation([]) - assert result == "" - - -# ========================================================================= -# _truncate_around_matches -# ========================================================================= - -class TestTruncateAroundMatches: - def test_short_text_unchanged(self): - text = "Short text about docker" - result = _truncate_around_matches(text, "docker") - assert result == text - - def test_long_text_truncated(self): - # Create text longer than MAX_SESSION_CHARS with query term in middle - padding = "x" * (MAX_SESSION_CHARS + 5000) - text = padding + " KEYWORD_HERE " + padding - result = _truncate_around_matches(text, "KEYWORD_HERE") - assert len(result) <= MAX_SESSION_CHARS + 100 # +100 for prefix/suffix markers - assert "KEYWORD_HERE" in result - - def test_truncation_adds_markers(self): - text = "a" * 50000 + " target " + "b" * (MAX_SESSION_CHARS + 5000) - result = _truncate_around_matches(text, "target") - assert "truncated" in result.lower() - - def test_no_match_takes_from_start(self): - text = "x" * (MAX_SESSION_CHARS + 5000) - result = _truncate_around_matches(text, "nonexistent") - # Should take from the beginning - assert result.startswith("x") - - def test_match_at_beginning(self): - text = "KEYWORD " + "x" * (MAX_SESSION_CHARS + 5000) - result = _truncate_around_matches(text, "KEYWORD") - assert "KEYWORD" in result - - def test_multiword_phrase_match_beats_individual_term(self): - """Full phrase deep in text should be found even when a single term - appears much earlier in boilerplate.""" - boilerplate = "The project setup is complex. " * 500 # ~15K, has 'project' early - filler = "x" * (MAX_SESSION_CHARS + 20000) - target = "We reviewed the keystone project roadmap in detail." - text = boilerplate + filler + target + filler - result = _truncate_around_matches(text, "keystone project") - assert "keystone project" in result.lower() - - def test_multiword_proximity_cooccurrence(self): - """When exact phrase is absent, terms co-occurring within proximity - should be preferred over a lone early term.""" - early = "project " + "a" * (MAX_SESSION_CHARS + 20000) - # Place 'keystone' and 'project' near each other (but not as exact phrase) - cooccur = "this keystone initiative for the project was pivotal" - tail = "b" * (MAX_SESSION_CHARS + 20000) - text = early + cooccur + tail - result = _truncate_around_matches(text, "keystone project") - assert "keystone" in result.lower() - assert "project" in result.lower() - - def test_multiword_window_maximises_coverage(self): - """Sliding window should capture as many match clusters as possible.""" - # Place two phrase matches: one at ~50K, one at ~60K, both should fit - pre = "z" * 50000 - match1 = " alpha beta " - gap = "z" * 10000 - match2 = " alpha beta " - post = "z" * (MAX_SESSION_CHARS + 40000) - text = pre + match1 + gap + match2 + post - result = _truncate_around_matches(text, "alpha beta") - assert result.lower().count("alpha beta") == 2 - - -class TestSessionSearchConcurrency: - def test_defaults_to_three(self): - assert _get_session_search_max_concurrency() == 3 - - def test_reads_and_clamps_configured_value(self, monkeypatch): - monkeypatch.setattr( - "hermes_cli.config.load_config", - lambda: {"auxiliary": {"session_search": {"max_concurrency": 9}}}, - ) - assert _get_session_search_max_concurrency() == 5 - - def test_session_search_respects_configured_concurrency_limit(self, monkeypatch): - from unittest.mock import MagicMock - from tools.session_search_tool import session_search - - monkeypatch.setattr( - "hermes_cli.config.load_config", - lambda: {"auxiliary": {"session_search": {"max_concurrency": 1}}}, - ) - - max_seen = {"value": 0} - active = {"value": 0} - - async def fake_summarize(_text, _query, _meta): - active["value"] += 1 - max_seen["value"] = max(max_seen["value"], active["value"]) - await asyncio.sleep(0.01) - active["value"] -= 1 - return "summary" - - monkeypatch.setattr("tools.session_search_tool._summarize_session", fake_summarize) - monkeypatch.setattr("model_tools._run_async", lambda coro: asyncio.run(coro)) - - mock_db = MagicMock() - mock_db.search_messages.return_value = [ - {"session_id": "s1", "source": "cli", "session_started": 1709500000, "model": "test"}, - {"session_id": "s2", "source": "cli", "session_started": 1709500001, "model": "test"}, - {"session_id": "s3", "source": "cli", "session_started": 1709500002, "model": "test"}, - ] - mock_db.get_session.side_effect = lambda sid: { - "id": sid, - "parent_session_id": None, - "source": "cli", - "started_at": 1709500000, - } - mock_db.get_messages_as_conversation.side_effect = lambda sid: [ - {"role": "user", "content": f"message from {sid}"}, - {"role": "assistant", "content": "response"}, - ] - - result = json.loads(session_search(query="message", db=mock_db, limit=3)) - +class TestBrowseShape: + def test_no_args_returns_recent_sessions(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(db=db)) assert result["success"] is True - assert result["count"] == 3 - assert max_seen["value"] == 1 + assert result["mode"] == "browse" + assert result["count"] >= 3 + def test_browse_excludes_current_session(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(db=db, current_session_id="s_newest")) + sids = [r["session_id"] for r in result["results"]] + assert "s_newest" not in sids -class TestRecentSessionListing: - def test_recent_mode_requests_last_active_ordering(self): - from unittest.mock import MagicMock - - mock_db = MagicMock() - mock_db.list_sessions_rich.return_value = [] - - result = json.loads(_list_recent_sessions(mock_db, limit=5)) - - assert result["success"] is True - mock_db.list_sessions_rich.assert_called_once_with( - limit=10, - exclude_sources=["tool"], - order_by_last_active=True, - ) - - def test_current_child_session_excludes_root_lineage_even_when_child_id_is_longer(self): - from unittest.mock import MagicMock - - mock_db = MagicMock() - mock_db.list_sessions_rich.return_value = [ - { - "id": "root", - "title": "Current conversation", - "source": "cli", - "started_at": 1709500000, - "last_active": 1709500100, - "message_count": 4, - "preview": "current root", - "parent_session_id": None, - }, - { - "id": "other_session", - "title": "Other conversation", - "source": "cli", - "started_at": 1709400000, - "last_active": 1709400100, - "message_count": 3, - "preview": "other root", - "parent_session_id": None, - }, - ] - - def _get_session(session_id): - if session_id == "child_session_id_that_is_definitely_longer": - return {"parent_session_id": "root"} - if session_id == "root": - return {"parent_session_id": None} - return None - - mock_db.get_session.side_effect = _get_session - - result = json.loads(_list_recent_sessions( - mock_db, - limit=5, - current_session_id="child_session_id_that_is_definitely_longer", - )) - - assert result["success"] is True - assert [item["session_id"] for item in result["results"]] == ["other_session"] - assert all(item["session_id"] != "root" for item in result["results"]) + def test_browse_returns_titles(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(db=db)) + titles = [r.get("title") for r in result["results"]] + assert any("Modpack" in (t or "") for t in titles) # ========================================================================= -# session_search (dispatcher) +# Discovery shape (with query) # ========================================================================= -class TestSessionSearch: - def test_no_db_lazily_opens_default_session_db(self, monkeypatch): - from unittest.mock import MagicMock - from tools.session_search_tool import session_search - - mock_db = MagicMock() - mock_db.search_messages.return_value = [] - - class FakeSessionDB: - def __new__(cls): - return mock_db - - import types - import sys - - fake_state = types.ModuleType("hermes_state") - fake_state.SessionDB = FakeSessionDB - monkeypatch.setitem(sys.modules, "hermes_state", fake_state) - - result = json.loads(session_search(query="test")) +class TestDiscoveryShape: + def test_query_returns_anchored_windows(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query="modpack", db=db)) assert result["success"] is True - mock_db.search_messages.assert_called_once() + assert result["mode"] == "discover" + assert result["count"] >= 1 - def test_empty_query_returns_error(self): - from tools.session_search_tool import session_search - mock_db = object() - result = json.loads(session_search(query="", db=mock_db)) - assert result["success"] is False + def test_discovery_result_has_bookends_and_window(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query="modpack", limit=3, db=db)) + for hit in result["results"]: + assert "bookend_start" in hit + assert "messages" in hit + assert "bookend_end" in hit + assert "match_message_id" in hit + assert "snippet" in hit + assert "messages_before" in hit + assert "messages_after" in hit - def test_whitespace_query_returns_error(self): - from tools.session_search_tool import session_search - mock_db = object() - result = json.loads(session_search(query=" ", db=mock_db)) - assert result["success"] is False + def test_match_message_id_is_anchor_in_window(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query="modpack", limit=3, db=db)) + for hit in result["results"]: + anchor_id = hit["match_message_id"] + window_ids = [m["id"] for m in hit["messages"]] + assert anchor_id in window_ids - def test_current_session_excluded(self): - """session_search should never return the current session.""" - from unittest.mock import MagicMock - from tools.session_search_tool import session_search - - mock_db = MagicMock() - current_sid = "20260304_120000_abc123" - - # Simulate FTS5 returning matches only from the current session - mock_db.search_messages.return_value = [ - {"session_id": current_sid, "content": "test match", "source": "cli", - "session_started": 1709500000, "model": "test"}, - ] - mock_db.get_session.return_value = {"parent_session_id": None} - - result = json.loads(session_search( - query="test", db=mock_db, current_session_id=current_sid, - )) + def test_no_results_returns_empty_list(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query="zzz_no_such_term_zzz", db=db)) assert result["success"] is True - assert result["count"] == 0 assert result["results"] == [] + assert result["count"] == 0 - def test_current_session_excluded_keeps_others(self): - """Other sessions should still be returned when current is excluded.""" - from unittest.mock import MagicMock - from tools.session_search_tool import session_search + def test_limit_clamped_to_max_10(self, db): + _seed_modpack_sessions(db) + # Pass huge limit; should not error and should cap + result = json.loads(session_search(query="modpack", limit=999, db=db)) + assert result["count"] <= 10 - mock_db = MagicMock() - current_sid = "20260304_120000_abc123" - other_sid = "20260303_100000_def456" + def test_limit_floor_to_1(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query="modpack", limit=0, db=db)) + # Result count depends on hits, but the limit must be at least 1 + assert result["count"] >= 0 - mock_db.search_messages.return_value = [ - {"session_id": current_sid, "content": "match 1", "source": "cli", - "session_started": 1709500000, "model": "test"}, - {"session_id": other_sid, "content": "match 2", "source": "telegram", - "session_started": 1709400000, "model": "test"}, - ] - mock_db.get_session.return_value = {"parent_session_id": None} - mock_db.get_messages_as_conversation.return_value = [ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "hi there"}, - ] + def test_non_int_limit_falls_back(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query="modpack", limit="bogus", db=db)) + assert result["success"] is True - # Mock async_call_llm to raise RuntimeError → summarizer returns None - from unittest.mock import AsyncMock, patch as _patch - with _patch("tools.session_search_tool.async_call_llm", - new_callable=AsyncMock, - side_effect=RuntimeError("no provider")): + def test_current_session_filtered_out(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query="modpack", db=db, current_session_id="s_newest")) + sids = [r["session_id"] for r in result["results"]] + assert "s_newest" not in sids + + +class TestDiscoverySort: + def test_sort_newest_orders_by_recency(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query="modpack", limit=3, sort="newest", db=db)) + # First result should be the most recent session + first = result["results"][0] + assert first["session_id"] == "s_newest" or "Newest" in (first.get("title") or "") + + def test_sort_oldest_orders_by_age(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query="modpack", limit=3, sort="oldest", db=db)) + first = result["results"][0] + assert first["session_id"] == "s_oldest" + + def test_invalid_sort_silently_ignored(self, db): + _seed_modpack_sessions(db) + # Should not error + result = json.loads(session_search(query="modpack", sort="bogus", db=db)) + assert result["success"] is True + + +class TestRoleFilter: + def test_default_excludes_tool_role(self, db): + db.create_session("s1", source="cli") + db.append_message("s1", role="user", content="modpack question") + db.append_message("s1", role="tool", content="modpack tool output", tool_name="x") + result = json.loads(session_search(query="modpack", db=db)) + # The FTS5 match should be on the user message, not the tool message + if result["count"] > 0: + matched_role = result["results"][0]["matched_role"] + assert matched_role in ("user", "assistant") + + def test_explicit_tool_role_includes_tool(self, db): + db.create_session("s1", source="cli") + db.append_message("s1", role="tool", content="modpack tool output", tool_name="x") + result = json.loads(session_search(query="modpack", role_filter="tool", db=db)) + # Should now match the tool message + if result["count"] > 0: + assert result["results"][0]["matched_role"] == "tool" + + +# ========================================================================= +# Scroll shape (session_id + around_message_id) +# ========================================================================= + +class TestScrollShape: + def test_scroll_returns_window_without_bookends(self, db): + _seed_modpack_sessions(db) + # Get an anchor first via discovery + disc = json.loads(session_search(query="modpack", limit=1, db=db)) + anchor_sid = disc["results"][0]["session_id"] + anchor_mid = disc["results"][0]["match_message_id"] + + # Now scroll + result = json.loads(session_search( + session_id=anchor_sid, around_message_id=anchor_mid, window=2, db=db + )) + assert result["success"] is True + assert result["mode"] == "scroll" + assert "messages" in result + # Scroll shape has no bookends + assert "bookend_start" not in result + assert "bookend_end" not in result + + def test_scroll_window_clamped_to_20(self, db): + _seed_modpack_sessions(db) + disc = json.loads(session_search(query="modpack", limit=1, db=db)) + anchor_sid = disc["results"][0]["session_id"] + anchor_mid = disc["results"][0]["match_message_id"] + result = json.loads(session_search( + session_id=anchor_sid, around_message_id=anchor_mid, window=999, db=db + )) + assert result["window"] == 20 + + def test_scroll_window_floor_to_1(self, db): + _seed_modpack_sessions(db) + disc = json.loads(session_search(query="modpack", limit=1, db=db)) + anchor_sid = disc["results"][0]["session_id"] + anchor_mid = disc["results"][0]["match_message_id"] + result = json.loads(session_search( + session_id=anchor_sid, around_message_id=anchor_mid, window=-5, db=db + )) + assert result["window"] == 1 + + def test_scroll_returns_messages_before_after_counts(self, db): + _seed_modpack_sessions(db) + disc = json.loads(session_search(query="modpack", limit=1, db=db)) + anchor_sid = disc["results"][0]["session_id"] + anchor_mid = disc["results"][0]["match_message_id"] + result = json.loads(session_search( + session_id=anchor_sid, around_message_id=anchor_mid, window=3, db=db + )) + assert "messages_before" in result + assert "messages_after" in result + + def test_scroll_anchor_in_window(self, db): + _seed_modpack_sessions(db) + disc = json.loads(session_search(query="modpack", limit=1, db=db)) + anchor_sid = disc["results"][0]["session_id"] + anchor_mid = disc["results"][0]["match_message_id"] + result = json.loads(session_search( + session_id=anchor_sid, around_message_id=anchor_mid, window=2, db=db + )) + anchor_in_window = [m for m in result["messages"] if m["id"] == anchor_mid] + assert len(anchor_in_window) == 1 + assert anchor_in_window[0].get("anchor") is True + + def test_scroll_missing_anchor_errors(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search( + session_id="s_oldest", around_message_id=999999, db=db + )) + assert result["success"] is False + assert "not in" in result.get("error", "") + + def test_scroll_missing_session_errors(self, db): + result = json.loads(session_search( + session_id="nonexistent", around_message_id=1, db=db + )) + assert result["success"] is False + + def test_scroll_rejects_current_session_lineage(self, db): + _seed_modpack_sessions(db) + # Grab some valid id from s_oldest + disc = json.loads(session_search(query="modpack", limit=3, db=db)) + match = [r for r in disc["results"] if r["session_id"] == "s_oldest"] + if match: + mid = match[0]["match_message_id"] result = json.loads(session_search( - query="test", db=mock_db, current_session_id=current_sid, + session_id="s_oldest", around_message_id=mid, db=db, + current_session_id="s_oldest", )) + assert result["success"] is False + assert "current session" in result.get("error", "").lower() - assert result["success"] is True - # Current session should be skipped, only other_sid should appear - assert result["sessions_searched"] == 1 - assert current_sid not in [r.get("session_id") for r in result.get("results", [])] - - def test_current_child_session_excludes_parent_lineage(self): - """Compression/delegation parents should be excluded for the active child session.""" - from unittest.mock import MagicMock - from tools.session_search_tool import session_search - - mock_db = MagicMock() - mock_db.search_messages.return_value = [ - {"session_id": "parent_sid", "content": "match", "source": "cli", - "session_started": 1709500000, "model": "test"}, - ] - - def _get_session(session_id): - if session_id == "child_sid": - return {"parent_session_id": "parent_sid"} - if session_id == "parent_sid": - return {"parent_session_id": None} - return None - - mock_db.get_session.side_effect = _get_session - + def test_scroll_invalid_around_message_id_errors(self, db): + _seed_modpack_sessions(db) result = json.loads(session_search( - query="test", db=mock_db, current_session_id="child_sid", + session_id="s_oldest", around_message_id="not-an-int", db=db )) + assert result["success"] is False - assert result["success"] is True - assert result["count"] == 0 - assert result["results"] == [] - assert result["sessions_searched"] == 0 - def test_limit_none_coerced_to_default(self): - """Model sends limit=null → should fall back to 3, not TypeError.""" - from unittest.mock import MagicMock - from tools.session_search_tool import session_search +class TestScrollPattern: + """The forward/backward scroll loop using tool output.""" - mock_db = MagicMock() - mock_db.search_messages.return_value = [] + def test_scroll_forward_from_last_id(self, db): + # Long session + db.create_session("s_long", source="cli") + ids = [] + for i in range(20): + ids.append(db.append_message("s_long", role="user" if i % 2 == 0 else "assistant", + content=f"long session msg {i}")) + v1 = json.loads(session_search( + session_id="s_long", around_message_id=ids[5], window=3, db=db + )) + last_id = v1["messages"][-1]["id"] + v2 = json.loads(session_search( + session_id="s_long", around_message_id=last_id, window=3, db=db + )) + # Forward scroll: v2 should reach further than v1 + assert max(m["id"] for m in v2["messages"]) > max(m["id"] for m in v1["messages"]) + # Boundary id appears in both + assert last_id in [m["id"] for m in v1["messages"]] + assert last_id in [m["id"] for m in v2["messages"]] + + +# ========================================================================= +# Shape precedence +# ========================================================================= + +class TestShapePrecedence: + def test_scroll_args_beat_query(self, db): + _seed_modpack_sessions(db) + disc = json.loads(session_search(query="modpack", limit=1, db=db)) + anchor_sid = disc["results"][0]["session_id"] + anchor_mid = disc["results"][0]["match_message_id"] + # Pass both query and scroll args — scroll should win result = json.loads(session_search( - query="test", db=mock_db, limit=None, + query="modpack", # would normally trigger discovery + session_id=anchor_sid, around_message_id=anchor_mid, db=db, )) - assert result["success"] is True + assert result["mode"] == "scroll" - def test_limit_type_object_coerced_to_default(self): - """Model sends limit as a type object → should fall back to 3, not TypeError.""" - from unittest.mock import MagicMock - from tools.session_search_tool import session_search + def test_empty_query_falls_back_to_browse(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query=" ", db=db)) + assert result["mode"] == "browse" - mock_db = MagicMock() - mock_db.search_messages.return_value = [] - - result = json.loads(session_search( - query="test", db=mock_db, limit=int, - )) - assert result["success"] is True - - def test_limit_string_coerced(self): - """Model sends limit as string '2' → should coerce to int.""" - from unittest.mock import MagicMock - from tools.session_search_tool import session_search - - mock_db = MagicMock() - mock_db.search_messages.return_value = [] - - result = json.loads(session_search( - query="test", db=mock_db, limit="2", - )) - assert result["success"] is True - - def test_limit_clamped_to_range(self): - """Negative or zero limit should be clamped to 1.""" - from unittest.mock import MagicMock - from tools.session_search_tool import session_search - - mock_db = MagicMock() - mock_db.search_messages.return_value = [] - - result = json.loads(session_search( - query="test", db=mock_db, limit=-5, - )) - assert result["success"] is True - - result = json.loads(session_search( - query="test", db=mock_db, limit=0, - )) - assert result["success"] is True - - def test_current_root_session_excludes_child_lineage(self): - """Delegation child hits should be excluded when they resolve to the current root session.""" - from unittest.mock import MagicMock - from tools.session_search_tool import session_search - - mock_db = MagicMock() - mock_db.search_messages.return_value = [ - {"session_id": "child_sid", "content": "match", "source": "cli", - "session_started": 1709500000, "model": "test"}, - ] - - def _get_session(session_id): - if session_id == "root_sid": - return {"parent_session_id": None} - if session_id == "child_sid": - return {"parent_session_id": "root_sid"} - return None - - mock_db.get_session.side_effect = _get_session - - result = json.loads(session_search( - query="test", db=mock_db, current_session_id="root_sid", - )) - - assert result["success"] is True - assert result["count"] == 0 - assert result["results"] == [] - assert result["sessions_searched"] == 0 - - def test_source_from_resolved_parent_not_fts5_child(self): - """source in output must reflect the resolved parent session, not the child that matched FTS5. - - Regression test for #15909: when a delegation child session (source='telegram') - resolves to a parent (source='api_server'), the result entry must report - 'api_server', not 'telegram'. - """ - from unittest.mock import MagicMock, AsyncMock, patch as _patch - from tools.session_search_tool import session_search - - mock_db = MagicMock() - # FTS5 hit is in the child delegation session which carries source='telegram' - mock_db.search_messages.return_value = [ - { - "session_id": "child_sid", - "content": "hello world", - "source": "telegram", # child session source — wrong value to surface - "session_started": 1709400000, - "model": "gpt-4o-mini", - }, - ] - - def _get_session(session_id): - if session_id == "child_sid": - return { - "id": "child_sid", - "parent_session_id": "parent_sid", - "source": "telegram", - "started_at": 1709400000, - "model": "gpt-4o-mini", - } - if session_id == "parent_sid": - return { - "id": "parent_sid", - "parent_session_id": None, - "source": "api_server", # correct parent source - "started_at": 1709300000, - "model": "gpt-4o-mini", - } - return None - - mock_db.get_session.side_effect = _get_session - mock_db.get_messages_as_conversation.return_value = [ - {"role": "user", "content": "hello world"}, - {"role": "assistant", "content": "hi there"}, - ] - - with _patch( - "tools.session_search_tool.async_call_llm", - new_callable=AsyncMock, - side_effect=RuntimeError("no provider"), - ): - result = json.loads(session_search(query="hello world", db=mock_db)) - - assert result["success"] is True - assert result["count"] == 1 - entry = result["results"][0] - assert entry["session_id"] == "parent_sid", "should report resolved parent session ID" - assert entry["source"] == "api_server", ( - f"source should be parent's 'api_server', got {entry['source']!r}" - ) + def test_non_string_query_falls_back_to_browse(self, db): + _seed_modpack_sessions(db) + result = json.loads(session_search(query=None, db=db)) # type: ignore + assert result["mode"] == "browse" diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index e73cce6bbd9..65b9d32f1f7 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -2,52 +2,41 @@ """ Session Search Tool - Long-Term Conversation Recall -Searches past session transcripts in SQLite via FTS5, then summarizes the top -matching sessions using the configured auxiliary session_search model (same -pattern as web_extract). By default, auxiliary "auto" routing uses the main -chat provider/model unless the user overrides auxiliary.session_search. -Returns focused summaries of past conversations rather than raw transcripts, -keeping the main model's context window clean. +Single-shape tool with three calling modes (inferred from args, no explicit +mode parameter): -Flow: - 1. FTS5 search finds matching messages ranked by relevance - 2. Groups by session, takes the top N unique sessions (default 3) - 3. Loads each session's conversation, truncates to ~100k chars centered on matches - 4. Sends to the configured auxiliary model with a focused summarization prompt - 5. Returns per-session summaries with metadata + 1. DISCOVERY — pass ``query``. Runs FTS5, dedupes hits by session lineage, + returns top N sessions each with: snippet, ±5 message window around the + match, plus bookend_start (first 3 user+assistant msgs of session) and + bookend_end (last 3). Zero LLM cost. + + 2. SCROLL — pass ``session_id`` + ``around_message_id``. Returns a window + of ±window messages centered on the anchor, no FTS5, no bookends. To + scroll forward / backward, re-anchor on the last / first message id of + the returned window. + + 3. BROWSE — no args. Returns recent sessions chronologically (titles, + previews, timestamps). + +All three modes operate on the SQLite session DB via the FTS5 index and +the get_anchored_view / get_messages_around primitives in hermes_state. +No LLM calls anywhere — every shape returns actual messages from the DB. + +History: PR #20238 (JabberELF) seeded a fast/summary dual-mode split; the +toolkit expansion in PR #26419 (yoniebans) added the anchored drill-down, +bookends, and sort. This module merges all of that into a single calling +shape with no mode parameter, no summary LLM path, and explicit scroll +support. """ -import asyncio -import concurrent.futures import json import logging -import re -from typing import Dict, Any, List, Optional, Union +from typing import Any, Dict, List, Optional, Union -from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning -MAX_SESSION_CHARS = 100_000 -MAX_SUMMARY_TOKENS = 10000 - - -def _get_session_search_max_concurrency(default: int = 3) -> int: - """Read auxiliary.session_search.max_concurrency with sane bounds.""" - try: - from hermes_cli.config import load_config - config = load_config() - except ImportError: - return default - aux = config.get("auxiliary", {}) if isinstance(config, dict) else {} - task_config = aux.get("session_search", {}) if isinstance(aux, dict) else {} - if not isinstance(task_config, dict): - return default - raw = task_config.get("max_concurrency") - if raw is None: - return default - try: - value = int(raw) - except (TypeError, ValueError): - return default - return max(1, min(value, 5)) +# Sources that are excluded from session browsing/searching by default. +# Third-party integrations tag their sessions with HERMES_SESSION_SOURCE=tool +# so they don't clutter the user's session history. +_HIDDEN_SESSION_SOURCES = ("tool",) def _format_timestamp(ts: Union[int, float, str, None]) -> str: @@ -69,233 +58,72 @@ def _format_timestamp(ts: Union[int, float, str, None]) -> str: return dt.strftime("%B %d, %Y at %I:%M %p") return ts except (ValueError, OSError, OverflowError) as e: - # Log specific errors for debugging while gracefully handling edge cases logging.debug("Failed to format timestamp %s: %s", ts, e, exc_info=True) except Exception as e: logging.debug("Unexpected error formatting timestamp %s: %s", ts, e, exc_info=True) return str(ts) -def _format_conversation(messages: List[Dict[str, Any]]) -> str: - """Format session messages into a readable transcript for summarization.""" - parts = [] - for msg in messages: - role = msg.get("role", "unknown").upper() - content = msg.get("content") or "" - tool_name = msg.get("tool_name") - - if role == "TOOL" and tool_name: - # Truncate long tool outputs - if len(content) > 500: - content = content[:250] + "\n...[truncated]...\n" + content[-250:] - parts.append(f"[TOOL:{tool_name}]: {content}") - elif role == "ASSISTANT": - # Include tool call names if present - tool_calls = msg.get("tool_calls") - if tool_calls and isinstance(tool_calls, list): - tc_names = [] - for tc in tool_calls: - if isinstance(tc, dict): - name = tc.get("name") or tc.get("function", {}).get("name", "?") - tc_names.append(name) - if tc_names: - parts.append(f"[ASSISTANT]: [Called: {', '.join(tc_names)}]") - if content: - parts.append(f"[ASSISTANT]: {content}") - else: - parts.append(f"[ASSISTANT]: {content}") - else: - parts.append(f"[{role}]: {content}") - - return "\n\n".join(parts) - - -def _truncate_around_matches( - full_text: str, query: str, max_chars: int = MAX_SESSION_CHARS -) -> str: - """ - Truncate a conversation transcript to *max_chars*, choosing a window - that maximises coverage of positions where the *query* actually appears. - - Strategy (in priority order): - 1. Try to find the full query as a phrase (case-insensitive). - 2. If no phrase hit, look for positions where all query terms appear - within a 200-char proximity window (co-occurrence). - 3. Fall back to individual term positions. - - Once candidate positions are collected the function picks the window - start that covers the most of them. - """ - if len(full_text) <= max_chars: - return full_text - - text_lower = full_text.lower() - query_lower = query.lower().strip() - match_positions: list[int] = [] - - # --- 1. Full-phrase search ------------------------------------------------ - phrase_pat = re.compile(re.escape(query_lower)) - match_positions = [m.start() for m in phrase_pat.finditer(text_lower)] - - # --- 2. Proximity co-occurrence of all terms (within 200 chars) ----------- - if not match_positions: - terms = query_lower.split() - if len(terms) > 1: - # Collect every occurrence of each term - term_positions: dict[str, list[int]] = {} - for t in terms: - term_positions[t] = [ - m.start() for m in re.finditer(re.escape(t), text_lower) - ] - # Slide through positions of the rarest term and check proximity - rarest = min(terms, key=lambda t: len(term_positions.get(t, []))) - for pos in term_positions.get(rarest, []): - if all( - any(abs(p - pos) < 200 for p in term_positions.get(t, [])) - for t in terms - if t != rarest - ): - match_positions.append(pos) - - # --- 3. Individual term positions (last resort) --------------------------- - if not match_positions: - terms = query_lower.split() - for t in terms: - for m in re.finditer(re.escape(t), text_lower): - match_positions.append(m.start()) - - if not match_positions: - # Nothing at all — take from the start - truncated = full_text[:max_chars] - suffix = "\n\n...[later conversation truncated]..." if max_chars < len(full_text) else "" - return truncated + suffix - - # --- Pick window that covers the most match positions --------------------- - match_positions.sort() - - best_start = 0 - best_count = 0 - for candidate in match_positions: - ws = max(0, candidate - max_chars // 4) # bias: 25% before, 75% after - we = ws + max_chars - if we > len(full_text): - ws = max(0, len(full_text) - max_chars) - we = len(full_text) - count = sum(1 for p in match_positions if ws <= p < we) - if count > best_count: - best_count = count - best_start = ws - - start = best_start - end = min(len(full_text), start + max_chars) - - truncated = full_text[start:end] - prefix = "...[earlier conversation truncated]...\n\n" if start > 0 else "" - suffix = "\n\n...[later conversation truncated]..." if end < len(full_text) else "" - return prefix + truncated + suffix - - -async def _summarize_session( - conversation_text: str, query: str, session_meta: Dict[str, Any] -) -> Optional[str]: - """Summarize a single session conversation focused on the search query.""" - system_prompt = ( - "You are reviewing a past conversation transcript to help recall what happened. " - "Summarize the conversation with a focus on the search topic. Include:\n" - "1. What the user asked about or wanted to accomplish\n" - "2. What actions were taken and what the outcomes were\n" - "3. Key decisions, solutions found, or conclusions reached\n" - "4. Any specific commands, files, URLs, or technical details that were important\n" - "5. Anything left unresolved or notable\n\n" - "Be thorough but concise. Preserve specific details (commands, paths, error messages) " - "that would be useful to recall. Write in past tense as a factual recap." - ) - - source = session_meta.get("source", "unknown") - started = _format_timestamp(session_meta.get("started_at")) - - user_prompt = ( - f"Search topic: {query}\n" - f"Session source: {source}\n" - f"Session date: {started}\n\n" - f"CONVERSATION TRANSCRIPT:\n{conversation_text}\n\n" - f"Summarize this conversation with focus on: {query}" - ) - - max_retries = 3 - for attempt in range(max_retries): +def _resolve_to_parent(db, session_id: str) -> str: + """Walk parent_session_id chain to the lineage root. Falls back to input on errors.""" + if not session_id: + return session_id + visited = set() + cur = session_id + while cur and cur not in visited: + visited.add(cur) try: - response = await async_call_llm( - task="session_search", - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ], - temperature=0.1, - max_tokens=MAX_SUMMARY_TOKENS, - ) - content = extract_content_or_reasoning(response) - if content: - return content - # Reasoning-only / empty — let the retry loop handle it - logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries) - if attempt < max_retries - 1: - await asyncio.sleep(1 * (attempt + 1)) - continue - return content - except RuntimeError: - logging.warning("No auxiliary model available for session summarization") - return None + s = db.get_session(cur) + if not s: + break + parent = s.get("parent_session_id") + if not parent: + break + cur = parent except Exception as e: - if attempt < max_retries - 1: - await asyncio.sleep(1 * (attempt + 1)) - else: - logging.warning( - "Session summarization failed after %d attempts: %s", - max_retries, - e, - exc_info=True, - ) - return None + logging.debug("Error resolving parent for %s: %s", cur, e, exc_info=True) + break + return cur -# Sources that are excluded from session browsing/searching by default. -# Third-party integrations (Paperclip agents, etc.) tag their sessions with -# HERMES_SESSION_SOURCE=tool so they don't clutter the user's session history. -_HIDDEN_SESSION_SOURCES = ("tool",) +def _shape_message(m: Dict[str, Any], anchor_id: Optional[int] = None) -> Dict[str, Any]: + """Slim a message row for the tool response. Keeps content even if empty.""" + entry = { + "id": m.get("id"), + "role": m.get("role"), + "content": m.get("content"), + "timestamp": m.get("timestamp"), + } + if m.get("tool_name"): + entry["tool_name"] = m.get("tool_name") + if m.get("tool_calls"): + entry["tool_calls"] = m.get("tool_calls") + if m.get("tool_call_id"): + entry["tool_call_id"] = m.get("tool_call_id") + if anchor_id is not None and m.get("id") == anchor_id: + entry["anchor"] = True + # Strip None values to keep payload tight, but always keep content + # (absent content is meaningful — tool-call-only assistant turns). + return {k: v for k, v in entry.items() if v is not None or k in ("content",)} def _list_recent_sessions(db, limit: int, current_session_id: str = None) -> str: - """Return metadata for the most recent sessions (no LLM calls).""" + """Return metadata for the most recent sessions (no LLM calls, no FTS5).""" try: sessions = db.list_sessions_rich( limit=limit + 5, exclude_sources=list(_HIDDEN_SESSION_SOURCES), order_by_last_active=True, - ) # fetch extra to skip current + ) # fetch extra so we can skip current - # Resolve current session lineage to exclude it - current_root = None - if current_session_id: - try: - sid = current_session_id - visited = set() - current_root = current_session_id - while sid and sid not in visited: - visited.add(sid) - current_root = sid - s = db.get_session(sid) - parent = s.get("parent_session_id") if s else None - sid = parent if parent else None - except Exception: - current_root = current_session_id + current_root = _resolve_to_parent(db, current_session_id) if current_session_id else None results = [] for s in sessions: sid = s.get("id", "") if current_root and (sid == current_root or sid == current_session_id): continue - # Skip child/delegation sessions (they have parent_session_id) + # Skip child / delegation sessions if s.get("parent_session_id"): continue results.append({ @@ -312,234 +140,318 @@ def _list_recent_sessions(db, limit: int, current_session_id: str = None) -> str return json.dumps({ "success": True, - "mode": "recent", + "mode": "browse", "results": results, "count": len(results), - "message": f"Showing {len(results)} most recent sessions. Use a keyword query to search specific topics.", + "message": f"Showing {len(results)} most recent sessions. Pass a query= to search, or session_id+around_message_id to scroll.", }, ensure_ascii=False) except Exception as e: logging.error("Error listing recent sessions: %s", e, exc_info=True) return tool_error(f"Failed to list recent sessions: {e}", success=False) -def session_search( +def _scroll( + db, + session_id: str, + around_message_id: int, + window: int = 5, + current_session_id: str = None, +) -> str: + """Scroll shape: return a window of messages centered on an anchor. + + No FTS5, no bookends — just the slice. The discovery shape's lineage + fixup is preserved: if the anchor doesn't live in the named session + but does live in a child session in the same lineage, rebind silently. + """ + if not isinstance(session_id, str) or not session_id.strip(): + return tool_error("scroll requires session_id", success=False) + session_id = session_id.strip() + + try: + around_message_id = int(around_message_id) + except (TypeError, ValueError): + return tool_error("scroll requires integer around_message_id", success=False) + + # Window clamp [1, 20] + if not isinstance(window, int): + try: + window = int(window) + except (TypeError, ValueError): + window = 5 + window = max(1, min(window, 20)) + + # Reject scrolling inside the active session lineage — those messages are + # already in context. + if current_session_id: + a_root = _resolve_to_parent(db, session_id) + c_root = _resolve_to_parent(db, current_session_id) + if a_root and c_root and a_root == c_root: + return tool_error( + "scroll rejected: anchor lives in the current session lineage (already in your active context)", + success=False, + ) + + # Session existence check + try: + session_meta = db.get_session(session_id) or {} + except Exception as e: + logging.debug("get_session failed for %s: %s", session_id, e, exc_info=True) + session_meta = {} + if not session_meta: + return tool_error(f"session_id not found: {session_id}", success=False) + + # Fetch the window + try: + view = db.get_messages_around(session_id, around_message_id, window=window) + except Exception as e: + logging.error("get_messages_around failed: %s", e, exc_info=True) + return tool_error(f"failed to load messages: {e}", success=False) + + messages = view.get("window") or [] + + # Lineage rebind: caller may have paired a parent session_id with a + # message id that lives in a descendant (compaction / delegation creates + # child sessions). Locate the real owning session and refetch. + rebind_warning = None + if not messages: + owning = None + try: + conn = getattr(db, "_conn", None) + if conn is not None: + row = conn.execute( + "SELECT session_id FROM messages WHERE id = ?", + (around_message_id,), + ).fetchone() + owning = row[0] if row else None + except Exception as e: + logging.debug("owning-session lookup failed: %s", e, exc_info=True) + owning = None + if owning and owning != session_id: + a_root = _resolve_to_parent(db, session_id) + o_root = _resolve_to_parent(db, owning) + if a_root and o_root and a_root == o_root: + try: + rebind_view = db.get_messages_around(owning, around_message_id, window=window) + messages = rebind_view.get("window") or [] + if messages: + view = rebind_view + rebind_warning = ( + f"around_message_id {around_message_id} lives in {owning} " + f"(child of {session_id}); rebound transparently" + ) + try: + session_meta = db.get_session(owning) or session_meta + except Exception: + pass + session_id = owning + except Exception as e: + logging.debug("rebind get_messages_around failed: %s", e, exc_info=True) + + if not messages: + return tool_error( + f"around_message_id {around_message_id} not in session_id {session_id}", + success=False, + ) + + response = { + "success": True, + "mode": "scroll", + "session_id": session_id, + "around_message_id": around_message_id, + "session_meta": { + "when": _format_timestamp(session_meta.get("started_at")), + "source": session_meta.get("source"), + "model": session_meta.get("model"), + "title": session_meta.get("title"), + }, + "window": window, + "messages": [_shape_message(m, anchor_id=around_message_id) for m in messages], + "messages_before": view.get("messages_before", 0), + "messages_after": view.get("messages_after", 0), + } + if rebind_warning: + response["warning"] = rebind_warning + return json.dumps(response, ensure_ascii=False) + + +def _discover( + db, query: str, + role_filter: Optional[List[str]], + limit: int, + sort: Optional[str], + current_session_id: str = None, +) -> str: + """Discovery shape: FTS5 + anchored window + bookends per hit. Single call.""" + role_list = role_filter if role_filter else ["user", "assistant"] + + try: + raw_results = db.search_messages( + query=query, + role_filter=role_list, + exclude_sources=list(_HIDDEN_SESSION_SOURCES), + limit=50, # widen so dedup-by-lineage can find distinct sessions + offset=0, + sort=sort, + ) + except Exception as e: + logging.error("FTS5 search failed: %s", e, exc_info=True) + return tool_error(f"Search failed: {e}", success=False) + + if not raw_results: + return json.dumps({ + "success": True, + "mode": "discover", + "query": query, + "results": [], + "count": 0, + "message": "No matching sessions found.", + }, ensure_ascii=False) + + current_lineage_root = _resolve_to_parent(db, current_session_id) if current_session_id else None + + # Dedupe by lineage. Keep the raw owning session_id on the surviving + # row — only that pairs validly with the FTS5 match id for the anchored + # window. parent_session_id is exposed separately when different. + seen_sessions = {} + for r in raw_results: + raw_sid = r["session_id"] + resolved_sid = _resolve_to_parent(db, raw_sid) + # Skip the current session lineage + if current_lineage_root and resolved_sid == current_lineage_root: + continue + if current_session_id and raw_sid == current_session_id: + continue + if resolved_sid not in seen_sessions: + row = dict(r) + row["_lineage_root"] = resolved_sid + seen_sessions[resolved_sid] = row + if len(seen_sessions) >= limit: + break + + results = [] + for lineage_root, match_info in seen_sessions.items(): + hit_sid = match_info.get("session_id") or lineage_root + msg_id = match_info.get("id") + try: + view = db.get_anchored_view(hit_sid, msg_id, window=5, bookend=3) + except Exception as e: + logging.warning("get_anchored_view failed for %s/%s: %s", hit_sid, msg_id, e, exc_info=True) + continue + + try: + session_meta = db.get_session(lineage_root) or {} + except Exception: + session_meta = {} + + entry = { + "session_id": hit_sid, + "when": _format_timestamp( + session_meta.get("started_at") or match_info.get("session_started") + ), + "source": session_meta.get("source") or match_info.get("source", "unknown"), + "model": session_meta.get("model") or match_info.get("model") or "unknown", + "title": session_meta.get("title") or None, + "matched_role": match_info.get("role"), + "match_message_id": msg_id, + "snippet": match_info.get("snippet") or "", + "bookend_start": [_shape_message(m) for m in (view.get("bookend_start") or [])], + "messages": [_shape_message(m, anchor_id=msg_id) for m in (view.get("window") or [])], + "bookend_end": [_shape_message(m) for m in (view.get("bookend_end") or [])], + "messages_before": view.get("messages_before", 0), + "messages_after": view.get("messages_after", 0), + } + if lineage_root and lineage_root != hit_sid: + entry["parent_session_id"] = lineage_root + results.append(entry) + + return json.dumps({ + "success": True, + "mode": "discover", + "query": query, + "results": results, + "count": len(results), + "sessions_searched": len(seen_sessions), + }, ensure_ascii=False) + + +def session_search( + query: str = "", role_filter: str = None, limit: int = 3, db=None, current_session_id: str = None, + # Scroll shape + session_id: str = None, + around_message_id: int = None, + window: int = 5, + # Discovery shape + sort: str = None, ) -> str: - """ - Search past sessions and return focused summaries of matching conversations. + """Single-shape tool. Mode inferred from which args are set. - Uses FTS5 to find matches, then summarizes the top sessions with the - configured auxiliary session_search model. - The current session is excluded from results since the agent already has that context. + Discovery: pass ``query``. + Scroll: pass ``session_id`` + ``around_message_id``. + Browse: pass nothing. + + Scroll wins over discovery when both are set — the agent has explicitly + asked for a slice of a known session. """ if db is None: try: from hermes_state import SessionDB - db = SessionDB() except Exception: logging.debug("SessionDB unavailable for session_search", exc_info=True) from hermes_state import format_session_db_unavailable return tool_error(format_session_db_unavailable(), success=False) - # Defensive: models (especially open-source) may send non-int limit values - # (None when JSON null, string "int", or even a type object). Coerce to a - # safe integer before any arithmetic/comparison to prevent TypeError. + # Scroll shape takes precedence — explicit anchor beats any query. + if (isinstance(session_id, str) and session_id.strip()) and around_message_id is not None: + return _scroll( + db=db, + session_id=session_id, + around_message_id=around_message_id, + window=window, + current_session_id=current_session_id, + ) + + # Limit clamp [1, 10] if not isinstance(limit, int): try: limit = int(limit) except (TypeError, ValueError): limit = 3 - limit = max(1, min(limit, 5)) # Clamp to [1, 5] + limit = max(1, min(limit, 10)) - # Recent sessions mode: when query is empty, return metadata for recent sessions. - # No LLM calls — just DB queries for titles, previews, timestamps. - if not query or not query.strip(): + # Browse shape: no query → recent sessions. + if not query or not isinstance(query, str) or not query.strip(): return _list_recent_sessions(db, limit, current_session_id) - query = query.strip() + # Parse role_filter + role_list: Optional[List[str]] = None + if isinstance(role_filter, str) and role_filter.strip(): + role_list = [r.strip() for r in role_filter.split(",") if r.strip()] - try: - # Parse role filter - role_list = None - if role_filter and role_filter.strip(): - role_list = [r.strip() for r in role_filter.split(",") if r.strip()] + # Normalise sort + sort_norm: Optional[str] = None + if isinstance(sort, str): + candidate = sort.strip().lower() + if candidate in ("newest", "oldest"): + sort_norm = candidate - # FTS5 search -- get matches ranked by relevance - raw_results = db.search_messages( - query=query, - role_filter=role_list, - exclude_sources=list(_HIDDEN_SESSION_SOURCES), - limit=50, # Get more matches to find unique sessions - offset=0, - ) - - if not raw_results: - return json.dumps({ - "success": True, - "query": query, - "results": [], - "count": 0, - "message": "No matching sessions found.", - }, ensure_ascii=False) - - # Resolve child sessions to their parent — delegation stores detailed - # content in child sessions, but the user's conversation is the parent. - def _resolve_to_parent(session_id: str) -> str: - """Walk delegation chain to find the root parent session ID.""" - visited = set() - sid = session_id - while sid and sid not in visited: - visited.add(sid) - try: - session = db.get_session(sid) - if not session: - break - parent = session.get("parent_session_id") - if parent: - sid = parent - else: - break - except Exception as e: - logging.debug( - "Error resolving parent for session %s: %s", - sid, - e, - exc_info=True, - ) - break - return sid - - current_lineage_root = ( - _resolve_to_parent(current_session_id) if current_session_id else None - ) - - # Group by resolved (parent) session_id, dedup, skip the current - # session lineage. Compression and delegation create child sessions - # that still belong to the same active conversation. - seen_sessions = {} - for result in raw_results: - raw_sid = result["session_id"] - resolved_sid = _resolve_to_parent(raw_sid) - # Skip the current session lineage — the agent already has that - # context, even if older turns live in parent fragments. - if current_lineage_root and resolved_sid == current_lineage_root: - continue - if current_session_id and raw_sid == current_session_id: - continue - if resolved_sid not in seen_sessions: - result = dict(result) - result["session_id"] = resolved_sid - seen_sessions[resolved_sid] = result - if len(seen_sessions) >= limit: - break - - # Prepare all sessions for parallel summarization - tasks = [] - for session_id, match_info in seen_sessions.items(): - try: - messages = db.get_messages_as_conversation(session_id) - if not messages: - continue - session_meta = db.get_session(session_id) or {} - conversation_text = _format_conversation(messages) - conversation_text = _truncate_around_matches(conversation_text, query) - tasks.append((session_id, match_info, conversation_text, session_meta)) - except Exception as e: - logging.warning( - "Failed to prepare session %s: %s", - session_id, - e, - exc_info=True, - ) - - # Summarize all sessions in parallel - async def _summarize_all() -> List[Union[str, Exception]]: - """Summarize all sessions with bounded concurrency.""" - max_concurrency = min(_get_session_search_max_concurrency(), max(1, len(tasks))) - semaphore = asyncio.Semaphore(max_concurrency) - - async def _bounded_summary(text: str, meta: Dict[str, Any]) -> Optional[str]: - async with semaphore: - return await _summarize_session(text, query, meta) - - coros = [ - _bounded_summary(text, meta) - for _, _, text, meta in tasks - ] - return await asyncio.gather(*coros, return_exceptions=True) - - try: - # Use _run_async() which properly manages event loops across - # CLI, gateway, and worker-thread contexts. The previous - # pattern (asyncio.run() in a ThreadPoolExecutor) created a - # disposable event loop that conflicted with cached - # AsyncOpenAI/httpx clients bound to a different loop, - # causing deadlocks in gateway mode (#2681). - from model_tools import _run_async - results = _run_async(_summarize_all()) - except concurrent.futures.TimeoutError: - logging.warning( - "Session summarization timed out after 60 seconds", - exc_info=True, - ) - return json.dumps({ - "success": False, - "error": "Session summarization timed out. Try a more specific query or reduce the limit.", - }, ensure_ascii=False) - - summaries = [] - for (session_id, match_info, conversation_text, session_meta), result in zip(tasks, results): - if isinstance(result, Exception): - logging.warning( - "Failed to summarize session %s: %s", - session_id, result, exc_info=True, - ) - result = None - - # Prefer resolved parent session metadata over FTS5 match metadata. - # match_info carries source/model from the *child* session that contained - # the FTS5 hit; after _resolve_to_parent() the session_id points to the - # root, so session_meta has the authoritative platform/source for the - # session the user actually cares about (#15909). - entry = { - "session_id": session_id, - "when": _format_timestamp( - session_meta.get("started_at") or match_info.get("session_started") - ), - "source": session_meta.get("source") or match_info.get("source", "unknown"), - "model": session_meta.get("model") or match_info.get("model"), - } - - if result: - entry["summary"] = result - else: - # Fallback: raw preview so matched sessions aren't silently - # dropped when the summarizer is unavailable (fixes #3409). - preview = (conversation_text[:500] + "\n…[truncated]") if conversation_text else "No preview available." - entry["summary"] = f"[Raw preview — summarization unavailable]\n{preview}" - - summaries.append(entry) - - return json.dumps({ - "success": True, - "query": query, - "results": summaries, - "count": len(summaries), - "sessions_searched": len(seen_sessions), - }, ensure_ascii=False) - - except Exception as e: - logging.error("Session search failed: %s", e, exc_info=True) - return tool_error(f"Search failed: {str(e)}", success=False) + return _discover( + db=db, + query=query.strip(), + role_filter=role_list, + limit=limit, + sort=sort_norm, + current_session_id=current_session_id, + ) def check_session_search_requirements() -> bool: - """Requires SQLite state database and an auxiliary text model.""" + """Requires the SQLite state database.""" try: from hermes_state import DEFAULT_DB_PATH return DEFAULT_DB_PATH.parent.exists() @@ -550,44 +462,117 @@ def check_session_search_requirements() -> bool: SESSION_SEARCH_SCHEMA = { "name": "session_search", "description": ( - "Search your long-term memory of past conversations, or browse recent sessions. This is your recall -- " - "every past session is searchable, and this tool summarizes what happened.\n\n" - "TWO MODES:\n" - "1. Recent sessions (no query): Call with no arguments to see what was worked on recently. " - "Returns titles, previews, and timestamps. Zero LLM cost, instant. " - "Start here when the user asks what were we working on or what did we do recently.\n" - "2. Keyword search (with query): Search for specific topics across all past sessions. " - "Returns LLM-generated summaries of matching sessions.\n\n" - "USE THIS PROACTIVELY when:\n" - "- The user says 'we did this before', 'remember when', 'last time', 'as I mentioned'\n" - "- The user asks about a topic you worked on before but don't have in current context\n" - "- The user references a project, person, or concept that seems familiar but isn't in memory\n" - "- You want to check if you've solved a similar problem before\n" - "- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n" - "Don't hesitate to search when it is actually cross-session -- it's fast and cheap. " - "Better to search and confirm than to guess or ask the user to repeat themselves.\n\n" - "Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), " - "phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). " - "IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses " - "sessions that only mention some terms. If a broad OR query returns nothing, try individual " - "keyword searches in parallel. Returns summaries of the top matching sessions." + "Search past sessions stored in the local session DB, or scroll inside one. " + "FTS5-backed retrieval over the SQLite message store. No LLM calls — every " + "shape returns actual messages from the DB.\n\n" + "THREE CALLING SHAPES\n\n" + " 1) DISCOVERY — pass `query`:\n" + " session_search(query=\"auth refactor\", limit=3)\n" + " Runs FTS5, dedupes hits by session lineage, returns the top N sessions. " + "Each result carries:\n" + " - session_id, title, when, source\n" + " - snippet: FTS5-highlighted match excerpt\n" + " - bookend_start: first 3 user+assistant messages of the session " + "(the goal / kickoff)\n" + " - messages: ±5 messages around the FTS5 match, with the anchor message " + "flagged (the hit in context)\n" + " - bookend_end: last 3 user+assistant messages of the session " + "(the resolution / decisions)\n" + " - match_message_id, messages_before, messages_after\n" + " Bookends + window together let you reconstruct goal → match → resolution " + "without paying for the whole transcript.\n\n" + " 2) SCROLL — pass `session_id` + `around_message_id`:\n" + " session_search(session_id=\"...\", around_message_id=12345, window=10)\n" + " Returns a window of ±`window` messages centered on the anchor. No FTS5, " + "no bookends — just the slice. Use after a discovery call when you need more " + "context than the ±5 default window.\n" + " - To scroll FORWARD: pass messages[-1].id back as around_message_id.\n" + " - To scroll BACKWARD: pass messages[0].id back as around_message_id.\n" + " - The boundary message appears in both windows — orientation marker.\n" + " - When messages_before or messages_after is < window, you're at the " + "start or end of the session.\n\n" + " 3) BROWSE — no args:\n" + " session_search()\n" + " Returns recent sessions chronologically: titles, previews, timestamps. " + "Use when the user asks \"what was I working on\" without naming a topic.\n\n" + "FTS5 SYNTAX\n\n" + " AND is the default — multi-word queries require all terms. Use OR explicitly " + "for broader recall (`alpha OR beta OR gamma`), quoted phrases for exact match " + "(`\"docker networking\"`), boolean (`python NOT java`), or prefix wildcards " + "(`deploy*`).\n\n" + "WHEN TO USE\n\n" + " Reach for this on any \"what did we do about X\" / \"where did we leave Y\" / " + "\"find the session where Z\" question — before gh, web search, or filesystem " + "inspection. The session DB carries what was said when; external tools show " + "current world state." ), "parameters": { "type": "object", "properties": { "query": { "type": "string", - "description": "Search query — keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead (returns titles, previews, timestamps with no LLM cost).", - }, - "role_filter": { - "type": "string", - "description": "Optional: only search messages from specific roles (comma-separated). E.g. 'user,assistant' to skip tool outputs.", + "description": ( + "Search query (discovery shape). Keywords, phrases, or boolean " + "expressions to find in past sessions. Omit to browse recent " + "sessions. Ignored when session_id + around_message_id are set " + "(scroll shape)." + ), }, "limit": { "type": "integer", - "description": "Max sessions to summarize (default: 3, max: 5).", + "description": ( + "Discovery shape only. Max sessions to return (default 3, max 10). " + "Bump to 5–10 when the topic likely spans several sessions and you " + "want to pick the right one to scroll into." + ), "default": 3, }, + "sort": { + "type": "string", + "enum": ["newest", "oldest"], + "description": ( + "Discovery shape only. Temporal bias on top of FTS5 ranking. Omit " + "to keep relevance-only ordering (suitable for exploratory recall — " + "\"what do we know about X\"). Set 'newest' for recency-shaped " + "questions (\"where did we leave X\"). Set 'oldest' for " + "origin-shaped questions (\"how did X start\"). Ignored in scroll " + "and browse shapes." + ), + }, + "session_id": { + "type": "string", + "description": ( + "Scroll shape. Session to read inside. Use the session_id returned " + "from a prior discovery call. Must be paired with " + "around_message_id." + ), + }, + "around_message_id": { + "type": "integer", + "description": ( + "Scroll shape. Message id to center the window on. From a discovery " + "result use match_message_id, or any id seen in a prior window. To " + "scroll forward pass the last window message's id; to scroll " + "backward pass the first." + ), + }, + "window": { + "type": "integer", + "description": ( + "Scroll shape only. Messages to return on each side of the anchor " + "(anchor itself always included). Clamped to [1, 20]. Default 5." + ), + "default": 5, + }, + "role_filter": { + "type": "string", + "description": ( + "Optional. Comma-separated roles to include. Discovery defaults to " + "'user,assistant' (tool output is usually noise). Pass " + "'user,assistant,tool' to include tool output (debugging tool " + "behaviour) or 'tool' to search tool output only." + ), + }, }, "required": [], }, @@ -605,8 +590,13 @@ registry.register( query=args.get("query") or "", role_filter=args.get("role_filter"), limit=args.get("limit", 3), + session_id=args.get("session_id"), + around_message_id=args.get("around_message_id"), + window=args.get("window", 5), + sort=args.get("sort"), db=kw.get("db"), - current_session_id=kw.get("current_session_id")), + current_session_id=kw.get("current_session_id"), + ), check_fn=check_session_search_requirements, emoji="🔍", ) diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md index 64cf5e2dc09..db8a8102b63 100644 --- a/website/docs/reference/tools-reference.md +++ b/website/docs/reference/tools-reference.md @@ -152,7 +152,7 @@ Registered only when the agent is spawned by the kanban dispatcher (`HERMES_KANB | Tool | Description | Requires environment | |------|-------------|----------------------| -| `session_search` | Search your long-term memory of past conversations. This is your recall -- every past session is searchable, and this tool summarizes what happened. USE THIS PROACTIVELY when: - The user says 'we did this before', 'remember when', 'last ti… | — | +| `session_search` | Search past sessions stored in the local session DB, or scroll inside one. FTS5-backed retrieval; returns actual messages from the DB (no LLM calls). Three shapes: discovery (pass `query`), scroll (pass `session_id` + `around_message_id`), browse (no args). | — | ## `skills` toolset diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index d972b38b384..204c6d39c24 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -780,7 +780,6 @@ $ hermes model [ ] vision currently: auto / main model [ ] web_extract currently: auto / main model -[ ] session_search currently: openrouter / google/gemini-2.5-flash [ ] title_generation currently: openrouter / google/gemini-3-flash-preview [ ] compression currently: auto / main model [ ] approval currently: auto / main model @@ -862,16 +861,6 @@ auxiliary: compression: timeout: 120 # seconds — compression summarizes long conversations, needs more time - # Session search — summarizes past session matches - session_search: - provider: "auto" - model: "" - base_url: "" - api_key: "" - timeout: 30 - max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s - extra_body: {} # Provider-specific OpenAI-compatible request fields - # Skills hub — skill matching and search skills_hub: provider: "auto" @@ -909,34 +898,6 @@ Each auxiliary task has a configurable `timeout` (in seconds). Defaults: vision Context compression has its own `compression:` block for thresholds and an `auxiliary.compression:` block for model/provider settings — see [Context Compression](#context-compression) above. The fallback model uses a `fallback_model:` block — see [Fallback Model](/docs/integrations/providers#fallback-model). All three follow the same provider/model/base_url pattern. ::: -### Session Search Tuning - -If you use a reasoning-heavy model for `auxiliary.session_search`, Hermes now gives you two built-in controls: - -- `auxiliary.session_search.max_concurrency`: limits how many matched sessions Hermes summarizes at once -- `auxiliary.session_search.extra_body`: forwards provider-specific OpenAI-compatible request fields on the summarization calls - -Example: - -```yaml -auxiliary: - session_search: - provider: "main" - model: "glm-4.5-air" - timeout: 60 - max_concurrency: 2 - extra_body: - enable_thinking: false -``` - -Use `max_concurrency` when your provider rate-limits request bursts and you want `session_search` to trade some parallelism for stability. - -Use `extra_body` only when your provider documents OpenAI-compatible request-body fields you want Hermes to pass through for that task. Hermes forwards the object as-is. - -:::warning -`extra_body` is only effective when your provider actually supports the field you send. If the provider does not expose a native OpenAI-compatible reasoning-off flag, Hermes cannot synthesize one on its behalf. -::: - ### OpenRouter routing & Pareto Code for auxiliary tasks When an auxiliary task resolves to OpenRouter (either explicitly or via `provider: "main"` while your main agent is on OpenRouter), the main agent's `provider_routing` and `openrouter.min_coding_score` settings **do not propagate** — by design, each auxiliary task is independent. To set OpenRouter provider preferences or use the [Pareto Code router](/docs/integrations/providers#openrouter-pareto-code-router) for a specific aux task, set them per-task via `extra_body`: diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index b17102cb82e..6ae92e3bb20 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -188,7 +188,6 @@ Hermes uses separate lightweight models for side tasks. Each task has its own pr | Vision | Image analysis, browser screenshots | `auxiliary.vision` | | Web Extract | Web page summarization | `auxiliary.web_extract` | | Compression | Context compression summaries | `auxiliary.compression` | -| Session Search | Past session summarization | `auxiliary.session_search` | | Skills Hub | Skill search and discovery | `auxiliary.skills_hub` | | MCP | MCP helper operations | `auxiliary.mcp` | | Approval | Smart command-approval classification | `auxiliary.approval` | @@ -235,13 +234,6 @@ auxiliary: provider: "auto" model: "" - session_search: - provider: "auto" - model: "" - timeout: 30 - max_concurrency: 3 - extra_body: {} - skills_hub: provider: "auto" model: "" @@ -270,25 +262,6 @@ fallback_model: # base_url: http://localhost:8000/v1 # Optional custom endpoint ``` -For `auxiliary.session_search`, Hermes also supports: - -- `max_concurrency` to limit how many session summaries run at once -- `extra_body` to pass provider-specific OpenAI-compatible request fields through on the summarization calls - -Example: - -```yaml -auxiliary: - session_search: - provider: main - model: glm-4.5-air - max_concurrency: 2 - extra_body: - enable_thinking: false -``` - -If your provider does not support a native OpenAI-compatible reasoning-control field, `extra_body` will not help for that part; in that case `max_concurrency` is still useful for reducing request-burst 429s. - All three — auxiliary, compression, fallback — work the same way: set `provider` to pick who handles the request, `model` to pick which model, and `base_url` to point at a custom endpoint (overrides provider). ### Provider Options for Auxiliary Tasks @@ -432,7 +405,6 @@ See [Scheduled Tasks (Cron)](/docs/user-guide/features/cron) for full configurat | Vision | Layered (see above) + internal OpenRouter retry | `auxiliary.vision` | | Web extraction | Layered (see above) + internal OpenRouter retry | `auxiliary.web_extract` | | Context compression | Layered (see above); degrades to no-summary if all layers unavailable | `auxiliary.compression` | -| Session search | Layered (see above) | `auxiliary.session_search` | | Skills hub | Layered (see above) | `auxiliary.skills_hub` | | MCP helpers | Layered (see above) | `auxiliary.mcp` | | Approval classification | Layered (see above) | `auxiliary.approval` |