Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui

2026-07-25 17:18:11 +00:00 · 2026-05-18 02:23:49 -05:00 · 2026-05-18 02:23:49 -05:00 · e98bec95ef
commit e98bec95ef
parent fd256b0a70 abf1af5401
20 changed files with 1745 additions and 1128 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -172,7 +172,7 @@ hermes-agent/
 │   ├── vision_tools.py           # Image analysis via multimodal models
 │   ├── delegate_tool.py          # Subagent spawning and parallel task execution
 │   ├── code_execution_tool.py    # Sandboxed Python with RPC tool access
-│   ├── session_search_tool.py    # Search past conversations with FTS5 + summarization
+│   ├── session_search_tool.py    # Search past conversations with FTS5 + anchored windows
 │   ├── cronjob_tools.py          # Scheduled task management
 │   ├── skill_tools.py            # Skill search, load, manage
 │   └── environments/             # Terminal execution backends
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@ -1503,6 +1503,10 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i
            query=function_args.get("query", ""),
            role_filter=function_args.get("role_filter"),
            limit=function_args.get("limit", 3),
+            session_id=function_args.get("session_id"),
+            around_message_id=function_args.get("around_message_id"),
+            window=function_args.get("window", 5),
+            sort=function_args.get("sort"),
            db=session_db,
            current_session_id=agent.session_id,
        )
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@ -82,6 +82,108 @@ def _ra():
    return run_agent


+def _restore_or_build_system_prompt(agent, system_message, conversation_history):
+    """Restore the cached system prompt from the session DB or build it fresh.
+
+    Mutates ``agent._cached_system_prompt`` and persists a freshly-built
+    prompt back to the session DB on first build.  Extracted from
+    ``run_conversation`` so the prefix-cache restore path can be tested in
+    isolation.
+
+    Three-way state distinction for the stored row, surfaced via logs so
+    silent prefix-cache misses are visible in ``agent.log``:
+
+      * ``missing`` — no session row yet (legitimate first turn).
+      * ``null``   — row exists, ``system_prompt`` column is NULL.
+        Legacy session predating system-prompt persistence, or a migration
+        leftover.  Warns when ``conversation_history`` is non-empty.
+      * ``empty``  — row exists, ``system_prompt`` column is the empty
+        string.  Indicates a previous-turn write that ran but stored
+        nothing (silent persistence bug).  Always warns.
+      * ``present`` — row exists with a usable prompt → reused verbatim.
+
+    Read or write failures against the session DB log at WARNING (not
+    DEBUG) so persistent issues (disk full, schema drift, lock contention)
+    surface without needing verbose mode.  This used to be a debug-level
+    log that silently broke prefix-cache reuse on the gateway path
+    (which constructs a fresh ``AIAgent`` per turn and depends on this
+    DB roundtrip).
+    """
+    stored_prompt = None
+    stored_state = "missing"
+    if conversation_history and agent._session_db:
+        try:
+            session_row = agent._session_db.get_session(agent.session_id)
+            if session_row is not None:
+                raw_prompt = session_row.get("system_prompt")
+                if raw_prompt is None:
+                    stored_state = "null"
+                elif raw_prompt == "":
+                    stored_state = "empty"
+                else:
+                    stored_prompt = raw_prompt
+                    stored_state = "present"
+        except Exception as exc:
+            logger.warning(
+                "Session DB get_session failed for system-prompt restore "
+                "(session=%s): %s. Falling back to fresh build — prefix "
+                "cache will miss for this turn.",
+                agent.session_id, exc,
+            )
+
+    if stored_prompt:
+        # Continuing session — reuse the exact system prompt from the
+        # previous turn so the Anthropic cache prefix matches.
+        agent._cached_system_prompt = stored_prompt
+        return
+
+    if conversation_history and stored_state in ("null", "empty"):
+        # Continuing session whose stored prompt is unusable.  The
+        # previous turn's write either never happened or wrote an empty
+        # string — either way every turn now rebuilds and the prefix
+        # cache misses every time.
+        logger.warning(
+            "Stored system prompt for session %s is %s; rebuilding "
+            "from scratch this turn. Prefix cache will miss until "
+            "the rebuild persists. Investigate the previous turn's "
+            "update_system_prompt write path.",
+            agent.session_id, stored_state,
+        )
+
+    # First turn of a new session (or recovering from a broken stored
+    # prompt) — build from scratch.
+    agent._cached_system_prompt = agent._build_system_prompt(system_message)
+
+    # Plugin hook: on_session_start — fired once when a brand-new
+    # session is created (not on continuation).  Plugins can use this
+    # to initialise session-scoped state (e.g. warm a memory cache).
+    try:
+        from hermes_cli.plugins import invoke_hook as _invoke_hook
+        _invoke_hook(
+            "on_session_start",
+            session_id=agent.session_id,
+            model=agent.model,
+            platform=getattr(agent, "platform", None) or "",
+        )
+    except Exception as exc:
+        logger.warning("on_session_start hook failed: %s", exc)
+
+    # Persist the system prompt snapshot in SQLite.  Failure here used
+    # to log at DEBUG, which silently broke prefix-cache reuse on the
+    # gateway path (fresh AIAgent per turn → reads from this row every
+    # subsequent turn).
+    if agent._session_db:
+        try:
+            agent._session_db.update_system_prompt(agent.session_id, agent._cached_system_prompt)
+        except Exception as exc:
+            logger.warning(
+                "Session DB update_system_prompt failed for session %s: "
+                "%s. Subsequent turns will rebuild the system prompt and "
+                "miss the prefix cache.",
+                agent.session_id, exc,
+            )
+
+
 def run_conversation(
    agent,
    user_message: str,
@ -313,43 +415,7 @@ def run_conversation(
    # producing a different system prompt and breaking the Anthropic
    # prefix cache.
    if agent._cached_system_prompt is None:
-        stored_prompt = None
-        if conversation_history and agent._session_db:
-            try:
-                session_row = agent._session_db.get_session(agent.session_id)
-                if session_row:
-                    stored_prompt = session_row.get("system_prompt") or None
-            except Exception:
-                pass  # Fall through to build fresh
-
-        if stored_prompt:
-            # Continuing session — reuse the exact system prompt from
-            # the previous turn so the Anthropic cache prefix matches.
-            agent._cached_system_prompt = stored_prompt
-        else:
-            # First turn of a new session — build from scratch.
-            agent._cached_system_prompt = agent._build_system_prompt(system_message)
-            # Plugin hook: on_session_start
-            # Fired once when a brand-new session is created (not on
-            # continuation).  Plugins can use this to initialise
-            # session-scoped state (e.g. warm a memory cache).
-            try:
-                from hermes_cli.plugins import invoke_hook as _invoke_hook
-                _invoke_hook(
-                    "on_session_start",
-                    session_id=agent.session_id,
-                    model=agent.model,
-                    platform=getattr(agent, "platform", None) or "",
-                )
-            except Exception as exc:
-                logger.warning("on_session_start hook failed: %s", exc)
-
-            # Store the system prompt snapshot in SQLite
-            if agent._session_db:
-                try:
-                    agent._session_db.update_system_prompt(agent.session_id, agent._cached_system_prompt)
-                except Exception as e:
-                    logger.debug("Session DB update_system_prompt failed: %s", e)
+        _restore_or_build_system_prompt(agent, system_message, conversation_history)

    active_system_prompt = agent._cached_system_prompt

--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@ -274,6 +274,10 @@ TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
 # where GPT models abandon work on partial results, skip prerequisite lookups,
 # hallucinate instead of using tools, and declare "done" without verification.
 # Inspired by patterns from OpenAI's GPT-5.4 prompting guide & OpenClaw PR #38953.
+# Also applied to xAI Grok — same failure modes in practice (claims completion
+# without tool calls, suggests workarounds instead of using existing tools,
+# replies with plans/suggestions instead of executing). The body is
+# family-agnostic; the OPENAI_ prefix reflects origin, not exclusivity.
 OPENAI_MODEL_EXECUTION_GUIDANCE = (
    "# Execution discipline\n"
    "<tool_persistence>\n"
--- a/agent/system_prompt.py
+++ b/agent/system_prompt.py
@ -156,7 +156,10 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
                stable_parts.append(GOOGLE_MODEL_OPERATIONAL_GUIDANCE)
            # OpenAI GPT/Codex execution discipline (tool persistence,
            # prerequisite checks, verification, anti-hallucination).
-            if "gpt" in _model_lower or "codex" in _model_lower:
+            # Also applied to xAI Grok — same failure modes (claims completion
+            # without tool calls, suggests workarounds instead of using
+            # existing tools, replies with plans instead of executing).
+            if "gpt" in _model_lower or "codex" in _model_lower or "grok" in _model_lower:
                stable_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)

    has_skills_tools = any(name in agent.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
@ -255,7 +258,13 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)

    from hermes_time import now as _hermes_now
    now = _hermes_now()
-    timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
+    # Date-only (not minute-precision) so the system prompt is byte-stable
+    # for the full day.  Minute-precision changes invalidate prefix-cache KV
+    # on every rebuild path (compression boundary, fresh-agent gateway turns,
+    # session resume without a stored prompt).  The model can still query the
+    # exact wall-clock time via tools when it actually needs it.
+    # Credit: @iamfoz (PR #20451).
+    timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y')}"
    if agent.pass_session_id and agent.session_id:
        timestamp_line += f"\nSession ID: {agent.session_id}"
    if agent.model:
--- a/agent/tool_executor.py
+++ b/agent/tool_executor.py
@ -622,6 +622,10 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
                    query=function_args.get("query", ""),
                    role_filter=function_args.get("role_filter"),
                    limit=function_args.get("limit", 3),
+                    session_id=function_args.get("session_id"),
+                    around_message_id=function_args.get("around_message_id"),
+                    window=function_args.get("window", 5),
+                    sort=function_args.get("sort"),
                    db=session_db,
                    current_session_id=agent.session_id,
                )
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -872,15 +872,10 @@ DEFAULT_CONFIG = {
            "timeout": 120,        # seconds — compression summarises large contexts; increase for local models
            "extra_body": {},
        },
-        "session_search": {
-            "provider": "auto",
-            "model": "",
-            "base_url": "",
-            "api_key": "",
-            "timeout": 30,
-            "extra_body": {},
-            "max_concurrency": 3,  # Clamp parallel summaries to avoid request-burst 429s on small providers
-        },
+        # Note: session_search no longer uses an auxiliary LLM (PR #27590 —
+        # single-shape tool returns DB content directly). The old
+        # ``auxiliary.session_search.*`` block was removed here. Existing
+        # values in user config.yaml files are harmless leftovers and ignored.
        "skills_hub": {
            "provider": "auto",
            "model": "",
--- a/hermes_cli/tips.py
+++ b/hermes_cli/tips.py
@ -458,8 +458,6 @@ TIPS = [
    'image_gen.model in config.yaml picks the FAL model: flux-2/klein, gpt-image-2, nano-banana-pro, and more.',
    'image_gen.provider routes image generation through a plugin (OpenAI Images, Codex, FAL) instead of the default.',
    'AUXILIARY_VISION_BASE_URL + AUXILIARY_VISION_API_KEY point vision analysis at any OpenAI-compatible endpoint.',
-    'auxiliary.session_search.max_concurrency bounds how many matched sessions are summarized in parallel (default 3).',
-    'auxiliary.session_search.extra_body forwards provider-specific OpenAI-compatible fields on summarization calls.',

    # --- Security ---
    'security.tirith_fail_open: false makes Hermes block commands when the tirith scanner itself errors out.',
--- a/hermes_state.py
+++ b/hermes_state.py
@ -25,7 +25,7 @@ from pathlib import Path

 from agent.memory_manager import sanitize_context
 from hermes_constants import get_hermes_home
-from typing import Any, Callable, Dict, List, Optional, TypeVar
+from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar

 logger = logging.getLogger(__name__)

@ -1635,6 +1635,204 @@ class SessionDB:
            result.append(msg)
        return result

+    def get_messages_around(
+        self,
+        session_id: str,
+        around_message_id: int,
+        window: int = 5,
+    ) -> Dict[str, Any]:
+        """Load a window of messages anchored on a specific message id.
+
+        Returns a dict with:
+          - ``window``: up to ``window`` messages before the anchor, the anchor
+            itself, and up to ``window`` messages after, ordered by id ascending.
+          - ``messages_before``: count of messages strictly before the anchor
+            still in the session (== window unless we hit the start).
+          - ``messages_after``: count of messages strictly after the anchor
+            still in the session (== window unless we hit the end).
+
+        Used by ``session_search`` for both the discovery shape (anchored on the
+        FTS5 match) and the scroll shape (anchored on any message id). The
+        ``messages_before`` / ``messages_after`` counts let the caller detect
+        session boundaries: when either is less than ``window``, the agent has
+        reached one end of the session.
+
+        Returns an empty window when ``around_message_id`` is not a real id in
+        ``session_id`` — callers decide how to surface that.
+        """
+        if window < 0:
+            window = 0
+        with self._lock:
+            # Confirm the anchor exists in this session.
+            anchor_exists = self._conn.execute(
+                "SELECT 1 FROM messages WHERE id = ? AND session_id = ? LIMIT 1",
+                (around_message_id, session_id),
+            ).fetchone()
+            if not anchor_exists:
+                return {"window": [], "messages_before": 0, "messages_after": 0}
+
+            # Two queries: anchor + before (DESC, take window+1), and after
+            # (ASC, take window). Final order is id ASC.
+            before_rows = self._conn.execute(
+                "SELECT * FROM messages "
+                "WHERE session_id = ? AND id <= ? "
+                "ORDER BY id DESC LIMIT ?",
+                (session_id, around_message_id, window + 1),
+            ).fetchall()
+            after_rows = self._conn.execute(
+                "SELECT * FROM messages "
+                "WHERE session_id = ? AND id > ? "
+                "ORDER BY id ASC LIMIT ?",
+                (session_id, around_message_id, window),
+            ).fetchall()
+
+        # before_rows is DESC; reverse so it's ASC, then concatenate after_rows.
+        rows = list(reversed(before_rows)) + list(after_rows)
+        result = []
+        for row in rows:
+            msg = dict(row)
+            if "content" in msg:
+                msg["content"] = self._decode_content(msg["content"])
+            if msg.get("tool_calls"):
+                try:
+                    msg["tool_calls"] = json.loads(msg["tool_calls"])
+                except (json.JSONDecodeError, TypeError):
+                    logger.warning(
+                        "Failed to deserialize tool_calls in get_messages_around, falling back to []"
+                    )
+                    msg["tool_calls"] = []
+            result.append(msg)
+
+        # before_rows includes the anchor itself; subtract 1 for the count of
+        # messages strictly before the anchor in the returned slice.
+        messages_before = max(0, len(before_rows) - 1)
+        messages_after = len(after_rows)
+        return {
+            "window": result,
+            "messages_before": messages_before,
+            "messages_after": messages_after,
+        }
+
+    def get_anchored_view(
+        self,
+        session_id: str,
+        around_message_id: int,
+        window: int = 5,
+        bookend: int = 3,
+        keep_roles: Optional[Tuple[str, ...]] = ("user", "assistant"),
+    ) -> Dict[str, Any]:
+        """Return an anchored window plus session bookends.
+
+        Built on top of ``get_messages_around``. Three slices:
+
+          - ``window``: messages immediately surrounding the anchor. Filtered
+            to ``keep_roles`` (tool-response noise dropped by default), EXCEPT
+            the anchor itself is always preserved regardless of role.
+          - ``bookend_start``: first ``bookend`` user/assistant messages of the
+            session — but only those whose id is strictly before the window's
+            first message id. Empty when the window already overlaps the
+            session head. Empty-content messages (tool-call-only assistant
+            turns) are skipped so they don't crowd out actual prose openings.
+          - ``bookend_end``: last ``bookend`` user/assistant messages of the
+            session, same non-overlap rule at the tail.
+
+        Bookends let an FTS5 hit anywhere in a long session yield the goal
+        (opening) and the resolution (closing) on a single call — without
+        loading the whole transcript.
+
+        Returns ``{"window": [], "messages_before": 0, "messages_after": 0,
+        "bookend_start": [], "bookend_end": []}`` when the anchor isn't in
+        the session.
+
+        ``keep_roles=None`` disables role filtering (raw window + raw
+        bookends).
+        """
+        if bookend < 0:
+            bookend = 0
+
+        # Reuse the primitive — handles anchor-existence, content decoding,
+        # tool_calls deserialisation, and boundary counts.
+        primitive = self.get_messages_around(
+            session_id, around_message_id, window=window
+        )
+        window_rows = primitive["window"]
+        if not window_rows:
+            return {
+                "window": [],
+                "messages_before": 0,
+                "messages_after": 0,
+                "bookend_start": [],
+                "bookend_end": [],
+            }
+
+        # Apply role filter to the window, but never drop the anchor itself.
+        if keep_roles is not None:
+            keep_set = set(keep_roles)
+            filtered_window = [
+                m for m in window_rows
+                if m.get("id") == around_message_id or m.get("role") in keep_set
+            ]
+        else:
+            filtered_window = window_rows
+
+        window_min_id = window_rows[0]["id"]
+        window_max_id = window_rows[-1]["id"]
+
+        # Fetch bookends only when there's room outside the window. SQL filters
+        # by id range, role, and non-empty content — tool-call-only assistant
+        # turns (content='' with tool_calls populated) are excluded so they
+        # don't crowd out actual prose openings/closings.
+        bookend_start_rows: List[Any] = []
+        bookend_end_rows: List[Any] = []
+        if bookend > 0:
+            with self._lock:
+                role_clause = ""
+                role_params: list = []
+                if keep_roles is not None:
+                    role_placeholders = ",".join("?" for _ in keep_roles)
+                    role_clause = f" AND role IN ({role_placeholders})"
+                    role_params = list(keep_roles)
+
+                bookend_start_rows = self._conn.execute(
+                    f"SELECT * FROM messages "
+                    f"WHERE session_id = ? AND id < ?{role_clause} "
+                    f"AND length(content) > 0 "
+                    f"ORDER BY id ASC LIMIT ?",
+                    (session_id, window_min_id, *role_params, bookend),
+                ).fetchall()
+
+                bookend_end_rows = self._conn.execute(
+                    f"SELECT * FROM messages "
+                    f"WHERE session_id = ? AND id > ?{role_clause} "
+                    f"AND length(content) > 0 "
+                    f"ORDER BY id DESC LIMIT ?",
+                    (session_id, window_max_id, *role_params, bookend),
+                ).fetchall()
+                # End rows came back DESC for the LIMIT cap; flip to ASC.
+                bookend_end_rows = list(reversed(bookend_end_rows))
+
+        def _hydrate(row) -> Dict[str, Any]:
+            msg = dict(row)
+            if "content" in msg:
+                msg["content"] = self._decode_content(msg["content"])
+            if msg.get("tool_calls"):
+                try:
+                    msg["tool_calls"] = json.loads(msg["tool_calls"])
+                except (json.JSONDecodeError, TypeError):
+                    logger.warning(
+                        "Failed to deserialize tool_calls in get_anchored_view, falling back to []"
+                    )
+                    msg["tool_calls"] = []
+            return msg
+
+        return {
+            "window": filtered_window,
+            "messages_before": primitive["messages_before"],
+            "messages_after": primitive["messages_after"],
+            "bookend_start": [_hydrate(r) for r in bookend_start_rows],
+            "bookend_end": [_hydrate(r) for r in bookend_end_rows],
+        }
+
    def resolve_resume_session_id(self, session_id: str) -> str:
        """Redirect a resume target to the descendant session that holds the messages.

@ -1902,6 +2100,7 @@ class SessionDB:
        role_filter: List[str] = None,
        limit: int = 20,
        offset: int = 0,
+        sort: str = None,
    ) -> List[Dict[str, Any]]:
        """
        Full-text search across session messages using FTS5.
@ -1914,6 +2113,15 @@ class SessionDB:

        Returns matching messages with session metadata, content snippet,
        and surrounding context (1 message before and after the match).
+
+        ``sort`` controls temporal ordering:
+          - ``None`` (default): FTS5 BM25 relevance only. Time-neutral.
+          - ``"newest"``: order by message timestamp DESC, then by rank.
+          - ``"oldest"``: order by message timestamp ASC, then by rank.
+
+        The short-CJK LIKE fallback already orders by timestamp DESC and
+        ignores ``sort``. The trigram CJK path honours ``sort`` like the main
+        FTS5 path.
        """
        if not query or not query.strip():
            return []
@ -1922,6 +2130,25 @@ class SessionDB:
        if not query:
            return []

+        # Normalise sort. Anything not in the allowed set falls back to None
+        # (FTS5 rank-only) so callers can pass through user input without
+        # validation.
+        if isinstance(sort, str):
+            sort_norm = sort.strip().lower()
+            if sort_norm not in ("newest", "oldest"):
+                sort_norm = None
+        else:
+            sort_norm = None
+
+        # ORDER BY shared across the main FTS5 path and trigram CJK path.
+        # With sort set, timestamp is primary and rank is the tiebreaker.
+        if sort_norm == "newest":
+            order_by_sql = "ORDER BY m.timestamp DESC, rank"
+        elif sort_norm == "oldest":
+            order_by_sql = "ORDER BY m.timestamp ASC, rank"
+        else:
+            order_by_sql = "ORDER BY rank"
+
        # Build WHERE clauses dynamically
        where_clauses = ["messages_fts MATCH ?"]
        params: list = [query]
@ -1960,7 +2187,7 @@ class SessionDB:
            JOIN messages m ON m.id = messages_fts.rowid
            JOIN sessions s ON s.id = m.session_id
            WHERE {where_sql}
-            ORDER BY rank
+            {order_by_sql}
            LIMIT ? OFFSET ?
        """

@ -2029,7 +2256,7 @@ class SessionDB:
                    JOIN messages m ON m.id = messages_fts_trigram.rowid
                    JOIN sessions s ON s.id = m.session_id
                    WHERE {' AND '.join(tri_where)}
-                    ORDER BY rank
+                    {order_by_sql}
                    LIMIT ? OFFSET ?
                """
                tri_params.extend([limit, offset])
--- a/scripts/release.py
+++ b/scripts/release.py
@ -1058,6 +1058,7 @@ AUTHOR_MAP = {
    "openclaw@agent.local": "29206394",  # PR #22194 salvage (sudo -S brute-force guard, #9590)
    "freedemon@gmail.com": "fr33d3m0n",  # PR #21128 salvage (sudo stdin/askpass DANGEROUS, #17873 cat 4)
    "zhaowh3613@outlook.com": "VinceZcrikl",  # PR #23647 salvage (npm UTF-8 decode on GBK Windows)
+    "abcdjmm970703@gmail.com": "JabberELF",  # PR #20238 seed (session_search dual-mode, evolved into single-shape)
    "anton.kuenzi@gmail.com": "ZeterMordio",  # PR #11754 salvage (zsh completion compdef + _arguments syntax)
    "23yntong@stu.edu.cn": "iuyup",  # PR #6155 salvage (shell=True hardening)
    "86501179+1RB@users.noreply.github.com": "1RB",  # PR #25462 salvage (discord forwarded messages)
--- a/tests/agent/test_system_prompt_restore.py
+++ b/tests/agent/test_system_prompt_restore.py
@ -0,0 +1,223 @@
+"""Tests for ``agent.conversation_loop._restore_or_build_system_prompt``.
+
+Validates the gateway DB-roundtrip path that keeps the system prompt
+byte-stable across turns (fresh AIAgent → must restore from session DB
+instead of rebuilding).  Covers:
+
+  * Successful restore from a stored prompt (present row).
+  * Legitimate first-turn build (no history).
+  * Silent-failure recovery paths:
+      - DB read raises → WARNING + fresh build
+      - Row has system_prompt=NULL → WARNING + fresh build
+      - Row has system_prompt="" → WARNING + fresh build
+      - DB write fails → WARNING (subsequent turns will miss cache)
+"""
+
+from __future__ import annotations
+
+import logging
+from unittest.mock import MagicMock
+
+import pytest
+
+from agent.conversation_loop import _restore_or_build_system_prompt
+
+
+def _make_agent(session_db=None, prebuilt_prompt: str = "BUILT_PROMPT"):
+    """Construct the minimal agent fake the helper needs."""
+    agent = MagicMock()
+    agent._cached_system_prompt = None
+    agent.session_id = "test-session-id"
+    agent.model = "test-model"
+    agent.platform = "cli"
+    agent._session_db = session_db
+    agent._build_system_prompt = MagicMock(return_value=prebuilt_prompt)
+    return agent
+
+
+# ---------------------------------------------------------------------------
+# Happy paths
+# ---------------------------------------------------------------------------
+
+
+class TestStoredPromptReuse:
+    def test_present_row_is_reused_verbatim(self, caplog):
+        """Continuing session with a stored prompt → reuse byte-for-byte."""
+        stored = "Stored prompt from turn 1 — byte-identical reuse"
+        db = MagicMock()
+        db.get_session.return_value = {"system_prompt": stored}
+        agent = _make_agent(session_db=db)
+
+        with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
+            _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
+
+        assert agent._cached_system_prompt == stored
+        agent._build_system_prompt.assert_not_called()
+        db.update_system_prompt.assert_not_called()
+        # No warnings on the happy path
+        assert not [r for r in caplog.records if r.levelno >= logging.WARNING]
+
+    def test_present_row_with_unicode_preserved(self):
+        """Non-ASCII bytes in the stored prompt are not mangled."""
+        stored = "Stored prompt with unicode: ☤ ⚗ ◆ — and emoji 🦊"
+        db = MagicMock()
+        db.get_session.return_value = {"system_prompt": stored}
+        agent = _make_agent(session_db=db)
+
+        _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
+        assert agent._cached_system_prompt == stored
+
+
+# ---------------------------------------------------------------------------
+# Legitimate fresh-build paths (no history, no DB)
+# ---------------------------------------------------------------------------
+
+
+class TestLegitimateFreshBuild:
+    def test_no_history_skips_db_and_builds_fresh(self, caplog):
+        """First turn with empty history → build fresh, don't touch the DB."""
+        db = MagicMock()
+        agent = _make_agent(session_db=db)
+
+        with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
+            _restore_or_build_system_prompt(agent, None, [])
+
+        # No history → DB read skipped entirely
+        db.get_session.assert_not_called()
+        agent._build_system_prompt.assert_called_once_with(None)
+        assert agent._cached_system_prompt == "BUILT_PROMPT"
+        # Persisted to DB
+        db.update_system_prompt.assert_called_once_with(agent.session_id, "BUILT_PROMPT")
+        assert not [r for r in caplog.records if r.levelno >= logging.WARNING]
+
+    def test_no_db_skips_persistence(self):
+        """When session DB is None, build and skip persistence silently."""
+        agent = _make_agent(session_db=None)
+        _restore_or_build_system_prompt(agent, None, [])
+        agent._build_system_prompt.assert_called_once()
+        assert agent._cached_system_prompt == "BUILT_PROMPT"
+
+
+# ---------------------------------------------------------------------------
+# Silent-failure recovery — these are the new A/B logging paths
+# ---------------------------------------------------------------------------
+
+
+class TestSilentFailureWarnings:
+    def test_db_read_exception_warns_and_rebuilds(self, caplog):
+        """DB read raising → WARNING + fall through to fresh build."""
+        db = MagicMock()
+        db.get_session.side_effect = RuntimeError("disk full")
+        agent = _make_agent(session_db=db)
+
+        with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
+            _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
+
+        # Built fresh
+        agent._build_system_prompt.assert_called_once()
+        assert agent._cached_system_prompt == "BUILT_PROMPT"
+        # Loud warning about the read failure
+        warnings = [r for r in caplog.records if r.levelno >= logging.WARNING]
+        assert any("get_session failed" in r.getMessage() for r in warnings), \
+            f"Expected a get_session warning, got: {[r.getMessage() for r in warnings]}"
+        assert any("disk full" in r.getMessage() for r in warnings)
+
+    def test_null_system_prompt_warns_about_unusable_stored_state(self, caplog):
+        """Row exists but system_prompt is NULL → WARNING + fresh build."""
+        db = MagicMock()
+        db.get_session.return_value = {"system_prompt": None}
+        agent = _make_agent(session_db=db)
+
+        with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
+            _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
+
+        agent._build_system_prompt.assert_called_once()
+        warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
+        assert any("is null" in m and "rebuilding" in m for m in warnings), \
+            f"Expected null-stored-prompt warning, got: {warnings}"
+
+    def test_empty_system_prompt_warns_about_silent_persistence_bug(self, caplog):
+        """Row exists but system_prompt is '' → WARNING about silent write bug."""
+        db = MagicMock()
+        db.get_session.return_value = {"system_prompt": ""}
+        agent = _make_agent(session_db=db)
+
+        with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
+            _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
+
+        agent._build_system_prompt.assert_called_once()
+        warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
+        assert any("is empty" in m and "rebuilding" in m for m in warnings), \
+            f"Expected empty-stored-prompt warning, got: {warnings}"
+
+    def test_db_write_failure_warns_loudly(self, caplog):
+        """update_system_prompt raising → WARNING (was DEBUG before)."""
+        db = MagicMock()
+        # No prior row (first turn)
+        db.get_session.return_value = None
+        db.update_system_prompt.side_effect = RuntimeError("database is locked")
+        agent = _make_agent(session_db=db)
+
+        with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
+            _restore_or_build_system_prompt(agent, None, [])
+
+        # Built and assigned the cache anyway
+        agent._build_system_prompt.assert_called_once()
+        assert agent._cached_system_prompt == "BUILT_PROMPT"
+        # Warning surfaced
+        warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
+        assert any(
+            "update_system_prompt failed" in m and "database is locked" in m
+            for m in warnings
+        ), f"Expected write-failure warning, got: {warnings}"
+
+    def test_no_history_with_null_row_does_not_warn(self, caplog):
+        """First turn (no history) hitting a null row is not surprising — no warn."""
+        db = MagicMock()
+        db.get_session.return_value = {"system_prompt": None}
+        agent = _make_agent(session_db=db)
+
+        with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
+            # Empty history → DB read is skipped entirely
+            _restore_or_build_system_prompt(agent, None, [])
+
+        db.get_session.assert_not_called()
+        # No "rebuilding from scratch" warning because history is empty
+        warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
+        assert not any("rebuilding" in m for m in warnings)
+
+
+# ---------------------------------------------------------------------------
+# Byte-stability invariant
+# ---------------------------------------------------------------------------
+
+
+class TestPromptStabilityInvariant:
+    def test_restored_prompt_is_byte_identical_to_stored(self):
+        """The restored prompt must equal the stored bytes exactly — no
+        normalization, trimming, or concat that could shift the prefix.
+
+        This is the core invariant: any byte-level change at this point
+        invalidates KV cache on every prefix-cache backend.
+        """
+        stored = (
+            "You are Hermes Agent.\n"
+            "\n"
+            "Conversation started: Sunday, May 17, 2026\n"
+            "Session ID: 20260517_153500_abc123\n"
+        )
+        db = MagicMock()
+        db.get_session.return_value = {"system_prompt": stored}
+        agent = _make_agent(session_db=db)
+
+        _restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
+
+        # Identity check — must be the same object reference for maximum
+        # confidence we're not slicing/copying/normalizing.
+        assert agent._cached_system_prompt == stored
+        # Byte-level check
+        assert agent._cached_system_prompt.encode("utf-8") == stored.encode("utf-8")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/tests/hermes_state/test_get_anchored_view.py
+++ b/tests/hermes_state/test_get_anchored_view.py
@ -0,0 +1,161 @@
+"""Tests for SessionDB.get_anchored_view — anchored window + session bookends.
+
+Used by the discovery shape of session_search: an FTS5 match becomes the
+anchor, the call returns goal (bookend_start) + match (window) + resolution
+(bookend_end) in a single round trip, no LLM.
+"""
+import pytest
+
+from hermes_state import SessionDB
+
+
+@pytest.fixture
+def db(tmp_path):
+    return SessionDB(tmp_path / "state.db")
+
+
+def _seed_long_session(db, sid="s1", n=30):
+    """Create a long session with alternating user/assistant prose. Returns ids ascending."""
+    db.create_session(sid, source="cli")
+    ids = []
+    for i in range(n):
+        role = "user" if i % 2 == 0 else "assistant"
+        mid = db.append_message(sid, role=role, content=f"prose msg {i}")
+        ids.append(mid)
+    return ids
+
+
+class TestWindowAndBookendShape:
+    def test_returns_window_with_bookend_start_and_end(self, db):
+        ids = _seed_long_session(db, n=30)
+        # Anchor mid-session
+        anchor = ids[15]
+        view = db.get_anchored_view("s1", anchor, window=3, bookend=3)
+        assert len(view["window"]) == 7  # ±3 + anchor
+        assert len(view["bookend_start"]) == 3
+        assert len(view["bookend_end"]) == 3
+        # bookend_start is the first 3 ids of the session
+        assert [m["id"] for m in view["bookend_start"]] == ids[:3]
+        # bookend_end is the last 3 ids of the session
+        assert [m["id"] for m in view["bookend_end"]] == ids[-3:]
+
+    def test_window_anchor_marked_correctly(self, db):
+        ids = _seed_long_session(db, n=20)
+        anchor = ids[10]
+        view = db.get_anchored_view("s1", anchor, window=2, bookend=3)
+        # Anchor message is present in the window
+        anchor_msgs = [m for m in view["window"] if m["id"] == anchor]
+        assert len(anchor_msgs) == 1
+
+
+class TestBookendOverlap:
+    """Bookends shouldn't duplicate messages that are already in the window."""
+
+    def test_bookend_start_empty_when_window_covers_session_head(self, db):
+        ids = _seed_long_session(db, n=10)
+        # Anchor on msg 1 (id index 1), window=3 → covers ids[0..4]
+        anchor = ids[1]
+        view = db.get_anchored_view("s1", anchor, window=3, bookend=3)
+        # Window includes session head, so bookend_start should be empty
+        assert view["bookend_start"] == []
+        # bookend_end is still populated
+        assert len(view["bookend_end"]) > 0
+
+    def test_bookend_end_empty_when_window_covers_session_tail(self, db):
+        ids = _seed_long_session(db, n=10)
+        # Anchor on second-to-last
+        anchor = ids[-2]
+        view = db.get_anchored_view("s1", anchor, window=3, bookend=3)
+        assert view["bookend_end"] == []
+        assert len(view["bookend_start"]) > 0
+
+    def test_short_session_both_bookends_empty(self, db):
+        ids = _seed_long_session(db, n=5)
+        view = db.get_anchored_view("s1", ids[2], window=10, bookend=3)
+        # Window covers entire session
+        assert view["bookend_start"] == []
+        assert view["bookend_end"] == []
+        # And window has all 5 messages
+        assert len(view["window"]) == 5
+
+
+class TestRoleFiltering:
+    def test_tool_role_filtered_from_window(self, db):
+        db.create_session("s1", source="cli")
+        user_ids = []
+        for i in range(5):
+            user_ids.append(db.append_message("s1", role="user", content=f"u{i}"))
+            db.append_message("s1", role="tool", content=f"tool output {i}", tool_name="x")
+        # Anchor on user message
+        view = db.get_anchored_view("s1", user_ids[2], window=5, bookend=0)
+        # No tool messages should appear in the window
+        roles = [m.get("role") for m in view["window"]]
+        assert "tool" not in roles
+
+    def test_anchor_preserved_even_when_tool_role(self, db):
+        db.create_session("s1", source="cli")
+        db.append_message("s1", role="user", content="ask")
+        tool_id = db.append_message("s1", role="tool", content="tool output", tool_name="x")
+        db.append_message("s1", role="user", content="follow-up")
+        # Anchor on the tool message — should still appear despite default filter
+        view = db.get_anchored_view("s1", tool_id, window=5, bookend=0)
+        ids_in_window = [m["id"] for m in view["window"]]
+        assert tool_id in ids_in_window
+
+    def test_keep_roles_none_disables_filter(self, db):
+        db.create_session("s1", source="cli")
+        anchor_id = db.append_message("s1", role="user", content="ask")
+        db.append_message("s1", role="tool", content="output", tool_name="x")
+        view = db.get_anchored_view("s1", anchor_id, window=5, bookend=0, keep_roles=None)
+        roles = [m.get("role") for m in view["window"]]
+        assert "tool" in roles
+
+
+class TestEmptyContentFilter:
+    """Tool-call-only assistant turns (empty content) should be skipped in bookends."""
+
+    def test_empty_content_messages_excluded_from_bookends(self, db):
+        db.create_session("s1", source="cli")
+        # Real prose opener
+        opener = db.append_message("s1", role="user", content="Let's start the work")
+        # Empty content assistant turn (tool-call-only — common in agent loops)
+        db.append_message("s1", role="assistant", content="", tool_calls=[{"id": "t1", "function": {"name": "x", "arguments": "{}"}}])
+        # More prose
+        for i in range(20):
+            db.append_message("s1", role="user" if i % 2 == 0 else "assistant", content=f"prose {i}")
+        # Another empty assistant near the end
+        db.append_message("s1", role="assistant", content="", tool_calls=[{"id": "t2", "function": {"name": "y", "arguments": "{}"}}])
+        # Prose closer
+        closer = db.append_message("s1", role="assistant", content="Final decision: ship it.")
+
+        # Anchor mid-session
+        view = db.get_anchored_view("s1", opener + 15, window=2, bookend=3)
+        # Bookend_start should not contain the empty-content tool-call turn
+        for m in view["bookend_start"]:
+            assert m.get("content"), "bookend_start should skip empty-content messages"
+        # Bookend_end should include the closer
+        end_contents = [m.get("content") for m in view["bookend_end"]]
+        assert any("Final decision" in (c or "") for c in end_contents)
+
+
+class TestAnchorValidation:
+    def test_missing_anchor_returns_empty_view(self, db):
+        _seed_long_session(db, n=10)
+        view = db.get_anchored_view("s1", 999999, window=5, bookend=3)
+        assert view["window"] == []
+        assert view["bookend_start"] == []
+        assert view["bookend_end"] == []
+        assert view["messages_before"] == 0
+        assert view["messages_after"] == 0
+
+
+class TestSessionIsolation:
+    """Bookends must not cross session boundaries."""
+
+    def test_bookends_only_from_anchor_session(self, db):
+        ids1 = _seed_long_session(db, sid="s1", n=20)
+        _seed_long_session(db, sid="s2", n=20)
+        view = db.get_anchored_view("s1", ids1[10], window=2, bookend=3)
+        # All bookend messages should have session_id = s1 (or session_id col)
+        for m in view["bookend_start"] + view["bookend_end"]:
+            assert m.get("session_id") == "s1"
--- a/tests/hermes_state/test_get_messages_around.py
+++ b/tests/hermes_state/test_get_messages_around.py
@ -0,0 +1,148 @@
+"""Tests for SessionDB.get_messages_around (anchored-window primitive).
+
+Used by session_search both for the discovery shape (FTS5 match as anchor)
+and the scroll shape (user-supplied anchor). Returns a window of messages
+around the anchor plus before/after counts so callers can detect session
+boundaries.
+"""
+import pytest
+
+from hermes_state import SessionDB
+
+
+@pytest.fixture
+def db(tmp_path):
+    return SessionDB(tmp_path / "state.db")
+
+
+def _seed(db, sid="s1", n=10):
+    """Create session with n alternating user/assistant messages, return ids ascending."""
+    db.create_session(sid, source="cli")
+    ids = []
+    for i in range(n):
+        role = "user" if i % 2 == 0 else "assistant"
+        # append_message returns the new id
+        mid = db.append_message(sid, role=role, content=f"msg {i}")
+        ids.append(mid)
+    return ids
+
+
+class TestBasicWindow:
+    def test_returns_window_around_anchor(self, db):
+        ids = _seed(db, n=10)
+        anchor = ids[5]
+        view = db.get_messages_around("s1", anchor, window=2)
+        # Expected: 2 before + anchor + 2 after = 5 messages
+        msgs = view["window"]
+        assert len(msgs) == 5
+        assert [m["id"] for m in msgs] == [ids[3], ids[4], ids[5], ids[6], ids[7]]
+        assert view["messages_before"] == 2
+        assert view["messages_after"] == 2
+
+    def test_window_zero_returns_only_anchor(self, db):
+        ids = _seed(db, n=5)
+        view = db.get_messages_around("s1", ids[2], window=0)
+        assert len(view["window"]) == 1
+        assert view["window"][0]["id"] == ids[2]
+        assert view["messages_before"] == 0
+        assert view["messages_after"] == 0
+
+    def test_negative_window_clamps_to_zero(self, db):
+        ids = _seed(db, n=5)
+        view = db.get_messages_around("s1", ids[2], window=-3)
+        # Just anchor, like window=0
+        assert len(view["window"]) == 1
+        assert view["window"][0]["id"] == ids[2]
+
+
+class TestBoundaryDetection:
+    """messages_before / messages_after tell the agent it's at start/end."""
+
+    def test_at_session_start_messages_before_is_short(self, db):
+        ids = _seed(db, n=10)
+        # Anchor on first message; ask for window=5
+        view = db.get_messages_around("s1", ids[0], window=5)
+        assert view["messages_before"] == 0  # nothing before the first msg
+        assert view["messages_after"] == 5
+        # window contains anchor + 5 after = 6 messages
+        assert len(view["window"]) == 6
+
+    def test_at_session_end_messages_after_is_short(self, db):
+        ids = _seed(db, n=10)
+        view = db.get_messages_around("s1", ids[-1], window=5)
+        assert view["messages_before"] == 5
+        assert view["messages_after"] == 0
+        assert len(view["window"]) == 6
+
+    def test_window_larger_than_session(self, db):
+        ids = _seed(db, n=3)
+        view = db.get_messages_around("s1", ids[1], window=50)
+        # All 3 messages return, both boundaries hit
+        assert len(view["window"]) == 3
+        assert view["messages_before"] == 1
+        assert view["messages_after"] == 1
+
+
+class TestAnchorValidation:
+    def test_missing_anchor_returns_empty(self, db):
+        _seed(db, n=5)
+        view = db.get_messages_around("s1", 99999, window=5)
+        assert view["window"] == []
+        assert view["messages_before"] == 0
+        assert view["messages_after"] == 0
+
+    def test_anchor_in_different_session_returns_empty(self, db):
+        # Two sessions, ask for s1's anchor in s2's namespace
+        ids1 = _seed(db, sid="s1", n=5)
+        _seed(db, sid="s2", n=5)
+        view = db.get_messages_around("s2", ids1[2], window=2)
+        assert view["window"] == []
+
+
+class TestScrollPattern:
+    """The forward/backward scroll loop the agent will run."""
+
+    def test_scroll_forward_re_anchored_on_last_id(self, db):
+        ids = _seed(db, n=20)
+        anchor = ids[5]
+        v1 = db.get_messages_around("s1", anchor, window=3)
+        last_id = v1["window"][-1]["id"]
+        v2 = db.get_messages_around("s1", last_id, window=3)
+        # Boundary id (last_id) appears in both windows (in v2 it's the anchor)
+        assert last_id in [m["id"] for m in v1["window"]]
+        assert last_id in [m["id"] for m in v2["window"]]
+        # v2's window extends beyond v1
+        assert max(m["id"] for m in v2["window"]) > max(m["id"] for m in v1["window"])
+
+    def test_scroll_backward_re_anchored_on_first_id(self, db):
+        ids = _seed(db, n=20)
+        anchor = ids[10]
+        v1 = db.get_messages_around("s1", anchor, window=3)
+        first_id = v1["window"][0]["id"]
+        v2 = db.get_messages_around("s1", first_id, window=3)
+        assert first_id in [m["id"] for m in v1["window"]]
+        assert first_id in [m["id"] for m in v2["window"]]
+        assert min(m["id"] for m in v2["window"]) < min(m["id"] for m in v1["window"])
+
+
+class TestContentHydration:
+    def test_content_is_decoded(self, db):
+        ids = _seed(db, n=3)
+        view = db.get_messages_around("s1", ids[1], window=1)
+        for m in view["window"]:
+            assert isinstance(m.get("content"), str)
+            assert m["content"].startswith("msg ")
+
+    def test_tool_calls_deserialized(self, db):
+        db.create_session("s1", source="cli")
+        # Message with tool_calls (pass list — append_message JSON-encodes it)
+        tc_payload = [{"id": "t1", "function": {"name": "x", "arguments": "{}"}}]
+        db.append_message("s1", role="assistant", content="", tool_calls=tc_payload)
+        mid = db.append_message("s1", role="tool", content="result", tool_name="x")
+
+        view = db.get_messages_around("s1", mid, window=2)
+        # Find the assistant message with tool_calls
+        asst = [m for m in view["window"] if m.get("role") == "assistant"]
+        assert asst, "expected an assistant message"
+        # tool_calls should be a list after hydration, not a string
+        assert isinstance(asst[0].get("tool_calls"), list)
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -989,6 +989,28 @@ class TestBuildSystemPrompt:
        # Should contain current date info like "Conversation started:"
        assert "Conversation started:" in prompt

+    def test_datetime_is_date_only_not_minute_precision(self, agent):
+        """Timestamp must be date-only (no HH:MM) so the system prompt
+        stays byte-stable for the full day. Minute precision invalidates
+        prefix-cache KV on every rebuild path (compression, fresh-agent
+        gateway turns, session resume without a stored prompt)."""
+        prompt = agent._build_system_prompt()
+        # Find the line and strip it for inspection
+        for line in prompt.splitlines():
+            if line.startswith("Conversation started:"):
+                # Must NOT contain AM/PM indicator (minute precision had %I:%M %p)
+                assert " AM" not in line and " PM" not in line, (
+                    f"Timestamp line has time-of-day, breaks daily cache stability: {line!r}"
+                )
+                # Must NOT contain a colon followed by two digits (HH:MM pattern)
+                import re as _re
+                assert not _re.search(r":\d{2}", line), (
+                    f"Timestamp line has HH:MM, breaks daily cache stability: {line!r}"
+                )
+                break
+        else:
+            assert False, "Expected a 'Conversation started:' line in the system prompt"
+
    def test_includes_nous_subscription_prompt(self, agent, monkeypatch):
        monkeypatch.setattr(run_agent, "build_nous_subscription_prompt", lambda tool_names: "NOUS SUBSCRIPTION BLOCK")
        prompt = agent._build_system_prompt()
@ -1074,6 +1096,40 @@ class TestToolUseEnforcementConfig:
        prompt = agent._build_system_prompt()
        assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt

+    def test_auto_injects_for_grok(self):
+        """xAI Grok / xai-oauth models hit the same enforcement path as GPT."""
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(model="x-ai/grok-4.3", tool_use_enforcement="auto")
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
+
+    def test_auto_injects_execution_guidance_for_grok(self):
+        """Grok also gets OPENAI_MODEL_EXECUTION_GUIDANCE (verification,
+        mandatory_tool_use, act_dont_ask). Same failure modes as GPT in
+        practice — claims completion without tool calls, suggests workarounds
+        instead of using existing tools.
+        """
+        from agent.prompt_builder import OPENAI_MODEL_EXECUTION_GUIDANCE
+        agent = self._make_agent(model="x-ai/grok-4.3", tool_use_enforcement="auto")
+        prompt = agent._build_system_prompt()
+        assert OPENAI_MODEL_EXECUTION_GUIDANCE in prompt
+
+    def test_auto_injects_execution_guidance_for_xai_oauth_model(self):
+        """xai-oauth bare model names (no slash) also match the grok pattern."""
+        from agent.prompt_builder import OPENAI_MODEL_EXECUTION_GUIDANCE
+        agent = self._make_agent(model="grok-4.3", tool_use_enforcement="auto")
+        prompt = agent._build_system_prompt()
+        assert OPENAI_MODEL_EXECUTION_GUIDANCE in prompt
+
+    def test_auto_does_not_inject_execution_guidance_for_claude(self):
+        """Sanity: execution guidance stays off for non-targeted families."""
+        from agent.prompt_builder import OPENAI_MODEL_EXECUTION_GUIDANCE
+        agent = self._make_agent(
+            model="anthropic/claude-sonnet-4", tool_use_enforcement="auto"
+        )
+        prompt = agent._build_system_prompt()
+        assert OPENAI_MODEL_EXECUTION_GUIDANCE not in prompt
+
    def test_true_forces_for_all_models(self):
        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
--- a/tests/tools/test_llm_content_none_guard.py
+++ b/tests/tools/test_llm_content_none_guard.py
@ -155,24 +155,6 @@ class TestSkillsGuardContentNone:
        assert content == ""


-# ── session_search_tool (line 164) ────────────────────────────────────────
-
-class TestSessionSearchContentNone:
-    """tools/session_search_tool.py — _summarize_session() return line"""
-
-    def test_none_content_raises_before_fix(self):
-        response = _make_response(None)
-
-        with pytest.raises(AttributeError):
-            response.choices[0].message.content.strip()
-
-    def test_none_content_safe_with_or_guard(self):
-        response = _make_response(None)
-
-        content = (response.choices[0].message.content or "").strip()
-        assert content == ""
-
-
 # ── integration: verify the actual source lines are guarded ───────────────

 class TestSourceLinesAreGuarded:
@ -218,13 +200,6 @@ class TestSourceLinesAreGuarded:
            ".content.strip() — apply `(... or \"\").strip()` guard"
        )

-    def test_session_search_tool_guarded(self):
-        src = self._read_file("tools/session_search_tool.py")
-        assert ".message.content.strip()" not in src, (
-            "tools/session_search_tool.py still has unguarded "
-            ".content.strip() — apply `(... or \"\").strip()` guard"
-        )
-

 # ── extract_content_or_reasoning() ────────────────────────────────────────

--- a/tests/tools/test_session_search.py
+++ b/tests/tools/test_session_search.py
@ -1,578 +1,401 @@
-"""Tests for tools/session_search_tool.py — helper functions and search dispatcher."""
+"""Tests for the single-shape session_search tool.

-import asyncio
+Three calling shapes:
+  1. DISCOVERY — pass query → FTS5 + anchored window + bookends per hit
+  2. SCROLL    — pass session_id + around_message_id → just the window
+  3. BROWSE    — no args → recent sessions chronologically
+
+All run zero LLM calls.
+"""
 import json
 import time
+
 import pytest

+from hermes_state import SessionDB
 from tools.session_search_tool import (
-    _format_timestamp,
-    _format_conversation,
-    _truncate_around_matches,
-    _get_session_search_max_concurrency,
-    _list_recent_sessions,
-    _HIDDEN_SESSION_SOURCES,
-    MAX_SESSION_CHARS,
    SESSION_SEARCH_SCHEMA,
+    _HIDDEN_SESSION_SOURCES,
+    _format_timestamp,
+    session_search,
 )


+@pytest.fixture
+def db(tmp_path):
+    return SessionDB(tmp_path / "state.db")
+
+
+def _seed_modpack_sessions(db):
+    """Create three sessions about a modpack so FTS5 has hits to dedupe."""
+    now = int(time.time())
+    # Older session — modpack origin
+    db.create_session("s_oldest", source="cli")
+    db._conn.execute("UPDATE sessions SET started_at = ?, title = ? WHERE id = ?",
+                     (now - 30000, "Building the Modpack", "s_oldest"))
+    db.append_message("s_oldest", role="user", content="Let's build a Minecraft modpack")
+    db.append_message("s_oldest", role="assistant", content="Great. Let me scaffold the modpack repo.")
+    db.append_message("s_oldest", role="user", content="Use NeoForge 1.21.1")
+    db.append_message("s_oldest", role="assistant", content="Done. Modpack repo created with NeoForge 1.21.1.")
+    db.append_message("s_oldest", role="assistant", content="Tier-0 mods installed; modpack smoke test passes.")
+
+    # Middle session — modpack quest coverage
+    db.create_session("s_middle", source="cli")
+    db._conn.execute("UPDATE sessions SET started_at = ?, title = ? WHERE id = ?",
+                     (now - 15000, "Modpack Quest Coverage", "s_middle"))
+    db.append_message("s_middle", role="user", content="Deep-dive every modpack reference quest guide")
+    db.append_message("s_middle", role="assistant", content="Surveying ATM10 questbook for modpack inspiration.")
+    db.append_message("s_middle", role="user", content="Update the modpack version too")
+    db.append_message("s_middle", role="assistant", content="Modpack version bumped 0.4 → 0.8.5; quest coverage page added.")
+
+    # Newest session — modpack mob spawn fix
+    db.create_session("s_newest", source="cli")
+    db._conn.execute("UPDATE sessions SET started_at = ?, title = ? WHERE id = ?",
+                     (now - 1000, "Modpack Mob Spawn Fix", "s_newest"))
+    db.append_message("s_newest", role="user", content="Fix the modpack mob spawning")
+    db.append_message("s_newest", role="assistant", content="Investigating elite mob gating in the modpack KubeJS.")
+    db.append_message("s_newest", role="assistant", content="Shipped commit b850442. Modpack alternator nerfed too.")
+    db._conn.commit()
+
+
 # =========================================================================
-# Tool schema guidance
+# Schema invariants
 # =========================================================================

-class TestHiddenSessionSources:
-    """Verify the _HIDDEN_SESSION_SOURCES constant used for third-party isolation."""
+class TestSchema:
+    def test_schema_has_required_params(self):
+        params = SESSION_SEARCH_SCHEMA["parameters"]["properties"]
+        # Discovery shape
+        assert "query" in params
+        assert "limit" in params
+        assert "sort" in params
+        # Scroll shape
+        assert "session_id" in params
+        assert "around_message_id" in params
+        assert "window" in params
+        # Shared
+        assert "role_filter" in params

-    def test_tool_source_is_hidden(self):
+    def test_no_mode_parameter(self):
+        # Mode is inferred from which args are set — no explicit mode param
+        params = SESSION_SEARCH_SCHEMA["parameters"]["properties"]
+        assert "mode" not in params
+
+    def test_sort_enum(self):
+        params = SESSION_SEARCH_SCHEMA["parameters"]["properties"]
+        assert params["sort"]["enum"] == ["newest", "oldest"]
+
+    def test_schema_description_teaches_scroll(self):
+        desc = SESSION_SEARCH_SCHEMA["description"]
+        assert "SCROLL" in desc
+        assert "DISCOVERY" in desc
+        assert "BROWSE" in desc
+        # Must explain how to scroll
+        assert "scroll FORWARD" in desc or "messages[-1]" in desc
+
+    def test_no_llm_promise_in_description(self):
+        # The new design never calls an LLM
+        desc = SESSION_SEARCH_SCHEMA["description"].lower()
+        assert "no llm" in desc
+
+
+class TestHiddenSources:
+    def test_tool_source_hidden(self):
        assert "tool" in _HIDDEN_SESSION_SOURCES

-    def test_standard_sources_not_hidden(self):
-        for src in ("cli", "telegram", "discord", "slack", "cron"):
-            assert src not in _HIDDEN_SESSION_SOURCES
-
-
-class TestSessionSearchSchema:
-    def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self):
-        description = SESSION_SEARCH_SCHEMA["description"]
-        assert "past conversations" in description
-        assert "recent turns of the current session" not in description
-
-
-# =========================================================================
-# _format_timestamp
-# =========================================================================

 class TestFormatTimestamp:
-    def test_unix_float(self):
-        ts = 1700000000.0  # Nov 14, 2023
-        result = _format_timestamp(ts)
-        assert "2023" in result or "November" in result
+    def test_unix_timestamp(self):
+        out = _format_timestamp(1700000000)
+        assert "2023" in out

-    def test_unix_int(self):
-        result = _format_timestamp(1700000000)
-        assert isinstance(result, str)
-        assert len(result) > 5
-
-    def test_iso_string(self):
-        result = _format_timestamp("2024-01-15T10:30:00")
-        assert isinstance(result, str)
-
-    def test_none_returns_unknown(self):
+    def test_none(self):
        assert _format_timestamp(None) == "unknown"

-    def test_numeric_string(self):
-        result = _format_timestamp("1700000000.0")
-        assert isinstance(result, str)
-        assert "unknown" not in result.lower()
+    def test_iso_string_passthrough(self):
+        out = _format_timestamp("not-a-number-string")
+        assert out == "not-a-number-string"


 # =========================================================================
-# _format_conversation
+# Browse shape (no args)
 # =========================================================================

-class TestFormatConversation:
-    def test_basic_messages(self):
-        msgs = [
-            {"role": "user", "content": "Hello"},
-            {"role": "assistant", "content": "Hi there!"},
-        ]
-        result = _format_conversation(msgs)
-        assert "[USER]: Hello" in result
-        assert "[ASSISTANT]: Hi there!" in result
-
-    def test_tool_message(self):
-        msgs = [
-            {"role": "tool", "content": "search results", "tool_name": "web_search"},
-        ]
-        result = _format_conversation(msgs)
-        assert "[TOOL:web_search]" in result
-
-    def test_long_tool_output_truncated(self):
-        msgs = [
-            {"role": "tool", "content": "x" * 1000, "tool_name": "terminal"},
-        ]
-        result = _format_conversation(msgs)
-        assert "[truncated]" in result
-
-    def test_assistant_with_tool_calls(self):
-        msgs = [
-            {
-                "role": "assistant",
-                "content": "",
-                "tool_calls": [
-                    {"function": {"name": "web_search"}},
-                    {"function": {"name": "terminal"}},
-                ],
-            },
-        ]
-        result = _format_conversation(msgs)
-        assert "web_search" in result
-        assert "terminal" in result
-
-    def test_empty_messages(self):
-        result = _format_conversation([])
-        assert result == ""
-
-
-# =========================================================================
-# _truncate_around_matches
-# =========================================================================
-
-class TestTruncateAroundMatches:
-    def test_short_text_unchanged(self):
-        text = "Short text about docker"
-        result = _truncate_around_matches(text, "docker")
-        assert result == text
-
-    def test_long_text_truncated(self):
-        # Create text longer than MAX_SESSION_CHARS with query term in middle
-        padding = "x" * (MAX_SESSION_CHARS + 5000)
-        text = padding + " KEYWORD_HERE " + padding
-        result = _truncate_around_matches(text, "KEYWORD_HERE")
-        assert len(result) <= MAX_SESSION_CHARS + 100  # +100 for prefix/suffix markers
-        assert "KEYWORD_HERE" in result
-
-    def test_truncation_adds_markers(self):
-        text = "a" * 50000 + " target " + "b" * (MAX_SESSION_CHARS + 5000)
-        result = _truncate_around_matches(text, "target")
-        assert "truncated" in result.lower()
-
-    def test_no_match_takes_from_start(self):
-        text = "x" * (MAX_SESSION_CHARS + 5000)
-        result = _truncate_around_matches(text, "nonexistent")
-        # Should take from the beginning
-        assert result.startswith("x")
-
-    def test_match_at_beginning(self):
-        text = "KEYWORD " + "x" * (MAX_SESSION_CHARS + 5000)
-        result = _truncate_around_matches(text, "KEYWORD")
-        assert "KEYWORD" in result
-
-    def test_multiword_phrase_match_beats_individual_term(self):
-        """Full phrase deep in text should be found even when a single term
-        appears much earlier in boilerplate."""
-        boilerplate = "The project setup is complex. " * 500  # ~15K, has 'project' early
-        filler = "x" * (MAX_SESSION_CHARS + 20000)
-        target = "We reviewed the keystone project roadmap in detail."
-        text = boilerplate + filler + target + filler
-        result = _truncate_around_matches(text, "keystone project")
-        assert "keystone project" in result.lower()
-
-    def test_multiword_proximity_cooccurrence(self):
-        """When exact phrase is absent, terms co-occurring within proximity
-        should be preferred over a lone early term."""
-        early = "project " + "a" * (MAX_SESSION_CHARS + 20000)
-        # Place 'keystone' and 'project' near each other (but not as exact phrase)
-        cooccur = "this keystone initiative for the project was pivotal"
-        tail = "b" * (MAX_SESSION_CHARS + 20000)
-        text = early + cooccur + tail
-        result = _truncate_around_matches(text, "keystone project")
-        assert "keystone" in result.lower()
-        assert "project" in result.lower()
-
-    def test_multiword_window_maximises_coverage(self):
-        """Sliding window should capture as many match clusters as possible."""
-        # Place two phrase matches: one at ~50K, one at ~60K, both should fit
-        pre = "z" * 50000
-        match1 = " alpha beta "
-        gap = "z" * 10000
-        match2 = " alpha beta "
-        post = "z" * (MAX_SESSION_CHARS + 40000)
-        text = pre + match1 + gap + match2 + post
-        result = _truncate_around_matches(text, "alpha beta")
-        assert result.lower().count("alpha beta") == 2
-
-
-class TestSessionSearchConcurrency:
-    def test_defaults_to_three(self):
-        assert _get_session_search_max_concurrency() == 3
-
-    def test_reads_and_clamps_configured_value(self, monkeypatch):
-        monkeypatch.setattr(
-            "hermes_cli.config.load_config",
-            lambda: {"auxiliary": {"session_search": {"max_concurrency": 9}}},
-        )
-        assert _get_session_search_max_concurrency() == 5
-
-    def test_session_search_respects_configured_concurrency_limit(self, monkeypatch):
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
-
-        monkeypatch.setattr(
-            "hermes_cli.config.load_config",
-            lambda: {"auxiliary": {"session_search": {"max_concurrency": 1}}},
-        )
-
-        max_seen = {"value": 0}
-        active = {"value": 0}
-
-        async def fake_summarize(_text, _query, _meta):
-            active["value"] += 1
-            max_seen["value"] = max(max_seen["value"], active["value"])
-            await asyncio.sleep(0.01)
-            active["value"] -= 1
-            return "summary"
-
-        monkeypatch.setattr("tools.session_search_tool._summarize_session", fake_summarize)
-        monkeypatch.setattr("model_tools._run_async", lambda coro: asyncio.run(coro))
-
-        mock_db = MagicMock()
-        mock_db.search_messages.return_value = [
-            {"session_id": "s1", "source": "cli", "session_started": 1709500000, "model": "test"},
-            {"session_id": "s2", "source": "cli", "session_started": 1709500001, "model": "test"},
-            {"session_id": "s3", "source": "cli", "session_started": 1709500002, "model": "test"},
-        ]
-        mock_db.get_session.side_effect = lambda sid: {
-            "id": sid,
-            "parent_session_id": None,
-            "source": "cli",
-            "started_at": 1709500000,
-        }
-        mock_db.get_messages_as_conversation.side_effect = lambda sid: [
-            {"role": "user", "content": f"message from {sid}"},
-            {"role": "assistant", "content": "response"},
-        ]
-
-        result = json.loads(session_search(query="message", db=mock_db, limit=3))
-
+class TestBrowseShape:
+    def test_no_args_returns_recent_sessions(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(db=db))
        assert result["success"] is True
-        assert result["count"] == 3
-        assert max_seen["value"] == 1
+        assert result["mode"] == "browse"
+        assert result["count"] >= 3

+    def test_browse_excludes_current_session(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(db=db, current_session_id="s_newest"))
+        sids = [r["session_id"] for r in result["results"]]
+        assert "s_newest" not in sids

-class TestRecentSessionListing:
-    def test_recent_mode_requests_last_active_ordering(self):
-        from unittest.mock import MagicMock
-
-        mock_db = MagicMock()
-        mock_db.list_sessions_rich.return_value = []
-
-        result = json.loads(_list_recent_sessions(mock_db, limit=5))
-
-        assert result["success"] is True
-        mock_db.list_sessions_rich.assert_called_once_with(
-            limit=10,
-            exclude_sources=["tool"],
-            order_by_last_active=True,
-        )
-
-    def test_current_child_session_excludes_root_lineage_even_when_child_id_is_longer(self):
-        from unittest.mock import MagicMock
-
-        mock_db = MagicMock()
-        mock_db.list_sessions_rich.return_value = [
-            {
-                "id": "root",
-                "title": "Current conversation",
-                "source": "cli",
-                "started_at": 1709500000,
-                "last_active": 1709500100,
-                "message_count": 4,
-                "preview": "current root",
-                "parent_session_id": None,
-            },
-            {
-                "id": "other_session",
-                "title": "Other conversation",
-                "source": "cli",
-                "started_at": 1709400000,
-                "last_active": 1709400100,
-                "message_count": 3,
-                "preview": "other root",
-                "parent_session_id": None,
-            },
-        ]
-
-        def _get_session(session_id):
-            if session_id == "child_session_id_that_is_definitely_longer":
-                return {"parent_session_id": "root"}
-            if session_id == "root":
-                return {"parent_session_id": None}
-            return None
-
-        mock_db.get_session.side_effect = _get_session
-
-        result = json.loads(_list_recent_sessions(
-            mock_db,
-            limit=5,
-            current_session_id="child_session_id_that_is_definitely_longer",
-        ))
-
-        assert result["success"] is True
-        assert [item["session_id"] for item in result["results"]] == ["other_session"]
-        assert all(item["session_id"] != "root" for item in result["results"])
+    def test_browse_returns_titles(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(db=db))
+        titles = [r.get("title") for r in result["results"]]
+        assert any("Modpack" in (t or "") for t in titles)


 # =========================================================================
-# session_search (dispatcher)
+# Discovery shape (with query)
 # =========================================================================

-class TestSessionSearch:
-    def test_no_db_lazily_opens_default_session_db(self, monkeypatch):
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
-
-        mock_db = MagicMock()
-        mock_db.search_messages.return_value = []
-
-        class FakeSessionDB:
-            def __new__(cls):
-                return mock_db
-
-        import types
-        import sys
-
-        fake_state = types.ModuleType("hermes_state")
-        fake_state.SessionDB = FakeSessionDB
-        monkeypatch.setitem(sys.modules, "hermes_state", fake_state)
-
-        result = json.loads(session_search(query="test"))
+class TestDiscoveryShape:
+    def test_query_returns_anchored_windows(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="modpack", db=db))
        assert result["success"] is True
-        mock_db.search_messages.assert_called_once()
+        assert result["mode"] == "discover"
+        assert result["count"] >= 1

-    def test_empty_query_returns_error(self):
-        from tools.session_search_tool import session_search
-        mock_db = object()
-        result = json.loads(session_search(query="", db=mock_db))
-        assert result["success"] is False
+    def test_discovery_result_has_bookends_and_window(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="modpack", limit=3, db=db))
+        for hit in result["results"]:
+            assert "bookend_start" in hit
+            assert "messages" in hit
+            assert "bookend_end" in hit
+            assert "match_message_id" in hit
+            assert "snippet" in hit
+            assert "messages_before" in hit
+            assert "messages_after" in hit

-    def test_whitespace_query_returns_error(self):
-        from tools.session_search_tool import session_search
-        mock_db = object()
-        result = json.loads(session_search(query="   ", db=mock_db))
-        assert result["success"] is False
+    def test_match_message_id_is_anchor_in_window(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="modpack", limit=3, db=db))
+        for hit in result["results"]:
+            anchor_id = hit["match_message_id"]
+            window_ids = [m["id"] for m in hit["messages"]]
+            assert anchor_id in window_ids

-    def test_current_session_excluded(self):
-        """session_search should never return the current session."""
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
-
-        mock_db = MagicMock()
-        current_sid = "20260304_120000_abc123"
-
-        # Simulate FTS5 returning matches only from the current session
-        mock_db.search_messages.return_value = [
-            {"session_id": current_sid, "content": "test match", "source": "cli",
-             "session_started": 1709500000, "model": "test"},
-        ]
-        mock_db.get_session.return_value = {"parent_session_id": None}
-
-        result = json.loads(session_search(
-            query="test", db=mock_db, current_session_id=current_sid,
-        ))
+    def test_no_results_returns_empty_list(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="zzz_no_such_term_zzz", db=db))
        assert result["success"] is True
-        assert result["count"] == 0
        assert result["results"] == []
+        assert result["count"] == 0

-    def test_current_session_excluded_keeps_others(self):
-        """Other sessions should still be returned when current is excluded."""
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
+    def test_limit_clamped_to_max_10(self, db):
+        _seed_modpack_sessions(db)
+        # Pass huge limit; should not error and should cap
+        result = json.loads(session_search(query="modpack", limit=999, db=db))
+        assert result["count"] <= 10

-        mock_db = MagicMock()
-        current_sid = "20260304_120000_abc123"
-        other_sid = "20260303_100000_def456"
+    def test_limit_floor_to_1(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="modpack", limit=0, db=db))
+        # Result count depends on hits, but the limit must be at least 1
+        assert result["count"] >= 0

-        mock_db.search_messages.return_value = [
-            {"session_id": current_sid, "content": "match 1", "source": "cli",
-             "session_started": 1709500000, "model": "test"},
-            {"session_id": other_sid, "content": "match 2", "source": "telegram",
-             "session_started": 1709400000, "model": "test"},
-        ]
-        mock_db.get_session.return_value = {"parent_session_id": None}
-        mock_db.get_messages_as_conversation.return_value = [
-            {"role": "user", "content": "hello"},
-            {"role": "assistant", "content": "hi there"},
-        ]
+    def test_non_int_limit_falls_back(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="modpack", limit="bogus", db=db))
+        assert result["success"] is True

-        # Mock async_call_llm to raise RuntimeError → summarizer returns None
-        from unittest.mock import AsyncMock, patch as _patch
-        with _patch("tools.session_search_tool.async_call_llm",
-                     new_callable=AsyncMock,
-                     side_effect=RuntimeError("no provider")):
+    def test_current_session_filtered_out(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="modpack", db=db, current_session_id="s_newest"))
+        sids = [r["session_id"] for r in result["results"]]
+        assert "s_newest" not in sids
+
+
+class TestDiscoverySort:
+    def test_sort_newest_orders_by_recency(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="modpack", limit=3, sort="newest", db=db))
+        # First result should be the most recent session
+        first = result["results"][0]
+        assert first["session_id"] == "s_newest" or "Newest" in (first.get("title") or "")
+
+    def test_sort_oldest_orders_by_age(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="modpack", limit=3, sort="oldest", db=db))
+        first = result["results"][0]
+        assert first["session_id"] == "s_oldest"
+
+    def test_invalid_sort_silently_ignored(self, db):
+        _seed_modpack_sessions(db)
+        # Should not error
+        result = json.loads(session_search(query="modpack", sort="bogus", db=db))
+        assert result["success"] is True
+
+
+class TestRoleFilter:
+    def test_default_excludes_tool_role(self, db):
+        db.create_session("s1", source="cli")
+        db.append_message("s1", role="user", content="modpack question")
+        db.append_message("s1", role="tool", content="modpack tool output", tool_name="x")
+        result = json.loads(session_search(query="modpack", db=db))
+        # The FTS5 match should be on the user message, not the tool message
+        if result["count"] > 0:
+            matched_role = result["results"][0]["matched_role"]
+            assert matched_role in ("user", "assistant")
+
+    def test_explicit_tool_role_includes_tool(self, db):
+        db.create_session("s1", source="cli")
+        db.append_message("s1", role="tool", content="modpack tool output", tool_name="x")
+        result = json.loads(session_search(query="modpack", role_filter="tool", db=db))
+        # Should now match the tool message
+        if result["count"] > 0:
+            assert result["results"][0]["matched_role"] == "tool"
+
+
+# =========================================================================
+# Scroll shape (session_id + around_message_id)
+# =========================================================================
+
+class TestScrollShape:
+    def test_scroll_returns_window_without_bookends(self, db):
+        _seed_modpack_sessions(db)
+        # Get an anchor first via discovery
+        disc = json.loads(session_search(query="modpack", limit=1, db=db))
+        anchor_sid = disc["results"][0]["session_id"]
+        anchor_mid = disc["results"][0]["match_message_id"]
+
+        # Now scroll
+        result = json.loads(session_search(
+            session_id=anchor_sid, around_message_id=anchor_mid, window=2, db=db
+        ))
+        assert result["success"] is True
+        assert result["mode"] == "scroll"
+        assert "messages" in result
+        # Scroll shape has no bookends
+        assert "bookend_start" not in result
+        assert "bookend_end" not in result
+
+    def test_scroll_window_clamped_to_20(self, db):
+        _seed_modpack_sessions(db)
+        disc = json.loads(session_search(query="modpack", limit=1, db=db))
+        anchor_sid = disc["results"][0]["session_id"]
+        anchor_mid = disc["results"][0]["match_message_id"]
+        result = json.loads(session_search(
+            session_id=anchor_sid, around_message_id=anchor_mid, window=999, db=db
+        ))
+        assert result["window"] == 20
+
+    def test_scroll_window_floor_to_1(self, db):
+        _seed_modpack_sessions(db)
+        disc = json.loads(session_search(query="modpack", limit=1, db=db))
+        anchor_sid = disc["results"][0]["session_id"]
+        anchor_mid = disc["results"][0]["match_message_id"]
+        result = json.loads(session_search(
+            session_id=anchor_sid, around_message_id=anchor_mid, window=-5, db=db
+        ))
+        assert result["window"] == 1
+
+    def test_scroll_returns_messages_before_after_counts(self, db):
+        _seed_modpack_sessions(db)
+        disc = json.loads(session_search(query="modpack", limit=1, db=db))
+        anchor_sid = disc["results"][0]["session_id"]
+        anchor_mid = disc["results"][0]["match_message_id"]
+        result = json.loads(session_search(
+            session_id=anchor_sid, around_message_id=anchor_mid, window=3, db=db
+        ))
+        assert "messages_before" in result
+        assert "messages_after" in result
+
+    def test_scroll_anchor_in_window(self, db):
+        _seed_modpack_sessions(db)
+        disc = json.loads(session_search(query="modpack", limit=1, db=db))
+        anchor_sid = disc["results"][0]["session_id"]
+        anchor_mid = disc["results"][0]["match_message_id"]
+        result = json.loads(session_search(
+            session_id=anchor_sid, around_message_id=anchor_mid, window=2, db=db
+        ))
+        anchor_in_window = [m for m in result["messages"] if m["id"] == anchor_mid]
+        assert len(anchor_in_window) == 1
+        assert anchor_in_window[0].get("anchor") is True
+
+    def test_scroll_missing_anchor_errors(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(
+            session_id="s_oldest", around_message_id=999999, db=db
+        ))
+        assert result["success"] is False
+        assert "not in" in result.get("error", "")
+
+    def test_scroll_missing_session_errors(self, db):
+        result = json.loads(session_search(
+            session_id="nonexistent", around_message_id=1, db=db
+        ))
+        assert result["success"] is False
+
+    def test_scroll_rejects_current_session_lineage(self, db):
+        _seed_modpack_sessions(db)
+        # Grab some valid id from s_oldest
+        disc = json.loads(session_search(query="modpack", limit=3, db=db))
+        match = [r for r in disc["results"] if r["session_id"] == "s_oldest"]
+        if match:
+            mid = match[0]["match_message_id"]
            result = json.loads(session_search(
-                query="test", db=mock_db, current_session_id=current_sid,
+                session_id="s_oldest", around_message_id=mid, db=db,
+                current_session_id="s_oldest",
            ))
+            assert result["success"] is False
+            assert "current session" in result.get("error", "").lower()

-        assert result["success"] is True
-        # Current session should be skipped, only other_sid should appear
-        assert result["sessions_searched"] == 1
-        assert current_sid not in [r.get("session_id") for r in result.get("results", [])]
-
-    def test_current_child_session_excludes_parent_lineage(self):
-        """Compression/delegation parents should be excluded for the active child session."""
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
-
-        mock_db = MagicMock()
-        mock_db.search_messages.return_value = [
-            {"session_id": "parent_sid", "content": "match", "source": "cli",
-             "session_started": 1709500000, "model": "test"},
-        ]
-
-        def _get_session(session_id):
-            if session_id == "child_sid":
-                return {"parent_session_id": "parent_sid"}
-            if session_id == "parent_sid":
-                return {"parent_session_id": None}
-            return None
-
-        mock_db.get_session.side_effect = _get_session
-
+    def test_scroll_invalid_around_message_id_errors(self, db):
+        _seed_modpack_sessions(db)
        result = json.loads(session_search(
-            query="test", db=mock_db, current_session_id="child_sid",
+            session_id="s_oldest", around_message_id="not-an-int", db=db
        ))
+        assert result["success"] is False

-        assert result["success"] is True
-        assert result["count"] == 0
-        assert result["results"] == []
-        assert result["sessions_searched"] == 0

-    def test_limit_none_coerced_to_default(self):
-        """Model sends limit=null → should fall back to 3, not TypeError."""
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
+class TestScrollPattern:
+    """The forward/backward scroll loop using tool output."""

-        mock_db = MagicMock()
-        mock_db.search_messages.return_value = []
+    def test_scroll_forward_from_last_id(self, db):
+        # Long session
+        db.create_session("s_long", source="cli")
+        ids = []
+        for i in range(20):
+            ids.append(db.append_message("s_long", role="user" if i % 2 == 0 else "assistant",
+                                         content=f"long session msg {i}"))

+        v1 = json.loads(session_search(
+            session_id="s_long", around_message_id=ids[5], window=3, db=db
+        ))
+        last_id = v1["messages"][-1]["id"]
+        v2 = json.loads(session_search(
+            session_id="s_long", around_message_id=last_id, window=3, db=db
+        ))
+        # Forward scroll: v2 should reach further than v1
+        assert max(m["id"] for m in v2["messages"]) > max(m["id"] for m in v1["messages"])
+        # Boundary id appears in both
+        assert last_id in [m["id"] for m in v1["messages"]]
+        assert last_id in [m["id"] for m in v2["messages"]]
+
+
+# =========================================================================
+# Shape precedence
+# =========================================================================
+
+class TestShapePrecedence:
+    def test_scroll_args_beat_query(self, db):
+        _seed_modpack_sessions(db)
+        disc = json.loads(session_search(query="modpack", limit=1, db=db))
+        anchor_sid = disc["results"][0]["session_id"]
+        anchor_mid = disc["results"][0]["match_message_id"]
+        # Pass both query and scroll args — scroll should win
        result = json.loads(session_search(
-            query="test", db=mock_db, limit=None,
+            query="modpack",  # would normally trigger discovery
+            session_id=anchor_sid, around_message_id=anchor_mid, db=db,
        ))
-        assert result["success"] is True
+        assert result["mode"] == "scroll"

-    def test_limit_type_object_coerced_to_default(self):
-        """Model sends limit as a type object → should fall back to 3, not TypeError."""
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
+    def test_empty_query_falls_back_to_browse(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query="   ", db=db))
+        assert result["mode"] == "browse"

-        mock_db = MagicMock()
-        mock_db.search_messages.return_value = []
-
-        result = json.loads(session_search(
-            query="test", db=mock_db, limit=int,
-        ))
-        assert result["success"] is True
-
-    def test_limit_string_coerced(self):
-        """Model sends limit as string '2' → should coerce to int."""
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
-
-        mock_db = MagicMock()
-        mock_db.search_messages.return_value = []
-
-        result = json.loads(session_search(
-            query="test", db=mock_db, limit="2",
-        ))
-        assert result["success"] is True
-
-    def test_limit_clamped_to_range(self):
-        """Negative or zero limit should be clamped to 1."""
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
-
-        mock_db = MagicMock()
-        mock_db.search_messages.return_value = []
-
-        result = json.loads(session_search(
-            query="test", db=mock_db, limit=-5,
-        ))
-        assert result["success"] is True
-
-        result = json.loads(session_search(
-            query="test", db=mock_db, limit=0,
-        ))
-        assert result["success"] is True
-
-    def test_current_root_session_excludes_child_lineage(self):
-        """Delegation child hits should be excluded when they resolve to the current root session."""
-        from unittest.mock import MagicMock
-        from tools.session_search_tool import session_search
-
-        mock_db = MagicMock()
-        mock_db.search_messages.return_value = [
-            {"session_id": "child_sid", "content": "match", "source": "cli",
-             "session_started": 1709500000, "model": "test"},
-        ]
-
-        def _get_session(session_id):
-            if session_id == "root_sid":
-                return {"parent_session_id": None}
-            if session_id == "child_sid":
-                return {"parent_session_id": "root_sid"}
-            return None
-
-        mock_db.get_session.side_effect = _get_session
-
-        result = json.loads(session_search(
-            query="test", db=mock_db, current_session_id="root_sid",
-        ))
-
-        assert result["success"] is True
-        assert result["count"] == 0
-        assert result["results"] == []
-        assert result["sessions_searched"] == 0
-
-    def test_source_from_resolved_parent_not_fts5_child(self):
-        """source in output must reflect the resolved parent session, not the child that matched FTS5.
-
-        Regression test for #15909: when a delegation child session (source='telegram')
-        resolves to a parent (source='api_server'), the result entry must report
-        'api_server', not 'telegram'.
-        """
-        from unittest.mock import MagicMock, AsyncMock, patch as _patch
-        from tools.session_search_tool import session_search
-
-        mock_db = MagicMock()
-        # FTS5 hit is in the child delegation session which carries source='telegram'
-        mock_db.search_messages.return_value = [
-            {
-                "session_id": "child_sid",
-                "content": "hello world",
-                "source": "telegram",       # child session source — wrong value to surface
-                "session_started": 1709400000,
-                "model": "gpt-4o-mini",
-            },
-        ]
-
-        def _get_session(session_id):
-            if session_id == "child_sid":
-                return {
-                    "id": "child_sid",
-                    "parent_session_id": "parent_sid",
-                    "source": "telegram",
-                    "started_at": 1709400000,
-                    "model": "gpt-4o-mini",
-                }
-            if session_id == "parent_sid":
-                return {
-                    "id": "parent_sid",
-                    "parent_session_id": None,
-                    "source": "api_server",  # correct parent source
-                    "started_at": 1709300000,
-                    "model": "gpt-4o-mini",
-                }
-            return None
-
-        mock_db.get_session.side_effect = _get_session
-        mock_db.get_messages_as_conversation.return_value = [
-            {"role": "user", "content": "hello world"},
-            {"role": "assistant", "content": "hi there"},
-        ]
-
-        with _patch(
-            "tools.session_search_tool.async_call_llm",
-            new_callable=AsyncMock,
-            side_effect=RuntimeError("no provider"),
-        ):
-            result = json.loads(session_search(query="hello world", db=mock_db))
-
-        assert result["success"] is True
-        assert result["count"] == 1
-        entry = result["results"][0]
-        assert entry["session_id"] == "parent_sid", "should report resolved parent session ID"
-        assert entry["source"] == "api_server", (
-            f"source should be parent's 'api_server', got {entry['source']!r}"
-        )
+    def test_non_string_query_falls_back_to_browse(self, db):
+        _seed_modpack_sessions(db)
+        result = json.loads(session_search(query=None, db=db))  # type: ignore
+        assert result["mode"] == "browse"
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@ -152,7 +152,7 @@ Registered only when the agent is spawned by the kanban dispatcher (`HERMES_KANB

 | Tool | Description | Requires environment |
 |------|-------------|----------------------|
-| `session_search` | Search your long-term memory of past conversations. This is your recall -- every past session is searchable, and this tool summarizes what happened. USE THIS PROACTIVELY when: - The user says 'we did this before', 'remember when', 'last ti… | — |
+| `session_search` | Search past sessions stored in the local session DB, or scroll inside one. FTS5-backed retrieval; returns actual messages from the DB (no LLM calls). Three shapes: discovery (pass `query`), scroll (pass `session_id` + `around_message_id`), browse (no args). | — |

 ## `skills` toolset

--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@ -780,7 +780,6 @@ $ hermes model

 [ ] vision               currently: auto / main model
 [ ] web_extract          currently: auto / main model
-[ ] session_search       currently: openrouter / google/gemini-2.5-flash
 [ ] title_generation     currently: openrouter / google/gemini-3-flash-preview
 [ ] compression          currently: auto / main model
 [ ] approval             currently: auto / main model
@ -862,16 +861,6 @@ auxiliary:
  compression:
    timeout: 120               # seconds — compression summarizes long conversations, needs more time

-  # Session search — summarizes past session matches
-  session_search:
-    provider: "auto"
-    model: ""
-    base_url: ""
-    api_key: ""
-    timeout: 30
-    max_concurrency: 3       # Limit parallel summaries to reduce request-burst 429s
-    extra_body: {}           # Provider-specific OpenAI-compatible request fields
-
  # Skills hub — skill matching and search
  skills_hub:
    provider: "auto"
@ -909,34 +898,6 @@ Each auxiliary task has a configurable `timeout` (in seconds). Defaults: vision
 Context compression has its own `compression:` block for thresholds and an `auxiliary.compression:` block for model/provider settings — see [Context Compression](#context-compression) above. The fallback model uses a `fallback_model:` block — see [Fallback Model](/docs/integrations/providers#fallback-model). All three follow the same provider/model/base_url pattern.
 :::

-### Session Search Tuning
-
-If you use a reasoning-heavy model for `auxiliary.session_search`, Hermes now gives you two built-in controls:
-
- `auxiliary.session_search.max_concurrency`: limits how many matched sessions Hermes summarizes at once
- `auxiliary.session_search.extra_body`: forwards provider-specific OpenAI-compatible request fields on the summarization calls
-
-Example:
-
-```yaml
-auxiliary:
-  session_search:
-    provider: "main"
-    model: "glm-4.5-air"
-    timeout: 60
-    max_concurrency: 2
-    extra_body:
-      enable_thinking: false
-```
-
-Use `max_concurrency` when your provider rate-limits request bursts and you want `session_search` to trade some parallelism for stability.
-
-Use `extra_body` only when your provider documents OpenAI-compatible request-body fields you want Hermes to pass through for that task. Hermes forwards the object as-is.
-
-:::warning
-`extra_body` is only effective when your provider actually supports the field you send. If the provider does not expose a native OpenAI-compatible reasoning-off flag, Hermes cannot synthesize one on its behalf.
-:::
-
 ### OpenRouter routing & Pareto Code for auxiliary tasks

 When an auxiliary task resolves to OpenRouter (either explicitly or via `provider: "main"` while your main agent is on OpenRouter), the main agent's `provider_routing` and `openrouter.min_coding_score` settings **do not propagate** — by design, each auxiliary task is independent. To set OpenRouter provider preferences or use the [Pareto Code router](/docs/integrations/providers#openrouter-pareto-code-router) for a specific aux task, set them per-task via `extra_body`:
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@ -188,7 +188,6 @@ Hermes uses separate lightweight models for side tasks. Each task has its own pr
 | Vision | Image analysis, browser screenshots | `auxiliary.vision` |
 | Web Extract | Web page summarization | `auxiliary.web_extract` |
 | Compression | Context compression summaries | `auxiliary.compression` |
-| Session Search | Past session summarization | `auxiliary.session_search` |
 | Skills Hub | Skill search and discovery | `auxiliary.skills_hub` |
 | MCP | MCP helper operations | `auxiliary.mcp` |
 | Approval | Smart command-approval classification | `auxiliary.approval` |
@ -235,13 +234,6 @@ auxiliary:
    provider: "auto"
    model: ""

-  session_search:
-    provider: "auto"
-    model: ""
-    timeout: 30
-    max_concurrency: 3
-    extra_body: {}
-
  skills_hub:
    provider: "auto"
    model: ""
@ -270,25 +262,6 @@ fallback_model:
  # base_url: http://localhost:8000/v1               # Optional custom endpoint
 ```

-For `auxiliary.session_search`, Hermes also supports:
-
- `max_concurrency` to limit how many session summaries run at once
- `extra_body` to pass provider-specific OpenAI-compatible request fields through on the summarization calls
-
-Example:
-
-```yaml
-auxiliary:
-  session_search:
-    provider: main
-    model: glm-4.5-air
-    max_concurrency: 2
-    extra_body:
-      enable_thinking: false
-```
-
-If your provider does not support a native OpenAI-compatible reasoning-control field, `extra_body` will not help for that part; in that case `max_concurrency` is still useful for reducing request-burst 429s.
-
 All three — auxiliary, compression, fallback — work the same way: set `provider` to pick who handles the request, `model` to pick which model, and `base_url` to point at a custom endpoint (overrides provider).

 ### Provider Options for Auxiliary Tasks
@ -432,7 +405,6 @@ See [Scheduled Tasks (Cron)](/docs/user-guide/features/cron) for full configurat
 | Vision | Layered (see above) + internal OpenRouter retry | `auxiliary.vision` |
 | Web extraction | Layered (see above) + internal OpenRouter retry | `auxiliary.web_extract` |
 | Context compression | Layered (see above); degrades to no-summary if all layers unavailable | `auxiliary.compression` |
-| Session search | Layered (see above) | `auxiliary.session_search` |
 | Skills hub | Layered (see above) | `auxiliary.skills_hub` |
 | MCP helpers | Layered (see above) | `auxiliary.mcp` |
 | Approval classification | Layered (see above) | `auxiliary.approval` |