mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui
This commit is contained in:
commit
e98bec95ef
20 changed files with 1745 additions and 1128 deletions
|
|
@ -172,7 +172,7 @@ hermes-agent/
|
|||
│ ├── vision_tools.py # Image analysis via multimodal models
|
||||
│ ├── delegate_tool.py # Subagent spawning and parallel task execution
|
||||
│ ├── code_execution_tool.py # Sandboxed Python with RPC tool access
|
||||
│ ├── session_search_tool.py # Search past conversations with FTS5 + summarization
|
||||
│ ├── session_search_tool.py # Search past conversations with FTS5 + anchored windows
|
||||
│ ├── cronjob_tools.py # Scheduled task management
|
||||
│ ├── skill_tools.py # Skill search, load, manage
|
||||
│ └── environments/ # Terminal execution backends
|
||||
|
|
|
|||
|
|
@ -1503,6 +1503,10 @@ def invoke_tool(agent, function_name: str, function_args: dict, effective_task_i
|
|||
query=function_args.get("query", ""),
|
||||
role_filter=function_args.get("role_filter"),
|
||||
limit=function_args.get("limit", 3),
|
||||
session_id=function_args.get("session_id"),
|
||||
around_message_id=function_args.get("around_message_id"),
|
||||
window=function_args.get("window", 5),
|
||||
sort=function_args.get("sort"),
|
||||
db=session_db,
|
||||
current_session_id=agent.session_id,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -82,6 +82,108 @@ def _ra():
|
|||
return run_agent
|
||||
|
||||
|
||||
def _restore_or_build_system_prompt(agent, system_message, conversation_history):
|
||||
"""Restore the cached system prompt from the session DB or build it fresh.
|
||||
|
||||
Mutates ``agent._cached_system_prompt`` and persists a freshly-built
|
||||
prompt back to the session DB on first build. Extracted from
|
||||
``run_conversation`` so the prefix-cache restore path can be tested in
|
||||
isolation.
|
||||
|
||||
Three-way state distinction for the stored row, surfaced via logs so
|
||||
silent prefix-cache misses are visible in ``agent.log``:
|
||||
|
||||
* ``missing`` — no session row yet (legitimate first turn).
|
||||
* ``null`` — row exists, ``system_prompt`` column is NULL.
|
||||
Legacy session predating system-prompt persistence, or a migration
|
||||
leftover. Warns when ``conversation_history`` is non-empty.
|
||||
* ``empty`` — row exists, ``system_prompt`` column is the empty
|
||||
string. Indicates a previous-turn write that ran but stored
|
||||
nothing (silent persistence bug). Always warns.
|
||||
* ``present`` — row exists with a usable prompt → reused verbatim.
|
||||
|
||||
Read or write failures against the session DB log at WARNING (not
|
||||
DEBUG) so persistent issues (disk full, schema drift, lock contention)
|
||||
surface without needing verbose mode. This used to be a debug-level
|
||||
log that silently broke prefix-cache reuse on the gateway path
|
||||
(which constructs a fresh ``AIAgent`` per turn and depends on this
|
||||
DB roundtrip).
|
||||
"""
|
||||
stored_prompt = None
|
||||
stored_state = "missing"
|
||||
if conversation_history and agent._session_db:
|
||||
try:
|
||||
session_row = agent._session_db.get_session(agent.session_id)
|
||||
if session_row is not None:
|
||||
raw_prompt = session_row.get("system_prompt")
|
||||
if raw_prompt is None:
|
||||
stored_state = "null"
|
||||
elif raw_prompt == "":
|
||||
stored_state = "empty"
|
||||
else:
|
||||
stored_prompt = raw_prompt
|
||||
stored_state = "present"
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Session DB get_session failed for system-prompt restore "
|
||||
"(session=%s): %s. Falling back to fresh build — prefix "
|
||||
"cache will miss for this turn.",
|
||||
agent.session_id, exc,
|
||||
)
|
||||
|
||||
if stored_prompt:
|
||||
# Continuing session — reuse the exact system prompt from the
|
||||
# previous turn so the Anthropic cache prefix matches.
|
||||
agent._cached_system_prompt = stored_prompt
|
||||
return
|
||||
|
||||
if conversation_history and stored_state in ("null", "empty"):
|
||||
# Continuing session whose stored prompt is unusable. The
|
||||
# previous turn's write either never happened or wrote an empty
|
||||
# string — either way every turn now rebuilds and the prefix
|
||||
# cache misses every time.
|
||||
logger.warning(
|
||||
"Stored system prompt for session %s is %s; rebuilding "
|
||||
"from scratch this turn. Prefix cache will miss until "
|
||||
"the rebuild persists. Investigate the previous turn's "
|
||||
"update_system_prompt write path.",
|
||||
agent.session_id, stored_state,
|
||||
)
|
||||
|
||||
# First turn of a new session (or recovering from a broken stored
|
||||
# prompt) — build from scratch.
|
||||
agent._cached_system_prompt = agent._build_system_prompt(system_message)
|
||||
|
||||
# Plugin hook: on_session_start — fired once when a brand-new
|
||||
# session is created (not on continuation). Plugins can use this
|
||||
# to initialise session-scoped state (e.g. warm a memory cache).
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"on_session_start",
|
||||
session_id=agent.session_id,
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("on_session_start hook failed: %s", exc)
|
||||
|
||||
# Persist the system prompt snapshot in SQLite. Failure here used
|
||||
# to log at DEBUG, which silently broke prefix-cache reuse on the
|
||||
# gateway path (fresh AIAgent per turn → reads from this row every
|
||||
# subsequent turn).
|
||||
if agent._session_db:
|
||||
try:
|
||||
agent._session_db.update_system_prompt(agent.session_id, agent._cached_system_prompt)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Session DB update_system_prompt failed for session %s: "
|
||||
"%s. Subsequent turns will rebuild the system prompt and "
|
||||
"miss the prefix cache.",
|
||||
agent.session_id, exc,
|
||||
)
|
||||
|
||||
|
||||
def run_conversation(
|
||||
agent,
|
||||
user_message: str,
|
||||
|
|
@ -313,43 +415,7 @@ def run_conversation(
|
|||
# producing a different system prompt and breaking the Anthropic
|
||||
# prefix cache.
|
||||
if agent._cached_system_prompt is None:
|
||||
stored_prompt = None
|
||||
if conversation_history and agent._session_db:
|
||||
try:
|
||||
session_row = agent._session_db.get_session(agent.session_id)
|
||||
if session_row:
|
||||
stored_prompt = session_row.get("system_prompt") or None
|
||||
except Exception:
|
||||
pass # Fall through to build fresh
|
||||
|
||||
if stored_prompt:
|
||||
# Continuing session — reuse the exact system prompt from
|
||||
# the previous turn so the Anthropic cache prefix matches.
|
||||
agent._cached_system_prompt = stored_prompt
|
||||
else:
|
||||
# First turn of a new session — build from scratch.
|
||||
agent._cached_system_prompt = agent._build_system_prompt(system_message)
|
||||
# Plugin hook: on_session_start
|
||||
# Fired once when a brand-new session is created (not on
|
||||
# continuation). Plugins can use this to initialise
|
||||
# session-scoped state (e.g. warm a memory cache).
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"on_session_start",
|
||||
session_id=agent.session_id,
|
||||
model=agent.model,
|
||||
platform=getattr(agent, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("on_session_start hook failed: %s", exc)
|
||||
|
||||
# Store the system prompt snapshot in SQLite
|
||||
if agent._session_db:
|
||||
try:
|
||||
agent._session_db.update_system_prompt(agent.session_id, agent._cached_system_prompt)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB update_system_prompt failed: %s", e)
|
||||
_restore_or_build_system_prompt(agent, system_message, conversation_history)
|
||||
|
||||
active_system_prompt = agent._cached_system_prompt
|
||||
|
||||
|
|
|
|||
|
|
@ -274,6 +274,10 @@ TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
|
|||
# where GPT models abandon work on partial results, skip prerequisite lookups,
|
||||
# hallucinate instead of using tools, and declare "done" without verification.
|
||||
# Inspired by patterns from OpenAI's GPT-5.4 prompting guide & OpenClaw PR #38953.
|
||||
# Also applied to xAI Grok — same failure modes in practice (claims completion
|
||||
# without tool calls, suggests workarounds instead of using existing tools,
|
||||
# replies with plans/suggestions instead of executing). The body is
|
||||
# family-agnostic; the OPENAI_ prefix reflects origin, not exclusivity.
|
||||
OPENAI_MODEL_EXECUTION_GUIDANCE = (
|
||||
"# Execution discipline\n"
|
||||
"<tool_persistence>\n"
|
||||
|
|
|
|||
|
|
@ -156,7 +156,10 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
|||
stable_parts.append(GOOGLE_MODEL_OPERATIONAL_GUIDANCE)
|
||||
# OpenAI GPT/Codex execution discipline (tool persistence,
|
||||
# prerequisite checks, verification, anti-hallucination).
|
||||
if "gpt" in _model_lower or "codex" in _model_lower:
|
||||
# Also applied to xAI Grok — same failure modes (claims completion
|
||||
# without tool calls, suggests workarounds instead of using
|
||||
# existing tools, replies with plans instead of executing).
|
||||
if "gpt" in _model_lower or "codex" in _model_lower or "grok" in _model_lower:
|
||||
stable_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
|
||||
|
||||
has_skills_tools = any(name in agent.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
|
||||
|
|
@ -255,7 +258,13 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
|||
|
||||
from hermes_time import now as _hermes_now
|
||||
now = _hermes_now()
|
||||
timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
|
||||
# Date-only (not minute-precision) so the system prompt is byte-stable
|
||||
# for the full day. Minute-precision changes invalidate prefix-cache KV
|
||||
# on every rebuild path (compression boundary, fresh-agent gateway turns,
|
||||
# session resume without a stored prompt). The model can still query the
|
||||
# exact wall-clock time via tools when it actually needs it.
|
||||
# Credit: @iamfoz (PR #20451).
|
||||
timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y')}"
|
||||
if agent.pass_session_id and agent.session_id:
|
||||
timestamp_line += f"\nSession ID: {agent.session_id}"
|
||||
if agent.model:
|
||||
|
|
|
|||
|
|
@ -622,6 +622,10 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
|
|||
query=function_args.get("query", ""),
|
||||
role_filter=function_args.get("role_filter"),
|
||||
limit=function_args.get("limit", 3),
|
||||
session_id=function_args.get("session_id"),
|
||||
around_message_id=function_args.get("around_message_id"),
|
||||
window=function_args.get("window", 5),
|
||||
sort=function_args.get("sort"),
|
||||
db=session_db,
|
||||
current_session_id=agent.session_id,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -872,15 +872,10 @@ DEFAULT_CONFIG = {
|
|||
"timeout": 120, # seconds — compression summarises large contexts; increase for local models
|
||||
"extra_body": {},
|
||||
},
|
||||
"session_search": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
"max_concurrency": 3, # Clamp parallel summaries to avoid request-burst 429s on small providers
|
||||
},
|
||||
# Note: session_search no longer uses an auxiliary LLM (PR #27590 —
|
||||
# single-shape tool returns DB content directly). The old
|
||||
# ``auxiliary.session_search.*`` block was removed here. Existing
|
||||
# values in user config.yaml files are harmless leftovers and ignored.
|
||||
"skills_hub": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
|
|
|
|||
|
|
@ -458,8 +458,6 @@ TIPS = [
|
|||
'image_gen.model in config.yaml picks the FAL model: flux-2/klein, gpt-image-2, nano-banana-pro, and more.',
|
||||
'image_gen.provider routes image generation through a plugin (OpenAI Images, Codex, FAL) instead of the default.',
|
||||
'AUXILIARY_VISION_BASE_URL + AUXILIARY_VISION_API_KEY point vision analysis at any OpenAI-compatible endpoint.',
|
||||
'auxiliary.session_search.max_concurrency bounds how many matched sessions are summarized in parallel (default 3).',
|
||||
'auxiliary.session_search.extra_body forwards provider-specific OpenAI-compatible fields on summarization calls.',
|
||||
|
||||
# --- Security ---
|
||||
'security.tirith_fail_open: false makes Hermes block commands when the tirith scanner itself errors out.',
|
||||
|
|
|
|||
233
hermes_state.py
233
hermes_state.py
|
|
@ -25,7 +25,7 @@ from pathlib import Path
|
|||
|
||||
from agent.memory_manager import sanitize_context
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -1635,6 +1635,204 @@ class SessionDB:
|
|||
result.append(msg)
|
||||
return result
|
||||
|
||||
def get_messages_around(
|
||||
self,
|
||||
session_id: str,
|
||||
around_message_id: int,
|
||||
window: int = 5,
|
||||
) -> Dict[str, Any]:
|
||||
"""Load a window of messages anchored on a specific message id.
|
||||
|
||||
Returns a dict with:
|
||||
- ``window``: up to ``window`` messages before the anchor, the anchor
|
||||
itself, and up to ``window`` messages after, ordered by id ascending.
|
||||
- ``messages_before``: count of messages strictly before the anchor
|
||||
still in the session (== window unless we hit the start).
|
||||
- ``messages_after``: count of messages strictly after the anchor
|
||||
still in the session (== window unless we hit the end).
|
||||
|
||||
Used by ``session_search`` for both the discovery shape (anchored on the
|
||||
FTS5 match) and the scroll shape (anchored on any message id). The
|
||||
``messages_before`` / ``messages_after`` counts let the caller detect
|
||||
session boundaries: when either is less than ``window``, the agent has
|
||||
reached one end of the session.
|
||||
|
||||
Returns an empty window when ``around_message_id`` is not a real id in
|
||||
``session_id`` — callers decide how to surface that.
|
||||
"""
|
||||
if window < 0:
|
||||
window = 0
|
||||
with self._lock:
|
||||
# Confirm the anchor exists in this session.
|
||||
anchor_exists = self._conn.execute(
|
||||
"SELECT 1 FROM messages WHERE id = ? AND session_id = ? LIMIT 1",
|
||||
(around_message_id, session_id),
|
||||
).fetchone()
|
||||
if not anchor_exists:
|
||||
return {"window": [], "messages_before": 0, "messages_after": 0}
|
||||
|
||||
# Two queries: anchor + before (DESC, take window+1), and after
|
||||
# (ASC, take window). Final order is id ASC.
|
||||
before_rows = self._conn.execute(
|
||||
"SELECT * FROM messages "
|
||||
"WHERE session_id = ? AND id <= ? "
|
||||
"ORDER BY id DESC LIMIT ?",
|
||||
(session_id, around_message_id, window + 1),
|
||||
).fetchall()
|
||||
after_rows = self._conn.execute(
|
||||
"SELECT * FROM messages "
|
||||
"WHERE session_id = ? AND id > ? "
|
||||
"ORDER BY id ASC LIMIT ?",
|
||||
(session_id, around_message_id, window),
|
||||
).fetchall()
|
||||
|
||||
# before_rows is DESC; reverse so it's ASC, then concatenate after_rows.
|
||||
rows = list(reversed(before_rows)) + list(after_rows)
|
||||
result = []
|
||||
for row in rows:
|
||||
msg = dict(row)
|
||||
if "content" in msg:
|
||||
msg["content"] = self._decode_content(msg["content"])
|
||||
if msg.get("tool_calls"):
|
||||
try:
|
||||
msg["tool_calls"] = json.loads(msg["tool_calls"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning(
|
||||
"Failed to deserialize tool_calls in get_messages_around, falling back to []"
|
||||
)
|
||||
msg["tool_calls"] = []
|
||||
result.append(msg)
|
||||
|
||||
# before_rows includes the anchor itself; subtract 1 for the count of
|
||||
# messages strictly before the anchor in the returned slice.
|
||||
messages_before = max(0, len(before_rows) - 1)
|
||||
messages_after = len(after_rows)
|
||||
return {
|
||||
"window": result,
|
||||
"messages_before": messages_before,
|
||||
"messages_after": messages_after,
|
||||
}
|
||||
|
||||
def get_anchored_view(
|
||||
self,
|
||||
session_id: str,
|
||||
around_message_id: int,
|
||||
window: int = 5,
|
||||
bookend: int = 3,
|
||||
keep_roles: Optional[Tuple[str, ...]] = ("user", "assistant"),
|
||||
) -> Dict[str, Any]:
|
||||
"""Return an anchored window plus session bookends.
|
||||
|
||||
Built on top of ``get_messages_around``. Three slices:
|
||||
|
||||
- ``window``: messages immediately surrounding the anchor. Filtered
|
||||
to ``keep_roles`` (tool-response noise dropped by default), EXCEPT
|
||||
the anchor itself is always preserved regardless of role.
|
||||
- ``bookend_start``: first ``bookend`` user/assistant messages of the
|
||||
session — but only those whose id is strictly before the window's
|
||||
first message id. Empty when the window already overlaps the
|
||||
session head. Empty-content messages (tool-call-only assistant
|
||||
turns) are skipped so they don't crowd out actual prose openings.
|
||||
- ``bookend_end``: last ``bookend`` user/assistant messages of the
|
||||
session, same non-overlap rule at the tail.
|
||||
|
||||
Bookends let an FTS5 hit anywhere in a long session yield the goal
|
||||
(opening) and the resolution (closing) on a single call — without
|
||||
loading the whole transcript.
|
||||
|
||||
Returns ``{"window": [], "messages_before": 0, "messages_after": 0,
|
||||
"bookend_start": [], "bookend_end": []}`` when the anchor isn't in
|
||||
the session.
|
||||
|
||||
``keep_roles=None`` disables role filtering (raw window + raw
|
||||
bookends).
|
||||
"""
|
||||
if bookend < 0:
|
||||
bookend = 0
|
||||
|
||||
# Reuse the primitive — handles anchor-existence, content decoding,
|
||||
# tool_calls deserialisation, and boundary counts.
|
||||
primitive = self.get_messages_around(
|
||||
session_id, around_message_id, window=window
|
||||
)
|
||||
window_rows = primitive["window"]
|
||||
if not window_rows:
|
||||
return {
|
||||
"window": [],
|
||||
"messages_before": 0,
|
||||
"messages_after": 0,
|
||||
"bookend_start": [],
|
||||
"bookend_end": [],
|
||||
}
|
||||
|
||||
# Apply role filter to the window, but never drop the anchor itself.
|
||||
if keep_roles is not None:
|
||||
keep_set = set(keep_roles)
|
||||
filtered_window = [
|
||||
m for m in window_rows
|
||||
if m.get("id") == around_message_id or m.get("role") in keep_set
|
||||
]
|
||||
else:
|
||||
filtered_window = window_rows
|
||||
|
||||
window_min_id = window_rows[0]["id"]
|
||||
window_max_id = window_rows[-1]["id"]
|
||||
|
||||
# Fetch bookends only when there's room outside the window. SQL filters
|
||||
# by id range, role, and non-empty content — tool-call-only assistant
|
||||
# turns (content='' with tool_calls populated) are excluded so they
|
||||
# don't crowd out actual prose openings/closings.
|
||||
bookend_start_rows: List[Any] = []
|
||||
bookend_end_rows: List[Any] = []
|
||||
if bookend > 0:
|
||||
with self._lock:
|
||||
role_clause = ""
|
||||
role_params: list = []
|
||||
if keep_roles is not None:
|
||||
role_placeholders = ",".join("?" for _ in keep_roles)
|
||||
role_clause = f" AND role IN ({role_placeholders})"
|
||||
role_params = list(keep_roles)
|
||||
|
||||
bookend_start_rows = self._conn.execute(
|
||||
f"SELECT * FROM messages "
|
||||
f"WHERE session_id = ? AND id < ?{role_clause} "
|
||||
f"AND length(content) > 0 "
|
||||
f"ORDER BY id ASC LIMIT ?",
|
||||
(session_id, window_min_id, *role_params, bookend),
|
||||
).fetchall()
|
||||
|
||||
bookend_end_rows = self._conn.execute(
|
||||
f"SELECT * FROM messages "
|
||||
f"WHERE session_id = ? AND id > ?{role_clause} "
|
||||
f"AND length(content) > 0 "
|
||||
f"ORDER BY id DESC LIMIT ?",
|
||||
(session_id, window_max_id, *role_params, bookend),
|
||||
).fetchall()
|
||||
# End rows came back DESC for the LIMIT cap; flip to ASC.
|
||||
bookend_end_rows = list(reversed(bookend_end_rows))
|
||||
|
||||
def _hydrate(row) -> Dict[str, Any]:
|
||||
msg = dict(row)
|
||||
if "content" in msg:
|
||||
msg["content"] = self._decode_content(msg["content"])
|
||||
if msg.get("tool_calls"):
|
||||
try:
|
||||
msg["tool_calls"] = json.loads(msg["tool_calls"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning(
|
||||
"Failed to deserialize tool_calls in get_anchored_view, falling back to []"
|
||||
)
|
||||
msg["tool_calls"] = []
|
||||
return msg
|
||||
|
||||
return {
|
||||
"window": filtered_window,
|
||||
"messages_before": primitive["messages_before"],
|
||||
"messages_after": primitive["messages_after"],
|
||||
"bookend_start": [_hydrate(r) for r in bookend_start_rows],
|
||||
"bookend_end": [_hydrate(r) for r in bookend_end_rows],
|
||||
}
|
||||
|
||||
def resolve_resume_session_id(self, session_id: str) -> str:
|
||||
"""Redirect a resume target to the descendant session that holds the messages.
|
||||
|
||||
|
|
@ -1902,6 +2100,7 @@ class SessionDB:
|
|||
role_filter: List[str] = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
sort: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Full-text search across session messages using FTS5.
|
||||
|
|
@ -1914,6 +2113,15 @@ class SessionDB:
|
|||
|
||||
Returns matching messages with session metadata, content snippet,
|
||||
and surrounding context (1 message before and after the match).
|
||||
|
||||
``sort`` controls temporal ordering:
|
||||
- ``None`` (default): FTS5 BM25 relevance only. Time-neutral.
|
||||
- ``"newest"``: order by message timestamp DESC, then by rank.
|
||||
- ``"oldest"``: order by message timestamp ASC, then by rank.
|
||||
|
||||
The short-CJK LIKE fallback already orders by timestamp DESC and
|
||||
ignores ``sort``. The trigram CJK path honours ``sort`` like the main
|
||||
FTS5 path.
|
||||
"""
|
||||
if not query or not query.strip():
|
||||
return []
|
||||
|
|
@ -1922,6 +2130,25 @@ class SessionDB:
|
|||
if not query:
|
||||
return []
|
||||
|
||||
# Normalise sort. Anything not in the allowed set falls back to None
|
||||
# (FTS5 rank-only) so callers can pass through user input without
|
||||
# validation.
|
||||
if isinstance(sort, str):
|
||||
sort_norm = sort.strip().lower()
|
||||
if sort_norm not in ("newest", "oldest"):
|
||||
sort_norm = None
|
||||
else:
|
||||
sort_norm = None
|
||||
|
||||
# ORDER BY shared across the main FTS5 path and trigram CJK path.
|
||||
# With sort set, timestamp is primary and rank is the tiebreaker.
|
||||
if sort_norm == "newest":
|
||||
order_by_sql = "ORDER BY m.timestamp DESC, rank"
|
||||
elif sort_norm == "oldest":
|
||||
order_by_sql = "ORDER BY m.timestamp ASC, rank"
|
||||
else:
|
||||
order_by_sql = "ORDER BY rank"
|
||||
|
||||
# Build WHERE clauses dynamically
|
||||
where_clauses = ["messages_fts MATCH ?"]
|
||||
params: list = [query]
|
||||
|
|
@ -1960,7 +2187,7 @@ class SessionDB:
|
|||
JOIN messages m ON m.id = messages_fts.rowid
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE {where_sql}
|
||||
ORDER BY rank
|
||||
{order_by_sql}
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
|
||||
|
|
@ -2029,7 +2256,7 @@ class SessionDB:
|
|||
JOIN messages m ON m.id = messages_fts_trigram.rowid
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE {' AND '.join(tri_where)}
|
||||
ORDER BY rank
|
||||
{order_by_sql}
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
tri_params.extend([limit, offset])
|
||||
|
|
|
|||
|
|
@ -1058,6 +1058,7 @@ AUTHOR_MAP = {
|
|||
"openclaw@agent.local": "29206394", # PR #22194 salvage (sudo -S brute-force guard, #9590)
|
||||
"freedemon@gmail.com": "fr33d3m0n", # PR #21128 salvage (sudo stdin/askpass DANGEROUS, #17873 cat 4)
|
||||
"zhaowh3613@outlook.com": "VinceZcrikl", # PR #23647 salvage (npm UTF-8 decode on GBK Windows)
|
||||
"abcdjmm970703@gmail.com": "JabberELF", # PR #20238 seed (session_search dual-mode, evolved into single-shape)
|
||||
"anton.kuenzi@gmail.com": "ZeterMordio", # PR #11754 salvage (zsh completion compdef + _arguments syntax)
|
||||
"23yntong@stu.edu.cn": "iuyup", # PR #6155 salvage (shell=True hardening)
|
||||
"86501179+1RB@users.noreply.github.com": "1RB", # PR #25462 salvage (discord forwarded messages)
|
||||
|
|
|
|||
223
tests/agent/test_system_prompt_restore.py
Normal file
223
tests/agent/test_system_prompt_restore.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
"""Tests for ``agent.conversation_loop._restore_or_build_system_prompt``.
|
||||
|
||||
Validates the gateway DB-roundtrip path that keeps the system prompt
|
||||
byte-stable across turns (fresh AIAgent → must restore from session DB
|
||||
instead of rebuilding). Covers:
|
||||
|
||||
* Successful restore from a stored prompt (present row).
|
||||
* Legitimate first-turn build (no history).
|
||||
* Silent-failure recovery paths:
|
||||
- DB read raises → WARNING + fresh build
|
||||
- Row has system_prompt=NULL → WARNING + fresh build
|
||||
- Row has system_prompt="" → WARNING + fresh build
|
||||
- DB write fails → WARNING (subsequent turns will miss cache)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.conversation_loop import _restore_or_build_system_prompt
|
||||
|
||||
|
||||
def _make_agent(session_db=None, prebuilt_prompt: str = "BUILT_PROMPT"):
|
||||
"""Construct the minimal agent fake the helper needs."""
|
||||
agent = MagicMock()
|
||||
agent._cached_system_prompt = None
|
||||
agent.session_id = "test-session-id"
|
||||
agent.model = "test-model"
|
||||
agent.platform = "cli"
|
||||
agent._session_db = session_db
|
||||
agent._build_system_prompt = MagicMock(return_value=prebuilt_prompt)
|
||||
return agent
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Happy paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStoredPromptReuse:
|
||||
def test_present_row_is_reused_verbatim(self, caplog):
|
||||
"""Continuing session with a stored prompt → reuse byte-for-byte."""
|
||||
stored = "Stored prompt from turn 1 — byte-identical reuse"
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": stored}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
assert agent._cached_system_prompt == stored
|
||||
agent._build_system_prompt.assert_not_called()
|
||||
db.update_system_prompt.assert_not_called()
|
||||
# No warnings on the happy path
|
||||
assert not [r for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
|
||||
def test_present_row_with_unicode_preserved(self):
|
||||
"""Non-ASCII bytes in the stored prompt are not mangled."""
|
||||
stored = "Stored prompt with unicode: ☤ ⚗ ◆ — and emoji 🦊"
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": stored}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
assert agent._cached_system_prompt == stored
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Legitimate fresh-build paths (no history, no DB)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLegitimateFreshBuild:
|
||||
def test_no_history_skips_db_and_builds_fresh(self, caplog):
|
||||
"""First turn with empty history → build fresh, don't touch the DB."""
|
||||
db = MagicMock()
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [])
|
||||
|
||||
# No history → DB read skipped entirely
|
||||
db.get_session.assert_not_called()
|
||||
agent._build_system_prompt.assert_called_once_with(None)
|
||||
assert agent._cached_system_prompt == "BUILT_PROMPT"
|
||||
# Persisted to DB
|
||||
db.update_system_prompt.assert_called_once_with(agent.session_id, "BUILT_PROMPT")
|
||||
assert not [r for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
|
||||
def test_no_db_skips_persistence(self):
|
||||
"""When session DB is None, build and skip persistence silently."""
|
||||
agent = _make_agent(session_db=None)
|
||||
_restore_or_build_system_prompt(agent, None, [])
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
assert agent._cached_system_prompt == "BUILT_PROMPT"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Silent-failure recovery — these are the new A/B logging paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSilentFailureWarnings:
|
||||
def test_db_read_exception_warns_and_rebuilds(self, caplog):
|
||||
"""DB read raising → WARNING + fall through to fresh build."""
|
||||
db = MagicMock()
|
||||
db.get_session.side_effect = RuntimeError("disk full")
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
# Built fresh
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
assert agent._cached_system_prompt == "BUILT_PROMPT"
|
||||
# Loud warning about the read failure
|
||||
warnings = [r for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert any("get_session failed" in r.getMessage() for r in warnings), \
|
||||
f"Expected a get_session warning, got: {[r.getMessage() for r in warnings]}"
|
||||
assert any("disk full" in r.getMessage() for r in warnings)
|
||||
|
||||
def test_null_system_prompt_warns_about_unusable_stored_state(self, caplog):
|
||||
"""Row exists but system_prompt is NULL → WARNING + fresh build."""
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": None}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert any("is null" in m and "rebuilding" in m for m in warnings), \
|
||||
f"Expected null-stored-prompt warning, got: {warnings}"
|
||||
|
||||
def test_empty_system_prompt_warns_about_silent_persistence_bug(self, caplog):
|
||||
"""Row exists but system_prompt is '' → WARNING about silent write bug."""
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": ""}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert any("is empty" in m and "rebuilding" in m for m in warnings), \
|
||||
f"Expected empty-stored-prompt warning, got: {warnings}"
|
||||
|
||||
def test_db_write_failure_warns_loudly(self, caplog):
|
||||
"""update_system_prompt raising → WARNING (was DEBUG before)."""
|
||||
db = MagicMock()
|
||||
# No prior row (first turn)
|
||||
db.get_session.return_value = None
|
||||
db.update_system_prompt.side_effect = RuntimeError("database is locked")
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
_restore_or_build_system_prompt(agent, None, [])
|
||||
|
||||
# Built and assigned the cache anyway
|
||||
agent._build_system_prompt.assert_called_once()
|
||||
assert agent._cached_system_prompt == "BUILT_PROMPT"
|
||||
# Warning surfaced
|
||||
warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert any(
|
||||
"update_system_prompt failed" in m and "database is locked" in m
|
||||
for m in warnings
|
||||
), f"Expected write-failure warning, got: {warnings}"
|
||||
|
||||
def test_no_history_with_null_row_does_not_warn(self, caplog):
|
||||
"""First turn (no history) hitting a null row is not surprising — no warn."""
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": None}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="agent.conversation_loop"):
|
||||
# Empty history → DB read is skipped entirely
|
||||
_restore_or_build_system_prompt(agent, None, [])
|
||||
|
||||
db.get_session.assert_not_called()
|
||||
# No "rebuilding from scratch" warning because history is empty
|
||||
warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING]
|
||||
assert not any("rebuilding" in m for m in warnings)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Byte-stability invariant
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPromptStabilityInvariant:
|
||||
def test_restored_prompt_is_byte_identical_to_stored(self):
|
||||
"""The restored prompt must equal the stored bytes exactly — no
|
||||
normalization, trimming, or concat that could shift the prefix.
|
||||
|
||||
This is the core invariant: any byte-level change at this point
|
||||
invalidates KV cache on every prefix-cache backend.
|
||||
"""
|
||||
stored = (
|
||||
"You are Hermes Agent.\n"
|
||||
"\n"
|
||||
"Conversation started: Sunday, May 17, 2026\n"
|
||||
"Session ID: 20260517_153500_abc123\n"
|
||||
)
|
||||
db = MagicMock()
|
||||
db.get_session.return_value = {"system_prompt": stored}
|
||||
agent = _make_agent(session_db=db)
|
||||
|
||||
_restore_or_build_system_prompt(agent, None, [{"role": "user", "content": "hi"}])
|
||||
|
||||
# Identity check — must be the same object reference for maximum
|
||||
# confidence we're not slicing/copying/normalizing.
|
||||
assert agent._cached_system_prompt == stored
|
||||
# Byte-level check
|
||||
assert agent._cached_system_prompt.encode("utf-8") == stored.encode("utf-8")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
161
tests/hermes_state/test_get_anchored_view.py
Normal file
161
tests/hermes_state/test_get_anchored_view.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
"""Tests for SessionDB.get_anchored_view — anchored window + session bookends.
|
||||
|
||||
Used by the discovery shape of session_search: an FTS5 match becomes the
|
||||
anchor, the call returns goal (bookend_start) + match (window) + resolution
|
||||
(bookend_end) in a single round trip, no LLM.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path):
|
||||
return SessionDB(tmp_path / "state.db")
|
||||
|
||||
|
||||
def _seed_long_session(db, sid="s1", n=30):
|
||||
"""Create a long session with alternating user/assistant prose. Returns ids ascending."""
|
||||
db.create_session(sid, source="cli")
|
||||
ids = []
|
||||
for i in range(n):
|
||||
role = "user" if i % 2 == 0 else "assistant"
|
||||
mid = db.append_message(sid, role=role, content=f"prose msg {i}")
|
||||
ids.append(mid)
|
||||
return ids
|
||||
|
||||
|
||||
class TestWindowAndBookendShape:
|
||||
def test_returns_window_with_bookend_start_and_end(self, db):
|
||||
ids = _seed_long_session(db, n=30)
|
||||
# Anchor mid-session
|
||||
anchor = ids[15]
|
||||
view = db.get_anchored_view("s1", anchor, window=3, bookend=3)
|
||||
assert len(view["window"]) == 7 # ±3 + anchor
|
||||
assert len(view["bookend_start"]) == 3
|
||||
assert len(view["bookend_end"]) == 3
|
||||
# bookend_start is the first 3 ids of the session
|
||||
assert [m["id"] for m in view["bookend_start"]] == ids[:3]
|
||||
# bookend_end is the last 3 ids of the session
|
||||
assert [m["id"] for m in view["bookend_end"]] == ids[-3:]
|
||||
|
||||
def test_window_anchor_marked_correctly(self, db):
|
||||
ids = _seed_long_session(db, n=20)
|
||||
anchor = ids[10]
|
||||
view = db.get_anchored_view("s1", anchor, window=2, bookend=3)
|
||||
# Anchor message is present in the window
|
||||
anchor_msgs = [m for m in view["window"] if m["id"] == anchor]
|
||||
assert len(anchor_msgs) == 1
|
||||
|
||||
|
||||
class TestBookendOverlap:
|
||||
"""Bookends shouldn't duplicate messages that are already in the window."""
|
||||
|
||||
def test_bookend_start_empty_when_window_covers_session_head(self, db):
|
||||
ids = _seed_long_session(db, n=10)
|
||||
# Anchor on msg 1 (id index 1), window=3 → covers ids[0..4]
|
||||
anchor = ids[1]
|
||||
view = db.get_anchored_view("s1", anchor, window=3, bookend=3)
|
||||
# Window includes session head, so bookend_start should be empty
|
||||
assert view["bookend_start"] == []
|
||||
# bookend_end is still populated
|
||||
assert len(view["bookend_end"]) > 0
|
||||
|
||||
def test_bookend_end_empty_when_window_covers_session_tail(self, db):
|
||||
ids = _seed_long_session(db, n=10)
|
||||
# Anchor on second-to-last
|
||||
anchor = ids[-2]
|
||||
view = db.get_anchored_view("s1", anchor, window=3, bookend=3)
|
||||
assert view["bookend_end"] == []
|
||||
assert len(view["bookend_start"]) > 0
|
||||
|
||||
def test_short_session_both_bookends_empty(self, db):
|
||||
ids = _seed_long_session(db, n=5)
|
||||
view = db.get_anchored_view("s1", ids[2], window=10, bookend=3)
|
||||
# Window covers entire session
|
||||
assert view["bookend_start"] == []
|
||||
assert view["bookend_end"] == []
|
||||
# And window has all 5 messages
|
||||
assert len(view["window"]) == 5
|
||||
|
||||
|
||||
class TestRoleFiltering:
|
||||
def test_tool_role_filtered_from_window(self, db):
|
||||
db.create_session("s1", source="cli")
|
||||
user_ids = []
|
||||
for i in range(5):
|
||||
user_ids.append(db.append_message("s1", role="user", content=f"u{i}"))
|
||||
db.append_message("s1", role="tool", content=f"tool output {i}", tool_name="x")
|
||||
# Anchor on user message
|
||||
view = db.get_anchored_view("s1", user_ids[2], window=5, bookend=0)
|
||||
# No tool messages should appear in the window
|
||||
roles = [m.get("role") for m in view["window"]]
|
||||
assert "tool" not in roles
|
||||
|
||||
def test_anchor_preserved_even_when_tool_role(self, db):
|
||||
db.create_session("s1", source="cli")
|
||||
db.append_message("s1", role="user", content="ask")
|
||||
tool_id = db.append_message("s1", role="tool", content="tool output", tool_name="x")
|
||||
db.append_message("s1", role="user", content="follow-up")
|
||||
# Anchor on the tool message — should still appear despite default filter
|
||||
view = db.get_anchored_view("s1", tool_id, window=5, bookend=0)
|
||||
ids_in_window = [m["id"] for m in view["window"]]
|
||||
assert tool_id in ids_in_window
|
||||
|
||||
def test_keep_roles_none_disables_filter(self, db):
|
||||
db.create_session("s1", source="cli")
|
||||
anchor_id = db.append_message("s1", role="user", content="ask")
|
||||
db.append_message("s1", role="tool", content="output", tool_name="x")
|
||||
view = db.get_anchored_view("s1", anchor_id, window=5, bookend=0, keep_roles=None)
|
||||
roles = [m.get("role") for m in view["window"]]
|
||||
assert "tool" in roles
|
||||
|
||||
|
||||
class TestEmptyContentFilter:
|
||||
"""Tool-call-only assistant turns (empty content) should be skipped in bookends."""
|
||||
|
||||
def test_empty_content_messages_excluded_from_bookends(self, db):
|
||||
db.create_session("s1", source="cli")
|
||||
# Real prose opener
|
||||
opener = db.append_message("s1", role="user", content="Let's start the work")
|
||||
# Empty content assistant turn (tool-call-only — common in agent loops)
|
||||
db.append_message("s1", role="assistant", content="", tool_calls=[{"id": "t1", "function": {"name": "x", "arguments": "{}"}}])
|
||||
# More prose
|
||||
for i in range(20):
|
||||
db.append_message("s1", role="user" if i % 2 == 0 else "assistant", content=f"prose {i}")
|
||||
# Another empty assistant near the end
|
||||
db.append_message("s1", role="assistant", content="", tool_calls=[{"id": "t2", "function": {"name": "y", "arguments": "{}"}}])
|
||||
# Prose closer
|
||||
closer = db.append_message("s1", role="assistant", content="Final decision: ship it.")
|
||||
|
||||
# Anchor mid-session
|
||||
view = db.get_anchored_view("s1", opener + 15, window=2, bookend=3)
|
||||
# Bookend_start should not contain the empty-content tool-call turn
|
||||
for m in view["bookend_start"]:
|
||||
assert m.get("content"), "bookend_start should skip empty-content messages"
|
||||
# Bookend_end should include the closer
|
||||
end_contents = [m.get("content") for m in view["bookend_end"]]
|
||||
assert any("Final decision" in (c or "") for c in end_contents)
|
||||
|
||||
|
||||
class TestAnchorValidation:
|
||||
def test_missing_anchor_returns_empty_view(self, db):
|
||||
_seed_long_session(db, n=10)
|
||||
view = db.get_anchored_view("s1", 999999, window=5, bookend=3)
|
||||
assert view["window"] == []
|
||||
assert view["bookend_start"] == []
|
||||
assert view["bookend_end"] == []
|
||||
assert view["messages_before"] == 0
|
||||
assert view["messages_after"] == 0
|
||||
|
||||
|
||||
class TestSessionIsolation:
|
||||
"""Bookends must not cross session boundaries."""
|
||||
|
||||
def test_bookends_only_from_anchor_session(self, db):
|
||||
ids1 = _seed_long_session(db, sid="s1", n=20)
|
||||
_seed_long_session(db, sid="s2", n=20)
|
||||
view = db.get_anchored_view("s1", ids1[10], window=2, bookend=3)
|
||||
# All bookend messages should have session_id = s1 (or session_id col)
|
||||
for m in view["bookend_start"] + view["bookend_end"]:
|
||||
assert m.get("session_id") == "s1"
|
||||
148
tests/hermes_state/test_get_messages_around.py
Normal file
148
tests/hermes_state/test_get_messages_around.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
"""Tests for SessionDB.get_messages_around (anchored-window primitive).
|
||||
|
||||
Used by session_search both for the discovery shape (FTS5 match as anchor)
|
||||
and the scroll shape (user-supplied anchor). Returns a window of messages
|
||||
around the anchor plus before/after counts so callers can detect session
|
||||
boundaries.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path):
|
||||
return SessionDB(tmp_path / "state.db")
|
||||
|
||||
|
||||
def _seed(db, sid="s1", n=10):
|
||||
"""Create session with n alternating user/assistant messages, return ids ascending."""
|
||||
db.create_session(sid, source="cli")
|
||||
ids = []
|
||||
for i in range(n):
|
||||
role = "user" if i % 2 == 0 else "assistant"
|
||||
# append_message returns the new id
|
||||
mid = db.append_message(sid, role=role, content=f"msg {i}")
|
||||
ids.append(mid)
|
||||
return ids
|
||||
|
||||
|
||||
class TestBasicWindow:
|
||||
def test_returns_window_around_anchor(self, db):
|
||||
ids = _seed(db, n=10)
|
||||
anchor = ids[5]
|
||||
view = db.get_messages_around("s1", anchor, window=2)
|
||||
# Expected: 2 before + anchor + 2 after = 5 messages
|
||||
msgs = view["window"]
|
||||
assert len(msgs) == 5
|
||||
assert [m["id"] for m in msgs] == [ids[3], ids[4], ids[5], ids[6], ids[7]]
|
||||
assert view["messages_before"] == 2
|
||||
assert view["messages_after"] == 2
|
||||
|
||||
def test_window_zero_returns_only_anchor(self, db):
|
||||
ids = _seed(db, n=5)
|
||||
view = db.get_messages_around("s1", ids[2], window=0)
|
||||
assert len(view["window"]) == 1
|
||||
assert view["window"][0]["id"] == ids[2]
|
||||
assert view["messages_before"] == 0
|
||||
assert view["messages_after"] == 0
|
||||
|
||||
def test_negative_window_clamps_to_zero(self, db):
|
||||
ids = _seed(db, n=5)
|
||||
view = db.get_messages_around("s1", ids[2], window=-3)
|
||||
# Just anchor, like window=0
|
||||
assert len(view["window"]) == 1
|
||||
assert view["window"][0]["id"] == ids[2]
|
||||
|
||||
|
||||
class TestBoundaryDetection:
|
||||
"""messages_before / messages_after tell the agent it's at start/end."""
|
||||
|
||||
def test_at_session_start_messages_before_is_short(self, db):
|
||||
ids = _seed(db, n=10)
|
||||
# Anchor on first message; ask for window=5
|
||||
view = db.get_messages_around("s1", ids[0], window=5)
|
||||
assert view["messages_before"] == 0 # nothing before the first msg
|
||||
assert view["messages_after"] == 5
|
||||
# window contains anchor + 5 after = 6 messages
|
||||
assert len(view["window"]) == 6
|
||||
|
||||
def test_at_session_end_messages_after_is_short(self, db):
|
||||
ids = _seed(db, n=10)
|
||||
view = db.get_messages_around("s1", ids[-1], window=5)
|
||||
assert view["messages_before"] == 5
|
||||
assert view["messages_after"] == 0
|
||||
assert len(view["window"]) == 6
|
||||
|
||||
def test_window_larger_than_session(self, db):
|
||||
ids = _seed(db, n=3)
|
||||
view = db.get_messages_around("s1", ids[1], window=50)
|
||||
# All 3 messages return, both boundaries hit
|
||||
assert len(view["window"]) == 3
|
||||
assert view["messages_before"] == 1
|
||||
assert view["messages_after"] == 1
|
||||
|
||||
|
||||
class TestAnchorValidation:
|
||||
def test_missing_anchor_returns_empty(self, db):
|
||||
_seed(db, n=5)
|
||||
view = db.get_messages_around("s1", 99999, window=5)
|
||||
assert view["window"] == []
|
||||
assert view["messages_before"] == 0
|
||||
assert view["messages_after"] == 0
|
||||
|
||||
def test_anchor_in_different_session_returns_empty(self, db):
|
||||
# Two sessions, ask for s1's anchor in s2's namespace
|
||||
ids1 = _seed(db, sid="s1", n=5)
|
||||
_seed(db, sid="s2", n=5)
|
||||
view = db.get_messages_around("s2", ids1[2], window=2)
|
||||
assert view["window"] == []
|
||||
|
||||
|
||||
class TestScrollPattern:
|
||||
"""The forward/backward scroll loop the agent will run."""
|
||||
|
||||
def test_scroll_forward_re_anchored_on_last_id(self, db):
|
||||
ids = _seed(db, n=20)
|
||||
anchor = ids[5]
|
||||
v1 = db.get_messages_around("s1", anchor, window=3)
|
||||
last_id = v1["window"][-1]["id"]
|
||||
v2 = db.get_messages_around("s1", last_id, window=3)
|
||||
# Boundary id (last_id) appears in both windows (in v2 it's the anchor)
|
||||
assert last_id in [m["id"] for m in v1["window"]]
|
||||
assert last_id in [m["id"] for m in v2["window"]]
|
||||
# v2's window extends beyond v1
|
||||
assert max(m["id"] for m in v2["window"]) > max(m["id"] for m in v1["window"])
|
||||
|
||||
def test_scroll_backward_re_anchored_on_first_id(self, db):
|
||||
ids = _seed(db, n=20)
|
||||
anchor = ids[10]
|
||||
v1 = db.get_messages_around("s1", anchor, window=3)
|
||||
first_id = v1["window"][0]["id"]
|
||||
v2 = db.get_messages_around("s1", first_id, window=3)
|
||||
assert first_id in [m["id"] for m in v1["window"]]
|
||||
assert first_id in [m["id"] for m in v2["window"]]
|
||||
assert min(m["id"] for m in v2["window"]) < min(m["id"] for m in v1["window"])
|
||||
|
||||
|
||||
class TestContentHydration:
|
||||
def test_content_is_decoded(self, db):
|
||||
ids = _seed(db, n=3)
|
||||
view = db.get_messages_around("s1", ids[1], window=1)
|
||||
for m in view["window"]:
|
||||
assert isinstance(m.get("content"), str)
|
||||
assert m["content"].startswith("msg ")
|
||||
|
||||
def test_tool_calls_deserialized(self, db):
|
||||
db.create_session("s1", source="cli")
|
||||
# Message with tool_calls (pass list — append_message JSON-encodes it)
|
||||
tc_payload = [{"id": "t1", "function": {"name": "x", "arguments": "{}"}}]
|
||||
db.append_message("s1", role="assistant", content="", tool_calls=tc_payload)
|
||||
mid = db.append_message("s1", role="tool", content="result", tool_name="x")
|
||||
|
||||
view = db.get_messages_around("s1", mid, window=2)
|
||||
# Find the assistant message with tool_calls
|
||||
asst = [m for m in view["window"] if m.get("role") == "assistant"]
|
||||
assert asst, "expected an assistant message"
|
||||
# tool_calls should be a list after hydration, not a string
|
||||
assert isinstance(asst[0].get("tool_calls"), list)
|
||||
|
|
@ -989,6 +989,28 @@ class TestBuildSystemPrompt:
|
|||
# Should contain current date info like "Conversation started:"
|
||||
assert "Conversation started:" in prompt
|
||||
|
||||
def test_datetime_is_date_only_not_minute_precision(self, agent):
|
||||
"""Timestamp must be date-only (no HH:MM) so the system prompt
|
||||
stays byte-stable for the full day. Minute precision invalidates
|
||||
prefix-cache KV on every rebuild path (compression, fresh-agent
|
||||
gateway turns, session resume without a stored prompt)."""
|
||||
prompt = agent._build_system_prompt()
|
||||
# Find the line and strip it for inspection
|
||||
for line in prompt.splitlines():
|
||||
if line.startswith("Conversation started:"):
|
||||
# Must NOT contain AM/PM indicator (minute precision had %I:%M %p)
|
||||
assert " AM" not in line and " PM" not in line, (
|
||||
f"Timestamp line has time-of-day, breaks daily cache stability: {line!r}"
|
||||
)
|
||||
# Must NOT contain a colon followed by two digits (HH:MM pattern)
|
||||
import re as _re
|
||||
assert not _re.search(r":\d{2}", line), (
|
||||
f"Timestamp line has HH:MM, breaks daily cache stability: {line!r}"
|
||||
)
|
||||
break
|
||||
else:
|
||||
assert False, "Expected a 'Conversation started:' line in the system prompt"
|
||||
|
||||
def test_includes_nous_subscription_prompt(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(run_agent, "build_nous_subscription_prompt", lambda tool_names: "NOUS SUBSCRIPTION BLOCK")
|
||||
prompt = agent._build_system_prompt()
|
||||
|
|
@ -1074,6 +1096,40 @@ class TestToolUseEnforcementConfig:
|
|||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
def test_auto_injects_for_grok(self):
|
||||
"""xAI Grok / xai-oauth models hit the same enforcement path as GPT."""
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="x-ai/grok-4.3", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_execution_guidance_for_grok(self):
|
||||
"""Grok also gets OPENAI_MODEL_EXECUTION_GUIDANCE (verification,
|
||||
mandatory_tool_use, act_dont_ask). Same failure modes as GPT in
|
||||
practice — claims completion without tool calls, suggests workarounds
|
||||
instead of using existing tools.
|
||||
"""
|
||||
from agent.prompt_builder import OPENAI_MODEL_EXECUTION_GUIDANCE
|
||||
agent = self._make_agent(model="x-ai/grok-4.3", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert OPENAI_MODEL_EXECUTION_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_execution_guidance_for_xai_oauth_model(self):
|
||||
"""xai-oauth bare model names (no slash) also match the grok pattern."""
|
||||
from agent.prompt_builder import OPENAI_MODEL_EXECUTION_GUIDANCE
|
||||
agent = self._make_agent(model="grok-4.3", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert OPENAI_MODEL_EXECUTION_GUIDANCE in prompt
|
||||
|
||||
def test_auto_does_not_inject_execution_guidance_for_claude(self):
|
||||
"""Sanity: execution guidance stays off for non-targeted families."""
|
||||
from agent.prompt_builder import OPENAI_MODEL_EXECUTION_GUIDANCE
|
||||
agent = self._make_agent(
|
||||
model="anthropic/claude-sonnet-4", tool_use_enforcement="auto"
|
||||
)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert OPENAI_MODEL_EXECUTION_GUIDANCE not in prompt
|
||||
|
||||
def test_true_forces_for_all_models(self):
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
|
||||
|
|
|
|||
|
|
@ -155,24 +155,6 @@ class TestSkillsGuardContentNone:
|
|||
assert content == ""
|
||||
|
||||
|
||||
# ── session_search_tool (line 164) ────────────────────────────────────────
|
||||
|
||||
class TestSessionSearchContentNone:
|
||||
"""tools/session_search_tool.py — _summarize_session() return line"""
|
||||
|
||||
def test_none_content_raises_before_fix(self):
|
||||
response = _make_response(None)
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
response.choices[0].message.content.strip()
|
||||
|
||||
def test_none_content_safe_with_or_guard(self):
|
||||
response = _make_response(None)
|
||||
|
||||
content = (response.choices[0].message.content or "").strip()
|
||||
assert content == ""
|
||||
|
||||
|
||||
# ── integration: verify the actual source lines are guarded ───────────────
|
||||
|
||||
class TestSourceLinesAreGuarded:
|
||||
|
|
@ -218,13 +200,6 @@ class TestSourceLinesAreGuarded:
|
|||
".content.strip() — apply `(... or \"\").strip()` guard"
|
||||
)
|
||||
|
||||
def test_session_search_tool_guarded(self):
|
||||
src = self._read_file("tools/session_search_tool.py")
|
||||
assert ".message.content.strip()" not in src, (
|
||||
"tools/session_search_tool.py still has unguarded "
|
||||
".content.strip() — apply `(... or \"\").strip()` guard"
|
||||
)
|
||||
|
||||
|
||||
# ── extract_content_or_reasoning() ────────────────────────────────────────
|
||||
|
||||
|
|
|
|||
|
|
@ -1,578 +1,401 @@
|
|||
"""Tests for tools/session_search_tool.py — helper functions and search dispatcher."""
|
||||
"""Tests for the single-shape session_search tool.
|
||||
|
||||
import asyncio
|
||||
Three calling shapes:
|
||||
1. DISCOVERY — pass query → FTS5 + anchored window + bookends per hit
|
||||
2. SCROLL — pass session_id + around_message_id → just the window
|
||||
3. BROWSE — no args → recent sessions chronologically
|
||||
|
||||
All run zero LLM calls.
|
||||
"""
|
||||
import json
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_state import SessionDB
|
||||
from tools.session_search_tool import (
|
||||
_format_timestamp,
|
||||
_format_conversation,
|
||||
_truncate_around_matches,
|
||||
_get_session_search_max_concurrency,
|
||||
_list_recent_sessions,
|
||||
_HIDDEN_SESSION_SOURCES,
|
||||
MAX_SESSION_CHARS,
|
||||
SESSION_SEARCH_SCHEMA,
|
||||
_HIDDEN_SESSION_SOURCES,
|
||||
_format_timestamp,
|
||||
session_search,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path):
|
||||
return SessionDB(tmp_path / "state.db")
|
||||
|
||||
|
||||
def _seed_modpack_sessions(db):
|
||||
"""Create three sessions about a modpack so FTS5 has hits to dedupe."""
|
||||
now = int(time.time())
|
||||
# Older session — modpack origin
|
||||
db.create_session("s_oldest", source="cli")
|
||||
db._conn.execute("UPDATE sessions SET started_at = ?, title = ? WHERE id = ?",
|
||||
(now - 30000, "Building the Modpack", "s_oldest"))
|
||||
db.append_message("s_oldest", role="user", content="Let's build a Minecraft modpack")
|
||||
db.append_message("s_oldest", role="assistant", content="Great. Let me scaffold the modpack repo.")
|
||||
db.append_message("s_oldest", role="user", content="Use NeoForge 1.21.1")
|
||||
db.append_message("s_oldest", role="assistant", content="Done. Modpack repo created with NeoForge 1.21.1.")
|
||||
db.append_message("s_oldest", role="assistant", content="Tier-0 mods installed; modpack smoke test passes.")
|
||||
|
||||
# Middle session — modpack quest coverage
|
||||
db.create_session("s_middle", source="cli")
|
||||
db._conn.execute("UPDATE sessions SET started_at = ?, title = ? WHERE id = ?",
|
||||
(now - 15000, "Modpack Quest Coverage", "s_middle"))
|
||||
db.append_message("s_middle", role="user", content="Deep-dive every modpack reference quest guide")
|
||||
db.append_message("s_middle", role="assistant", content="Surveying ATM10 questbook for modpack inspiration.")
|
||||
db.append_message("s_middle", role="user", content="Update the modpack version too")
|
||||
db.append_message("s_middle", role="assistant", content="Modpack version bumped 0.4 → 0.8.5; quest coverage page added.")
|
||||
|
||||
# Newest session — modpack mob spawn fix
|
||||
db.create_session("s_newest", source="cli")
|
||||
db._conn.execute("UPDATE sessions SET started_at = ?, title = ? WHERE id = ?",
|
||||
(now - 1000, "Modpack Mob Spawn Fix", "s_newest"))
|
||||
db.append_message("s_newest", role="user", content="Fix the modpack mob spawning")
|
||||
db.append_message("s_newest", role="assistant", content="Investigating elite mob gating in the modpack KubeJS.")
|
||||
db.append_message("s_newest", role="assistant", content="Shipped commit b850442. Modpack alternator nerfed too.")
|
||||
db._conn.commit()
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tool schema guidance
|
||||
# Schema invariants
|
||||
# =========================================================================
|
||||
|
||||
class TestHiddenSessionSources:
|
||||
"""Verify the _HIDDEN_SESSION_SOURCES constant used for third-party isolation."""
|
||||
class TestSchema:
|
||||
def test_schema_has_required_params(self):
|
||||
params = SESSION_SEARCH_SCHEMA["parameters"]["properties"]
|
||||
# Discovery shape
|
||||
assert "query" in params
|
||||
assert "limit" in params
|
||||
assert "sort" in params
|
||||
# Scroll shape
|
||||
assert "session_id" in params
|
||||
assert "around_message_id" in params
|
||||
assert "window" in params
|
||||
# Shared
|
||||
assert "role_filter" in params
|
||||
|
||||
def test_tool_source_is_hidden(self):
|
||||
def test_no_mode_parameter(self):
|
||||
# Mode is inferred from which args are set — no explicit mode param
|
||||
params = SESSION_SEARCH_SCHEMA["parameters"]["properties"]
|
||||
assert "mode" not in params
|
||||
|
||||
def test_sort_enum(self):
|
||||
params = SESSION_SEARCH_SCHEMA["parameters"]["properties"]
|
||||
assert params["sort"]["enum"] == ["newest", "oldest"]
|
||||
|
||||
def test_schema_description_teaches_scroll(self):
|
||||
desc = SESSION_SEARCH_SCHEMA["description"]
|
||||
assert "SCROLL" in desc
|
||||
assert "DISCOVERY" in desc
|
||||
assert "BROWSE" in desc
|
||||
# Must explain how to scroll
|
||||
assert "scroll FORWARD" in desc or "messages[-1]" in desc
|
||||
|
||||
def test_no_llm_promise_in_description(self):
|
||||
# The new design never calls an LLM
|
||||
desc = SESSION_SEARCH_SCHEMA["description"].lower()
|
||||
assert "no llm" in desc
|
||||
|
||||
|
||||
class TestHiddenSources:
|
||||
def test_tool_source_hidden(self):
|
||||
assert "tool" in _HIDDEN_SESSION_SOURCES
|
||||
|
||||
def test_standard_sources_not_hidden(self):
|
||||
for src in ("cli", "telegram", "discord", "slack", "cron"):
|
||||
assert src not in _HIDDEN_SESSION_SOURCES
|
||||
|
||||
|
||||
class TestSessionSearchSchema:
|
||||
def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self):
|
||||
description = SESSION_SEARCH_SCHEMA["description"]
|
||||
assert "past conversations" in description
|
||||
assert "recent turns of the current session" not in description
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# _format_timestamp
|
||||
# =========================================================================
|
||||
|
||||
class TestFormatTimestamp:
|
||||
def test_unix_float(self):
|
||||
ts = 1700000000.0 # Nov 14, 2023
|
||||
result = _format_timestamp(ts)
|
||||
assert "2023" in result or "November" in result
|
||||
def test_unix_timestamp(self):
|
||||
out = _format_timestamp(1700000000)
|
||||
assert "2023" in out
|
||||
|
||||
def test_unix_int(self):
|
||||
result = _format_timestamp(1700000000)
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 5
|
||||
|
||||
def test_iso_string(self):
|
||||
result = _format_timestamp("2024-01-15T10:30:00")
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_none_returns_unknown(self):
|
||||
def test_none(self):
|
||||
assert _format_timestamp(None) == "unknown"
|
||||
|
||||
def test_numeric_string(self):
|
||||
result = _format_timestamp("1700000000.0")
|
||||
assert isinstance(result, str)
|
||||
assert "unknown" not in result.lower()
|
||||
def test_iso_string_passthrough(self):
|
||||
out = _format_timestamp("not-a-number-string")
|
||||
assert out == "not-a-number-string"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# _format_conversation
|
||||
# Browse shape (no args)
|
||||
# =========================================================================
|
||||
|
||||
class TestFormatConversation:
|
||||
def test_basic_messages(self):
|
||||
msgs = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"},
|
||||
]
|
||||
result = _format_conversation(msgs)
|
||||
assert "[USER]: Hello" in result
|
||||
assert "[ASSISTANT]: Hi there!" in result
|
||||
|
||||
def test_tool_message(self):
|
||||
msgs = [
|
||||
{"role": "tool", "content": "search results", "tool_name": "web_search"},
|
||||
]
|
||||
result = _format_conversation(msgs)
|
||||
assert "[TOOL:web_search]" in result
|
||||
|
||||
def test_long_tool_output_truncated(self):
|
||||
msgs = [
|
||||
{"role": "tool", "content": "x" * 1000, "tool_name": "terminal"},
|
||||
]
|
||||
result = _format_conversation(msgs)
|
||||
assert "[truncated]" in result
|
||||
|
||||
def test_assistant_with_tool_calls(self):
|
||||
msgs = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{"function": {"name": "web_search"}},
|
||||
{"function": {"name": "terminal"}},
|
||||
],
|
||||
},
|
||||
]
|
||||
result = _format_conversation(msgs)
|
||||
assert "web_search" in result
|
||||
assert "terminal" in result
|
||||
|
||||
def test_empty_messages(self):
|
||||
result = _format_conversation([])
|
||||
assert result == ""
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# _truncate_around_matches
|
||||
# =========================================================================
|
||||
|
||||
class TestTruncateAroundMatches:
|
||||
def test_short_text_unchanged(self):
|
||||
text = "Short text about docker"
|
||||
result = _truncate_around_matches(text, "docker")
|
||||
assert result == text
|
||||
|
||||
def test_long_text_truncated(self):
|
||||
# Create text longer than MAX_SESSION_CHARS with query term in middle
|
||||
padding = "x" * (MAX_SESSION_CHARS + 5000)
|
||||
text = padding + " KEYWORD_HERE " + padding
|
||||
result = _truncate_around_matches(text, "KEYWORD_HERE")
|
||||
assert len(result) <= MAX_SESSION_CHARS + 100 # +100 for prefix/suffix markers
|
||||
assert "KEYWORD_HERE" in result
|
||||
|
||||
def test_truncation_adds_markers(self):
|
||||
text = "a" * 50000 + " target " + "b" * (MAX_SESSION_CHARS + 5000)
|
||||
result = _truncate_around_matches(text, "target")
|
||||
assert "truncated" in result.lower()
|
||||
|
||||
def test_no_match_takes_from_start(self):
|
||||
text = "x" * (MAX_SESSION_CHARS + 5000)
|
||||
result = _truncate_around_matches(text, "nonexistent")
|
||||
# Should take from the beginning
|
||||
assert result.startswith("x")
|
||||
|
||||
def test_match_at_beginning(self):
|
||||
text = "KEYWORD " + "x" * (MAX_SESSION_CHARS + 5000)
|
||||
result = _truncate_around_matches(text, "KEYWORD")
|
||||
assert "KEYWORD" in result
|
||||
|
||||
def test_multiword_phrase_match_beats_individual_term(self):
|
||||
"""Full phrase deep in text should be found even when a single term
|
||||
appears much earlier in boilerplate."""
|
||||
boilerplate = "The project setup is complex. " * 500 # ~15K, has 'project' early
|
||||
filler = "x" * (MAX_SESSION_CHARS + 20000)
|
||||
target = "We reviewed the keystone project roadmap in detail."
|
||||
text = boilerplate + filler + target + filler
|
||||
result = _truncate_around_matches(text, "keystone project")
|
||||
assert "keystone project" in result.lower()
|
||||
|
||||
def test_multiword_proximity_cooccurrence(self):
|
||||
"""When exact phrase is absent, terms co-occurring within proximity
|
||||
should be preferred over a lone early term."""
|
||||
early = "project " + "a" * (MAX_SESSION_CHARS + 20000)
|
||||
# Place 'keystone' and 'project' near each other (but not as exact phrase)
|
||||
cooccur = "this keystone initiative for the project was pivotal"
|
||||
tail = "b" * (MAX_SESSION_CHARS + 20000)
|
||||
text = early + cooccur + tail
|
||||
result = _truncate_around_matches(text, "keystone project")
|
||||
assert "keystone" in result.lower()
|
||||
assert "project" in result.lower()
|
||||
|
||||
def test_multiword_window_maximises_coverage(self):
|
||||
"""Sliding window should capture as many match clusters as possible."""
|
||||
# Place two phrase matches: one at ~50K, one at ~60K, both should fit
|
||||
pre = "z" * 50000
|
||||
match1 = " alpha beta "
|
||||
gap = "z" * 10000
|
||||
match2 = " alpha beta "
|
||||
post = "z" * (MAX_SESSION_CHARS + 40000)
|
||||
text = pre + match1 + gap + match2 + post
|
||||
result = _truncate_around_matches(text, "alpha beta")
|
||||
assert result.lower().count("alpha beta") == 2
|
||||
|
||||
|
||||
class TestSessionSearchConcurrency:
|
||||
def test_defaults_to_three(self):
|
||||
assert _get_session_search_max_concurrency() == 3
|
||||
|
||||
def test_reads_and_clamps_configured_value(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"auxiliary": {"session_search": {"max_concurrency": 9}}},
|
||||
)
|
||||
assert _get_session_search_max_concurrency() == 5
|
||||
|
||||
def test_session_search_respects_configured_concurrency_limit(self, monkeypatch):
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"auxiliary": {"session_search": {"max_concurrency": 1}}},
|
||||
)
|
||||
|
||||
max_seen = {"value": 0}
|
||||
active = {"value": 0}
|
||||
|
||||
async def fake_summarize(_text, _query, _meta):
|
||||
active["value"] += 1
|
||||
max_seen["value"] = max(max_seen["value"], active["value"])
|
||||
await asyncio.sleep(0.01)
|
||||
active["value"] -= 1
|
||||
return "summary"
|
||||
|
||||
monkeypatch.setattr("tools.session_search_tool._summarize_session", fake_summarize)
|
||||
monkeypatch.setattr("model_tools._run_async", lambda coro: asyncio.run(coro))
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.search_messages.return_value = [
|
||||
{"session_id": "s1", "source": "cli", "session_started": 1709500000, "model": "test"},
|
||||
{"session_id": "s2", "source": "cli", "session_started": 1709500001, "model": "test"},
|
||||
{"session_id": "s3", "source": "cli", "session_started": 1709500002, "model": "test"},
|
||||
]
|
||||
mock_db.get_session.side_effect = lambda sid: {
|
||||
"id": sid,
|
||||
"parent_session_id": None,
|
||||
"source": "cli",
|
||||
"started_at": 1709500000,
|
||||
}
|
||||
mock_db.get_messages_as_conversation.side_effect = lambda sid: [
|
||||
{"role": "user", "content": f"message from {sid}"},
|
||||
{"role": "assistant", "content": "response"},
|
||||
]
|
||||
|
||||
result = json.loads(session_search(query="message", db=mock_db, limit=3))
|
||||
|
||||
class TestBrowseShape:
|
||||
def test_no_args_returns_recent_sessions(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(db=db))
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 3
|
||||
assert max_seen["value"] == 1
|
||||
assert result["mode"] == "browse"
|
||||
assert result["count"] >= 3
|
||||
|
||||
def test_browse_excludes_current_session(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(db=db, current_session_id="s_newest"))
|
||||
sids = [r["session_id"] for r in result["results"]]
|
||||
assert "s_newest" not in sids
|
||||
|
||||
class TestRecentSessionListing:
|
||||
def test_recent_mode_requests_last_active_ordering(self):
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.list_sessions_rich.return_value = []
|
||||
|
||||
result = json.loads(_list_recent_sessions(mock_db, limit=5))
|
||||
|
||||
assert result["success"] is True
|
||||
mock_db.list_sessions_rich.assert_called_once_with(
|
||||
limit=10,
|
||||
exclude_sources=["tool"],
|
||||
order_by_last_active=True,
|
||||
)
|
||||
|
||||
def test_current_child_session_excludes_root_lineage_even_when_child_id_is_longer(self):
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.list_sessions_rich.return_value = [
|
||||
{
|
||||
"id": "root",
|
||||
"title": "Current conversation",
|
||||
"source": "cli",
|
||||
"started_at": 1709500000,
|
||||
"last_active": 1709500100,
|
||||
"message_count": 4,
|
||||
"preview": "current root",
|
||||
"parent_session_id": None,
|
||||
},
|
||||
{
|
||||
"id": "other_session",
|
||||
"title": "Other conversation",
|
||||
"source": "cli",
|
||||
"started_at": 1709400000,
|
||||
"last_active": 1709400100,
|
||||
"message_count": 3,
|
||||
"preview": "other root",
|
||||
"parent_session_id": None,
|
||||
},
|
||||
]
|
||||
|
||||
def _get_session(session_id):
|
||||
if session_id == "child_session_id_that_is_definitely_longer":
|
||||
return {"parent_session_id": "root"}
|
||||
if session_id == "root":
|
||||
return {"parent_session_id": None}
|
||||
return None
|
||||
|
||||
mock_db.get_session.side_effect = _get_session
|
||||
|
||||
result = json.loads(_list_recent_sessions(
|
||||
mock_db,
|
||||
limit=5,
|
||||
current_session_id="child_session_id_that_is_definitely_longer",
|
||||
))
|
||||
|
||||
assert result["success"] is True
|
||||
assert [item["session_id"] for item in result["results"]] == ["other_session"]
|
||||
assert all(item["session_id"] != "root" for item in result["results"])
|
||||
def test_browse_returns_titles(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(db=db))
|
||||
titles = [r.get("title") for r in result["results"]]
|
||||
assert any("Modpack" in (t or "") for t in titles)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# session_search (dispatcher)
|
||||
# Discovery shape (with query)
|
||||
# =========================================================================
|
||||
|
||||
class TestSessionSearch:
|
||||
def test_no_db_lazily_opens_default_session_db(self, monkeypatch):
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.search_messages.return_value = []
|
||||
|
||||
class FakeSessionDB:
|
||||
def __new__(cls):
|
||||
return mock_db
|
||||
|
||||
import types
|
||||
import sys
|
||||
|
||||
fake_state = types.ModuleType("hermes_state")
|
||||
fake_state.SessionDB = FakeSessionDB
|
||||
monkeypatch.setitem(sys.modules, "hermes_state", fake_state)
|
||||
|
||||
result = json.loads(session_search(query="test"))
|
||||
class TestDiscoveryShape:
|
||||
def test_query_returns_anchored_windows(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query="modpack", db=db))
|
||||
assert result["success"] is True
|
||||
mock_db.search_messages.assert_called_once()
|
||||
assert result["mode"] == "discover"
|
||||
assert result["count"] >= 1
|
||||
|
||||
def test_empty_query_returns_error(self):
|
||||
from tools.session_search_tool import session_search
|
||||
mock_db = object()
|
||||
result = json.loads(session_search(query="", db=mock_db))
|
||||
assert result["success"] is False
|
||||
def test_discovery_result_has_bookends_and_window(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query="modpack", limit=3, db=db))
|
||||
for hit in result["results"]:
|
||||
assert "bookend_start" in hit
|
||||
assert "messages" in hit
|
||||
assert "bookend_end" in hit
|
||||
assert "match_message_id" in hit
|
||||
assert "snippet" in hit
|
||||
assert "messages_before" in hit
|
||||
assert "messages_after" in hit
|
||||
|
||||
def test_whitespace_query_returns_error(self):
|
||||
from tools.session_search_tool import session_search
|
||||
mock_db = object()
|
||||
result = json.loads(session_search(query=" ", db=mock_db))
|
||||
assert result["success"] is False
|
||||
def test_match_message_id_is_anchor_in_window(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query="modpack", limit=3, db=db))
|
||||
for hit in result["results"]:
|
||||
anchor_id = hit["match_message_id"]
|
||||
window_ids = [m["id"] for m in hit["messages"]]
|
||||
assert anchor_id in window_ids
|
||||
|
||||
def test_current_session_excluded(self):
|
||||
"""session_search should never return the current session."""
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
|
||||
mock_db = MagicMock()
|
||||
current_sid = "20260304_120000_abc123"
|
||||
|
||||
# Simulate FTS5 returning matches only from the current session
|
||||
mock_db.search_messages.return_value = [
|
||||
{"session_id": current_sid, "content": "test match", "source": "cli",
|
||||
"session_started": 1709500000, "model": "test"},
|
||||
]
|
||||
mock_db.get_session.return_value = {"parent_session_id": None}
|
||||
|
||||
result = json.loads(session_search(
|
||||
query="test", db=mock_db, current_session_id=current_sid,
|
||||
))
|
||||
def test_no_results_returns_empty_list(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query="zzz_no_such_term_zzz", db=db))
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 0
|
||||
assert result["results"] == []
|
||||
assert result["count"] == 0
|
||||
|
||||
def test_current_session_excluded_keeps_others(self):
|
||||
"""Other sessions should still be returned when current is excluded."""
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
def test_limit_clamped_to_max_10(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
# Pass huge limit; should not error and should cap
|
||||
result = json.loads(session_search(query="modpack", limit=999, db=db))
|
||||
assert result["count"] <= 10
|
||||
|
||||
mock_db = MagicMock()
|
||||
current_sid = "20260304_120000_abc123"
|
||||
other_sid = "20260303_100000_def456"
|
||||
def test_limit_floor_to_1(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query="modpack", limit=0, db=db))
|
||||
# Result count depends on hits, but the limit must be at least 1
|
||||
assert result["count"] >= 0
|
||||
|
||||
mock_db.search_messages.return_value = [
|
||||
{"session_id": current_sid, "content": "match 1", "source": "cli",
|
||||
"session_started": 1709500000, "model": "test"},
|
||||
{"session_id": other_sid, "content": "match 2", "source": "telegram",
|
||||
"session_started": 1709400000, "model": "test"},
|
||||
]
|
||||
mock_db.get_session.return_value = {"parent_session_id": None}
|
||||
mock_db.get_messages_as_conversation.return_value = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi there"},
|
||||
]
|
||||
def test_non_int_limit_falls_back(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query="modpack", limit="bogus", db=db))
|
||||
assert result["success"] is True
|
||||
|
||||
# Mock async_call_llm to raise RuntimeError → summarizer returns None
|
||||
from unittest.mock import AsyncMock, patch as _patch
|
||||
with _patch("tools.session_search_tool.async_call_llm",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=RuntimeError("no provider")):
|
||||
def test_current_session_filtered_out(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query="modpack", db=db, current_session_id="s_newest"))
|
||||
sids = [r["session_id"] for r in result["results"]]
|
||||
assert "s_newest" not in sids
|
||||
|
||||
|
||||
class TestDiscoverySort:
|
||||
def test_sort_newest_orders_by_recency(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query="modpack", limit=3, sort="newest", db=db))
|
||||
# First result should be the most recent session
|
||||
first = result["results"][0]
|
||||
assert first["session_id"] == "s_newest" or "Newest" in (first.get("title") or "")
|
||||
|
||||
def test_sort_oldest_orders_by_age(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query="modpack", limit=3, sort="oldest", db=db))
|
||||
first = result["results"][0]
|
||||
assert first["session_id"] == "s_oldest"
|
||||
|
||||
def test_invalid_sort_silently_ignored(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
# Should not error
|
||||
result = json.loads(session_search(query="modpack", sort="bogus", db=db))
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
class TestRoleFilter:
|
||||
def test_default_excludes_tool_role(self, db):
|
||||
db.create_session("s1", source="cli")
|
||||
db.append_message("s1", role="user", content="modpack question")
|
||||
db.append_message("s1", role="tool", content="modpack tool output", tool_name="x")
|
||||
result = json.loads(session_search(query="modpack", db=db))
|
||||
# The FTS5 match should be on the user message, not the tool message
|
||||
if result["count"] > 0:
|
||||
matched_role = result["results"][0]["matched_role"]
|
||||
assert matched_role in ("user", "assistant")
|
||||
|
||||
def test_explicit_tool_role_includes_tool(self, db):
|
||||
db.create_session("s1", source="cli")
|
||||
db.append_message("s1", role="tool", content="modpack tool output", tool_name="x")
|
||||
result = json.loads(session_search(query="modpack", role_filter="tool", db=db))
|
||||
# Should now match the tool message
|
||||
if result["count"] > 0:
|
||||
assert result["results"][0]["matched_role"] == "tool"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Scroll shape (session_id + around_message_id)
|
||||
# =========================================================================
|
||||
|
||||
class TestScrollShape:
|
||||
def test_scroll_returns_window_without_bookends(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
# Get an anchor first via discovery
|
||||
disc = json.loads(session_search(query="modpack", limit=1, db=db))
|
||||
anchor_sid = disc["results"][0]["session_id"]
|
||||
anchor_mid = disc["results"][0]["match_message_id"]
|
||||
|
||||
# Now scroll
|
||||
result = json.loads(session_search(
|
||||
session_id=anchor_sid, around_message_id=anchor_mid, window=2, db=db
|
||||
))
|
||||
assert result["success"] is True
|
||||
assert result["mode"] == "scroll"
|
||||
assert "messages" in result
|
||||
# Scroll shape has no bookends
|
||||
assert "bookend_start" not in result
|
||||
assert "bookend_end" not in result
|
||||
|
||||
def test_scroll_window_clamped_to_20(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
disc = json.loads(session_search(query="modpack", limit=1, db=db))
|
||||
anchor_sid = disc["results"][0]["session_id"]
|
||||
anchor_mid = disc["results"][0]["match_message_id"]
|
||||
result = json.loads(session_search(
|
||||
session_id=anchor_sid, around_message_id=anchor_mid, window=999, db=db
|
||||
))
|
||||
assert result["window"] == 20
|
||||
|
||||
def test_scroll_window_floor_to_1(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
disc = json.loads(session_search(query="modpack", limit=1, db=db))
|
||||
anchor_sid = disc["results"][0]["session_id"]
|
||||
anchor_mid = disc["results"][0]["match_message_id"]
|
||||
result = json.loads(session_search(
|
||||
session_id=anchor_sid, around_message_id=anchor_mid, window=-5, db=db
|
||||
))
|
||||
assert result["window"] == 1
|
||||
|
||||
def test_scroll_returns_messages_before_after_counts(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
disc = json.loads(session_search(query="modpack", limit=1, db=db))
|
||||
anchor_sid = disc["results"][0]["session_id"]
|
||||
anchor_mid = disc["results"][0]["match_message_id"]
|
||||
result = json.loads(session_search(
|
||||
session_id=anchor_sid, around_message_id=anchor_mid, window=3, db=db
|
||||
))
|
||||
assert "messages_before" in result
|
||||
assert "messages_after" in result
|
||||
|
||||
def test_scroll_anchor_in_window(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
disc = json.loads(session_search(query="modpack", limit=1, db=db))
|
||||
anchor_sid = disc["results"][0]["session_id"]
|
||||
anchor_mid = disc["results"][0]["match_message_id"]
|
||||
result = json.loads(session_search(
|
||||
session_id=anchor_sid, around_message_id=anchor_mid, window=2, db=db
|
||||
))
|
||||
anchor_in_window = [m for m in result["messages"] if m["id"] == anchor_mid]
|
||||
assert len(anchor_in_window) == 1
|
||||
assert anchor_in_window[0].get("anchor") is True
|
||||
|
||||
def test_scroll_missing_anchor_errors(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(
|
||||
session_id="s_oldest", around_message_id=999999, db=db
|
||||
))
|
||||
assert result["success"] is False
|
||||
assert "not in" in result.get("error", "")
|
||||
|
||||
def test_scroll_missing_session_errors(self, db):
|
||||
result = json.loads(session_search(
|
||||
session_id="nonexistent", around_message_id=1, db=db
|
||||
))
|
||||
assert result["success"] is False
|
||||
|
||||
def test_scroll_rejects_current_session_lineage(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
# Grab some valid id from s_oldest
|
||||
disc = json.loads(session_search(query="modpack", limit=3, db=db))
|
||||
match = [r for r in disc["results"] if r["session_id"] == "s_oldest"]
|
||||
if match:
|
||||
mid = match[0]["match_message_id"]
|
||||
result = json.loads(session_search(
|
||||
query="test", db=mock_db, current_session_id=current_sid,
|
||||
session_id="s_oldest", around_message_id=mid, db=db,
|
||||
current_session_id="s_oldest",
|
||||
))
|
||||
assert result["success"] is False
|
||||
assert "current session" in result.get("error", "").lower()
|
||||
|
||||
assert result["success"] is True
|
||||
# Current session should be skipped, only other_sid should appear
|
||||
assert result["sessions_searched"] == 1
|
||||
assert current_sid not in [r.get("session_id") for r in result.get("results", [])]
|
||||
|
||||
def test_current_child_session_excludes_parent_lineage(self):
|
||||
"""Compression/delegation parents should be excluded for the active child session."""
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.search_messages.return_value = [
|
||||
{"session_id": "parent_sid", "content": "match", "source": "cli",
|
||||
"session_started": 1709500000, "model": "test"},
|
||||
]
|
||||
|
||||
def _get_session(session_id):
|
||||
if session_id == "child_sid":
|
||||
return {"parent_session_id": "parent_sid"}
|
||||
if session_id == "parent_sid":
|
||||
return {"parent_session_id": None}
|
||||
return None
|
||||
|
||||
mock_db.get_session.side_effect = _get_session
|
||||
|
||||
def test_scroll_invalid_around_message_id_errors(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(
|
||||
query="test", db=mock_db, current_session_id="child_sid",
|
||||
session_id="s_oldest", around_message_id="not-an-int", db=db
|
||||
))
|
||||
assert result["success"] is False
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 0
|
||||
assert result["results"] == []
|
||||
assert result["sessions_searched"] == 0
|
||||
|
||||
def test_limit_none_coerced_to_default(self):
|
||||
"""Model sends limit=null → should fall back to 3, not TypeError."""
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
class TestScrollPattern:
|
||||
"""The forward/backward scroll loop using tool output."""
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.search_messages.return_value = []
|
||||
def test_scroll_forward_from_last_id(self, db):
|
||||
# Long session
|
||||
db.create_session("s_long", source="cli")
|
||||
ids = []
|
||||
for i in range(20):
|
||||
ids.append(db.append_message("s_long", role="user" if i % 2 == 0 else "assistant",
|
||||
content=f"long session msg {i}"))
|
||||
|
||||
v1 = json.loads(session_search(
|
||||
session_id="s_long", around_message_id=ids[5], window=3, db=db
|
||||
))
|
||||
last_id = v1["messages"][-1]["id"]
|
||||
v2 = json.loads(session_search(
|
||||
session_id="s_long", around_message_id=last_id, window=3, db=db
|
||||
))
|
||||
# Forward scroll: v2 should reach further than v1
|
||||
assert max(m["id"] for m in v2["messages"]) > max(m["id"] for m in v1["messages"])
|
||||
# Boundary id appears in both
|
||||
assert last_id in [m["id"] for m in v1["messages"]]
|
||||
assert last_id in [m["id"] for m in v2["messages"]]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Shape precedence
|
||||
# =========================================================================
|
||||
|
||||
class TestShapePrecedence:
|
||||
def test_scroll_args_beat_query(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
disc = json.loads(session_search(query="modpack", limit=1, db=db))
|
||||
anchor_sid = disc["results"][0]["session_id"]
|
||||
anchor_mid = disc["results"][0]["match_message_id"]
|
||||
# Pass both query and scroll args — scroll should win
|
||||
result = json.loads(session_search(
|
||||
query="test", db=mock_db, limit=None,
|
||||
query="modpack", # would normally trigger discovery
|
||||
session_id=anchor_sid, around_message_id=anchor_mid, db=db,
|
||||
))
|
||||
assert result["success"] is True
|
||||
assert result["mode"] == "scroll"
|
||||
|
||||
def test_limit_type_object_coerced_to_default(self):
|
||||
"""Model sends limit as a type object → should fall back to 3, not TypeError."""
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
def test_empty_query_falls_back_to_browse(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query=" ", db=db))
|
||||
assert result["mode"] == "browse"
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.search_messages.return_value = []
|
||||
|
||||
result = json.loads(session_search(
|
||||
query="test", db=mock_db, limit=int,
|
||||
))
|
||||
assert result["success"] is True
|
||||
|
||||
def test_limit_string_coerced(self):
|
||||
"""Model sends limit as string '2' → should coerce to int."""
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.search_messages.return_value = []
|
||||
|
||||
result = json.loads(session_search(
|
||||
query="test", db=mock_db, limit="2",
|
||||
))
|
||||
assert result["success"] is True
|
||||
|
||||
def test_limit_clamped_to_range(self):
|
||||
"""Negative or zero limit should be clamped to 1."""
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.search_messages.return_value = []
|
||||
|
||||
result = json.loads(session_search(
|
||||
query="test", db=mock_db, limit=-5,
|
||||
))
|
||||
assert result["success"] is True
|
||||
|
||||
result = json.loads(session_search(
|
||||
query="test", db=mock_db, limit=0,
|
||||
))
|
||||
assert result["success"] is True
|
||||
|
||||
def test_current_root_session_excludes_child_lineage(self):
|
||||
"""Delegation child hits should be excluded when they resolve to the current root session."""
|
||||
from unittest.mock import MagicMock
|
||||
from tools.session_search_tool import session_search
|
||||
|
||||
mock_db = MagicMock()
|
||||
mock_db.search_messages.return_value = [
|
||||
{"session_id": "child_sid", "content": "match", "source": "cli",
|
||||
"session_started": 1709500000, "model": "test"},
|
||||
]
|
||||
|
||||
def _get_session(session_id):
|
||||
if session_id == "root_sid":
|
||||
return {"parent_session_id": None}
|
||||
if session_id == "child_sid":
|
||||
return {"parent_session_id": "root_sid"}
|
||||
return None
|
||||
|
||||
mock_db.get_session.side_effect = _get_session
|
||||
|
||||
result = json.loads(session_search(
|
||||
query="test", db=mock_db, current_session_id="root_sid",
|
||||
))
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 0
|
||||
assert result["results"] == []
|
||||
assert result["sessions_searched"] == 0
|
||||
|
||||
def test_source_from_resolved_parent_not_fts5_child(self):
|
||||
"""source in output must reflect the resolved parent session, not the child that matched FTS5.
|
||||
|
||||
Regression test for #15909: when a delegation child session (source='telegram')
|
||||
resolves to a parent (source='api_server'), the result entry must report
|
||||
'api_server', not 'telegram'.
|
||||
"""
|
||||
from unittest.mock import MagicMock, AsyncMock, patch as _patch
|
||||
from tools.session_search_tool import session_search
|
||||
|
||||
mock_db = MagicMock()
|
||||
# FTS5 hit is in the child delegation session which carries source='telegram'
|
||||
mock_db.search_messages.return_value = [
|
||||
{
|
||||
"session_id": "child_sid",
|
||||
"content": "hello world",
|
||||
"source": "telegram", # child session source — wrong value to surface
|
||||
"session_started": 1709400000,
|
||||
"model": "gpt-4o-mini",
|
||||
},
|
||||
]
|
||||
|
||||
def _get_session(session_id):
|
||||
if session_id == "child_sid":
|
||||
return {
|
||||
"id": "child_sid",
|
||||
"parent_session_id": "parent_sid",
|
||||
"source": "telegram",
|
||||
"started_at": 1709400000,
|
||||
"model": "gpt-4o-mini",
|
||||
}
|
||||
if session_id == "parent_sid":
|
||||
return {
|
||||
"id": "parent_sid",
|
||||
"parent_session_id": None,
|
||||
"source": "api_server", # correct parent source
|
||||
"started_at": 1709300000,
|
||||
"model": "gpt-4o-mini",
|
||||
}
|
||||
return None
|
||||
|
||||
mock_db.get_session.side_effect = _get_session
|
||||
mock_db.get_messages_as_conversation.return_value = [
|
||||
{"role": "user", "content": "hello world"},
|
||||
{"role": "assistant", "content": "hi there"},
|
||||
]
|
||||
|
||||
with _patch(
|
||||
"tools.session_search_tool.async_call_llm",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=RuntimeError("no provider"),
|
||||
):
|
||||
result = json.loads(session_search(query="hello world", db=mock_db))
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 1
|
||||
entry = result["results"][0]
|
||||
assert entry["session_id"] == "parent_sid", "should report resolved parent session ID"
|
||||
assert entry["source"] == "api_server", (
|
||||
f"source should be parent's 'api_server', got {entry['source']!r}"
|
||||
)
|
||||
def test_non_string_query_falls_back_to_browse(self, db):
|
||||
_seed_modpack_sessions(db)
|
||||
result = json.loads(session_search(query=None, db=db)) # type: ignore
|
||||
assert result["mode"] == "browse"
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -152,7 +152,7 @@ Registered only when the agent is spawned by the kanban dispatcher (`HERMES_KANB
|
|||
|
||||
| Tool | Description | Requires environment |
|
||||
|------|-------------|----------------------|
|
||||
| `session_search` | Search your long-term memory of past conversations. This is your recall -- every past session is searchable, and this tool summarizes what happened. USE THIS PROACTIVELY when: - The user says 'we did this before', 'remember when', 'last ti… | — |
|
||||
| `session_search` | Search past sessions stored in the local session DB, or scroll inside one. FTS5-backed retrieval; returns actual messages from the DB (no LLM calls). Three shapes: discovery (pass `query`), scroll (pass `session_id` + `around_message_id`), browse (no args). | — |
|
||||
|
||||
## `skills` toolset
|
||||
|
||||
|
|
|
|||
|
|
@ -780,7 +780,6 @@ $ hermes model
|
|||
|
||||
[ ] vision currently: auto / main model
|
||||
[ ] web_extract currently: auto / main model
|
||||
[ ] session_search currently: openrouter / google/gemini-2.5-flash
|
||||
[ ] title_generation currently: openrouter / google/gemini-3-flash-preview
|
||||
[ ] compression currently: auto / main model
|
||||
[ ] approval currently: auto / main model
|
||||
|
|
@ -862,16 +861,6 @@ auxiliary:
|
|||
compression:
|
||||
timeout: 120 # seconds — compression summarizes long conversations, needs more time
|
||||
|
||||
# Session search — summarizes past session matches
|
||||
session_search:
|
||||
provider: "auto"
|
||||
model: ""
|
||||
base_url: ""
|
||||
api_key: ""
|
||||
timeout: 30
|
||||
max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s
|
||||
extra_body: {} # Provider-specific OpenAI-compatible request fields
|
||||
|
||||
# Skills hub — skill matching and search
|
||||
skills_hub:
|
||||
provider: "auto"
|
||||
|
|
@ -909,34 +898,6 @@ Each auxiliary task has a configurable `timeout` (in seconds). Defaults: vision
|
|||
Context compression has its own `compression:` block for thresholds and an `auxiliary.compression:` block for model/provider settings — see [Context Compression](#context-compression) above. The fallback model uses a `fallback_model:` block — see [Fallback Model](/docs/integrations/providers#fallback-model). All three follow the same provider/model/base_url pattern.
|
||||
:::
|
||||
|
||||
### Session Search Tuning
|
||||
|
||||
If you use a reasoning-heavy model for `auxiliary.session_search`, Hermes now gives you two built-in controls:
|
||||
|
||||
- `auxiliary.session_search.max_concurrency`: limits how many matched sessions Hermes summarizes at once
|
||||
- `auxiliary.session_search.extra_body`: forwards provider-specific OpenAI-compatible request fields on the summarization calls
|
||||
|
||||
Example:
|
||||
|
||||
```yaml
|
||||
auxiliary:
|
||||
session_search:
|
||||
provider: "main"
|
||||
model: "glm-4.5-air"
|
||||
timeout: 60
|
||||
max_concurrency: 2
|
||||
extra_body:
|
||||
enable_thinking: false
|
||||
```
|
||||
|
||||
Use `max_concurrency` when your provider rate-limits request bursts and you want `session_search` to trade some parallelism for stability.
|
||||
|
||||
Use `extra_body` only when your provider documents OpenAI-compatible request-body fields you want Hermes to pass through for that task. Hermes forwards the object as-is.
|
||||
|
||||
:::warning
|
||||
`extra_body` is only effective when your provider actually supports the field you send. If the provider does not expose a native OpenAI-compatible reasoning-off flag, Hermes cannot synthesize one on its behalf.
|
||||
:::
|
||||
|
||||
### OpenRouter routing & Pareto Code for auxiliary tasks
|
||||
|
||||
When an auxiliary task resolves to OpenRouter (either explicitly or via `provider: "main"` while your main agent is on OpenRouter), the main agent's `provider_routing` and `openrouter.min_coding_score` settings **do not propagate** — by design, each auxiliary task is independent. To set OpenRouter provider preferences or use the [Pareto Code router](/docs/integrations/providers#openrouter-pareto-code-router) for a specific aux task, set them per-task via `extra_body`:
|
||||
|
|
|
|||
|
|
@ -188,7 +188,6 @@ Hermes uses separate lightweight models for side tasks. Each task has its own pr
|
|||
| Vision | Image analysis, browser screenshots | `auxiliary.vision` |
|
||||
| Web Extract | Web page summarization | `auxiliary.web_extract` |
|
||||
| Compression | Context compression summaries | `auxiliary.compression` |
|
||||
| Session Search | Past session summarization | `auxiliary.session_search` |
|
||||
| Skills Hub | Skill search and discovery | `auxiliary.skills_hub` |
|
||||
| MCP | MCP helper operations | `auxiliary.mcp` |
|
||||
| Approval | Smart command-approval classification | `auxiliary.approval` |
|
||||
|
|
@ -235,13 +234,6 @@ auxiliary:
|
|||
provider: "auto"
|
||||
model: ""
|
||||
|
||||
session_search:
|
||||
provider: "auto"
|
||||
model: ""
|
||||
timeout: 30
|
||||
max_concurrency: 3
|
||||
extra_body: {}
|
||||
|
||||
skills_hub:
|
||||
provider: "auto"
|
||||
model: ""
|
||||
|
|
@ -270,25 +262,6 @@ fallback_model:
|
|||
# base_url: http://localhost:8000/v1 # Optional custom endpoint
|
||||
```
|
||||
|
||||
For `auxiliary.session_search`, Hermes also supports:
|
||||
|
||||
- `max_concurrency` to limit how many session summaries run at once
|
||||
- `extra_body` to pass provider-specific OpenAI-compatible request fields through on the summarization calls
|
||||
|
||||
Example:
|
||||
|
||||
```yaml
|
||||
auxiliary:
|
||||
session_search:
|
||||
provider: main
|
||||
model: glm-4.5-air
|
||||
max_concurrency: 2
|
||||
extra_body:
|
||||
enable_thinking: false
|
||||
```
|
||||
|
||||
If your provider does not support a native OpenAI-compatible reasoning-control field, `extra_body` will not help for that part; in that case `max_concurrency` is still useful for reducing request-burst 429s.
|
||||
|
||||
All three — auxiliary, compression, fallback — work the same way: set `provider` to pick who handles the request, `model` to pick which model, and `base_url` to point at a custom endpoint (overrides provider).
|
||||
|
||||
### Provider Options for Auxiliary Tasks
|
||||
|
|
@ -432,7 +405,6 @@ See [Scheduled Tasks (Cron)](/docs/user-guide/features/cron) for full configurat
|
|||
| Vision | Layered (see above) + internal OpenRouter retry | `auxiliary.vision` |
|
||||
| Web extraction | Layered (see above) + internal OpenRouter retry | `auxiliary.web_extract` |
|
||||
| Context compression | Layered (see above); degrades to no-summary if all layers unavailable | `auxiliary.compression` |
|
||||
| Session search | Layered (see above) | `auxiliary.session_search` |
|
||||
| Skills hub | Layered (see above) | `auxiliary.skills_hub` |
|
||||
| MCP helpers | Layered (see above) | `auxiliary.mcp` |
|
||||
| Approval classification | Layered (see above) | `auxiliary.approval` |
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue