diff --git a/plugins/memory/holographic/retrieval.py b/plugins/memory/holographic/retrieval.py index a673dcef84..fc7557c160 100644 --- a/plugins/memory/holographic/retrieval.py +++ b/plugins/memory/holographic/retrieval.py @@ -10,6 +10,8 @@ import math from datetime import datetime, timezone from typing import TYPE_CHECKING +from .store import _sanitize_fts5_query + if TYPE_CHECKING: from .store import MemoryStore @@ -496,7 +498,7 @@ class FactRetriever: # We need to join facts_fts with facts to get all columns params: list = [] where_clauses = ["facts_fts MATCH ?"] - params.append(query) + params.append(_sanitize_fts5_query(query)) if category: where_clauses.append("f.category = ?") diff --git a/plugins/memory/holographic/store.py b/plugins/memory/holographic/store.py index 3dc66d6864..12bb7e33c8 100644 --- a/plugins/memory/holographic/store.py +++ b/plugins/memory/holographic/store.py @@ -95,6 +95,32 @@ def _clamp_trust(value: float) -> float: return max(_TRUST_MIN, min(_TRUST_MAX, value)) +def _sanitize_fts5_query(query: str) -> str: + """Sanitize user input for safe use in FTS5 MATCH queries. + + Mirrors the SessionDB FTS5 query handling so memory fact search treats + hyphenated and dotted tokens the same way as transcript search. + """ + quoted_parts: list[str] = [] + + def _preserve_quoted(match: re.Match[str]) -> str: + quoted_parts.append(match.group(0)) + return f"\x00Q{len(quoted_parts) - 1}\x00" + + sanitized = re.sub(r'"[^"]*"', _preserve_quoted, query) + sanitized = re.sub(r'[+{}()"^]', " ", sanitized) + sanitized = re.sub(r"\*+", "*", sanitized) + sanitized = re.sub(r"(^|\s)\*", r"\1", sanitized) + sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip()) + sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip()) + sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized) + + for idx, quoted in enumerate(quoted_parts): + sanitized = sanitized.replace(f"\x00Q{idx}\x00", quoted) + + return sanitized.strip() + + class MemoryStore: """SQLite-backed fact store with entity resolution and trust scoring.""" @@ -197,7 +223,7 @@ class MemoryStore: descending. Also increments retrieval_count for matched facts. """ with self._lock: - query = query.strip() + query = _sanitize_fts5_query(query) if not query: return [] diff --git a/tests/plugins/memory/test_holographic_provider.py b/tests/plugins/memory/test_holographic_provider.py new file mode 100644 index 0000000000..c5f5de0d42 --- /dev/null +++ b/tests/plugins/memory/test_holographic_provider.py @@ -0,0 +1,38 @@ +"""Regression tests for the holographic memory provider FTS search.""" + +from plugins.memory.holographic.retrieval import FactRetriever +from plugins.memory.holographic.store import MemoryStore, _sanitize_fts5_query + + +def test_sanitize_fts5_query_quotes_hyphenated_terms(): + assert _sanitize_fts5_query("pve-01") == '"pve-01"' + assert _sanitize_fts5_query('"pve-01"') == '"pve-01"' + + +def test_store_search_facts_matches_hyphenated_terms(tmp_path): + store = MemoryStore(db_path=tmp_path / "memory_store.db") + store.add_fact( + "PVE-01 hardware: i5-13500T, IP 10.20.90.00", + category="hardware", + tags="pve-01,homelab", + ) + + results = store.search_facts("pve-01", category="hardware", limit=10) + + assert len(results) == 1 + assert results[0]["content"].startswith("PVE-01 hardware") + + +def test_retriever_search_matches_hyphenated_terms(tmp_path): + store = MemoryStore(db_path=tmp_path / "memory_store.db") + retriever = FactRetriever(store) + store.add_fact( + "PVE-01 hardware: i5-13500T, IP 10.20.90.00", + category="hardware", + tags="pve-01,homelab", + ) + + results = retriever.search("pve-01", category="hardware", limit=10) + + assert len(results) == 1 + assert results[0]["content"].startswith("PVE-01 hardware")