This commit is contained in:
LeonSGP 2026-04-24 17:29:40 -05:00 committed by GitHub
commit a6d2654f42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 68 additions and 2 deletions

View file

@ -10,6 +10,8 @@ import math
from datetime import datetime, timezone
from typing import TYPE_CHECKING
from .store import _sanitize_fts5_query
if TYPE_CHECKING:
from .store import MemoryStore
@ -496,7 +498,7 @@ class FactRetriever:
# We need to join facts_fts with facts to get all columns
params: list = []
where_clauses = ["facts_fts MATCH ?"]
params.append(query)
params.append(_sanitize_fts5_query(query))
if category:
where_clauses.append("f.category = ?")

View file

@ -95,6 +95,32 @@ def _clamp_trust(value: float) -> float:
return max(_TRUST_MIN, min(_TRUST_MAX, value))
def _sanitize_fts5_query(query: str) -> str:
"""Sanitize user input for safe use in FTS5 MATCH queries.
Mirrors the SessionDB FTS5 query handling so memory fact search treats
hyphenated and dotted tokens the same way as transcript search.
"""
quoted_parts: list[str] = []
def _preserve_quoted(match: re.Match[str]) -> str:
quoted_parts.append(match.group(0))
return f"\x00Q{len(quoted_parts) - 1}\x00"
sanitized = re.sub(r'"[^"]*"', _preserve_quoted, query)
sanitized = re.sub(r'[+{}()"^]', " ", sanitized)
sanitized = re.sub(r"\*+", "*", sanitized)
sanitized = re.sub(r"(^|\s)\*", r"\1", sanitized)
sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())
sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized)
for idx, quoted in enumerate(quoted_parts):
sanitized = sanitized.replace(f"\x00Q{idx}\x00", quoted)
return sanitized.strip()
class MemoryStore:
"""SQLite-backed fact store with entity resolution and trust scoring."""
@ -197,7 +223,7 @@ class MemoryStore:
descending. Also increments retrieval_count for matched facts.
"""
with self._lock:
query = query.strip()
query = _sanitize_fts5_query(query)
if not query:
return []

View file

@ -0,0 +1,38 @@
"""Regression tests for the holographic memory provider FTS search."""
from plugins.memory.holographic.retrieval import FactRetriever
from plugins.memory.holographic.store import MemoryStore, _sanitize_fts5_query
def test_sanitize_fts5_query_quotes_hyphenated_terms():
assert _sanitize_fts5_query("pve-01") == '"pve-01"'
assert _sanitize_fts5_query('"pve-01"') == '"pve-01"'
def test_store_search_facts_matches_hyphenated_terms(tmp_path):
store = MemoryStore(db_path=tmp_path / "memory_store.db")
store.add_fact(
"PVE-01 hardware: i5-13500T, IP 10.20.90.00",
category="hardware",
tags="pve-01,homelab",
)
results = store.search_facts("pve-01", category="hardware", limit=10)
assert len(results) == 1
assert results[0]["content"].startswith("PVE-01 hardware")
def test_retriever_search_matches_hyphenated_terms(tmp_path):
store = MemoryStore(db_path=tmp_path / "memory_store.db")
retriever = FactRetriever(store)
store.add_fact(
"PVE-01 hardware: i5-13500T, IP 10.20.90.00",
category="hardware",
tags="pve-01,homelab",
)
results = retriever.search("pve-01", category="hardware", limit=10)
assert len(results) == 1
assert results[0]["content"].startswith("PVE-01 hardware")