diff --git a/hermes_state.py b/hermes_state.py index 5a38e73f4..77d1a1ab4 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -1009,8 +1009,9 @@ class SessionDB: Strategy: - Preserve properly paired quoted phrases (``"exact phrase"``) - Strip unmatched FTS5-special characters that would cause errors - - Wrap unquoted hyphenated terms in quotes so FTS5 matches them - as exact phrases instead of splitting on the hyphen + - Wrap unquoted hyphenated and dotted terms in quotes so FTS5 + matches them as exact phrases instead of splitting on the + hyphen/dot (e.g. ``chat-send``, ``P2.2``, ``my-app.config.ts``) """ # Step 1: Extract balanced double-quoted phrases and protect them # from further processing via numbered placeholders. @@ -1035,17 +1036,13 @@ class SessionDB: sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip()) sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip()) - # Step 5: Wrap unquoted dotted terms (e.g. ``P2.2``, ``simulate.p2.test.ts``) - # in double quotes. In practice, FTS5 query parsing can treat dots as - # syntax boundaries, which may produce OperationalError or zero results. - # Quoting forces phrase semantics and avoids query parse edge cases. - sanitized = re.sub(r"\b([\w-]+(?:\.[\w-]+)+)\b", r'"\1"', sanitized) - - # Step 6: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in - # double quotes. FTS5's tokenizer splits on hyphens, turning - # ``chat-send`` into ``chat AND send``. Quoting preserves the - # intended phrase match. - sanitized = re.sub(r"\b(\w+(?:-\w+)+)\b", r'"\1"', sanitized) + # Step 5: Wrap unquoted dotted and/or hyphenated terms in double + # quotes. FTS5's tokenizer splits on dots and hyphens, turning + # ``chat-send`` into ``chat AND send`` and ``P2.2`` into ``p2 AND 2``. + # Quoting preserves phrase semantics. A single pass avoids the + # double-quoting bug that would occur if dotted and hyphenated + # patterns were applied sequentially (e.g. ``my-app.config``). + sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized) # Step 6: Restore preserved quoted phrases for i, quoted in enumerate(_quoted_parts): diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index f38d97016..a0630858c 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -474,6 +474,10 @@ class TestFTS5Search: assert '"P2.2"' in result assert '"simulate.p2"' in result + # Mixed dots and hyphens — single pass avoids double-quoting + assert s('my-app.config') == '"my-app.config"' + assert s('my-app.config.ts') == '"my-app.config.ts"' + # ========================================================================= # Session search and listing