test: add multi-word query tests for truncation match strategy

Tests phrase matching, proximity co-occurrence, and sliding window
coverage maximisation — the three new tiers from the truncation fix.
This commit is contained in:
Teknium 2026-04-13 04:52:35 -07:00 committed by Teknium
parent a5bc698b9a
commit acdff020b7

View file

@ -146,6 +146,40 @@ class TestTruncateAroundMatches:
result = _truncate_around_matches(text, "KEYWORD")
assert "KEYWORD" in result
def test_multiword_phrase_match_beats_individual_term(self):
"""Full phrase deep in text should be found even when a single term
appears much earlier in boilerplate."""
boilerplate = "The project setup is complex. " * 500 # ~15K, has 'project' early
filler = "x" * (MAX_SESSION_CHARS + 20000)
target = "We reviewed the keystone project roadmap in detail."
text = boilerplate + filler + target + filler
result = _truncate_around_matches(text, "keystone project")
assert "keystone project" in result.lower()
def test_multiword_proximity_cooccurrence(self):
"""When exact phrase is absent, terms co-occurring within proximity
should be preferred over a lone early term."""
early = "project " + "a" * (MAX_SESSION_CHARS + 20000)
# Place 'keystone' and 'project' near each other (but not as exact phrase)
cooccur = "this keystone initiative for the project was pivotal"
tail = "b" * (MAX_SESSION_CHARS + 20000)
text = early + cooccur + tail
result = _truncate_around_matches(text, "keystone project")
assert "keystone" in result.lower()
assert "project" in result.lower()
def test_multiword_window_maximises_coverage(self):
"""Sliding window should capture as many match clusters as possible."""
# Place two phrase matches: one at ~50K, one at ~60K, both should fit
pre = "z" * 50000
match1 = " alpha beta "
gap = "z" * 10000
match2 = " alpha beta "
post = "z" * (MAX_SESSION_CHARS + 40000)
text = pre + match1 + gap + match2 + post
result = _truncate_around_matches(text, "alpha beta")
assert result.lower().count("alpha beta") == 2
# =========================================================================
# session_search (dispatcher)