diff --git a/tests/tools/test_session_search.py b/tests/tools/test_session_search.py index acb64d62fb..852ac7b9e8 100644 --- a/tests/tools/test_session_search.py +++ b/tests/tools/test_session_search.py @@ -146,6 +146,40 @@ class TestTruncateAroundMatches: result = _truncate_around_matches(text, "KEYWORD") assert "KEYWORD" in result + def test_multiword_phrase_match_beats_individual_term(self): + """Full phrase deep in text should be found even when a single term + appears much earlier in boilerplate.""" + boilerplate = "The project setup is complex. " * 500 # ~15K, has 'project' early + filler = "x" * (MAX_SESSION_CHARS + 20000) + target = "We reviewed the keystone project roadmap in detail." + text = boilerplate + filler + target + filler + result = _truncate_around_matches(text, "keystone project") + assert "keystone project" in result.lower() + + def test_multiword_proximity_cooccurrence(self): + """When exact phrase is absent, terms co-occurring within proximity + should be preferred over a lone early term.""" + early = "project " + "a" * (MAX_SESSION_CHARS + 20000) + # Place 'keystone' and 'project' near each other (but not as exact phrase) + cooccur = "this keystone initiative for the project was pivotal" + tail = "b" * (MAX_SESSION_CHARS + 20000) + text = early + cooccur + tail + result = _truncate_around_matches(text, "keystone project") + assert "keystone" in result.lower() + assert "project" in result.lower() + + def test_multiword_window_maximises_coverage(self): + """Sliding window should capture as many match clusters as possible.""" + # Place two phrase matches: one at ~50K, one at ~60K, both should fit + pre = "z" * 50000 + match1 = " alpha beta " + gap = "z" * 10000 + match2 = " alpha beta " + post = "z" * (MAX_SESSION_CHARS + 40000) + text = pre + match1 + gap + match2 + post + result = _truncate_around_matches(text, "alpha beta") + assert result.lower().count("alpha beta") == 2 + # ========================================================================= # session_search (dispatcher)