mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-27 01:11:40 +00:00
test: add multi-word query tests for truncation match strategy
Tests phrase matching, proximity co-occurrence, and sliding window coverage maximisation — the three new tiers from the truncation fix.
This commit is contained in:
parent
a5bc698b9a
commit
acdff020b7
1 changed files with 34 additions and 0 deletions
|
|
@ -146,6 +146,40 @@ class TestTruncateAroundMatches:
|
|||
result = _truncate_around_matches(text, "KEYWORD")
|
||||
assert "KEYWORD" in result
|
||||
|
||||
def test_multiword_phrase_match_beats_individual_term(self):
|
||||
"""Full phrase deep in text should be found even when a single term
|
||||
appears much earlier in boilerplate."""
|
||||
boilerplate = "The project setup is complex. " * 500 # ~15K, has 'project' early
|
||||
filler = "x" * (MAX_SESSION_CHARS + 20000)
|
||||
target = "We reviewed the keystone project roadmap in detail."
|
||||
text = boilerplate + filler + target + filler
|
||||
result = _truncate_around_matches(text, "keystone project")
|
||||
assert "keystone project" in result.lower()
|
||||
|
||||
def test_multiword_proximity_cooccurrence(self):
|
||||
"""When exact phrase is absent, terms co-occurring within proximity
|
||||
should be preferred over a lone early term."""
|
||||
early = "project " + "a" * (MAX_SESSION_CHARS + 20000)
|
||||
# Place 'keystone' and 'project' near each other (but not as exact phrase)
|
||||
cooccur = "this keystone initiative for the project was pivotal"
|
||||
tail = "b" * (MAX_SESSION_CHARS + 20000)
|
||||
text = early + cooccur + tail
|
||||
result = _truncate_around_matches(text, "keystone project")
|
||||
assert "keystone" in result.lower()
|
||||
assert "project" in result.lower()
|
||||
|
||||
def test_multiword_window_maximises_coverage(self):
|
||||
"""Sliding window should capture as many match clusters as possible."""
|
||||
# Place two phrase matches: one at ~50K, one at ~60K, both should fit
|
||||
pre = "z" * 50000
|
||||
match1 = " alpha beta "
|
||||
gap = "z" * 10000
|
||||
match2 = " alpha beta "
|
||||
post = "z" * (MAX_SESSION_CHARS + 40000)
|
||||
text = pre + match1 + gap + match2 + post
|
||||
result = _truncate_around_matches(text, "alpha beta")
|
||||
assert result.lower().count("alpha beta") == 2
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# session_search (dispatcher)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue