feat(openviking): add full recall prefetch policy

Salvage of PR #48927 by @ehz0ah, which consolidates OpenViking recall
work from #41706 (@huangxun375-stack), #33260, #49975, and #32444.

Replaces stale background post-turn prefetch warming with synchronous
current-query recall. The old queue_prefetch warmed the PREVIOUS user
message while turn-start recall consumed the CURRENT one, so injected
context was always about the wrong topic.

Changes:
- prefetch() now does session-aware /api/v1/search/search with the
  current query, falls back to /api/v1/search/find on failure
- Contract-safe payloads: limit, score_threshold, context_type,
  session_id — no top_k, no search-body mode, no target_uri
- L2 content reads for items with level=2 or empty abstracts, capped
  at full_read_limit (default 2)
- Local ranking (score + query-token overlap + leaf boost), dedup,
  score threshold, and injected-char budget
- queue_prefetch() is now a no-op (background warming removed)
- Additive batched viking_read: uris param accepts up to 3 URIs
- Per-request timeout support on _VikingClient.get/post/delete
- Removes stale _prefetch_result/_prefetch_thread/_prefetch_generation
  state and _invalidate_prefetch_state()
- Strengthened system_prompt_block guidance

Salvage follow-up fixes:
- Expose all 8 recall config knobs in get_config_schema() (PR #48927
  had removed them; #41706 correctly exposed them). Env vars remain
  as internal mechanism but are now visible in setup wizard.
- Lower default timeout 8s→4s, request_timeout 6s→3s, full_read_limit
  3→2 to reduce per-turn blocking latency.

Co-authored-by: Hao Zhe <haozhe4547@gmail.com>
Co-authored-by: Eurekaxun <eurekaxun@163.com>
This commit is contained in:
kshitijk4poor 2026-06-24 18:53:49 +05:30
parent 89540d592b
commit ab9134bf16
3 changed files with 1443 additions and 200 deletions

View file

@ -1,6 +1,7 @@
import json
import os
import stat
import time
import zipfile
from types import SimpleNamespace
from unittest.mock import MagicMock
@ -1208,6 +1209,7 @@ def test_tool_search_sends_limit_not_legacy_top_k():
payload = provider._client.post.call_args.args[1]
assert payload["limit"] == 7
assert "top_k" not in payload
assert "mode" not in payload
def test_tool_search_uses_find_for_normal_search():
@ -1222,6 +1224,7 @@ def test_tool_search_uses_find_for_normal_search():
provider._client.post.assert_called_once_with("/api/v1/search/find", {
"query": "simple lookup",
})
assert "mode" not in provider._client.post.call_args.args[1]
def test_tool_search_uses_session_search_for_deep_search():
@ -1238,6 +1241,7 @@ def test_tool_search_uses_session_search_for_deep_search():
"query": "connect facts",
"session_id": "session-123",
})
assert "mode" not in provider._client.post.call_args.args[1]
def test_tool_add_resource_uploads_existing_local_file(tmp_path):
@ -1590,6 +1594,36 @@ def test_viking_client_delete_uses_identity_headers(monkeypatch):
assert captured["kwargs"]["headers"]["X-OpenViking-Actor-Peer"] == "hermes"
def test_viking_client_post_allows_per_request_timeout(monkeypatch):
client = _VikingClient(
"https://example.com",
api_key="test-key",
account="acct",
user="alice",
agent="hermes",
)
captured = {}
def capture_post(url, **kwargs):
captured["url"] = url
captured["kwargs"] = kwargs
return SimpleNamespace(
status_code=200,
text="",
json=lambda: {"status": "ok", "result": {}},
raise_for_status=lambda: None,
)
monkeypatch.setattr(client._httpx, "post", capture_post)
assert client.post("/api/v1/search/find", {"query": "anything"}, timeout=1.25) == {
"status": "ok",
"result": {},
}
assert captured["url"] == "https://example.com/api/v1/search/find"
assert captured["kwargs"]["timeout"] == 1.25
def test_viking_client_upload_temp_file_uses_multipart_identity_headers(tmp_path, monkeypatch):
sample = tmp_path / "sample.md"
sample.write_text("# Local resource\n", encoding="utf-8")
@ -2140,10 +2174,8 @@ def test_on_session_switch_commits_pending_tokens_without_turn_count():
assert provider._turn_count == 0
def test_on_session_switch_rewound_same_session_only_invalidates_prefetch():
def test_on_session_switch_rewound_same_session_skips_commit_and_rotation():
provider = _make_provider_with_session("same-sid", turn_count=3)
provider._prefetch_generation = 9
provider._prefetch_result = "stale recall"
provider.on_session_switch("same-sid", rewound=True)
@ -2151,17 +2183,6 @@ def test_on_session_switch_rewound_same_session_only_invalidates_prefetch():
provider._client.post.assert_not_called()
assert provider._session_id == "same-sid"
assert provider._turn_count == 3
assert provider._prefetch_generation == 10
assert provider._prefetch_result == ""
def test_on_session_switch_clears_stale_prefetch_result():
provider = _make_provider_with_session("old-sid", turn_count=1)
provider._prefetch_result = "stale recall from old session"
provider.on_session_switch("new-sid")
assert provider._prefetch_result == ""
def test_on_session_switch_waits_for_inflight_sync_thread():
@ -2856,17 +2877,7 @@ def test_on_memory_write_ignores_non_add_actions(action, content, monkeypatch):
assert spawned == []
# ---------------------------------------------------------------------------
# Prefetch staleness: a prefetch worker that finishes AFTER a session switch
# must drop its result instead of repopulating the new session with stale
# recall from the old generation. Bump the generation directly (rather than
# calling on_session_switch, whose own join blocks on the test worker) so
# the test isolates the generation-gating behavior.
# ---------------------------------------------------------------------------
def test_queue_prefetch_drops_result_when_generation_changed_mid_flight():
import threading
def _make_prefetch_provider() -> OpenVikingMemoryProvider:
provider = OpenVikingMemoryProvider()
provider._client = MagicMock()
provider._endpoint = "http://test"
@ -2874,76 +2885,355 @@ def test_queue_prefetch_drops_result_when_generation_changed_mid_flight():
provider._account = "acct"
provider._user = "usr"
provider._agent = "hermes"
provider._session_id = "old-sid"
return provider
started = threading.Event()
release = threading.Event()
def test_queue_prefetch_is_noop_for_openviking_recall(monkeypatch):
provider = _make_prefetch_provider()
constructed_clients = []
class StubClient:
def __init__(self, *a, **kw):
constructed_clients.append((a, kw))
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
provider.queue_prefetch("anything", session_id="sid-123")
assert constructed_clients == []
def test_prefetch_sends_contract_safe_memory_context_payload(monkeypatch):
provider = _make_prefetch_provider()
captured_calls = []
class StubClient:
def __init__(self, *a, **kw):
pass
def post(self, path, payload=None, **kwargs):
started.set()
release.wait(timeout=2.0)
captured_calls.append((path, payload))
return {"result": {"memories": [], "resources": []}}
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
provider.prefetch("anything")
assert captured_calls == [
(
"/api/v1/search/find",
{
"query": "anything",
"limit": 24,
"score_threshold": 0,
"context_type": "memory",
},
)
]
payload = captured_calls[0][1]
assert "top_k" not in payload
assert "mode" not in payload
assert "target_uri" not in payload
def test_prefetch_uses_session_search_when_session_id_available(monkeypatch):
provider = _make_prefetch_provider()
captured_calls = []
class StubClient:
def __init__(self, *a, **kw):
pass
def post(self, path, payload=None, **kwargs):
captured_calls.append((path, payload))
return {
"result": {
"memories": [
{"uri": "viking://memories/old", "score": 0.9,
"abstract": "stale from old session"},
{
"uri": "viking://user/peers/hermes/memories/events/mem_1.md",
"score": 0.9,
"abstract": "session-aware memory",
},
],
"resources": [],
"skills": [],
}
}
import plugins.memory.openviking as _mod
real_client_cls = _mod._VikingClient
_mod._VikingClient = StubClient
try:
provider.queue_prefetch("anything")
assert started.wait(timeout=2.0), "prefetch worker never entered post()"
# Simulate a session switch by bumping the generation directly.
# The worker captured the pre-bump generation when it was spawned.
provider._prefetch_generation += 1
release.set()
if provider._prefetch_thread:
provider._prefetch_thread.join(timeout=2.0)
finally:
_mod._VikingClient = real_client_cls
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
# The stale result from the pre-bump generation must NOT have been written
# into the new generation's prefetch slot.
assert provider._prefetch_result == ""
result = provider.prefetch("anything", session_id="sid-123")
assert captured_calls == [
(
"/api/v1/search/search",
{
"query": "anything",
"limit": 24,
"score_threshold": 0,
"context_type": "memory",
"session_id": "sid-123",
},
)
]
payload = captured_calls[0][1]
assert "top_k" not in payload
assert "mode" not in payload
assert "target_uri" not in payload
assert "session-aware memory" in result
def test_queue_prefetch_sends_limit_not_legacy_top_k():
provider = OpenVikingMemoryProvider()
provider._client = MagicMock()
provider._endpoint = "http://test"
provider._api_key = ""
provider._account = "acct"
provider._user = "usr"
provider._agent = "hermes"
def test_prefetch_falls_back_to_find_when_session_search_fails(monkeypatch):
provider = _make_prefetch_provider()
captured_payloads = []
captured_calls = []
class StubClient:
def __init__(self, *a, **kw):
pass
def post(self, path, payload=None, **kwargs):
captured_payloads.append(payload)
return {"result": {"memories": [], "resources": []}}
captured_calls.append((path, payload))
if path == "/api/v1/search/search":
raise RuntimeError("session unavailable")
return {
"result": {
"memories": [
{
"uri": "viking://user/peers/hermes/memories/events/mem_2.md",
"score": 0.8,
"abstract": "non-session fallback",
},
],
"resources": [],
"skills": [],
}
}
import plugins.memory.openviking as _mod
real_client_cls = _mod._VikingClient
_mod._VikingClient = StubClient
try:
provider.queue_prefetch("anything")
if provider._prefetch_thread:
provider._prefetch_thread.join(timeout=2.0)
finally:
_mod._VikingClient = real_client_cls
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
assert captured_payloads == [{"query": "anything", "limit": 5}]
assert "top_k" not in captured_payloads[0]
result = provider.prefetch("anything", session_id="sid-123")
assert captured_calls == [
(
"/api/v1/search/search",
{
"query": "anything",
"limit": 24,
"score_threshold": 0,
"context_type": "memory",
"session_id": "sid-123",
},
),
(
"/api/v1/search/find",
{
"query": "anything",
"limit": 24,
"score_threshold": 0,
"context_type": "memory",
},
),
]
for _path, payload in captured_calls:
assert "top_k" not in payload
assert "mode" not in payload
assert "target_uri" not in payload
assert "non-session fallback" in result
def test_prefetch_budget_exhaustion_skips_find_fallback_log(caplog):
class StubClient:
def post(self, path, payload=None, **kwargs):
raise AssertionError("local budget exhaustion should not issue HTTP calls")
with caplog.at_level("DEBUG", logger=openviking_module.__name__):
with pytest.raises(TimeoutError):
OpenVikingMemoryProvider._post_prefetch_search(
StubClient(),
"anything",
"sid-123",
limit=24,
context_type="memory",
deadline=time.monotonic() - 1.0,
request_timeout=4.0,
)
assert "falling back to search/find" not in caplog.text
def test_prefetch_reads_l2_content_and_ignores_skills_by_default(monkeypatch):
provider = _make_prefetch_provider()
captured_reads = []
class StubClient:
def __init__(self, *a, **kw):
pass
def post(self, path, payload=None, **kwargs):
return {
"result": {
"memories": [
{
"uri": "viking://user/peers/hermes/memories/events/mem_3.md",
"score": 0.9,
"level": 2,
"category": "events",
"abstract": "short abstract",
},
],
"resources": [],
"skills": [
{
"uri": "viking://user/skills/release-triage",
"score": 0.7,
"abstract": "skill context",
},
],
}
}
def get(self, path, params=None, **kwargs):
captured_reads.append((path, params or {}))
return {"result": {"content": "full memory content\nwith useful context"}}
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
context = provider.prefetch("anything")
assert captured_reads == [
(
"/api/v1/content/read",
{"uri": "viking://user/peers/hermes/memories/events/mem_3.md"},
)
]
assert "full memory content" in context
assert "short abstract" not in context
assert "skill context" not in context
def test_prefetch_reads_empty_abstract_content_within_budget(monkeypatch):
provider = _make_prefetch_provider()
captured_reads = []
class StubClient:
def __init__(self, *a, **kw):
pass
def post(self, path, payload=None, **kwargs):
return {
"result": {
"memories": [
{
"uri": "viking://user/peers/hermes/memories/one.md",
"score": 0.9,
"abstract": "",
},
],
"resources": [],
"skills": [],
}
}
def get(self, path, params=None, **kwargs):
captured_reads.append((path, params or {}))
uri = (params or {}).get("uri", "")
return {"result": f"content for {uri}"}
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
context = provider.prefetch("anything")
assert [params["uri"] for _path, params in captured_reads] == [
"viking://user/peers/hermes/memories/one.md",
]
assert (
"content for viking://user/peers/hermes/memories/one.md"
in context
)
def test_prefetch_caps_full_content_reads(monkeypatch):
provider = _make_prefetch_provider()
captured_reads = []
class StubClient:
def __init__(self, *a, **kw):
pass
def post(self, path, payload=None, **kwargs):
return {
"result": {
"memories": [
{
"uri": f"viking://user/peers/hermes/memories/events/mem_{idx}.md",
"score": 0.9 - (idx * 0.01),
"level": 2,
"category": "events",
"abstract": f"short abstract {idx}",
}
for idx in range(6)
],
"resources": [],
"skills": [],
}
}
def get(self, path, params=None, **kwargs):
captured_reads.append((path, params or {}))
uri = (params or {}).get("uri", "")
return {"result": {"content": f"full content for {uri}"}}
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
context = provider.prefetch("anything")
assert len(captured_reads) == 2
assert "full content for viking://user/peers/hermes/memories/events/mem_0.md" in context
assert "full content for viking://user/peers/hermes/memories/events/mem_1.md" in context
assert "short abstract 2" in context
def test_prefetch_uses_bounded_http_timeouts(monkeypatch):
provider = _make_prefetch_provider()
captured_post_kwargs = []
captured_get_kwargs = []
class StubClient:
def __init__(self, *a, **kw):
pass
def post(self, path, payload=None, **kwargs):
captured_post_kwargs.append(kwargs)
return {
"result": {
"memories": [
{
"uri": "viking://user/peers/hermes/memories/events/mem_timeout.md",
"score": 0.9,
"level": 2,
"category": "events",
"abstract": "short abstract",
},
],
"resources": [],
"skills": [],
}
}
def get(self, path, params=None, **kwargs):
captured_get_kwargs.append(kwargs)
return {"result": {"content": "full memory content"}}
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
provider.prefetch("anything", session_id="sid-123")
assert 0 < captured_post_kwargs[0]["timeout"] < openviking_module._TIMEOUT
assert 0 < captured_get_kwargs[0]["timeout"] < openviking_module._TIMEOUT