feat(openviking): add full recall prefetch policy

Salvage of PR #48927 by @ehz0ah, which consolidates OpenViking recall work from #41706 (@huangxun375-stack), #33260, #49975, and #32444. Replaces stale background post-turn prefetch warming with synchronous current-query recall. The old queue_prefetch warmed the PREVIOUS user message while turn-start recall consumed the CURRENT one, so injected context was always about the wrong topic. Changes: - prefetch() now does session-aware /api/v1/search/search with the current query, falls back to /api/v1/search/find on failure - Contract-safe payloads: limit, score_threshold, context_type, session_id — no top_k, no search-body mode, no target_uri - L2 content reads for items with level=2 or empty abstracts, capped at full_read_limit (default 2) - Local ranking (score + query-token overlap + leaf boost), dedup, score threshold, and injected-char budget - queue_prefetch() is now a no-op (background warming removed) - Additive batched viking_read: uris param accepts up to 3 URIs - Per-request timeout support on _VikingClient.get/post/delete - Removes stale _prefetch_result/_prefetch_thread/_prefetch_generation state and _invalidate_prefetch_state() - Strengthened system_prompt_block guidance Salvage follow-up fixes: - Expose all 8 recall config knobs in get_config_schema() (PR #48927 had removed them; #41706 correctly exposed them). Env vars remain as internal mechanism but are now visible in setup wizard. - Lower default timeout 8s→4s, request_timeout 6s→3s, full_read_limit 3→2 to reduce per-turn blocking latency. Co-authored-by: Hao Zhe <haozhe4547@gmail.com> Co-authored-by: Eurekaxun <eurekaxun@163.com>
2026-06-28 11:32:22 +00:00 · 2026-06-24 18:53:49 +05:30 · 2026-06-24 18:53:49 +05:30 · ab9134bf16
commit ab9134bf16
parent 89540d592b
3 changed files with 1443 additions and 200 deletions
--- a/tests/plugins/memory/test_openviking_provider.py
+++ b/tests/plugins/memory/test_openviking_provider.py
@ -1,6 +1,7 @@
 import json
 import os
 import stat
+import time
 import zipfile
 from types import SimpleNamespace
 from unittest.mock import MagicMock
@ -1208,6 +1209,7 @@ def test_tool_search_sends_limit_not_legacy_top_k():
    payload = provider._client.post.call_args.args[1]
    assert payload["limit"] == 7
    assert "top_k" not in payload
+    assert "mode" not in payload


 def test_tool_search_uses_find_for_normal_search():
@ -1222,6 +1224,7 @@ def test_tool_search_uses_find_for_normal_search():
    provider._client.post.assert_called_once_with("/api/v1/search/find", {
        "query": "simple lookup",
    })
+    assert "mode" not in provider._client.post.call_args.args[1]


 def test_tool_search_uses_session_search_for_deep_search():
@ -1238,6 +1241,7 @@ def test_tool_search_uses_session_search_for_deep_search():
        "query": "connect facts",
        "session_id": "session-123",
    })
+    assert "mode" not in provider._client.post.call_args.args[1]


 def test_tool_add_resource_uploads_existing_local_file(tmp_path):
@ -1590,6 +1594,36 @@ def test_viking_client_delete_uses_identity_headers(monkeypatch):
    assert captured["kwargs"]["headers"]["X-OpenViking-Actor-Peer"] == "hermes"


+def test_viking_client_post_allows_per_request_timeout(monkeypatch):
+    client = _VikingClient(
+        "https://example.com",
+        api_key="test-key",
+        account="acct",
+        user="alice",
+        agent="hermes",
+    )
+    captured = {}
+
+    def capture_post(url, **kwargs):
+        captured["url"] = url
+        captured["kwargs"] = kwargs
+        return SimpleNamespace(
+            status_code=200,
+            text="",
+            json=lambda: {"status": "ok", "result": {}},
+            raise_for_status=lambda: None,
+        )
+
+    monkeypatch.setattr(client._httpx, "post", capture_post)
+
+    assert client.post("/api/v1/search/find", {"query": "anything"}, timeout=1.25) == {
+        "status": "ok",
+        "result": {},
+    }
+    assert captured["url"] == "https://example.com/api/v1/search/find"
+    assert captured["kwargs"]["timeout"] == 1.25
+
+
 def test_viking_client_upload_temp_file_uses_multipart_identity_headers(tmp_path, monkeypatch):
    sample = tmp_path / "sample.md"
    sample.write_text("# Local resource\n", encoding="utf-8")
@ -2140,10 +2174,8 @@ def test_on_session_switch_commits_pending_tokens_without_turn_count():
    assert provider._turn_count == 0


-def test_on_session_switch_rewound_same_session_only_invalidates_prefetch():
+def test_on_session_switch_rewound_same_session_skips_commit_and_rotation():
    provider = _make_provider_with_session("same-sid", turn_count=3)
-    provider._prefetch_generation = 9
-    provider._prefetch_result = "stale recall"

    provider.on_session_switch("same-sid", rewound=True)

@ -2151,17 +2183,6 @@ def test_on_session_switch_rewound_same_session_only_invalidates_prefetch():
    provider._client.post.assert_not_called()
    assert provider._session_id == "same-sid"
    assert provider._turn_count == 3
-    assert provider._prefetch_generation == 10
-    assert provider._prefetch_result == ""
-
-
-def test_on_session_switch_clears_stale_prefetch_result():
-    provider = _make_provider_with_session("old-sid", turn_count=1)
-    provider._prefetch_result = "stale recall from old session"
-
-    provider.on_session_switch("new-sid")
-
-    assert provider._prefetch_result == ""


 def test_on_session_switch_waits_for_inflight_sync_thread():
@ -2856,17 +2877,7 @@ def test_on_memory_write_ignores_non_add_actions(action, content, monkeypatch):
    assert spawned == []


-# ---------------------------------------------------------------------------
-# Prefetch staleness: a prefetch worker that finishes AFTER a session switch
-# must drop its result instead of repopulating the new session with stale
-# recall from the old generation. Bump the generation directly (rather than
-# calling on_session_switch, whose own join blocks on the test worker) so
-# the test isolates the generation-gating behavior.
-# ---------------------------------------------------------------------------
-
-def test_queue_prefetch_drops_result_when_generation_changed_mid_flight():
-    import threading
-
+def _make_prefetch_provider() -> OpenVikingMemoryProvider:
    provider = OpenVikingMemoryProvider()
    provider._client = MagicMock()
    provider._endpoint = "http://test"
@ -2874,76 +2885,355 @@ def test_queue_prefetch_drops_result_when_generation_changed_mid_flight():
    provider._account = "acct"
    provider._user = "usr"
    provider._agent = "hermes"
-    provider._session_id = "old-sid"
+    return provider

-    started = threading.Event()
-    release = threading.Event()
+
+def test_queue_prefetch_is_noop_for_openviking_recall(monkeypatch):
+    provider = _make_prefetch_provider()
+    constructed_clients = []
+
+    class StubClient:
+        def __init__(self, *a, **kw):
+            constructed_clients.append((a, kw))
+
+    monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
+
+    provider.queue_prefetch("anything", session_id="sid-123")
+
+    assert constructed_clients == []
+
+
+def test_prefetch_sends_contract_safe_memory_context_payload(monkeypatch):
+    provider = _make_prefetch_provider()
+
+    captured_calls = []

    class StubClient:
        def __init__(self, *a, **kw):
            pass

        def post(self, path, payload=None, **kwargs):
-            started.set()
-            release.wait(timeout=2.0)
+            captured_calls.append((path, payload))
+            return {"result": {"memories": [], "resources": []}}
+
+    monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
+
+    provider.prefetch("anything")
+
+    assert captured_calls == [
+        (
+            "/api/v1/search/find",
+            {
+                "query": "anything",
+                "limit": 24,
+                "score_threshold": 0,
+                "context_type": "memory",
+            },
+        )
+    ]
+    payload = captured_calls[0][1]
+    assert "top_k" not in payload
+    assert "mode" not in payload
+    assert "target_uri" not in payload
+
+
+def test_prefetch_uses_session_search_when_session_id_available(monkeypatch):
+    provider = _make_prefetch_provider()
+
+    captured_calls = []
+
+    class StubClient:
+        def __init__(self, *a, **kw):
+            pass
+
+        def post(self, path, payload=None, **kwargs):
+            captured_calls.append((path, payload))
            return {
                "result": {
                    "memories": [
-                        {"uri": "viking://memories/old", "score": 0.9,
-                         "abstract": "stale from old session"},
+                        {
+                            "uri": "viking://user/peers/hermes/memories/events/mem_1.md",
+                            "score": 0.9,
+                            "abstract": "session-aware memory",
+                        },
                    ],
                    "resources": [],
+                    "skills": [],
                }
            }

-    import plugins.memory.openviking as _mod
-    real_client_cls = _mod._VikingClient
-    _mod._VikingClient = StubClient
-    try:
-        provider.queue_prefetch("anything")
-        assert started.wait(timeout=2.0), "prefetch worker never entered post()"
-        # Simulate a session switch by bumping the generation directly.
-        # The worker captured the pre-bump generation when it was spawned.
-        provider._prefetch_generation += 1
-        release.set()
-        if provider._prefetch_thread:
-            provider._prefetch_thread.join(timeout=2.0)
-    finally:
-        _mod._VikingClient = real_client_cls
+    monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)

-    # The stale result from the pre-bump generation must NOT have been written
-    # into the new generation's prefetch slot.
-    assert provider._prefetch_result == ""
+    result = provider.prefetch("anything", session_id="sid-123")
+
+    assert captured_calls == [
+        (
+            "/api/v1/search/search",
+            {
+                "query": "anything",
+                "limit": 24,
+                "score_threshold": 0,
+                "context_type": "memory",
+                "session_id": "sid-123",
+            },
+        )
+    ]
+    payload = captured_calls[0][1]
+    assert "top_k" not in payload
+    assert "mode" not in payload
+    assert "target_uri" not in payload
+    assert "session-aware memory" in result


-def test_queue_prefetch_sends_limit_not_legacy_top_k():
-    provider = OpenVikingMemoryProvider()
-    provider._client = MagicMock()
-    provider._endpoint = "http://test"
-    provider._api_key = ""
-    provider._account = "acct"
-    provider._user = "usr"
-    provider._agent = "hermes"
+def test_prefetch_falls_back_to_find_when_session_search_fails(monkeypatch):
+    provider = _make_prefetch_provider()

-    captured_payloads = []
+    captured_calls = []

    class StubClient:
        def __init__(self, *a, **kw):
            pass

        def post(self, path, payload=None, **kwargs):
-            captured_payloads.append(payload)
-            return {"result": {"memories": [], "resources": []}}
+            captured_calls.append((path, payload))
+            if path == "/api/v1/search/search":
+                raise RuntimeError("session unavailable")
+            return {
+                "result": {
+                    "memories": [
+                        {
+                            "uri": "viking://user/peers/hermes/memories/events/mem_2.md",
+                            "score": 0.8,
+                            "abstract": "non-session fallback",
+                        },
+                    ],
+                    "resources": [],
+                    "skills": [],
+                }
+            }

-    import plugins.memory.openviking as _mod
-    real_client_cls = _mod._VikingClient
-    _mod._VikingClient = StubClient
-    try:
-        provider.queue_prefetch("anything")
-        if provider._prefetch_thread:
-            provider._prefetch_thread.join(timeout=2.0)
-    finally:
-        _mod._VikingClient = real_client_cls
+    monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)

-    assert captured_payloads == [{"query": "anything", "limit": 5}]
-    assert "top_k" not in captured_payloads[0]
+    result = provider.prefetch("anything", session_id="sid-123")
+
+    assert captured_calls == [
+        (
+            "/api/v1/search/search",
+            {
+                "query": "anything",
+                "limit": 24,
+                "score_threshold": 0,
+                "context_type": "memory",
+                "session_id": "sid-123",
+            },
+        ),
+        (
+            "/api/v1/search/find",
+            {
+                "query": "anything",
+                "limit": 24,
+                "score_threshold": 0,
+                "context_type": "memory",
+            },
+        ),
+    ]
+    for _path, payload in captured_calls:
+        assert "top_k" not in payload
+        assert "mode" not in payload
+        assert "target_uri" not in payload
+    assert "non-session fallback" in result
+
+
+def test_prefetch_budget_exhaustion_skips_find_fallback_log(caplog):
+    class StubClient:
+        def post(self, path, payload=None, **kwargs):
+            raise AssertionError("local budget exhaustion should not issue HTTP calls")
+
+    with caplog.at_level("DEBUG", logger=openviking_module.__name__):
+        with pytest.raises(TimeoutError):
+            OpenVikingMemoryProvider._post_prefetch_search(
+                StubClient(),
+                "anything",
+                "sid-123",
+                limit=24,
+                context_type="memory",
+                deadline=time.monotonic() - 1.0,
+                request_timeout=4.0,
+            )
+
+    assert "falling back to search/find" not in caplog.text
+
+
+def test_prefetch_reads_l2_content_and_ignores_skills_by_default(monkeypatch):
+    provider = _make_prefetch_provider()
+
+    captured_reads = []
+
+    class StubClient:
+        def __init__(self, *a, **kw):
+            pass
+
+        def post(self, path, payload=None, **kwargs):
+            return {
+                "result": {
+                    "memories": [
+                        {
+                            "uri": "viking://user/peers/hermes/memories/events/mem_3.md",
+                            "score": 0.9,
+                            "level": 2,
+                            "category": "events",
+                            "abstract": "short abstract",
+                        },
+                    ],
+                    "resources": [],
+                    "skills": [
+                        {
+                            "uri": "viking://user/skills/release-triage",
+                            "score": 0.7,
+                            "abstract": "skill context",
+                        },
+                    ],
+                }
+            }
+
+        def get(self, path, params=None, **kwargs):
+            captured_reads.append((path, params or {}))
+            return {"result": {"content": "full memory content\nwith useful context"}}
+
+    monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
+
+    context = provider.prefetch("anything")
+
+    assert captured_reads == [
+        (
+            "/api/v1/content/read",
+            {"uri": "viking://user/peers/hermes/memories/events/mem_3.md"},
+        )
+    ]
+    assert "full memory content" in context
+    assert "short abstract" not in context
+    assert "skill context" not in context
+
+
+def test_prefetch_reads_empty_abstract_content_within_budget(monkeypatch):
+    provider = _make_prefetch_provider()
+
+    captured_reads = []
+
+    class StubClient:
+        def __init__(self, *a, **kw):
+            pass
+
+        def post(self, path, payload=None, **kwargs):
+            return {
+                "result": {
+                    "memories": [
+                        {
+                            "uri": "viking://user/peers/hermes/memories/one.md",
+                            "score": 0.9,
+                            "abstract": "",
+                        },
+                    ],
+                    "resources": [],
+                    "skills": [],
+                }
+            }
+
+        def get(self, path, params=None, **kwargs):
+            captured_reads.append((path, params or {}))
+            uri = (params or {}).get("uri", "")
+            return {"result": f"content for {uri}"}
+
+    monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
+
+    context = provider.prefetch("anything")
+
+    assert [params["uri"] for _path, params in captured_reads] == [
+        "viking://user/peers/hermes/memories/one.md",
+    ]
+    assert (
+        "content for viking://user/peers/hermes/memories/one.md"
+        in context
+    )
+
+
+def test_prefetch_caps_full_content_reads(monkeypatch):
+    provider = _make_prefetch_provider()
+
+    captured_reads = []
+
+    class StubClient:
+        def __init__(self, *a, **kw):
+            pass
+
+        def post(self, path, payload=None, **kwargs):
+            return {
+                "result": {
+                    "memories": [
+                        {
+                            "uri": f"viking://user/peers/hermes/memories/events/mem_{idx}.md",
+                            "score": 0.9 - (idx * 0.01),
+                            "level": 2,
+                            "category": "events",
+                            "abstract": f"short abstract {idx}",
+                        }
+                        for idx in range(6)
+                    ],
+                    "resources": [],
+                    "skills": [],
+                }
+            }
+
+        def get(self, path, params=None, **kwargs):
+            captured_reads.append((path, params or {}))
+            uri = (params or {}).get("uri", "")
+            return {"result": {"content": f"full content for {uri}"}}
+
+    monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
+
+    context = provider.prefetch("anything")
+
+    assert len(captured_reads) == 2
+    assert "full content for viking://user/peers/hermes/memories/events/mem_0.md" in context
+    assert "full content for viking://user/peers/hermes/memories/events/mem_1.md" in context
+    assert "short abstract 2" in context
+
+
+def test_prefetch_uses_bounded_http_timeouts(monkeypatch):
+    provider = _make_prefetch_provider()
+
+    captured_post_kwargs = []
+    captured_get_kwargs = []
+
+    class StubClient:
+        def __init__(self, *a, **kw):
+            pass
+
+        def post(self, path, payload=None, **kwargs):
+            captured_post_kwargs.append(kwargs)
+            return {
+                "result": {
+                    "memories": [
+                        {
+                            "uri": "viking://user/peers/hermes/memories/events/mem_timeout.md",
+                            "score": 0.9,
+                            "level": 2,
+                            "category": "events",
+                            "abstract": "short abstract",
+                        },
+                    ],
+                    "resources": [],
+                    "skills": [],
+                }
+            }
+
+        def get(self, path, params=None, **kwargs):
+            captured_get_kwargs.append(kwargs)
+            return {"result": {"content": "full memory content"}}
+
+    monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
+
+    provider.prefetch("anything", session_id="sid-123")
+
+    assert 0 < captured_post_kwargs[0]["timeout"] < openviking_module._TIMEOUT
+    assert 0 < captured_get_kwargs[0]["timeout"] < openviking_module._TIMEOUT