mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-28 11:32:22 +00:00
feat(openviking): add full recall prefetch policy
Salvage of PR #48927 by @ehz0ah, which consolidates OpenViking recall work from #41706 (@huangxun375-stack), #33260, #49975, and #32444. Replaces stale background post-turn prefetch warming with synchronous current-query recall. The old queue_prefetch warmed the PREVIOUS user message while turn-start recall consumed the CURRENT one, so injected context was always about the wrong topic. Changes: - prefetch() now does session-aware /api/v1/search/search with the current query, falls back to /api/v1/search/find on failure - Contract-safe payloads: limit, score_threshold, context_type, session_id — no top_k, no search-body mode, no target_uri - L2 content reads for items with level=2 or empty abstracts, capped at full_read_limit (default 2) - Local ranking (score + query-token overlap + leaf boost), dedup, score threshold, and injected-char budget - queue_prefetch() is now a no-op (background warming removed) - Additive batched viking_read: uris param accepts up to 3 URIs - Per-request timeout support on _VikingClient.get/post/delete - Removes stale _prefetch_result/_prefetch_thread/_prefetch_generation state and _invalidate_prefetch_state() - Strengthened system_prompt_block guidance Salvage follow-up fixes: - Expose all 8 recall config knobs in get_config_schema() (PR #48927 had removed them; #41706 correctly exposed them). Env vars remain as internal mechanism but are now visible in setup wizard. - Lower default timeout 8s→4s, request_timeout 6s→3s, full_read_limit 3→2 to reduce per-turn blocking latency. Co-authored-by: Hao Zhe <haozhe4547@gmail.com> Co-authored-by: Eurekaxun <eurekaxun@163.com>
This commit is contained in:
parent
89540d592b
commit
ab9134bf16
3 changed files with 1443 additions and 200 deletions
|
|
@ -1,6 +1,7 @@
|
|||
import json
|
||||
import os
|
||||
import stat
|
||||
import time
|
||||
import zipfile
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
|
@ -1208,6 +1209,7 @@ def test_tool_search_sends_limit_not_legacy_top_k():
|
|||
payload = provider._client.post.call_args.args[1]
|
||||
assert payload["limit"] == 7
|
||||
assert "top_k" not in payload
|
||||
assert "mode" not in payload
|
||||
|
||||
|
||||
def test_tool_search_uses_find_for_normal_search():
|
||||
|
|
@ -1222,6 +1224,7 @@ def test_tool_search_uses_find_for_normal_search():
|
|||
provider._client.post.assert_called_once_with("/api/v1/search/find", {
|
||||
"query": "simple lookup",
|
||||
})
|
||||
assert "mode" not in provider._client.post.call_args.args[1]
|
||||
|
||||
|
||||
def test_tool_search_uses_session_search_for_deep_search():
|
||||
|
|
@ -1238,6 +1241,7 @@ def test_tool_search_uses_session_search_for_deep_search():
|
|||
"query": "connect facts",
|
||||
"session_id": "session-123",
|
||||
})
|
||||
assert "mode" not in provider._client.post.call_args.args[1]
|
||||
|
||||
|
||||
def test_tool_add_resource_uploads_existing_local_file(tmp_path):
|
||||
|
|
@ -1590,6 +1594,36 @@ def test_viking_client_delete_uses_identity_headers(monkeypatch):
|
|||
assert captured["kwargs"]["headers"]["X-OpenViking-Actor-Peer"] == "hermes"
|
||||
|
||||
|
||||
def test_viking_client_post_allows_per_request_timeout(monkeypatch):
|
||||
client = _VikingClient(
|
||||
"https://example.com",
|
||||
api_key="test-key",
|
||||
account="acct",
|
||||
user="alice",
|
||||
agent="hermes",
|
||||
)
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, **kwargs):
|
||||
captured["url"] = url
|
||||
captured["kwargs"] = kwargs
|
||||
return SimpleNamespace(
|
||||
status_code=200,
|
||||
text="",
|
||||
json=lambda: {"status": "ok", "result": {}},
|
||||
raise_for_status=lambda: None,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(client._httpx, "post", capture_post)
|
||||
|
||||
assert client.post("/api/v1/search/find", {"query": "anything"}, timeout=1.25) == {
|
||||
"status": "ok",
|
||||
"result": {},
|
||||
}
|
||||
assert captured["url"] == "https://example.com/api/v1/search/find"
|
||||
assert captured["kwargs"]["timeout"] == 1.25
|
||||
|
||||
|
||||
def test_viking_client_upload_temp_file_uses_multipart_identity_headers(tmp_path, monkeypatch):
|
||||
sample = tmp_path / "sample.md"
|
||||
sample.write_text("# Local resource\n", encoding="utf-8")
|
||||
|
|
@ -2140,10 +2174,8 @@ def test_on_session_switch_commits_pending_tokens_without_turn_count():
|
|||
assert provider._turn_count == 0
|
||||
|
||||
|
||||
def test_on_session_switch_rewound_same_session_only_invalidates_prefetch():
|
||||
def test_on_session_switch_rewound_same_session_skips_commit_and_rotation():
|
||||
provider = _make_provider_with_session("same-sid", turn_count=3)
|
||||
provider._prefetch_generation = 9
|
||||
provider._prefetch_result = "stale recall"
|
||||
|
||||
provider.on_session_switch("same-sid", rewound=True)
|
||||
|
||||
|
|
@ -2151,17 +2183,6 @@ def test_on_session_switch_rewound_same_session_only_invalidates_prefetch():
|
|||
provider._client.post.assert_not_called()
|
||||
assert provider._session_id == "same-sid"
|
||||
assert provider._turn_count == 3
|
||||
assert provider._prefetch_generation == 10
|
||||
assert provider._prefetch_result == ""
|
||||
|
||||
|
||||
def test_on_session_switch_clears_stale_prefetch_result():
|
||||
provider = _make_provider_with_session("old-sid", turn_count=1)
|
||||
provider._prefetch_result = "stale recall from old session"
|
||||
|
||||
provider.on_session_switch("new-sid")
|
||||
|
||||
assert provider._prefetch_result == ""
|
||||
|
||||
|
||||
def test_on_session_switch_waits_for_inflight_sync_thread():
|
||||
|
|
@ -2856,17 +2877,7 @@ def test_on_memory_write_ignores_non_add_actions(action, content, monkeypatch):
|
|||
assert spawned == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prefetch staleness: a prefetch worker that finishes AFTER a session switch
|
||||
# must drop its result instead of repopulating the new session with stale
|
||||
# recall from the old generation. Bump the generation directly (rather than
|
||||
# calling on_session_switch, whose own join blocks on the test worker) so
|
||||
# the test isolates the generation-gating behavior.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_queue_prefetch_drops_result_when_generation_changed_mid_flight():
|
||||
import threading
|
||||
|
||||
def _make_prefetch_provider() -> OpenVikingMemoryProvider:
|
||||
provider = OpenVikingMemoryProvider()
|
||||
provider._client = MagicMock()
|
||||
provider._endpoint = "http://test"
|
||||
|
|
@ -2874,76 +2885,355 @@ def test_queue_prefetch_drops_result_when_generation_changed_mid_flight():
|
|||
provider._account = "acct"
|
||||
provider._user = "usr"
|
||||
provider._agent = "hermes"
|
||||
provider._session_id = "old-sid"
|
||||
return provider
|
||||
|
||||
started = threading.Event()
|
||||
release = threading.Event()
|
||||
|
||||
def test_queue_prefetch_is_noop_for_openviking_recall(monkeypatch):
|
||||
provider = _make_prefetch_provider()
|
||||
constructed_clients = []
|
||||
|
||||
class StubClient:
|
||||
def __init__(self, *a, **kw):
|
||||
constructed_clients.append((a, kw))
|
||||
|
||||
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
|
||||
|
||||
provider.queue_prefetch("anything", session_id="sid-123")
|
||||
|
||||
assert constructed_clients == []
|
||||
|
||||
|
||||
def test_prefetch_sends_contract_safe_memory_context_payload(monkeypatch):
|
||||
provider = _make_prefetch_provider()
|
||||
|
||||
captured_calls = []
|
||||
|
||||
class StubClient:
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
def post(self, path, payload=None, **kwargs):
|
||||
started.set()
|
||||
release.wait(timeout=2.0)
|
||||
captured_calls.append((path, payload))
|
||||
return {"result": {"memories": [], "resources": []}}
|
||||
|
||||
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
|
||||
|
||||
provider.prefetch("anything")
|
||||
|
||||
assert captured_calls == [
|
||||
(
|
||||
"/api/v1/search/find",
|
||||
{
|
||||
"query": "anything",
|
||||
"limit": 24,
|
||||
"score_threshold": 0,
|
||||
"context_type": "memory",
|
||||
},
|
||||
)
|
||||
]
|
||||
payload = captured_calls[0][1]
|
||||
assert "top_k" not in payload
|
||||
assert "mode" not in payload
|
||||
assert "target_uri" not in payload
|
||||
|
||||
|
||||
def test_prefetch_uses_session_search_when_session_id_available(monkeypatch):
|
||||
provider = _make_prefetch_provider()
|
||||
|
||||
captured_calls = []
|
||||
|
||||
class StubClient:
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
def post(self, path, payload=None, **kwargs):
|
||||
captured_calls.append((path, payload))
|
||||
return {
|
||||
"result": {
|
||||
"memories": [
|
||||
{"uri": "viking://memories/old", "score": 0.9,
|
||||
"abstract": "stale from old session"},
|
||||
{
|
||||
"uri": "viking://user/peers/hermes/memories/events/mem_1.md",
|
||||
"score": 0.9,
|
||||
"abstract": "session-aware memory",
|
||||
},
|
||||
],
|
||||
"resources": [],
|
||||
"skills": [],
|
||||
}
|
||||
}
|
||||
|
||||
import plugins.memory.openviking as _mod
|
||||
real_client_cls = _mod._VikingClient
|
||||
_mod._VikingClient = StubClient
|
||||
try:
|
||||
provider.queue_prefetch("anything")
|
||||
assert started.wait(timeout=2.0), "prefetch worker never entered post()"
|
||||
# Simulate a session switch by bumping the generation directly.
|
||||
# The worker captured the pre-bump generation when it was spawned.
|
||||
provider._prefetch_generation += 1
|
||||
release.set()
|
||||
if provider._prefetch_thread:
|
||||
provider._prefetch_thread.join(timeout=2.0)
|
||||
finally:
|
||||
_mod._VikingClient = real_client_cls
|
||||
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
|
||||
|
||||
# The stale result from the pre-bump generation must NOT have been written
|
||||
# into the new generation's prefetch slot.
|
||||
assert provider._prefetch_result == ""
|
||||
result = provider.prefetch("anything", session_id="sid-123")
|
||||
|
||||
assert captured_calls == [
|
||||
(
|
||||
"/api/v1/search/search",
|
||||
{
|
||||
"query": "anything",
|
||||
"limit": 24,
|
||||
"score_threshold": 0,
|
||||
"context_type": "memory",
|
||||
"session_id": "sid-123",
|
||||
},
|
||||
)
|
||||
]
|
||||
payload = captured_calls[0][1]
|
||||
assert "top_k" not in payload
|
||||
assert "mode" not in payload
|
||||
assert "target_uri" not in payload
|
||||
assert "session-aware memory" in result
|
||||
|
||||
|
||||
def test_queue_prefetch_sends_limit_not_legacy_top_k():
|
||||
provider = OpenVikingMemoryProvider()
|
||||
provider._client = MagicMock()
|
||||
provider._endpoint = "http://test"
|
||||
provider._api_key = ""
|
||||
provider._account = "acct"
|
||||
provider._user = "usr"
|
||||
provider._agent = "hermes"
|
||||
def test_prefetch_falls_back_to_find_when_session_search_fails(monkeypatch):
|
||||
provider = _make_prefetch_provider()
|
||||
|
||||
captured_payloads = []
|
||||
captured_calls = []
|
||||
|
||||
class StubClient:
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
def post(self, path, payload=None, **kwargs):
|
||||
captured_payloads.append(payload)
|
||||
return {"result": {"memories": [], "resources": []}}
|
||||
captured_calls.append((path, payload))
|
||||
if path == "/api/v1/search/search":
|
||||
raise RuntimeError("session unavailable")
|
||||
return {
|
||||
"result": {
|
||||
"memories": [
|
||||
{
|
||||
"uri": "viking://user/peers/hermes/memories/events/mem_2.md",
|
||||
"score": 0.8,
|
||||
"abstract": "non-session fallback",
|
||||
},
|
||||
],
|
||||
"resources": [],
|
||||
"skills": [],
|
||||
}
|
||||
}
|
||||
|
||||
import plugins.memory.openviking as _mod
|
||||
real_client_cls = _mod._VikingClient
|
||||
_mod._VikingClient = StubClient
|
||||
try:
|
||||
provider.queue_prefetch("anything")
|
||||
if provider._prefetch_thread:
|
||||
provider._prefetch_thread.join(timeout=2.0)
|
||||
finally:
|
||||
_mod._VikingClient = real_client_cls
|
||||
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
|
||||
|
||||
assert captured_payloads == [{"query": "anything", "limit": 5}]
|
||||
assert "top_k" not in captured_payloads[0]
|
||||
result = provider.prefetch("anything", session_id="sid-123")
|
||||
|
||||
assert captured_calls == [
|
||||
(
|
||||
"/api/v1/search/search",
|
||||
{
|
||||
"query": "anything",
|
||||
"limit": 24,
|
||||
"score_threshold": 0,
|
||||
"context_type": "memory",
|
||||
"session_id": "sid-123",
|
||||
},
|
||||
),
|
||||
(
|
||||
"/api/v1/search/find",
|
||||
{
|
||||
"query": "anything",
|
||||
"limit": 24,
|
||||
"score_threshold": 0,
|
||||
"context_type": "memory",
|
||||
},
|
||||
),
|
||||
]
|
||||
for _path, payload in captured_calls:
|
||||
assert "top_k" not in payload
|
||||
assert "mode" not in payload
|
||||
assert "target_uri" not in payload
|
||||
assert "non-session fallback" in result
|
||||
|
||||
|
||||
def test_prefetch_budget_exhaustion_skips_find_fallback_log(caplog):
|
||||
class StubClient:
|
||||
def post(self, path, payload=None, **kwargs):
|
||||
raise AssertionError("local budget exhaustion should not issue HTTP calls")
|
||||
|
||||
with caplog.at_level("DEBUG", logger=openviking_module.__name__):
|
||||
with pytest.raises(TimeoutError):
|
||||
OpenVikingMemoryProvider._post_prefetch_search(
|
||||
StubClient(),
|
||||
"anything",
|
||||
"sid-123",
|
||||
limit=24,
|
||||
context_type="memory",
|
||||
deadline=time.monotonic() - 1.0,
|
||||
request_timeout=4.0,
|
||||
)
|
||||
|
||||
assert "falling back to search/find" not in caplog.text
|
||||
|
||||
|
||||
def test_prefetch_reads_l2_content_and_ignores_skills_by_default(monkeypatch):
|
||||
provider = _make_prefetch_provider()
|
||||
|
||||
captured_reads = []
|
||||
|
||||
class StubClient:
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
def post(self, path, payload=None, **kwargs):
|
||||
return {
|
||||
"result": {
|
||||
"memories": [
|
||||
{
|
||||
"uri": "viking://user/peers/hermes/memories/events/mem_3.md",
|
||||
"score": 0.9,
|
||||
"level": 2,
|
||||
"category": "events",
|
||||
"abstract": "short abstract",
|
||||
},
|
||||
],
|
||||
"resources": [],
|
||||
"skills": [
|
||||
{
|
||||
"uri": "viking://user/skills/release-triage",
|
||||
"score": 0.7,
|
||||
"abstract": "skill context",
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
def get(self, path, params=None, **kwargs):
|
||||
captured_reads.append((path, params or {}))
|
||||
return {"result": {"content": "full memory content\nwith useful context"}}
|
||||
|
||||
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
|
||||
|
||||
context = provider.prefetch("anything")
|
||||
|
||||
assert captured_reads == [
|
||||
(
|
||||
"/api/v1/content/read",
|
||||
{"uri": "viking://user/peers/hermes/memories/events/mem_3.md"},
|
||||
)
|
||||
]
|
||||
assert "full memory content" in context
|
||||
assert "short abstract" not in context
|
||||
assert "skill context" not in context
|
||||
|
||||
|
||||
def test_prefetch_reads_empty_abstract_content_within_budget(monkeypatch):
|
||||
provider = _make_prefetch_provider()
|
||||
|
||||
captured_reads = []
|
||||
|
||||
class StubClient:
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
def post(self, path, payload=None, **kwargs):
|
||||
return {
|
||||
"result": {
|
||||
"memories": [
|
||||
{
|
||||
"uri": "viking://user/peers/hermes/memories/one.md",
|
||||
"score": 0.9,
|
||||
"abstract": "",
|
||||
},
|
||||
],
|
||||
"resources": [],
|
||||
"skills": [],
|
||||
}
|
||||
}
|
||||
|
||||
def get(self, path, params=None, **kwargs):
|
||||
captured_reads.append((path, params or {}))
|
||||
uri = (params or {}).get("uri", "")
|
||||
return {"result": f"content for {uri}"}
|
||||
|
||||
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
|
||||
|
||||
context = provider.prefetch("anything")
|
||||
|
||||
assert [params["uri"] for _path, params in captured_reads] == [
|
||||
"viking://user/peers/hermes/memories/one.md",
|
||||
]
|
||||
assert (
|
||||
"content for viking://user/peers/hermes/memories/one.md"
|
||||
in context
|
||||
)
|
||||
|
||||
|
||||
def test_prefetch_caps_full_content_reads(monkeypatch):
|
||||
provider = _make_prefetch_provider()
|
||||
|
||||
captured_reads = []
|
||||
|
||||
class StubClient:
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
def post(self, path, payload=None, **kwargs):
|
||||
return {
|
||||
"result": {
|
||||
"memories": [
|
||||
{
|
||||
"uri": f"viking://user/peers/hermes/memories/events/mem_{idx}.md",
|
||||
"score": 0.9 - (idx * 0.01),
|
||||
"level": 2,
|
||||
"category": "events",
|
||||
"abstract": f"short abstract {idx}",
|
||||
}
|
||||
for idx in range(6)
|
||||
],
|
||||
"resources": [],
|
||||
"skills": [],
|
||||
}
|
||||
}
|
||||
|
||||
def get(self, path, params=None, **kwargs):
|
||||
captured_reads.append((path, params or {}))
|
||||
uri = (params or {}).get("uri", "")
|
||||
return {"result": {"content": f"full content for {uri}"}}
|
||||
|
||||
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
|
||||
|
||||
context = provider.prefetch("anything")
|
||||
|
||||
assert len(captured_reads) == 2
|
||||
assert "full content for viking://user/peers/hermes/memories/events/mem_0.md" in context
|
||||
assert "full content for viking://user/peers/hermes/memories/events/mem_1.md" in context
|
||||
assert "short abstract 2" in context
|
||||
|
||||
|
||||
def test_prefetch_uses_bounded_http_timeouts(monkeypatch):
|
||||
provider = _make_prefetch_provider()
|
||||
|
||||
captured_post_kwargs = []
|
||||
captured_get_kwargs = []
|
||||
|
||||
class StubClient:
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
def post(self, path, payload=None, **kwargs):
|
||||
captured_post_kwargs.append(kwargs)
|
||||
return {
|
||||
"result": {
|
||||
"memories": [
|
||||
{
|
||||
"uri": "viking://user/peers/hermes/memories/events/mem_timeout.md",
|
||||
"score": 0.9,
|
||||
"level": 2,
|
||||
"category": "events",
|
||||
"abstract": "short abstract",
|
||||
},
|
||||
],
|
||||
"resources": [],
|
||||
"skills": [],
|
||||
}
|
||||
}
|
||||
|
||||
def get(self, path, params=None, **kwargs):
|
||||
captured_get_kwargs.append(kwargs)
|
||||
return {"result": {"content": "full memory content"}}
|
||||
|
||||
monkeypatch.setattr(openviking_module, "_VikingClient", StubClient)
|
||||
|
||||
provider.prefetch("anything", session_id="sid-123")
|
||||
|
||||
assert 0 < captured_post_kwargs[0]["timeout"] < openviking_module._TIMEOUT
|
||||
assert 0 < captured_get_kwargs[0]["timeout"] < openviking_module._TIMEOUT
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue