From 7137cccbd134bf2b349af6e23f9af63f18550eaf Mon Sep 17 00:00:00 2001 From: nan Date: Wed, 15 Apr 2026 15:45:22 +0200 Subject: [PATCH] fix(memory): support OpenViking local resource uploads --- plugins/memory/openviking/__init__.py | 137 ++++++++++++++++-- .../memory/test_openviking_provider.py | 68 ++++++++- 2 files changed, 192 insertions(+), 13 deletions(-) diff --git a/plugins/memory/openviking/__init__.py b/plugins/memory/openviking/__init__.py index 8ea4a4bedc..db9a4c1501 100644 --- a/plugins/memory/openviking/__init__.py +++ b/plugins/memory/openviking/__init__.py @@ -27,8 +27,13 @@ from __future__ import annotations import atexit import json import logging +import mimetypes import os +import tempfile import threading +import uuid +import zipfile +from pathlib import Path from typing import Any, Dict, List, Optional from agent.memory_provider import MemoryProvider @@ -105,20 +110,72 @@ class _VikingClient: def _url(self, path: str) -> str: return f"{self._endpoint}{path}" + def _auth_headers(self) -> dict: + h = { + "X-OpenViking-Account": self._account, + "X-OpenViking-User": self._user, + } + if self._api_key: + h["X-API-Key"] = self._api_key + return h + + def _parse_response(self, resp) -> dict: + try: + data = resp.json() + except Exception: + data = None + + if resp.status_code >= 400: + if isinstance(data, dict): + error = data.get("error") + if isinstance(error, dict): + code = error.get("code", "HTTP_ERROR") + message = error.get("message", resp.text) + raise RuntimeError(f"{code}: {message}") + if data.get("status") == "error": + raise RuntimeError(str(data)) + resp.raise_for_status() + + if isinstance(data, dict) and data.get("status") == "error": + error = data.get("error") + if isinstance(error, dict): + code = error.get("code", "OPENVIKING_ERROR") + message = error.get("message", "") + raise RuntimeError(f"{code}: {message}") + raise RuntimeError(str(data)) + + if data is None: + return {} + return data + def get(self, path: str, **kwargs) -> dict: resp = self._httpx.get( self._url(path), headers=self._headers(), timeout=_TIMEOUT, **kwargs ) - resp.raise_for_status() - return resp.json() + return self._parse_response(resp) def post(self, path: str, payload: dict = None, **kwargs) -> dict: resp = self._httpx.post( self._url(path), json=payload or {}, headers=self._headers(), timeout=_TIMEOUT, **kwargs ) - resp.raise_for_status() - return resp.json() + return self._parse_response(resp) + + def upload_temp_file(self, file_path: Path) -> str: + mime_type = mimetypes.guess_type(file_path.name)[0] or "application/octet-stream" + with file_path.open("rb") as f: + resp = self._httpx.post( + self._url("/api/v1/resources/temp_upload"), + files={"file": (file_path.name, f, mime_type)}, + headers=self._auth_headers(), + timeout=_TIMEOUT, + ) + data = self._parse_response(resp) + result = data.get("result", {}) + temp_file_id = result.get("temp_file_id", "") + if not temp_file_id: + raise RuntimeError("OpenViking temp upload did not return temp_file_id") + return temp_file_id def health(self) -> bool: try: @@ -230,24 +287,56 @@ REMEMBER_SCHEMA = { ADD_RESOURCE_SCHEMA = { "name": "viking_add_resource", "description": ( - "Add a URL or document to the OpenViking knowledge base. " - "Supports web pages, GitHub repos, PDFs, markdown, code files. " + "Add a remote URL or local file/directory to the OpenViking knowledge base. " + "Remote resources must be public http(s), git, or ssh URLs. " + "Local files are uploaded first using OpenViking temp_upload. " "The system automatically parses, indexes, and generates summaries." ), "parameters": { "type": "object", "properties": { - "url": {"type": "string", "description": "URL or path of the resource to add."}, + "url": {"type": "string", "description": "Remote URL or local file/directory path to add."}, "reason": { "type": "string", "description": "Why this resource is relevant (improves search).", }, + "to": { + "type": "string", + "description": "Optional target viking:// URI for the resource.", + }, + "parent": { + "type": "string", + "description": "Optional parent viking:// URI. Cannot be used with to.", + }, + "instruction": { + "type": "string", + "description": "Optional processing instruction for semantic extraction.", + }, + "wait": { + "type": "boolean", + "description": "Whether to wait for processing to complete.", + }, + "timeout": { + "type": "number", + "description": "Timeout in seconds when wait is true.", + }, }, "required": ["url"], }, } +def _zip_directory(dir_path: Path) -> Path: + """Create a temporary zip file containing a directory tree.""" + zip_path = Path(tempfile.gettempdir()) / f"openviking_upload_{uuid.uuid4().hex}.zip" + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf: + for file_path in dir_path.rglob("*"): + if file_path.is_file(): + arcname = str(file_path.relative_to(dir_path)).replace("\\", "/") + zipf.write(file_path, arcname=arcname) + return zip_path + + # --------------------------------------------------------------------------- # MemoryProvider implementation # --------------------------------------------------------------------------- @@ -744,12 +833,36 @@ class OpenVikingMemoryProvider(MemoryProvider): if not url: return tool_error("url is required") - payload: Dict[str, Any] = {"path": url} - if args.get("reason"): - payload["reason"] = args["reason"] + if args.get("to") and args.get("parent"): + return tool_error("Cannot specify both 'to' and 'parent'") - resp = self._client.post("/api/v1/resources", payload) - result = resp.get("result", {}) + payload: Dict[str, Any] = {} + for key in ("reason", "to", "parent", "instruction", "wait", "timeout"): + if key in args and args[key] not in (None, ""): + payload[key] = args[key] + + source_path = Path(url).expanduser() + cleanup_path: Optional[Path] = None + if source_path.exists(): + if source_path.is_dir(): + payload["source_name"] = source_path.name + cleanup_path = _zip_directory(source_path) + upload_path = cleanup_path + elif source_path.is_file(): + payload["source_name"] = source_path.name + upload_path = source_path + else: + return tool_error(f"Unsupported local resource path: {url}") + payload["temp_file_id"] = self._client.upload_temp_file(upload_path) + else: + payload["path"] = url + + try: + resp = self._client.post("/api/v1/resources", payload) + result = resp.get("result", {}) + finally: + if cleanup_path: + cleanup_path.unlink(missing_ok=True) return json.dumps({ "status": "added", diff --git a/tests/plugins/memory/test_openviking_provider.py b/tests/plugins/memory/test_openviking_provider.py index c2408f0ae7..467c8bd30e 100644 --- a/tests/plugins/memory/test_openviking_provider.py +++ b/tests/plugins/memory/test_openviking_provider.py @@ -1,7 +1,10 @@ import json +from types import SimpleNamespace from unittest.mock import MagicMock -from plugins.memory.openviking import OpenVikingMemoryProvider +import pytest + +from plugins.memory.openviking import OpenVikingMemoryProvider, _VikingClient def test_tool_search_sorts_by_raw_score_across_buckets(): @@ -60,3 +63,66 @@ def test_tool_search_sorts_missing_raw_score_after_negative_scores(): ] assert [entry["score"] for entry in result["results"]] == [0.1, 0.0, -0.25] assert result["total"] == 3 + + +def test_tool_add_resource_uploads_existing_local_file(tmp_path): + sample = tmp_path / "sample.md" + sample.write_text("# Local resource\n", encoding="utf-8") + provider = OpenVikingMemoryProvider() + provider._client = MagicMock() + provider._client.upload_temp_file.return_value = "upload_sample.md" + provider._client.post.return_value = { + "status": "ok", + "result": {"root_uri": "viking://resources/sample"}, + } + + result = json.loads(provider._tool_add_resource({ + "url": str(sample), + "reason": "local test", + "wait": True, + })) + + provider._client.upload_temp_file.assert_called_once_with(sample) + provider._client.post.assert_called_once_with("/api/v1/resources", { + "reason": "local test", + "wait": True, + "source_name": "sample.md", + "temp_file_id": "upload_sample.md", + }) + assert result["status"] == "added" + assert result["root_uri"] == "viking://resources/sample" + + +def test_tool_add_resource_sends_remote_url_as_path(): + provider = OpenVikingMemoryProvider() + provider._client = MagicMock() + provider._client.post.return_value = { + "status": "ok", + "result": {"root_uri": "viking://resources/remote"}, + } + + provider._tool_add_resource({"url": "https://example.com/doc.md"}) + + provider._client.upload_temp_file.assert_not_called() + provider._client.post.assert_called_once_with("/api/v1/resources", { + "path": "https://example.com/doc.md", + }) + + +def test_viking_client_raises_structured_server_error(): + client = _VikingClient.__new__(_VikingClient) + response = SimpleNamespace( + status_code=403, + text='{"status":"error"}', + json=lambda: { + "status": "error", + "error": { + "code": "PERMISSION_DENIED", + "message": "direct host filesystem paths are not allowed", + }, + }, + raise_for_status=lambda: None, + ) + + with pytest.raises(RuntimeError, match="PERMISSION_DENIED"): + client._parse_response(response)