hermes-agent/plugins/memory/mem0_oss/__init__.py

"""Mem0 OSS (self-hosted) memory plugin — MemoryProvider interface.

LLM-powered fact extraction, semantic vector search, and automatic
deduplication using the open-source ``mem0ai`` library — no cloud API key
required.  All data is stored locally on disk.

Backend choices:
  Vector store: Qdrant (local path, no server) — default
  LLM / Embedder: resolved from ``auxiliary.mem0_oss`` in config.yaml, then
                  from ``MEM0_OSS_*`` env vars, then auto-detected.

Primary config — config.yaml (auxiliary.mem0_oss):
  provider   — Hermes provider name: "auto", "aws_bedrock", "bedrock",
               "openai", "openrouter", "ollama", "anthropic", or "custom".
               "auto" follows the standard Hermes auxiliary resolution chain.
  model      — LLM model id (provider-specific slug).  Empty = provider default.
  base_url   — OpenAI-compatible endpoint (forces provider="custom").
  api_key    — API key for that endpoint.  Falls back to MEM0_OSS_API_KEY.

Secondary config — environment variables:
  MEM0_OSS_VECTOR_STORE_PATH   — on-disk path for Qdrant (default: $HERMES_HOME/mem0_oss/qdrant)
  MEM0_OSS_HISTORY_DB_PATH     — SQLite history path  (default: $HERMES_HOME/mem0_oss/history.db)
  MEM0_OSS_COLLECTION          — Qdrant collection name (default: hermes)
  MEM0_OSS_USER_ID             — memory namespace (default: hermes-user)
  MEM0_OSS_LLM_PROVIDER        — override auxiliary.mem0_oss.provider
  MEM0_OSS_LLM_MODEL           — override auxiliary.mem0_oss.model
  MEM0_OSS_EMBEDDER_PROVIDER   — mem0 embedder provider (default: matches llm provider)
  MEM0_OSS_EMBEDDER_MODEL      — embedder model id
  MEM0_OSS_EMBEDDER_DIMS       — embedding dimensions (default: auto per provider)
  MEM0_OSS_TOP_K               — max results returned per search (default: 10)

Secret config:
  MEM0_OSS_API_KEY             — dedicated API key for mem0 LLM calls; takes
                                 precedence over auxiliary.mem0_oss.api_key.
                                 Falls back to the provider's standard env var
                                 (OPENAI_API_KEY, ANTHROPIC_API_KEY,
                                 OPENROUTER_API_KEY, etc.) resolved via the
                                 Hermes provider registry — so no extra key is
                                 needed when a main Hermes provider is already
                                 configured.
  (AWS Bedrock uses AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY / AWS_REGION.)

Optional $HERMES_HOME/mem0_oss.json for non-secret overrides:
  {
    "llm_provider": "aws_bedrock",
    "llm_model": "us.anthropic.claude-haiku-4-5-20251001-v1:0",
    "embedder_provider": "aws_bedrock",
    "embedder_model": "amazon.titan-embed-text-v2:0",
    "embedder_dims": 1024,
    "collection": "hermes",
    "user_id": "hermes-user",
    "top_k": 10
  }
"""

from __future__ import annotations

import json
import logging
import os
import threading
import time
from typing import Any, Dict, List, Optional

from agent.memory_provider import MemoryProvider
from hermes_constants import get_hermes_home
from tools.registry import tool_error

logger = logging.getLogger(__name__)

# Circuit breaker: after this many consecutive failures, pause for cooldown.
_BREAKER_THRESHOLD = 5
_BREAKER_COOLDOWN_SECS = 120

# Qdrant embedded lock error substring — used to detect contention gracefully.
_QDRANT_LOCK_ERROR = "already accessed by another instance"

# Retry parameters for Qdrant lock contention in _get_memory().
# Two processes (WebUI + gateway) may briefly overlap; retry resolves it.
# Prefetch + sync operations hold the lock during an LLM call (~1-3s),
# so we retry for up to 15s total with jitter to avoid thundering herd.
_LOCK_RETRY_ATTEMPTS = 10   # total attempts
_LOCK_RETRY_DELAY_S = 0.8   # base seconds between retries (with jitter, up to ~0.4s extra)


# ---------------------------------------------------------------------------
# Config helpers
# ---------------------------------------------------------------------------

def _get_aux_config() -> dict:
    """Read auxiliary.mem0_oss from config.yaml, with fallback to auxiliary.default.

    Keys not present in auxiliary.mem0_oss are inherited from auxiliary.default
    (if set) so that a single default auxiliary provider covers all aux tasks.
    Returns {} on any failure.
    """
    try:
        from hermes_cli.config import load_config
        config = load_config()
    except Exception:
        return {}
    aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
    if not isinstance(aux, dict):
        return {}
    default = aux.get("default", {}) or {}
    task = aux.get("mem0_oss", {}) or {}
    # task-specific keys win; default fills in anything not set
    merged = {**default, **task}
    return merged


def _resolve_auto_credentials(aux_provider: str, aux_model: str,
                               aux_base_url: str, aux_api_key: str):
    """When no specific provider is set, fall through to the default auxiliary chain.

    Mirrors the Hermes auxiliary auto-detection priority order so that users
    with a main provider configured (OPENROUTER_API_KEY, ANTHROPIC_API_KEY, …)
    don't need to also set MEM0_OSS_API_KEY.

    If an explicit provider is already configured (aux_provider is non-empty and
    not "auto"), this function is a no-op and returns the inputs unchanged.

    Returns (hermes_provider, model, base_url, api_key) — all strings, never None.
    """
    # Only kick in when no explicit provider was configured
    if aux_provider and aux_provider.lower() not in ("", "auto"):
        return aux_provider, aux_model, aux_base_url, aux_api_key

    # If task-level config.yaml has a specific auxiliary.mem0_oss entry with a
    # provider set, _resolve_task_provider_model returns that; otherwise "auto".
    # Rather than creating a full OpenAI client we probe env vars directly in
    # the same priority order the auxiliary auto-detect chain uses.
    try:
        from agent.auxiliary_client import _resolve_task_provider_model
        h_provider, h_model, h_base_url, h_api_key, _api_mode = (
            _resolve_task_provider_model("mem0_oss")
        )
        # If the task config actually resolved a specific non-auto provider,
        # use that directly (covers auxiliary.mem0_oss.provider = "openrouter" etc.)
        if h_provider and h_provider != "auto":
            resolved_provider = h_provider
            resolved_model = aux_model or h_model or ""
            resolved_base_url = aux_base_url or h_base_url or ""
            resolved_api_key = aux_api_key or h_api_key or ""
            # Still try to fill missing key from provider registry
            if not resolved_api_key and resolved_provider not in (
                    "aws_bedrock", "bedrock", "aws", "ollama", "lmstudio"):
                try:
                    from hermes_cli.auth import resolve_api_key_provider_credentials
                    creds = resolve_api_key_provider_credentials(resolved_provider)
                    resolved_api_key = str(creds.get("api_key", "") or "").strip()
                    if not resolved_base_url:
                        resolved_base_url = str(creds.get("base_url", "") or "").strip()
                except Exception:
                    pass
            return resolved_provider, resolved_model, resolved_base_url, resolved_api_key
    except Exception:
        pass

    # Full auto-detect: first try to mirror the main runtime provider so that
    # mem0 uses the same provider as the rest of Hermes.  Fall back to env-var
    # probe only when the main provider isn't usable for aux tasks.
    try:
        from agent.auxiliary_client import _read_main_provider
        main_provider = (_read_main_provider() or "").strip().lower()
        if main_provider in ("bedrock", "aws_bedrock", "aws"):
            return "aws_bedrock", aux_model, aux_base_url, aux_api_key
        if main_provider == "anthropic":
            anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
            if anthropic_key:
                return "anthropic", aux_model, aux_base_url, aux_api_key or anthropic_key
        if main_provider == "openai":
            openai_key = os.environ.get("OPENAI_API_KEY", "").strip()
            if openai_key:
                base_url = os.environ.get("OPENAI_BASE_URL", "").strip()
                return "openai", aux_model, aux_base_url or base_url, aux_api_key or openai_key
        if main_provider == "openrouter":
            openrouter_key = os.environ.get("OPENROUTER_API_KEY", "").strip()
            if openrouter_key:
                base_url = os.environ.get("OPENROUTER_BASE_URL",
                                           "https://openrouter.ai/api/v1").strip()
                return "openrouter", aux_model, aux_base_url or base_url, aux_api_key or openrouter_key
    except Exception:
        pass

    # Fallback env-var probe (no main provider available)
    openrouter_key = os.environ.get("OPENROUTER_API_KEY", "").strip()
    if openrouter_key:
        base_url = os.environ.get("OPENROUTER_BASE_URL",
                                   "https://openrouter.ai/api/v1").strip()
        return "openrouter", aux_model, aux_base_url or base_url, aux_api_key or openrouter_key

    anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
    if anthropic_key:
        return "anthropic", aux_model, aux_base_url, aux_api_key or anthropic_key

    openai_key = os.environ.get("OPENAI_API_KEY", "").strip()
    if openai_key:
        base_url = os.environ.get("OPENAI_BASE_URL", "").strip()
        return "openai", aux_model, aux_base_url or base_url, aux_api_key or openai_key

    # Bedrock: no API key needed, boto3 reads from env/profile automatically
    if os.environ.get("AWS_ACCESS_KEY_ID") or os.environ.get("AWS_PROFILE"):
        return "aws_bedrock", aux_model, aux_base_url, aux_api_key

    # Nothing found — return "auto" and let _load_config fall back to aws_bedrock default
    return aux_provider or "auto", aux_model, aux_base_url, aux_api_key


def _load_config() -> dict:
    """Load config from env vars, with $HERMES_HOME/mem0_oss.json overrides.

    Priority for LLM provider/model/api_key (highest → lowest):
      1. MEM0_OSS_LLM_PROVIDER / MEM0_OSS_LLM_MODEL env vars
      2. auxiliary.mem0_oss.provider / .model in config.yaml
      3. Default auxiliary chain (auto-detect from Hermes config) — uses the
         provider's standard env var (OPENROUTER_API_KEY, ANTHROPIC_API_KEY, …)
         so MEM0_OSS_API_KEY is not required when a main provider is configured.
      4. Defaults (aws_bedrock)

    Priority for API key:
      1. MEM0_OSS_API_KEY env var
      2. auxiliary.mem0_oss.api_key in config.yaml
      3. Provider standard env var (OPENROUTER_API_KEY, ANTHROPIC_API_KEY, etc.)
         resolved via the Hermes provider registry.

    Environment variables are the base; the JSON file (if present) overrides
    individual keys.  Neither source is required — sensible defaults apply.
    """
    hermes_home = get_hermes_home()
    qdrant_path = str(hermes_home / "mem0_oss" / "qdrant")
    history_path = str(hermes_home / "mem0_oss" / "history.db")

    aux = _get_aux_config()
    aux_provider = str(aux.get("provider", "") or "").strip()
    aux_model = str(aux.get("model", "") or "").strip()
    aux_base_url = str(aux.get("base_url", "") or "").strip()
    aux_api_key = str(aux.get("api_key", "") or "").strip()

    # MEM0_OSS_API_KEY is the dedicated key; falls back to aux config key, then
    # to the provider's standard env var via _resolve_auto_credentials below.
    explicit_api_key = (
        os.environ.get("MEM0_OSS_API_KEY", "").strip()
        or aux_api_key
    )
    # base_url: env var wins, then aux config
    explicit_base_url = (
        os.environ.get("MEM0_OSS_OPENAI_BASE_URL", "").strip()
        or aux_base_url
    )

    # When no specific provider is configured, fall through to the default
    # auxiliary chain so we inherit the user's main Hermes provider + key.
    auto_provider, auto_model, auto_base_url, auto_api_key = _resolve_auto_credentials(
        aux_provider, aux_model, explicit_base_url, explicit_api_key
    )

    resolved_api_key = explicit_api_key or auto_api_key
    resolved_base_url = explicit_base_url or auto_base_url

    # LLM provider: env > aux config > auto-detected > default
    default_llm_provider = aux_provider or auto_provider or "openai"
    llm_provider = os.environ.get("MEM0_OSS_LLM_PROVIDER", default_llm_provider).strip()
    # Normalise Hermes provider aliases → mem0 provider keys
    llm_provider = _normalise_provider(llm_provider)

    # LLM model: env > aux config > auto-detected > per-provider default
    default_llm_model = aux_model or auto_model or _default_model_for(llm_provider)
    llm_model = os.environ.get("MEM0_OSS_LLM_MODEL", default_llm_model).strip()

    # Embedder defaults mirror the LLM provider
    default_emb_provider = _default_embedder_provider(llm_provider)
    default_emb_model = _default_embedder_model(default_emb_provider)
    default_emb_dims = _default_embedder_dims(default_emb_provider)

    config: dict = {
        "vector_store_path": os.environ.get("MEM0_OSS_VECTOR_STORE_PATH", qdrant_path),
        "history_db_path": os.environ.get("MEM0_OSS_HISTORY_DB_PATH", history_path),
        "collection": os.environ.get("MEM0_OSS_COLLECTION", "hermes"),
        "user_id": os.environ.get("MEM0_OSS_USER_ID", "hermes-user"),
        "llm_provider": llm_provider,
        "llm_model": llm_model,
        "embedder_provider": _normalise_provider(
            os.environ.get("MEM0_OSS_EMBEDDER_PROVIDER", default_emb_provider)
        ),
        "embedder_model": os.environ.get("MEM0_OSS_EMBEDDER_MODEL", default_emb_model),
        "embedder_dims": int(os.environ.get("MEM0_OSS_EMBEDDER_DIMS", str(default_emb_dims))),
        "top_k": int(os.environ.get("MEM0_OSS_TOP_K", "10")),
        # Resolved credentials / endpoint
        "api_key": resolved_api_key,
        "base_url": resolved_base_url,
        # Legacy key kept for backwards compat with tests and mem0_oss.json
        "openai_api_key": resolved_api_key,
        "openai_base_url": resolved_base_url,
    }

    config_path = hermes_home / "mem0_oss.json"
    if config_path.exists():
        try:
            file_cfg = json.loads(config_path.read_text(encoding="utf-8"))
            config.update({k: v for k, v in file_cfg.items() if v is not None and v != ""})
        except Exception as exc:
            logger.warning("mem0_oss: failed to read config file %s: %s", config_path, exc)

    return config


# ---------------------------------------------------------------------------
# Provider normalisation helpers
# ---------------------------------------------------------------------------

# Maps Hermes provider names / aliases → mem0 LLM provider keys
_HERMES_TO_MEM0_PROVIDER: dict = {
    "bedrock": "aws_bedrock",
    "aws": "aws_bedrock",
    "aws_bedrock": "aws_bedrock",
    "openai": "openai",
    "openrouter": "openai",   # mem0 uses OpenAI adapter with OR base URL
    "anthropic": "anthropic",
    "ollama": "ollama",
    "lmstudio": "lmstudio",
    "custom": "openai",       # custom base_url → OpenAI-compatible adapter
    "auto": "aws_bedrock",    # resolved later in is_available(); placeholder
}

_PROVIDER_DEFAULTS: dict = {
    "aws_bedrock":  ("us.anthropic.claude-haiku-4-5-20251001-v1:0",
                     "aws_bedrock", "amazon.titan-embed-text-v2:0", 1024),
    # --- ordering note: openai is the last-resort default (most widely available) ---
    "openai":       ("gpt-4o-mini",    "openai", "text-embedding-3-small", 1536),
    "anthropic":    ("claude-haiku-4-5-20251001", "openai", "text-embedding-3-small", 1536),
    "ollama":       ("llama3.1",        "ollama", "nomic-embed-text", 768),
    "lmstudio":     ("llama-3.2-1b-instruct", "openai", "text-embedding-nomic-embed-text-v1.5", 768),
    "openrouter":   ("openai/gpt-4o-mini", "openai", "text-embedding-3-small", 1536),
}


def _normalise_provider(p: str) -> str:
    p = (p or "").strip().lower()
    return _HERMES_TO_MEM0_PROVIDER.get(p, p) or "openai"


def _default_model_for(mem0_provider: str) -> str:
    return _PROVIDER_DEFAULTS.get(mem0_provider, _PROVIDER_DEFAULTS["openai"])[0]


def _default_embedder_provider(mem0_provider: str) -> str:
    return _PROVIDER_DEFAULTS.get(mem0_provider, _PROVIDER_DEFAULTS["openai"])[1]


def _default_embedder_model(mem0_emb_provider: str) -> str:
    for _llm_p, (_, emb_p, emb_m, _) in _PROVIDER_DEFAULTS.items():
        if emb_p == mem0_emb_provider:
            return emb_m
    return "text-embedding-3-small"


def _default_embedder_dims(mem0_emb_provider: str) -> int:
    for _llm_p, (_, emb_p, _, emb_d) in _PROVIDER_DEFAULTS.items():
        if emb_p == mem0_emb_provider:
            return emb_d
    return 1536


def _build_mem0_config(cfg: dict) -> dict:
    """Build a mem0 MemoryConfig-compatible dict from our flattened config.

    Translates Hermes/mem0 provider names into the provider-specific config
    structures that mem0ai expects, including credentials and base URLs.
    """
    llm_provider = cfg["llm_provider"]
    llm_model = cfg["llm_model"]
    embedder_provider = cfg["embedder_provider"]
    embedder_model = cfg["embedder_model"]
    embedder_dims = cfg["embedder_dims"]
    api_key = cfg.get("api_key") or cfg.get("openai_api_key") or ""
    base_url = cfg.get("base_url") or cfg.get("openai_base_url") or ""

    llm_cfg = _build_llm_cfg(llm_provider, llm_model, api_key, base_url)
    emb_cfg = _build_embedder_cfg(embedder_provider, embedder_model, embedder_dims, api_key, base_url)

    vs_cfg = {
        "collection_name": cfg["collection"],
        "path": cfg["vector_store_path"],
        "embedding_model_dims": embedder_dims,
        "on_disk": True,
    }

    return {
        "vector_store": {
            "provider": "qdrant",
            "config": vs_cfg,
        },
        "llm": {
            "provider": llm_provider,
            "config": llm_cfg,
        },
        "embedder": {
            "provider": embedder_provider,
            "config": emb_cfg,
        },
        "history_db_path": cfg["history_db_path"],
        "version": "v1.1",
    }


def _build_llm_cfg(provider: str, model: str, api_key: str, base_url: str) -> dict:
    """Build the provider-specific LLM config dict for mem0ai."""
    cfg: dict = {"model": model}

    if provider == "aws_bedrock":
        # Bedrock reads creds from env vars automatically; we don't pass them
        # explicitly unless they're set (boto3 picks them up from the environment).
        pass

    elif provider in ("openai", "anthropic", "lmstudio"):
        if api_key:
            cfg["api_key"] = api_key
        if base_url and provider == "openai":
            cfg["openai_base_url"] = base_url

    elif provider == "ollama":
        # Ollama uses openai_base_url pointing at the local server
        cfg["openai_base_url"] = base_url or "http://localhost:11434"

    # openrouter is handled as openai with OR base URL — normalised upstream,
    # so if it reaches here with provider=="openai" it already has base_url set.

    return cfg


def _build_embedder_cfg(provider: str, model: str, dims: int,
                         api_key: str, base_url: str) -> dict:
    """Build the provider-specific embedder config dict for mem0ai."""
    cfg: dict = {"model": model}

    if provider == "aws_bedrock":
        cfg["embedding_dims"] = dims

    elif provider in ("openai",):
        cfg["embedding_dims"] = dims
        if api_key:
            cfg["api_key"] = api_key
        if base_url:
            cfg["openai_base_url"] = base_url

    elif provider == "ollama":
        cfg["embedding_dims"] = dims
        cfg["ollama_base_url"] = base_url or "http://localhost:11434"

    elif provider == "lmstudio":
        cfg["embedding_dims"] = dims
        if api_key:
            cfg["api_key"] = api_key

    return cfg


# ---------------------------------------------------------------------------
# Tool schemas
# ---------------------------------------------------------------------------

SEARCH_SCHEMA = {
    "name": "mem0_oss_search",
    "description": (
        "Search long-term memory using semantic similarity. Returns facts and context "
        "ranked by relevance.  Use this when you need information from past sessions "
        "that is not already in the current conversation."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "query": {"type": "string", "description": "What to search for."},
            "top_k": {
                "type": "integer",
                "description": "Max results (default: 10, max: 50).",
            },
        },
        "required": ["query"],
    },
}

ADD_SCHEMA = {
    "name": "mem0_oss_add",
    "description": (
        "Store a fact, preference, or piece of context to long-term memory. "
        "mem0 deduplicates automatically — safe to call for any important detail."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "content": {"type": "string", "description": "The information to store."},
        },
        "required": ["content"],
    },
}


# ---------------------------------------------------------------------------
# Provider class
# ---------------------------------------------------------------------------

class Mem0OSSMemoryProvider(MemoryProvider):
    """Self-hosted mem0 memory provider backed by a local Qdrant vector store.

    No cloud account required — all data stays on disk.  Uses AWS Bedrock
    (or OpenAI / Ollama) for LLM fact-extraction and embedding.
    """

    def __init__(self):
        # Config / identity
        self._cfg: dict = {}
        self._user_id: str = "hermes-user"
        self._top_k: int = 10
        self._session_id: str = ""
        self._agent_context: str = "primary"
        # Circuit-breaker state (lock-protected)
        self._lock = threading.Lock()
        self._fail_count: int = 0
        self._last_fail_ts: float = 0.0
        # Background thread state
        self._sync_thread: Optional[threading.Thread] = None
        self._prefetch_thread: Optional[threading.Thread] = None
        self._prefetch_result: str = ""

    # -- MemoryProvider identity --------------------------------------------

    @property
    def name(self) -> str:
        return "mem0_oss"

    # -- Availability -------------------------------------------------------

    def is_available(self) -> bool:
        """True if mem0ai is installed and at least one LLM backend is usable.

        We only check imports and credentials — no network calls here.
        """
        try:
            import mem0  # noqa: F401
        except ImportError:
            return False

        cfg = _load_config()
        llm_provider = cfg.get("llm_provider", "openai")

        if llm_provider == "aws_bedrock":
            if os.environ.get("AWS_ACCESS_KEY_ID") or os.environ.get("AWS_PROFILE"):
                return True
            try:
                from agent.bedrock_adapter import has_aws_credentials
                return has_aws_credentials()
            except Exception:
                return False
        if llm_provider == "anthropic":
            return bool(
                cfg.get("api_key")
                or os.environ.get("ANTHROPIC_API_KEY")
            )
        if llm_provider == "openai":
            return bool(
                cfg.get("api_key")
                or cfg.get("openai_api_key")
                or os.environ.get("OPENAI_API_KEY")
            )
        if llm_provider in ("ollama", "lmstudio"):
            return True  # local, always assumed available
        # Generic / custom base_url: trust the user's config
        return True

    # -- Lifecycle ----------------------------------------------------------

    def initialize(self, session_id: str, **kwargs) -> None:
        """Build the mem0 Memory instance for this session."""
        self._session_id = session_id
        self._agent_context = kwargs.get("agent_context", "primary")
        self._cfg = _load_config()
        self._user_id = self._cfg["user_id"]
        self._top_k = self._cfg["top_k"]
        # Reset circuit-breaker and prefetch state for this session.
        # (Lock is created in __init__ and reused across sessions.)
        with self._lock:
            self._fail_count = 0
            self._last_fail_ts = 0.0
        self._prefetch_result = ""
        import pathlib
        pathlib.Path(self._cfg["vector_store_path"]).mkdir(parents=True, exist_ok=True)
        pathlib.Path(self._cfg["history_db_path"]).parent.mkdir(parents=True, exist_ok=True)

    def _get_memory(self) -> Any:
        """Create a fresh mem0 Memory instance for each call.

        We intentionally do NOT cache the instance.  The embedded Qdrant store
        uses a portalocker (fcntl) exclusive lock that is held for the lifetime
        of the client object.  When both the WebUI and the gateway run on the
        same host they compete for this lock.

        We retry up to _LOCK_RETRY_ATTEMPTS times with _LOCK_RETRY_DELAY_S
        seconds between attempts so that brief overlaps (e.g. a concurrent
        prefetch in another process) are automatically resolved.
        """
        import time as _time

        last_exc: Optional[Exception] = None
        for attempt in range(_LOCK_RETRY_ATTEMPTS):
            try:
                from mem0 import Memory
                from mem0.configs.base import MemoryConfig

                mem0_dict = _build_mem0_config(self._cfg)
                mem_cfg = MemoryConfig(**{
                    "vector_store": mem0_dict["vector_store"],
                    "llm": mem0_dict["llm"],
                    "embedder": mem0_dict["embedder"],
                    "history_db_path": mem0_dict["history_db_path"],
                    "version": mem0_dict["version"],
                })
                return Memory(config=mem_cfg)
            except Exception as exc:
                last_exc = exc
                if _QDRANT_LOCK_ERROR in str(exc):
                    if attempt < _LOCK_RETRY_ATTEMPTS - 1:
                        import random as _random
                        jitter = _random.uniform(0, _LOCK_RETRY_DELAY_S * 0.5)
                        delay = _LOCK_RETRY_DELAY_S + jitter
                        logger.debug(
                            "mem0_oss: Qdrant lock busy (attempt %d/%d), retrying in %.2fs",
                            attempt + 1, _LOCK_RETRY_ATTEMPTS, delay,
                        )
                        _time.sleep(delay)
                        continue
                    # Last attempt also a lock error — fall through to raise below
                else:
                    # Non-lock error — fail fast, no retry
                    logger.error("mem0_oss: failed to initialize Memory: %s", exc)
                    raise
        logger.warning(
            "mem0_oss: Qdrant lock still held after %d attempts — giving up: %s",
            _LOCK_RETRY_ATTEMPTS, last_exc,
        )
        raise last_exc  # type: ignore[misc]

    # -- Circuit breaker helpers -------------------------------------------

    def _is_tripped(self) -> bool:
        with self._lock:
            if self._fail_count < _BREAKER_THRESHOLD:
                return False
            if time.monotonic() - self._last_fail_ts >= _BREAKER_COOLDOWN_SECS:
                self._fail_count = 0
                return False
            return True

    def _record_failure(self) -> None:
        with self._lock:
            self._fail_count += 1
            self._last_fail_ts = time.monotonic()

    def _record_success(self) -> None:
        with self._lock:
            self._fail_count = 0

    # -- System prompt block -----------------------------------------------

    def system_prompt_block(self) -> str:
        return (
            "## Mem0 OSS Memory (self-hosted)\n"
            "You have access to long-term memory stored locally via mem0.\n"
            "- Use `mem0_oss_search` to recall relevant facts before answering.\n"
            "- Use `mem0_oss_add` to store important new facts, preferences, or context.\n"
            "- Facts are extracted and deduplicated automatically on each turn.\n"
            "- Search is semantic — natural-language queries work well.\n"
        )

    # -- Prefetch (background recall before each turn) ---------------------

    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
        """Start a background thread to recall context for the upcoming turn."""
        if self._is_tripped():
            return

        self._prefetch_result = ""
        self._prefetch_thread = threading.Thread(
            target=self._do_prefetch,
            args=(query,),
            daemon=True,
            name="mem0-oss-prefetch",
        )
        self._prefetch_thread.start()

    def _do_prefetch(self, query: str) -> None:
        try:
            mem = self._get_memory()
            results = mem.search(
                query=query[:500],
                top_k=self._top_k,
                filters={"user_id": self._user_id},
            )
            del mem  # release Qdrant lock ASAP — before any further processing
            memories = _extract_results(results)
            if memories:
                lines = "\n".join(f"- {m}" for m in memories)
                self._prefetch_result = f"Mem0 OSS Memory:\n{lines}"
            self._record_success()
        except Exception as exc:
            if _QDRANT_LOCK_ERROR in str(exc):
                logger.debug("mem0_oss: prefetch skipped — Qdrant lock held by another process")
                return  # not a real failure; don't trip the circuit breaker
            self._record_failure()
            logger.debug("mem0_oss: prefetch error: %s", exc)

    def prefetch(self, query: str, *, session_id: str = "") -> str:
        """Return prefetched results (join background thread first)."""
        if self._prefetch_thread is not None:
            self._prefetch_thread.join(timeout=15.0)
            self._prefetch_thread = None
        return self._prefetch_result

    # -- Sync turn (auto-extract after each turn) --------------------------

    def sync_turn(
        self, user_content: str, assistant_content: str, *, session_id: str = ""
    ) -> None:
        """Spawn a background thread to extract and store facts from the turn."""
        if self._agent_context != "primary":
            return
        if self._is_tripped():
            return

        messages = [
            {"role": "user", "content": user_content},
            {"role": "assistant", "content": assistant_content},
        ]
        self._sync_thread = threading.Thread(
            target=self._do_sync,
            args=(messages,),
            daemon=True,
            name="mem0-oss-sync",
        )
        self._sync_thread.start()

    def _do_sync(self, messages: List[dict]) -> None:
        try:
            mem = self._get_memory()
            mem.add(messages=messages, user_id=self._user_id, infer=True)
            del mem  # release Qdrant lock ASAP
            self._record_success()
        except Exception as exc:
            if _QDRANT_LOCK_ERROR in str(exc):
                logger.debug("mem0_oss: sync_turn skipped — Qdrant lock held by another process")
                return  # not a real failure; don't trip the circuit breaker
            self._record_failure()
            logger.debug("mem0_oss: sync_turn error: %s", exc)

    # -- Tool schemas & dispatch -------------------------------------------

    def get_tool_schemas(self) -> List[dict]:
        return [SEARCH_SCHEMA, ADD_SCHEMA]

    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
        if tool_name == "mem0_oss_search":
            return self._handle_search(args)
        if tool_name == "mem0_oss_add":
            return self._handle_add(args)
        return tool_error(f"Unknown tool: {tool_name}")

    def _handle_search(self, args: Dict[str, Any]) -> str:
        query = args.get("query", "").strip()
        if not query:
            return tool_error("mem0_oss_search requires 'query'")

        top_k = min(int(args.get("top_k", self._top_k)), 50)

        try:
            mem = self._get_memory()
            results = mem.search(
                query=query,
                top_k=top_k,
                filters={"user_id": self._user_id},
            )
            del mem  # release Qdrant lock ASAP
            memories = _extract_results(results)
            self._record_success()
            if not memories:
                return json.dumps({"result": "No relevant memories found."})
            return json.dumps({"result": "\n".join(f"- {m}" for m in memories)})
        except Exception as exc:
            if _QDRANT_LOCK_ERROR in str(exc):
                self._record_failure()  # already handled by retry in _get_memory, but track it
                logger.warning("mem0_oss: Qdrant lock held by another process — search skipped")
                return json.dumps({"result": "Memory temporarily unavailable (storage locked by another process)."})
            self._record_failure()
            logger.error("mem0_oss: search error: %s", exc)
            return tool_error(f"mem0_oss_search failed: {exc}")

    def _handle_add(self, args: Dict[str, Any]) -> str:
        content = args.get("content", "").strip()
        if not content:
            return tool_error("mem0_oss_add requires 'content'")

        try:
            mem = self._get_memory()
            mem.add(
                messages=[{"role": "user", "content": content}],
                user_id=self._user_id,
                infer=True,
            )
            del mem  # release Qdrant lock ASAP
            self._record_success()
            return json.dumps({"result": "Memory stored successfully."})
        except Exception as exc:
            if _QDRANT_LOCK_ERROR in str(exc):
                self._record_failure()
                logger.warning("mem0_oss: Qdrant lock held by another process — add skipped")
                return json.dumps({"result": "Memory temporarily unavailable (storage locked by another process)."})
            self._record_failure()
            logger.error("mem0_oss: add error: %s", exc)
            return tool_error(f"mem0_oss_add failed: {exc}")

    # -- Config schema (for setup wizard) ----------------------------------

    def get_config_schema(self) -> List[dict]:
        return [
            {
                "key": "llm_provider",
                "label": "LLM provider",
                "description": "mem0 LLM provider key (openai, aws_bedrock, ollama, ...)",
                "default": "openai",
                "env": "MEM0_OSS_LLM_PROVIDER",
                "required": False,
            },
            {
                "key": "llm_model",
                "label": "LLM model",
                "description": "Model id passed to the LLM provider",
                "default": "gpt-4o-mini",
                "env": "MEM0_OSS_LLM_MODEL",
                "required": False,
            },
            {
                "key": "embedder_provider",
                "label": "Embedder provider",
                "description": "mem0 embedder provider key (openai, aws_bedrock, ...)",
                "default": "openai",
                "env": "MEM0_OSS_EMBEDDER_PROVIDER",
                "required": False,
            },
            {
                "key": "embedder_model",
                "label": "Embedding model id",
                "description": "Embedding model id",
                "default": "text-embedding-3-small",
                "env": "MEM0_OSS_EMBEDDER_MODEL",
                "required": False,
            },
            {
                "key": "embedder_dims",
                "label": "Embedding dimensions",
                "description": "Dimensions of the embedding model (must match the model)",
                "default": 1024,
                "env": "MEM0_OSS_EMBEDDER_DIMS",
                "required": False,
            },
            {
                "key": "collection",
                "label": "Qdrant collection name",
                "description": "Name of the Qdrant collection storing memories",
                "default": "hermes",
                "env": "MEM0_OSS_COLLECTION",
                "required": False,
            },
            {
                "key": "user_id",
                "label": "User ID",
                "description": "Memory namespace / user identifier",
                "default": "hermes-user",
                "env": "MEM0_OSS_USER_ID",
                "required": False,
            },
            {
                "key": "top_k",
                "label": "Top-K results",
                "description": "Default number of memories returned per search",
                "default": 10,
                "env": "MEM0_OSS_TOP_K",
                "required": False,
            },
            {
                "key": "api_key",
                "label": "API key (mem0 LLM)",
                "description": (
                    "Dedicated API key for mem0 LLM/embedder calls.  "
                    "Takes precedence over auxiliary.mem0_oss.api_key in config.yaml "
                    "and over OPENAI_API_KEY / ANTHROPIC_API_KEY.  "
                    "Not needed for AWS Bedrock (uses AWS_ACCESS_KEY_ID)."
                ),
                "default": "",
                "env": "MEM0_OSS_API_KEY",
                "secret": True,
                "required": False,
            },
            {
                "key": "openai_api_key",
                "label": "API key (legacy alias)",
                "description": "Legacy alias for api_key — prefer MEM0_OSS_API_KEY.",
                "default": "",
                "env": "MEM0_OSS_OPENAI_API_KEY",
                "secret": True,
                "required": False,
            },
            {
                "key": "base_url",
                "label": "OpenAI-compatible base URL",
                "description": (
                    "Custom LLM endpoint (e.g. http://localhost:11434/v1 for Ollama, "
                    "or an OpenRouter-compatible URL).  Also settable via "
                    "auxiliary.mem0_oss.base_url in config.yaml."
                ),
                "default": "",
                "env": "MEM0_OSS_OPENAI_BASE_URL",
                "required": False,
            },
        ]

    def save_config(self, values: dict, hermes_home) -> None:
        """Write non-secret config to $HERMES_HOME/mem0_oss.json.

        Merges ``values`` into any existing file so that only the supplied keys
        are overwritten.  Secret keys (api_key, openai_api_key) should be stored
        in ``.env`` instead; this method stores them only if explicitly passed.
        """
        import json
        from pathlib import Path

        config_path = Path(hermes_home) / "mem0_oss.json"
        existing: dict = {}
        if config_path.exists():
            try:
                existing = json.loads(config_path.read_text(encoding="utf-8"))
            except Exception:
                pass
        existing.update(values)
        config_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")

    # -- Shutdown ----------------------------------------------------------

    def on_memory_write(self, action: str, target: str, content: str) -> None:
        """Mirror built-in memory tool writes into mem0 store.

        Called by the framework whenever the agent uses the builtin memory tool,
        so writes go to mem0 automatically without the agent needing to call
        mem0_oss_add explicitly.
        """
        if action != "add" or not (content or "").strip():
            return

        def _write():
            try:
                mem = self._get_memory()
                mem.add(
                    messages=[{"role": "user", "content": content.strip()}],
                    user_id=self._user_id,
                    infer=False,
                    metadata={"source": "hermes_memory_tool", "target": target},
                )
            except Exception as e:
                if _QDRANT_LOCK_ERROR in str(e):
                    logger.debug("mem0_oss on_memory_write skipped — Qdrant lock held by another process")
                    return
                logger.debug("mem0_oss on_memory_write failed: %s", e)

        t = threading.Thread(target=_write, daemon=True, name="mem0-oss-memwrite")
        t.start()

    def shutdown(self) -> None:
        """Wait for any in-flight background threads."""
        for thread in (self._sync_thread, self._prefetch_thread):
            if thread is not None and thread.is_alive():
                thread.join(timeout=10.0)


# ---------------------------------------------------------------------------
# Result extraction helper
# ---------------------------------------------------------------------------

def _extract_results(results: Any) -> List[str]:
    """Normalize mem0 search results (v1 list or v2 dict) to plain strings."""
    if isinstance(results, dict) and "results" in results:
        items = results["results"]
    elif isinstance(results, list):
        items = results
    else:
        return []

    memories = []
    for item in items:
        if isinstance(item, dict):
            mem = item.get("memory") or item.get("text") or ""
        else:
            mem = str(item)
        if mem:
            memories.append(mem)
    return memories


# ---------------------------------------------------------------------------
# Plugin registration
# ---------------------------------------------------------------------------

def register(ctx) -> None:
    ctx.register_memory_provider(Mem0OSSMemoryProvider())