diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index 394b85828a..83298edaf3 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -18,6 +18,7 @@ from __future__ import annotations import json import logging import threading +from pathlib import Path from typing import Any, Dict, List, Optional from agent.memory_provider import MemoryProvider @@ -108,6 +109,9 @@ CONCLUDE_SCHEMA = { } +ALL_TOOL_SCHEMAS = [PROFILE_SCHEMA, SEARCH_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA] + + # --------------------------------------------------------------------------- # MemoryProvider implementation # --------------------------------------------------------------------------- @@ -124,6 +128,34 @@ class HonchoMemoryProvider(MemoryProvider): self._prefetch_thread: Optional[threading.Thread] = None self._sync_thread: Optional[threading.Thread] = None + # B1: recall_mode — set during initialize from config + self._recall_mode = "hybrid" # "context", "tools", or "hybrid" + + # B4: First-turn context baking + self._first_turn_context: Optional[str] = None + self._first_turn_lock = threading.Lock() + + # B5: Cost-awareness turn counting and cadence + self._turn_count = 0 + self._injection_frequency = "every-turn" # or "first-turn" + self._context_cadence = 1 # minimum turns between context API calls + self._dialectic_cadence = 1 # minimum turns between dialectic API calls + self._reasoning_level_cap: Optional[str] = None # "minimal", "low", "mid", "high" + self._last_context_turn = -999 + self._last_dialectic_turn = -999 + + # B2: peer_memory_mode gating (stub) + self._suppress_memory = False + self._suppress_user_profile = False + + # Port #1957: lazy session init for tools-only mode + self._session_initialized = False + self._lazy_init_kwargs: Optional[dict] = None + self._lazy_init_session_id: Optional[str] = None + + # Port #4053: cron guard — when True, plugin is fully inactive + self._cron_skipped = False + @property def name(self) -> str: return "honcho" @@ -133,6 +165,7 @@ class HonchoMemoryProvider(MemoryProvider): try: from plugins.memory.honcho.client import HonchoClientConfig cfg = HonchoClientConfig.from_global_config() + # Port #2645: baseUrl-only verification — api_key OR base_url suffices return cfg.enabled and bool(cfg.api_key or cfg.base_url) except Exception: return False @@ -158,8 +191,22 @@ class HonchoMemoryProvider(MemoryProvider): ] def initialize(self, session_id: str, **kwargs) -> None: - """Initialize Honcho session manager.""" + """Initialize Honcho session manager. + + Handles: cron guard, recall_mode, session name resolution, + peer memory mode, SOUL.md ai_peer sync, memory file migration, + and pre-warming context at init. + """ try: + # ----- Port #4053: cron guard ----- + agent_context = kwargs.get("agent_context", "") + platform = kwargs.get("platform", "cli") + if agent_context in ("cron", "flush") or platform == "cron": + logger.debug("Honcho skipped: cron/flush context (agent_context=%s, platform=%s)", + agent_context, platform) + self._cron_skipped = True + return + from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client from plugins.memory.honcho.session import HonchoSessionManager @@ -169,20 +216,78 @@ class HonchoMemoryProvider(MemoryProvider): return self._config = cfg - client = get_honcho_client(cfg) - self._manager = HonchoSessionManager( - honcho=client, - config=cfg, - context_tokens=cfg.context_tokens, - ) - # Build session key from kwargs or session_id - platform = kwargs.get("platform", "cli") - user_id = kwargs.get("user_id", "") - if user_id: - self._session_key = f"{platform}:{user_id}" - else: - self._session_key = session_id + # ----- B1: recall_mode from config ----- + self._recall_mode = cfg.recall_mode # "context", "tools", or "hybrid" + logger.debug("Honcho recall_mode: %s", self._recall_mode) + + # ----- B5: cost-awareness config ----- + try: + raw = cfg.raw or {} + self._injection_frequency = raw.get("injectionFrequency", "every-turn") + self._context_cadence = int(raw.get("contextCadence", 1)) + self._dialectic_cadence = int(raw.get("dialecticCadence", 1)) + cap = raw.get("reasoningLevelCap") + if cap and cap in ("minimal", "low", "mid", "high"): + self._reasoning_level_cap = cap + except Exception as e: + logger.debug("Honcho cost-awareness config parse error: %s", e) + + # ----- Port #1969: aiPeer sync from SOUL.md ----- + try: + hermes_home = kwargs.get("hermes_home", "") + if hermes_home and not cfg.raw.get("aiPeer"): + soul_path = Path(hermes_home) / "SOUL.md" + if soul_path.exists(): + soul_text = soul_path.read_text(encoding="utf-8").strip() + if soul_text: + # Try YAML frontmatter: "name: Foo" + first_line = soul_text.split("\n")[0].strip() + if first_line.startswith("---"): + # Look for name: in frontmatter + for line in soul_text.split("\n")[1:]: + line = line.strip() + if line == "---": + break + if line.lower().startswith("name:"): + name_val = line.split(":", 1)[1].strip().strip("\"'") + if name_val: + cfg.ai_peer = name_val + logger.debug("Honcho ai_peer set from SOUL.md: %s", name_val) + break + elif first_line.startswith("# "): + # Markdown heading: "# AgentName" + name_val = first_line[2:].strip() + if name_val: + cfg.ai_peer = name_val + logger.debug("Honcho ai_peer set from SOUL.md heading: %s", name_val) + except Exception as e: + logger.debug("Honcho SOUL.md ai_peer sync failed: %s", e) + + # ----- B2: peer_memory_mode gating (stub) ----- + try: + ai_mode = cfg.peer_memory_mode(cfg.ai_peer) + user_mode = cfg.peer_memory_mode(cfg.peer_name or "user") + # "honcho" means Honcho owns memory; suppress built-in + self._suppress_memory = (ai_mode == "honcho") + self._suppress_user_profile = (user_mode == "honcho") + logger.debug("Honcho peer_memory_mode: ai=%s (suppress_memory=%s), user=%s (suppress_user_profile=%s)", + ai_mode, self._suppress_memory, user_mode, self._suppress_user_profile) + except Exception as e: + logger.debug("Honcho peer_memory_mode check failed: %s", e) + + # ----- Port #1957: lazy session init for tools-only mode ----- + if self._recall_mode == "tools": + # Defer actual session creation until first tool call + self._lazy_init_kwargs = kwargs + self._lazy_init_session_id = session_id + # Still need a client reference for _ensure_session + self._config = cfg + logger.debug("Honcho tools-only mode — deferring session init until first tool call") + return + + # ----- Eager init (context or hybrid mode) ----- + self._do_session_init(cfg, session_id, **kwargs) except ImportError: logger.debug("honcho-ai package not installed — plugin inactive") @@ -190,19 +295,180 @@ class HonchoMemoryProvider(MemoryProvider): logger.warning("Honcho init failed: %s", e) self._manager = None - def system_prompt_block(self) -> str: - if not self._manager or not self._session_key: - return "" - return ( - "# Honcho Memory\n" - "Active. AI-native cross-session user modeling.\n" - "Use honcho_profile for a quick factual snapshot, " - "honcho_search for raw excerpts, honcho_context for synthesized answers, " - "honcho_conclude to save facts about the user." + def _do_session_init(self, cfg, session_id: str, **kwargs) -> None: + """Shared session initialization logic for both eager and lazy paths.""" + from plugins.memory.honcho.client import get_honcho_client + from plugins.memory.honcho.session import HonchoSessionManager + + client = get_honcho_client(cfg) + self._manager = HonchoSessionManager( + honcho=client, + config=cfg, + context_tokens=cfg.context_tokens, ) + # ----- B3: resolve_session_name ----- + session_title = kwargs.get("session_title") + self._session_key = ( + cfg.resolve_session_name(session_title=session_title, session_id=session_id) + or session_id + or "hermes-default" + ) + logger.debug("Honcho session key resolved: %s", self._session_key) + + # Create session eagerly + session = self._manager.get_or_create(self._session_key) + self._session_initialized = True + + # ----- B6: Memory file migration (one-time, for new sessions) ----- + try: + if not session.messages: + from hermes_constants import get_hermes_home + mem_dir = str(get_hermes_home() / "memories") + self._manager.migrate_memory_files(self._session_key, mem_dir) + logger.debug("Honcho memory file migration attempted for new session: %s", self._session_key) + except Exception as e: + logger.debug("Honcho memory file migration skipped: %s", e) + + # ----- B7: Pre-warming context at init ----- + if self._recall_mode in ("context", "hybrid"): + try: + self._manager.prefetch_context(self._session_key) + self._manager.prefetch_dialectic(self._session_key, "What should I know about this user?") + logger.debug("Honcho pre-warm threads started for session: %s", self._session_key) + except Exception as e: + logger.debug("Honcho pre-warm failed: %s", e) + + def _ensure_session(self) -> bool: + """Lazily initialize the Honcho session (for tools-only mode). + + Returns True if the manager is ready, False otherwise. + """ + if self._manager and self._session_initialized: + return True + if self._cron_skipped: + return False + if not self._config or not self._lazy_init_kwargs: + return False + + try: + self._do_session_init( + self._config, + self._lazy_init_session_id or "hermes-default", + **self._lazy_init_kwargs, + ) + # Clear lazy refs + self._lazy_init_kwargs = None + self._lazy_init_session_id = None + return self._manager is not None + except Exception as e: + logger.warning("Honcho lazy session init failed: %s", e) + return False + + def _format_first_turn_context(self, ctx: dict) -> str: + """Format the prefetch context dict into a readable system prompt block.""" + parts = [] + + rep = ctx.get("representation", "") + if rep: + parts.append(f"## User Representation\n{rep}") + + card = ctx.get("card", "") + if card: + parts.append(f"## User Peer Card\n{card}") + + ai_rep = ctx.get("ai_representation", "") + if ai_rep: + parts.append(f"## AI Self-Representation\n{ai_rep}") + + ai_card = ctx.get("ai_card", "") + if ai_card: + parts.append(f"## AI Identity Card\n{ai_card}") + + if not parts: + return "" + return "\n\n".join(parts) + + def system_prompt_block(self) -> str: + """Return system prompt text, adapted by recall_mode. + + B4: On the FIRST call, fetch and bake the full Honcho context + (user representation, peer card, AI representation, continuity synthesis). + Subsequent calls return the cached block for prompt caching stability. + """ + if self._cron_skipped: + return "" + if not self._manager or not self._session_key: + # tools-only mode without session yet still returns a minimal block + if self._recall_mode == "tools" and self._config: + return ( + "# Honcho Memory\n" + "Active (tools-only mode). Use honcho_profile, honcho_search, " + "honcho_context, and honcho_conclude tools to access user memory." + ) + return "" + + # ----- B4: First-turn context baking ----- + first_turn_block = "" + if self._recall_mode in ("context", "hybrid"): + with self._first_turn_lock: + if self._first_turn_context is None: + # First call — fetch and cache + try: + ctx = self._manager.get_prefetch_context(self._session_key) + self._first_turn_context = self._format_first_turn_context(ctx) if ctx else "" + except Exception as e: + logger.debug("Honcho first-turn context fetch failed: %s", e) + self._first_turn_context = "" + first_turn_block = self._first_turn_context + + # ----- B1: adapt text based on recall_mode ----- + if self._recall_mode == "context": + header = ( + "# Honcho Memory\n" + "Active (context-injection mode). Relevant user context is automatically " + "injected before each turn. No memory tools are available — context is " + "managed automatically." + ) + elif self._recall_mode == "tools": + header = ( + "# Honcho Memory\n" + "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, " + "honcho_search for raw excerpts, honcho_context for synthesized answers, " + "honcho_conclude to save facts about the user. " + "No automatic context injection — you must use tools to access memory." + ) + else: # hybrid + header = ( + "# Honcho Memory\n" + "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. " + "Use honcho_profile for a quick factual snapshot, " + "honcho_search for raw excerpts, honcho_context for synthesized answers, " + "honcho_conclude to save facts about the user." + ) + + if first_turn_block: + return f"{header}\n\n{first_turn_block}" + return header + def prefetch(self, query: str, *, session_id: str = "") -> str: - """Return prefetched dialectic context from background thread.""" + """Return prefetched dialectic context from background thread. + + B1: Returns empty when recall_mode is "tools" (no injection). + B5: Respects injection_frequency — "first-turn" returns cached/empty after turn 0. + Port #3265: Truncates to context_tokens budget. + """ + if self._cron_skipped: + return "" + + # B1: tools-only mode — no auto-injection + if self._recall_mode == "tools": + return "" + + # B5: injection_frequency — if "first-turn" and past first turn, return empty + if self._injection_frequency == "first-turn" and self._turn_count > 0: + return "" + if self._prefetch_thread and self._prefetch_thread.is_alive(): self._prefetch_thread.join(timeout=3.0) with self._prefetch_lock: @@ -210,13 +476,49 @@ class HonchoMemoryProvider(MemoryProvider): self._prefetch_result = "" if not result: return "" + + # ----- Port #3265: token budget enforcement ----- + result = self._truncate_to_budget(result) + return f"## Honcho Context\n{result}" + def _truncate_to_budget(self, text: str) -> str: + """Truncate text to fit within context_tokens budget if set.""" + if not self._config or not self._config.context_tokens: + return text + budget_chars = self._config.context_tokens * 4 # conservative char estimate + if len(text) <= budget_chars: + return text + # Truncate at word boundary + truncated = text[:budget_chars] + last_space = truncated.rfind(" ") + if last_space > budget_chars * 0.8: + truncated = truncated[:last_space] + return truncated + " …" + def queue_prefetch(self, query: str, *, session_id: str = "") -> None: - """Fire a background dialectic query for the upcoming turn.""" + """Fire a background dialectic query for the upcoming turn. + + B5: Checks cadence before firing background threads. + """ + if self._cron_skipped: + return if not self._manager or not self._session_key or not query: return + # B1: tools-only mode — no prefetch + if self._recall_mode == "tools": + return + + # B5: cadence check — skip if too soon since last dialectic call + if self._dialectic_cadence > 1: + if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence: + logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d", + self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) + return + + self._last_dialectic_turn = self._turn_count + def _run(): try: result = self._manager.dialectic_query( @@ -233,14 +535,28 @@ class HonchoMemoryProvider(MemoryProvider): ) self._prefetch_thread.start() + # Also fire context prefetch if cadence allows + if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence: + self._last_context_turn = self._turn_count + try: + self._manager.prefetch_context(self._session_key, query) + except Exception as e: + logger.debug("Honcho context prefetch failed: %s", e) + + def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None: + """Track turn count for cadence and injection_frequency logic.""" + self._turn_count = turn_number + def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None: """Record the conversation turn in Honcho (non-blocking).""" + if self._cron_skipped: + return if not self._manager or not self._session_key: return def _sync(): try: - session = self._manager.get_or_create_session(self._session_key) + session = self._manager.get_or_create(self._session_key) session.add_message("user", user_content[:4000]) session.add_message("assistant", assistant_content[:4000]) # Flush to Honcho API @@ -259,6 +575,8 @@ class HonchoMemoryProvider(MemoryProvider): """Mirror built-in user profile writes as Honcho conclusions.""" if action != "add" or target != "user" or not content: return + if self._cron_skipped: + return if not self._manager or not self._session_key: return @@ -273,6 +591,8 @@ class HonchoMemoryProvider(MemoryProvider): def on_session_end(self, messages: List[Dict[str, Any]]) -> None: """Flush all pending messages to Honcho on session end.""" + if self._cron_skipped: + return if not self._manager: return # Wait for pending sync @@ -284,9 +604,26 @@ class HonchoMemoryProvider(MemoryProvider): logger.debug("Honcho session-end flush failed: %s", e) def get_tool_schemas(self) -> List[Dict[str, Any]]: - return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA] + """Return tool schemas, respecting recall_mode. + + B1: context-only mode hides all tools. + """ + if self._cron_skipped: + return [] + if self._recall_mode == "context": + return [] + return list(ALL_TOOL_SCHEMAS) def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str: + """Handle a Honcho tool call, with lazy session init for tools-only mode.""" + if self._cron_skipped: + return json.dumps({"error": "Honcho is not active (cron context)."}) + + # Port #1957: ensure session is initialized for tools-only mode + if not self._session_initialized: + if not self._ensure_session(): + return json.dumps({"error": "Honcho session could not be initialized."}) + if not self._manager or not self._session_key: return json.dumps({"error": "Honcho is not active for this session."})