fix(honcho): move context injection from system prompt to prefetch, fix honcho_context crash

- system_prompt_block() now returns static header only (matching ABC contract) All other providers already did this; Honcho was the only one baking live user data into system prompt, freezing it on turn 1 forever - prefetch() assembles two layers: Layer 1: base context (representation + card) from peer.context(), cached and refreshed on context_cadence (not frozen) Layer 2: dialectic supplement, refreshed on dialectic_cadence - Context and dialectic cadence now checked independently in queue_prefetch() Previously context refresh was gated behind dialectic cadence - Fix honcho_context tool crash: Honcho SDK Message objects use .peer_id not .role — was silently returning 'No context available yet' due to AttributeError caught by broad except
2026-04-25 00:51:20 +00:00 · 2026-04-14 18:41:23 -04:00 · 2026-04-14 18:41:23 -04:00 · ef7f31562b
commit ef7f31562b
parent af5bbda7d2
2 changed files with 64 additions and 36 deletions
--- a/plugins/memory/honcho/init.py
+++ b/plugins/memory/honcho/init.py
@ -196,9 +196,9 @@ class HonchoMemoryProvider(MemoryProvider):
        # B1: recall_mode — set during initialize from config
        self._recall_mode = "hybrid"  # "context", "tools", or "hybrid"

-        # B4: First-turn context baking
-        self._first_turn_context: Optional[str] = None
-        self._first_turn_lock = threading.Lock()
+        # Base context cache — refreshed on context_cadence, not frozen
+        self._base_context_cache: Optional[str] = None
+        self._base_context_lock = threading.Lock()

        # B5: Cost-awareness turn counting and cadence
        self._turn_count = 0
@ -447,9 +447,9 @@ class HonchoMemoryProvider(MemoryProvider):
    def system_prompt_block(self) -> str:
        """Return system prompt text, adapted by recall_mode.

-        B4: On the FIRST call, fetch and bake the full Honcho context
-        (user representation, peer card, AI representation, continuity synthesis).
-        Subsequent calls return the cached block for prompt caching stability.
+        Returns only the mode header and tool instructions — static text
+        that doesn't change between turns (prompt-cache friendly).
+        Live context (representation, card) is injected via prefetch().
        """
        if self._cron_skipped:
            return ""
@ -463,20 +463,6 @@ class HonchoMemoryProvider(MemoryProvider):
                )
            return ""

-        # ----- B4: First-turn context baking -----
-        first_turn_block = ""
-        if self._recall_mode in ("context", "hybrid"):
-            with self._first_turn_lock:
-                if self._first_turn_context is None:
-                    # First call — fetch and cache
-                    try:
-                        ctx = self._manager.get_prefetch_context(self._session_key)
-                        self._first_turn_context = self._format_first_turn_context(ctx) if ctx else ""
-                    except Exception as e:
-                        logger.debug("Honcho first-turn context fetch failed: %s", e)
-                        self._first_turn_context = ""
-                first_turn_block = self._first_turn_context
-
        # ----- B1: adapt text based on recall_mode -----
        if self._recall_mode == "context":
            header = (
@ -504,12 +490,14 @@ class HonchoMemoryProvider(MemoryProvider):
                "honcho_conclude to save facts about the user."
            )

-        if first_turn_block:
-            return f"{header}\n\n{first_turn_block}"
        return header

    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Return prefetched dialectic context from background thread.
+        """Return base context (representation + card) plus dialectic supplement.
+
+        Assembles two layers:
+        1. Base context from peer.context() — cached, refreshed on context_cadence
+        2. Dialectic supplement — cached, refreshed on dialectic_cadence

        B1: Returns empty when recall_mode is "tools" (no injection).
        B5: Respects injection_frequency — "first-turn" returns cached/empty after turn 0.
@ -526,14 +514,51 @@ class HonchoMemoryProvider(MemoryProvider):
        if self._injection_frequency == "first-turn" and self._turn_count > 0:
            return ""

+        parts = []
+
+        # ----- Layer 1: Base context (representation + card) -----
+        # On first call, fetch synchronously so turn 1 isn't empty.
+        # After that, serve from cache and refresh in background on cadence.
+        with self._base_context_lock:
+            if self._base_context_cache is None:
+                # First call — synchronous fetch
+                try:
+                    ctx = self._manager.get_prefetch_context(self._session_key)
+                    self._base_context_cache = self._format_first_turn_context(ctx) if ctx else ""
+                    self._last_context_turn = self._turn_count
+                except Exception as e:
+                    logger.debug("Honcho base context fetch failed: %s", e)
+                    self._base_context_cache = ""
+            base_context = self._base_context_cache
+
+        # Check if background context prefetch has a fresher result
+        if self._manager:
+            fresh_ctx = self._manager.pop_context_result(self._session_key)
+            if fresh_ctx:
+                formatted = self._format_first_turn_context(fresh_ctx)
+                if formatted:
+                    with self._base_context_lock:
+                        self._base_context_cache = formatted
+                    base_context = formatted
+
+        if base_context:
+            parts.append(base_context)
+
+        # ----- Layer 2: Dialectic supplement -----
        if self._prefetch_thread and self._prefetch_thread.is_alive():
            self._prefetch_thread.join(timeout=3.0)
        with self._prefetch_lock:
-            result = self._prefetch_result
+            dialectic_result = self._prefetch_result
            self._prefetch_result = ""
-        if not result:
+
+        if dialectic_result and dialectic_result.strip():
+            parts.append(dialectic_result)
+
+        if not parts:
            return ""

+        result = "\n\n".join(parts)
+
        # ----- Port #3265: token budget enforcement -----
        result = self._truncate_to_budget(result)

@ -554,9 +579,11 @@ class HonchoMemoryProvider(MemoryProvider):
        return truncated + " …"

    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        """Fire a background dialectic query for the upcoming turn.
+        """Fire background prefetch threads for the upcoming turn.

-        B5: Checks cadence before firing background threads.
+        B5: Checks cadence independently for dialectic and context refresh.
+        Context refresh updates the base layer (representation + card).
+        Dialectic fires the LLM reasoning supplement.
        """
        if self._cron_skipped:
            return
@ -567,6 +594,15 @@ class HonchoMemoryProvider(MemoryProvider):
        if self._recall_mode == "tools":
            return

+        # ----- Context refresh (base layer) — independent cadence -----
+        if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
+            self._last_context_turn = self._turn_count
+            try:
+                self._manager.prefetch_context(self._session_key, query)
+            except Exception as e:
+                logger.debug("Honcho context prefetch failed: %s", e)
+
+        # ----- Dialectic prefetch (supplement layer) -----
        # B5: cadence check — skip if too soon since last dialectic call
        if self._dialectic_cadence > 1:
            if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
@ -592,14 +628,6 @@ class HonchoMemoryProvider(MemoryProvider):
        )
        self._prefetch_thread.start()

-        # Also fire context prefetch if cadence allows
-        if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
-            self._last_context_turn = self._turn_count
-            try:
-                self._manager.prefetch_context(self._session_key, query)
-            except Exception as e:
-                logger.debug("Honcho context prefetch failed: %s", e)
-
    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
        """Track turn count for cadence and injection_frequency logic."""
        self._turn_count = turn_number
--- a/plugins/memory/honcho/session.py
+++ b/plugins/memory/honcho/session.py
@ -942,7 +942,7 @@ class HonchoSessionManager:
            if ctx.messages:
                recent = ctx.messages[-10:]  # last 10 messages
                result["recent_messages"] = [
-                    {"role": m.role, "content": m.content[:500]}
+                    {"role": getattr(m, "peer_id", "unknown"), "content": (m.content or "")[:500]}
                    for m in recent
                ]