feat(honcho): context injection overhaul, 5-tool surface, cost safety, session isolation (#10619)

Salvaged from PR #9884 by erosika. Cherry-picked plugin changes onto current main with minimal core modifications. Plugin changes (plugins/memory/honcho/): - New honcho_reasoning tool (5th tool, splits LLM calls from honcho_context) - Two-layer context injection: base context (summary + representation + card) on contextCadence, dialectic supplement on dialecticCadence - Multi-pass dialectic depth (1-3 passes) with early bail-out on strong signal - Cold/warm prompt selection based on session state - dialecticCadence defaults to 3 (was 1) — ~66% fewer Honcho LLM calls - Session summary injection for conversational continuity - Bidirectional peer targeting on all 5 tools - Correctness fixes: peer param fallback, None guard on set_peer_card, schema validation, signal_sufficient anchored regex, mid->medium level fix Core changes (~20 lines across 3 files): - agent/memory_manager.py: Enhanced sanitize_context() to strip full <memory-context> blocks and system notes (prevents leak from saveMessages) - run_agent.py: gateway_session_key param for stable per-chat Honcho sessions, on_turn_start() call before prefetch_all() for cadence tracking, sanitize_context() on user messages to strip leaked memory blocks - gateway/run.py: skip_memory=True on 2 temp agents (prevents orphan sessions), gateway_session_key threading to main agent Tests: 509 passed (3 skipped — honcho SDK not installed locally) Docs: Updated honcho.md, memory-providers.md, tools-reference.md, SKILL.md Co-authored-by: erosika <erosika@users.noreply.github.com>
2026-04-25 00:51:20 +00:00 · 2026-04-15 19:12:19 -07:00 · 2026-04-15 19:12:19 -07:00 · cc6e8941db
commit cc6e8941db
parent 00ff9a26cd
17 changed files with 2632 additions and 396 deletions
--- a/plugins/memory/honcho/init.py
+++ b/plugins/memory/honcho/init.py
@ -17,6 +17,7 @@ from __future__ import annotations

 import json
 import logging
+import re
 import threading
 from typing import Any, Dict, List, Optional

@ -33,20 +34,33 @@ logger = logging.getLogger(__name__)
 PROFILE_SCHEMA = {
    "name": "honcho_profile",
    "description": (
-        "Retrieve the user's peer card from Honcho — a curated list of key facts "
-        "about them (name, role, preferences, communication style, patterns). "
-        "Fast, no LLM reasoning, minimal cost. "
-        "Use this at conversation start or when you need a quick factual snapshot."
+        "Retrieve or update a peer card from Honcho — a curated list of key facts "
+        "about that peer (name, role, preferences, communication style, patterns). "
+        "Pass `card` to update; omit `card` to read."
    ),
-    "parameters": {"type": "object", "properties": {}, "required": []},
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "peer": {
+                "type": "string",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
+            },
+            "card": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "New peer card as a list of fact strings. Omit to read the current card.",
+            },
+        },
+        "required": [],
+    },
 }

 SEARCH_SCHEMA = {
    "name": "honcho_search",
    "description": (
-        "Semantic search over Honcho's stored context about the user. "
+        "Semantic search over Honcho's stored context about a peer. "
        "Returns raw excerpts ranked by relevance — no LLM synthesis. "
-        "Cheaper and faster than honcho_context. "
+        "Cheaper and faster than honcho_reasoning. "
        "Good when you want to find specific past facts and reason over them yourself."
    ),
    "parameters": {
@ -60,17 +74,23 @@ SEARCH_SCHEMA = {
                "type": "integer",
                "description": "Token budget for returned context (default 800, max 2000).",
            },
+            "peer": {
+                "type": "string",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
+            },
        },
        "required": ["query"],
    },
 }

-CONTEXT_SCHEMA = {
-    "name": "honcho_context",
+REASONING_SCHEMA = {
+    "name": "honcho_reasoning",
    "description": (
        "Ask Honcho a natural language question and get a synthesized answer. "
        "Uses Honcho's LLM (dialectic reasoning) — higher cost than honcho_profile or honcho_search. "
-        "Can query about any peer: the user (default) or the AI assistant."
+        "Can query about any peer via alias or explicit peer ID. "
+        "Pass reasoning_level to control depth: minimal (fast/cheap), low (default), "
+        "medium, high, max (deep/expensive). Omit for configured default."
    ),
    "parameters": {
        "type": "object",
@ -79,37 +99,87 @@ CONTEXT_SCHEMA = {
                "type": "string",
                "description": "A natural language question.",
            },
+            "reasoning_level": {
+                "type": "string",
+                "description": (
+                    "Override the default reasoning depth. "
+                    "Omit to use the configured default (typically low). "
+                    "Guide:\n"
+                    "- minimal: quick factual lookups (name, role, simple preference)\n"
+                    "- low: straightforward questions with clear answers\n"
+                    "- medium: multi-aspect questions requiring synthesis across observations\n"
+                    "- high: complex behavioral patterns, contradictions, deep analysis\n"
+                    "- max: thorough audit-level analysis, leave no stone unturned"
+                ),
+                "enum": ["minimal", "low", "medium", "high", "max"],
+            },
            "peer": {
                "type": "string",
-                "description": "Which peer to query about: 'user' (default) or 'ai'.",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
            },
        },
        "required": ["query"],
    },
 }

+CONTEXT_SCHEMA = {
+    "name": "honcho_context",
+    "description": (
+        "Retrieve full session context from Honcho — summary, peer representation, "
+        "peer card, and recent messages. No LLM synthesis. "
+        "Cheaper than honcho_reasoning. Use this to see what Honcho knows about "
+        "the current conversation and the specified peer."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "Optional focus query to filter context. Omit for full session context snapshot.",
+            },
+            "peer": {
+                "type": "string",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
+            },
+        },
+        "required": [],
+    },
+}
+
 CONCLUDE_SCHEMA = {
    "name": "honcho_conclude",
    "description": (
-        "Write a conclusion about the user back to Honcho's memory. "
-        "Conclusions are persistent facts that build the user's profile. "
-        "Use when the user states a preference, corrects you, or shares "
-        "something to remember across sessions."
+        "Write or delete a conclusion about a peer in Honcho's memory. "
+        "Conclusions are persistent facts that build a peer's profile. "
+        "You MUST pass exactly one of: `conclusion` (to create) or `delete_id` (to delete). "
+        "Passing neither is an error. "
+        "Deletion is only for PII removal — Honcho self-heals incorrect conclusions over time."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "conclusion": {
                "type": "string",
-                "description": "A factual statement about the user to persist.",
-            }
+                "description": "A factual statement to persist. Required when not using delete_id.",
+            },
+            "delete_id": {
+                "type": "string",
+                "description": "Conclusion ID to delete (for PII removal). Required when not using conclusion.",
+            },
+            "peer": {
+                "type": "string",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
+            },
        },
-        "required": ["conclusion"],
+        "anyOf": [
+            {"required": ["conclusion"]},
+            {"required": ["delete_id"]},
+        ],
    },
 }


-ALL_TOOL_SCHEMAS = [PROFILE_SCHEMA, SEARCH_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA]
+ALL_TOOL_SCHEMAS = [PROFILE_SCHEMA, SEARCH_SCHEMA, REASONING_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA]


 # ---------------------------------------------------------------------------
@ -131,16 +201,18 @@ class HonchoMemoryProvider(MemoryProvider):
        # B1: recall_mode — set during initialize from config
        self._recall_mode = "hybrid"  # "context", "tools", or "hybrid"

-        # B4: First-turn context baking
-        self._first_turn_context: Optional[str] = None
-        self._first_turn_lock = threading.Lock()
+        # Base context cache — refreshed on context_cadence, not frozen
+        self._base_context_cache: Optional[str] = None
+        self._base_context_lock = threading.Lock()

        # B5: Cost-awareness turn counting and cadence
        self._turn_count = 0
        self._injection_frequency = "every-turn"  # or "first-turn"
        self._context_cadence = 1   # minimum turns between context API calls
-        self._dialectic_cadence = 1  # minimum turns between dialectic API calls
-        self._reasoning_level_cap: Optional[str] = None  # "minimal", "low", "mid", "high"
+        self._dialectic_cadence = 3  # minimum turns between dialectic API calls
+        self._dialectic_depth = 1   # how many .chat() calls per dialectic cycle (1-3)
+        self._dialectic_depth_levels: list[str] | None = None  # per-pass reasoning levels
+        self._reasoning_level_cap: Optional[str] = None  # "minimal", "low", "medium", "high"
        self._last_context_turn = -999
        self._last_dialectic_turn = -999

@ -236,9 +308,11 @@ class HonchoMemoryProvider(MemoryProvider):
                raw = cfg.raw or {}
                self._injection_frequency = raw.get("injectionFrequency", "every-turn")
                self._context_cadence = int(raw.get("contextCadence", 1))
-                self._dialectic_cadence = int(raw.get("dialecticCadence", 1))
+                self._dialectic_cadence = int(raw.get("dialecticCadence", 3))
+                self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3))
+                self._dialectic_depth_levels = cfg.dialectic_depth_levels
                cap = raw.get("reasoningLevelCap")
-                if cap and cap in ("minimal", "low", "mid", "high"):
+                if cap and cap in ("minimal", "low", "medium", "high"):
                    self._reasoning_level_cap = cap
            except Exception as e:
                logger.debug("Honcho cost-awareness config parse error: %s", e)
@ -251,9 +325,7 @@ class HonchoMemoryProvider(MemoryProvider):
            # ----- Port #1957: lazy session init for tools-only mode -----
            if self._recall_mode == "tools":
                if cfg.init_on_session_start:
-                    # Eager init: create session now so sync_turn() works from turn 1.
-                    # Does NOT enable auto-injection — prefetch() still returns empty.
-                    logger.debug("Honcho tools-only mode — eager session init (initOnSessionStart=true)")
+                    # Eager init even in tools mode (opt-in)
                    self._do_session_init(cfg, session_id, **kwargs)
                    return
                # Defer actual session creation until first tool call
@ -287,8 +359,13 @@ class HonchoMemoryProvider(MemoryProvider):

        # ----- B3: resolve_session_name -----
        session_title = kwargs.get("session_title")
+        gateway_session_key = kwargs.get("gateway_session_key")
        self._session_key = (
-            cfg.resolve_session_name(session_title=session_title, session_id=session_id)
+            cfg.resolve_session_name(
+                session_title=session_title,
+                session_id=session_id,
+                gateway_session_key=gateway_session_key,
+            )
            or session_id
            or "hermes-default"
        )
@ -299,12 +376,21 @@ class HonchoMemoryProvider(MemoryProvider):
        self._session_initialized = True

        # ----- B6: Memory file migration (one-time, for new sessions) -----
+        # Skip under per-session strategy: every Hermes run creates a fresh
+        # Honcho session by design, so uploading MEMORY.md/USER.md/SOUL.md to
+        # each one would flood the backend with short-lived duplicates instead
+        # of performing a one-time migration.
        try:
-            if not session.messages:
+            if not session.messages and cfg.session_strategy != "per-session":
                from hermes_constants import get_hermes_home
                mem_dir = str(get_hermes_home() / "memories")
                self._manager.migrate_memory_files(self._session_key, mem_dir)
                logger.debug("Honcho memory file migration attempted for new session: %s", self._session_key)
+            elif cfg.session_strategy == "per-session":
+                logger.debug(
+                    "Honcho memory file migration skipped: per-session strategy creates a fresh session per run (%s)",
+                    self._session_key,
+                )
        except Exception as e:
            logger.debug("Honcho memory file migration skipped: %s", e)

@ -347,6 +433,11 @@ class HonchoMemoryProvider(MemoryProvider):
        """Format the prefetch context dict into a readable system prompt block."""
        parts = []

+        # Session summary — session-scoped context, placed first for relevance
+        summary = ctx.get("summary", "")
+        if summary:
+            parts.append(f"## Session Summary\n{summary}")
+
        rep = ctx.get("representation", "")
        if rep:
            parts.append(f"## User Representation\n{rep}")
@ -370,9 +461,9 @@ class HonchoMemoryProvider(MemoryProvider):
    def system_prompt_block(self) -> str:
        """Return system prompt text, adapted by recall_mode.

-        B4: On the FIRST call, fetch and bake the full Honcho context
-        (user representation, peer card, AI representation, continuity synthesis).
-        Subsequent calls return the cached block for prompt caching stability.
+        Returns only the mode header and tool instructions — static text
+        that doesn't change between turns (prompt-cache friendly).
+        Live context (representation, card) is injected via prefetch().
        """
        if self._cron_skipped:
            return ""
@ -382,24 +473,10 @@ class HonchoMemoryProvider(MemoryProvider):
                return (
                    "# Honcho Memory\n"
                    "Active (tools-only mode). Use honcho_profile, honcho_search, "
-                    "honcho_context, and honcho_conclude tools to access user memory."
+                    "honcho_reasoning, honcho_context, and honcho_conclude tools to access user memory."
                )
            return ""

-        # ----- B4: First-turn context baking -----
-        first_turn_block = ""
-        if self._recall_mode in ("context", "hybrid"):
-            with self._first_turn_lock:
-                if self._first_turn_context is None:
-                    # First call — fetch and cache
-                    try:
-                        ctx = self._manager.get_prefetch_context(self._session_key)
-                        self._first_turn_context = self._format_first_turn_context(ctx) if ctx else ""
-                    except Exception as e:
-                        logger.debug("Honcho first-turn context fetch failed: %s", e)
-                        self._first_turn_context = ""
-                first_turn_block = self._first_turn_context
-
        # ----- B1: adapt text based on recall_mode -----
        if self._recall_mode == "context":
            header = (
@ -412,7 +489,8 @@ class HonchoMemoryProvider(MemoryProvider):
            header = (
                "# Honcho Memory\n"
                "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, "
-                "honcho_search for raw excerpts, honcho_context for synthesized answers, "
+                "honcho_search for raw excerpts, honcho_context for raw peer context, "
+                "honcho_reasoning for synthesized answers, "
                "honcho_conclude to save facts about the user. "
                "No automatic context injection — you must use tools to access memory."
            )
@ -421,16 +499,19 @@ class HonchoMemoryProvider(MemoryProvider):
                "# Honcho Memory\n"
                "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. "
                "Use honcho_profile for a quick factual snapshot, "
-                "honcho_search for raw excerpts, honcho_context for synthesized answers, "
+                "honcho_search for raw excerpts, honcho_context for raw peer context, "
+                "honcho_reasoning for synthesized answers, "
                "honcho_conclude to save facts about the user."
            )

-        if first_turn_block:
-            return f"{header}\n\n{first_turn_block}"
        return header

    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Return prefetched dialectic context from background thread.
+        """Return base context (representation + card) plus dialectic supplement.
+
+        Assembles two layers:
+        1. Base context from peer.context() — cached, refreshed on context_cadence
+        2. Dialectic supplement — cached, refreshed on dialectic_cadence

        B1: Returns empty when recall_mode is "tools" (no injection).
        B5: Respects injection_frequency — "first-turn" returns cached/empty after turn 0.
@ -443,22 +524,95 @@ class HonchoMemoryProvider(MemoryProvider):
        if self._recall_mode == "tools":
            return ""

-        # B5: injection_frequency — if "first-turn" and past first turn, return empty
-        if self._injection_frequency == "first-turn" and self._turn_count > 0:
+        # B5: injection_frequency — if "first-turn" and past first turn, return empty.
+        # _turn_count is 1-indexed (first user message = 1), so > 1 means "past first".
+        if self._injection_frequency == "first-turn" and self._turn_count > 1:
            return ""

+        parts = []
+
+        # ----- Layer 1: Base context (representation + card) -----
+        # On first call, fetch synchronously so turn 1 isn't empty.
+        # After that, serve from cache and refresh in background on cadence.
+        with self._base_context_lock:
+            if self._base_context_cache is None:
+                # First call — synchronous fetch
+                try:
+                    ctx = self._manager.get_prefetch_context(self._session_key)
+                    self._base_context_cache = self._format_first_turn_context(ctx) if ctx else ""
+                    self._last_context_turn = self._turn_count
+                except Exception as e:
+                    logger.debug("Honcho base context fetch failed: %s", e)
+                    self._base_context_cache = ""
+            base_context = self._base_context_cache
+
+        # Check if background context prefetch has a fresher result
+        if self._manager:
+            fresh_ctx = self._manager.pop_context_result(self._session_key)
+            if fresh_ctx:
+                formatted = self._format_first_turn_context(fresh_ctx)
+                if formatted:
+                    with self._base_context_lock:
+                        self._base_context_cache = formatted
+                    base_context = formatted
+
+        if base_context:
+            parts.append(base_context)
+
+        # ----- Layer 2: Dialectic supplement -----
+        # On the very first turn, no queue_prefetch() has run yet so the
+        # dialectic result is empty.  Run with a bounded timeout so a slow
+        # Honcho connection doesn't block the first response indefinitely.
+        # On timeout the result is skipped and queue_prefetch() will pick it
+        # up at the next cadence-allowed turn.
+        if self._last_dialectic_turn == -999 and query:
+            _first_turn_timeout = (
+                self._config.timeout if self._config and self._config.timeout else 8.0
+            )
+            _result_holder: list[str] = []
+
+            def _run_first_turn() -> None:
+                try:
+                    _result_holder.append(self._run_dialectic_depth(query))
+                except Exception as exc:
+                    logger.debug("Honcho first-turn dialectic failed: %s", exc)
+
+            _t = threading.Thread(target=_run_first_turn, daemon=True)
+            _t.start()
+            _t.join(timeout=_first_turn_timeout)
+            if not _t.is_alive():
+                first_turn_dialectic = _result_holder[0] if _result_holder else ""
+                if first_turn_dialectic and first_turn_dialectic.strip():
+                    with self._prefetch_lock:
+                        self._prefetch_result = first_turn_dialectic
+                self._last_dialectic_turn = self._turn_count
+            else:
+                logger.debug(
+                    "Honcho first-turn dialectic timed out (%.1fs) — "
+                    "will inject at next cadence-allowed turn",
+                    _first_turn_timeout,
+                )
+                # Don't update _last_dialectic_turn: queue_prefetch() will
+                # retry at the next cadence-allowed turn via the async path.
+
        if self._prefetch_thread and self._prefetch_thread.is_alive():
            self._prefetch_thread.join(timeout=3.0)
        with self._prefetch_lock:
-            result = self._prefetch_result
+            dialectic_result = self._prefetch_result
            self._prefetch_result = ""
-        if not result:
+
+        if dialectic_result and dialectic_result.strip():
+            parts.append(dialectic_result)
+
+        if not parts:
            return ""

+        result = "\n\n".join(parts)
+
        # ----- Port #3265: token budget enforcement -----
        result = self._truncate_to_budget(result)

-        return f"## Honcho Context\n{result}"
+        return result

    def _truncate_to_budget(self, text: str) -> str:
        """Truncate text to fit within context_tokens budget if set."""
@ -475,9 +629,11 @@ class HonchoMemoryProvider(MemoryProvider):
        return truncated + " …"

    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        """Fire a background dialectic query for the upcoming turn.
+        """Fire background prefetch threads for the upcoming turn.

-        B5: Checks cadence before firing background threads.
+        B5: Checks cadence independently for dialectic and context refresh.
+        Context refresh updates the base layer (representation + card).
+        Dialectic fires the LLM reasoning supplement.
        """
        if self._cron_skipped:
            return
@ -488,6 +644,15 @@ class HonchoMemoryProvider(MemoryProvider):
        if self._recall_mode == "tools":
            return

+        # ----- Context refresh (base layer) — independent cadence -----
+        if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
+            self._last_context_turn = self._turn_count
+            try:
+                self._manager.prefetch_context(self._session_key, query)
+            except Exception as e:
+                logger.debug("Honcho context prefetch failed: %s", e)
+
+        # ----- Dialectic prefetch (supplement layer) -----
        # B5: cadence check — skip if too soon since last dialectic call
        if self._dialectic_cadence > 1:
            if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
@ -499,9 +664,7 @@ class HonchoMemoryProvider(MemoryProvider):

        def _run():
            try:
-                result = self._manager.dialectic_query(
-                    self._session_key, query, peer="user"
-                )
+                result = self._run_dialectic_depth(query)
                if result and result.strip():
                    with self._prefetch_lock:
                        self._prefetch_result = result
@ -513,13 +676,140 @@ class HonchoMemoryProvider(MemoryProvider):
        )
        self._prefetch_thread.start()

-        # Also fire context prefetch if cadence allows
-        if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
-            self._last_context_turn = self._turn_count
-            try:
-                self._manager.prefetch_context(self._session_key, query)
-            except Exception as e:
-                logger.debug("Honcho context prefetch failed: %s", e)
+    # ----- Dialectic depth: multi-pass .chat() with cold/warm prompts -----
+
+    # Proportional reasoning levels per depth/pass when dialecticDepthLevels
+    # is not configured. The base level is dialecticReasoningLevel.
+    # Index: (depth, pass) → level relative to base.
+    _PROPORTIONAL_LEVELS: dict[tuple[int, int], str] = {
+        # depth 1: single pass at base level
+        (1, 0): "base",
+        # depth 2: pass 0 lighter, pass 1 at base
+        (2, 0): "minimal",
+        (2, 1): "base",
+        # depth 3: pass 0 lighter, pass 1 at base, pass 2 one above minimal
+        (3, 0): "minimal",
+        (3, 1): "base",
+        (3, 2): "low",
+    }
+
+    _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max")
+
+    def _resolve_pass_level(self, pass_idx: int) -> str:
+        """Resolve reasoning level for a given pass index.
+
+        Uses dialecticDepthLevels if configured, otherwise proportional
+        defaults relative to dialecticReasoningLevel.
+        """
+        if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels):
+            return self._dialectic_depth_levels[pass_idx]
+
+        base = (self._config.dialectic_reasoning_level if self._config else "low")
+        mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx))
+        if mapping is None or mapping == "base":
+            return base
+        return mapping
+
+    def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str:
+        """Build the prompt for a given dialectic pass.
+
+        Pass 0: cold start (general user query) or warm (session-scoped).
+        Pass 1: self-audit / targeted synthesis against gaps from pass 0.
+        Pass 2: reconciliation / contradiction check across prior passes.
+        """
+        if pass_idx == 0:
+            if is_cold:
+                return (
+                    "Who is this person? What are their preferences, goals, "
+                    "and working style? Focus on facts that would help an AI "
+                    "assistant be immediately useful."
+                )
+            return (
+                "Given what's been discussed in this session so far, what "
+                "context about this user is most relevant to the current "
+                "conversation? Prioritize active context over biographical facts."
+            )
+        elif pass_idx == 1:
+            prior = prior_results[-1] if prior_results else ""
+            return (
+                f"Given this initial assessment:\n\n{prior}\n\n"
+                "What gaps remain in your understanding that would help "
+                "going forward? Synthesize what you actually know about "
+                "the user's current state and immediate needs, grounded "
+                "in evidence from recent sessions."
+            )
+        else:
+            # pass 2: reconciliation
+            return (
+                f"Prior passes produced:\n\n"
+                f"Pass 1:\n{prior_results[0] if len(prior_results) > 0 else '(empty)'}\n\n"
+                f"Pass 2:\n{prior_results[1] if len(prior_results) > 1 else '(empty)'}\n\n"
+                "Do these assessments cohere? Reconcile any contradictions "
+                "and produce a final, concise synthesis of what matters most "
+                "for the current conversation."
+            )
+
+    @staticmethod
+    def _signal_sufficient(result: str) -> bool:
+        """Check if a dialectic pass returned enough signal to skip further passes.
+
+        Heuristic: a response longer than 100 chars with some structure
+        (section headers, bullets, or an ordered list) is considered sufficient.
+        """
+        if not result or len(result.strip()) < 100:
+            return False
+        # Structured output with sections/bullets is strong signal
+        if "\n" in result and (
+            "##" in result
+            or "•" in result
+            or re.search(r"^[*-] ", result, re.MULTILINE)
+            or re.search(r"^\s*\d+\. ", result, re.MULTILINE)
+        ):
+            return True
+        # Long enough even without structure
+        return len(result.strip()) > 300
+
+    def _run_dialectic_depth(self, query: str) -> str:
+        """Execute up to dialecticDepth .chat() calls with conditional bail-out.
+
+        Cold start (no base context): general user-oriented query.
+        Warm session (base context exists): session-scoped query.
+        Each pass is conditional — bails early if prior pass returned strong signal.
+        Returns the best (usually last) result.
+        """
+        if not self._manager or not self._session_key:
+            return ""
+
+        is_cold = not self._base_context_cache
+        results: list[str] = []
+
+        for i in range(self._dialectic_depth):
+            if i == 0:
+                prompt = self._build_dialectic_prompt(0, results, is_cold)
+            else:
+                # Skip further passes if prior pass delivered strong signal
+                if results and self._signal_sufficient(results[-1]):
+                    logger.debug("Honcho dialectic depth %d: pass %d skipped, prior signal sufficient",
+                                 self._dialectic_depth, i)
+                    break
+                prompt = self._build_dialectic_prompt(i, results, is_cold)
+
+            level = self._resolve_pass_level(i)
+            logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s",
+                         self._dialectic_depth, i, level, is_cold)
+
+            result = self._manager.dialectic_query(
+                self._session_key, prompt,
+                reasoning_level=level,
+                peer="user",
+            )
+            results.append(result or "")
+
+        # Return the last non-empty result (deepest pass that ran)
+        for r in reversed(results):
+            if r and r.strip():
+                return r
+        return ""

    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
        """Track turn count for cadence and injection_frequency logic."""
@ -659,7 +949,14 @@ class HonchoMemoryProvider(MemoryProvider):

        try:
            if tool_name == "honcho_profile":
-                card = self._manager.get_peer_card(self._session_key)
+                peer = args.get("peer", "user")
+                card_update = args.get("card")
+                if card_update:
+                    result = self._manager.set_peer_card(self._session_key, card_update, peer=peer)
+                    if result is None:
+                        return tool_error("Failed to update peer card.")
+                    return json.dumps({"result": f"Peer card updated ({len(result)} facts).", "card": result})
+                card = self._manager.get_peer_card(self._session_key, peer=peer)
                if not card:
                    return json.dumps({"result": "No profile facts available yet."})
                return json.dumps({"result": card})
@ -669,30 +966,64 @@ class HonchoMemoryProvider(MemoryProvider):
                if not query:
                    return tool_error("Missing required parameter: query")
                max_tokens = min(int(args.get("max_tokens", 800)), 2000)
+                peer = args.get("peer", "user")
                result = self._manager.search_context(
-                    self._session_key, query, max_tokens=max_tokens
+                    self._session_key, query, max_tokens=max_tokens, peer=peer
                )
                if not result:
                    return json.dumps({"result": "No relevant context found."})
                return json.dumps({"result": result})

-            elif tool_name == "honcho_context":
+            elif tool_name == "honcho_reasoning":
                query = args.get("query", "")
                if not query:
                    return tool_error("Missing required parameter: query")
                peer = args.get("peer", "user")
+                reasoning_level = args.get("reasoning_level")
                result = self._manager.dialectic_query(
-                    self._session_key, query, peer=peer
+                    self._session_key, query,
+                    reasoning_level=reasoning_level,
+                    peer=peer,
                )
+                # Update cadence tracker so auto-injection respects the gap after an explicit call
+                self._last_dialectic_turn = self._turn_count
                return json.dumps({"result": result or "No result from Honcho."})

+            elif tool_name == "honcho_context":
+                peer = args.get("peer", "user")
+                ctx = self._manager.get_session_context(self._session_key, peer=peer)
+                if not ctx:
+                    return json.dumps({"result": "No context available yet."})
+                parts = []
+                if ctx.get("summary"):
+                    parts.append(f"## Summary\n{ctx['summary']}")
+                if ctx.get("representation"):
+                    parts.append(f"## Representation\n{ctx['representation']}")
+                if ctx.get("card"):
+                    parts.append(f"## Card\n{ctx['card']}")
+                if ctx.get("recent_messages"):
+                    msgs = ctx["recent_messages"]
+                    msg_str = "\n".join(
+                        f"  [{m['role']}] {m['content'][:200]}"
+                        for m in msgs[-5:]  # last 5 for brevity
+                    )
+                    parts.append(f"## Recent messages\n{msg_str}")
+                return json.dumps({"result": "\n\n".join(parts) or "No context available."})
+
            elif tool_name == "honcho_conclude":
+                delete_id = args.get("delete_id")
+                peer = args.get("peer", "user")
+                if delete_id:
+                    ok = self._manager.delete_conclusion(self._session_key, delete_id, peer=peer)
+                    if ok:
+                        return json.dumps({"result": f"Conclusion {delete_id} deleted."})
+                    return tool_error(f"Failed to delete conclusion {delete_id}.")
                conclusion = args.get("conclusion", "")
                if not conclusion:
-                    return tool_error("Missing required parameter: conclusion")
-                ok = self._manager.create_conclusion(self._session_key, conclusion)
+                    return tool_error("Missing required parameter: conclusion or delete_id")
+                ok = self._manager.create_conclusion(self._session_key, conclusion, peer=peer)
                if ok:
-                    return json.dumps({"result": f"Conclusion saved: {conclusion}"})
+                    return json.dumps({"result": f"Conclusion saved for {peer}: {conclusion}"})
                return tool_error("Failed to save conclusion.")

            return tool_error(f"Unknown tool: {tool_name}")