feat(honcho): context injection overhaul, 5-tool surface, cost safety, session isolation (#10619)

Salvaged from PR #9884 by erosika. Cherry-picked plugin changes onto current main with minimal core modifications. Plugin changes (plugins/memory/honcho/): - New honcho_reasoning tool (5th tool, splits LLM calls from honcho_context) - Two-layer context injection: base context (summary + representation + card) on contextCadence, dialectic supplement on dialecticCadence - Multi-pass dialectic depth (1-3 passes) with early bail-out on strong signal - Cold/warm prompt selection based on session state - dialecticCadence defaults to 3 (was 1) — ~66% fewer Honcho LLM calls - Session summary injection for conversational continuity - Bidirectional peer targeting on all 5 tools - Correctness fixes: peer param fallback, None guard on set_peer_card, schema validation, signal_sufficient anchored regex, mid->medium level fix Core changes (~20 lines across 3 files): - agent/memory_manager.py: Enhanced sanitize_context() to strip full <memory-context> blocks and system notes (prevents leak from saveMessages) - run_agent.py: gateway_session_key param for stable per-chat Honcho sessions, on_turn_start() call before prefetch_all() for cadence tracking, sanitize_context() on user messages to strip leaked memory blocks - gateway/run.py: skip_memory=True on 2 temp agents (prevents orphan sessions), gateway_session_key threading to main agent Tests: 509 passed (3 skipped — honcho SDK not installed locally) Docs: Updated honcho.md, memory-providers.md, tools-reference.md, SKILL.md Co-authored-by: erosika <erosika@users.noreply.github.com>
2026-04-25 00:51:20 +00:00 · 2026-04-15 19:12:19 -07:00 · 2026-04-15 19:12:19 -07:00 · cc6e8941db
commit cc6e8941db
parent 00ff9a26cd
17 changed files with 2632 additions and 396 deletions
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@ -58,7 +58,8 @@ def resolve_config_path() -> Path:

    Resolution order:
      1. $HERMES_HOME/honcho.json      (profile-local, if it exists)
-      2. ~/.honcho/config.json          (global, cross-app interop)
+      2. ~/.hermes/honcho.json          (default profile — shared host blocks live here)
+      3. ~/.honcho/config.json          (global, cross-app interop)

    Returns the global path if none exist (for first-time setup writes).
    """
@ -66,6 +67,11 @@ def resolve_config_path() -> Path:
    if local_path.exists():
        return local_path

+    # Default profile's config — host blocks accumulate here via setup/clone
+    default_path = Path.home() / ".hermes" / "honcho.json"
+    if default_path != local_path and default_path.exists():
+        return default_path
+
    return GLOBAL_CONFIG_PATH


@ -88,6 +94,68 @@ def _resolve_bool(host_val, root_val, *, default: bool) -> bool:
    return default


+def _parse_context_tokens(host_val, root_val) -> int | None:
+    """Parse contextTokens: host wins, then root, then None (uncapped)."""
+    for val in (host_val, root_val):
+        if val is not None:
+            try:
+                return int(val)
+            except (ValueError, TypeError):
+                pass
+    return None
+
+
+def _parse_dialectic_depth(host_val, root_val) -> int:
+    """Parse dialecticDepth: host wins, then root, then 1. Clamped to 1-3."""
+    for val in (host_val, root_val):
+        if val is not None:
+            try:
+                return max(1, min(int(val), 3))
+            except (ValueError, TypeError):
+                pass
+    return 1
+
+
+_VALID_REASONING_LEVELS = ("minimal", "low", "medium", "high", "max")
+
+
+def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] | None:
+    """Parse dialecticDepthLevels: optional array of reasoning levels per pass.
+
+    Returns None when not configured (use proportional defaults).
+    When configured, validates each level and truncates/pads to match depth.
+    """
+    for val in (host_val, root_val):
+        if val is not None and isinstance(val, list):
+            levels = [
+                lvl if lvl in _VALID_REASONING_LEVELS else "low"
+                for lvl in val[:depth]
+            ]
+            # Pad with "low" if array is shorter than depth
+            while len(levels) < depth:
+                levels.append("low")
+            return levels
+    return None
+
+
+def _resolve_optional_float(*values: Any) -> float | None:
+    """Return the first non-empty value coerced to a positive float."""
+    for value in values:
+        if value is None:
+            continue
+        if isinstance(value, str):
+            value = value.strip()
+            if not value:
+                continue
+        try:
+            parsed = float(value)
+        except (TypeError, ValueError):
+            continue
+        if parsed > 0:
+            return parsed
+    return None
+
+
 _VALID_OBSERVATION_MODES = {"unified", "directional"}
 _OBSERVATION_MODE_ALIASES = {"shared": "unified", "separate": "directional", "cross": "directional"}

@ -153,6 +221,8 @@ class HonchoClientConfig:
    environment: str = "production"
    # Optional base URL for self-hosted Honcho (overrides environment mapping)
    base_url: str | None = None
+    # Optional request timeout in seconds for Honcho SDK HTTP calls
+    timeout: float | None = None
    # Identity
    peer_name: str | None = None
    ai_peer: str = "hermes"
@ -162,17 +232,25 @@ class HonchoClientConfig:
    # Write frequency: "async" (background thread), "turn" (sync per turn),
    # "session" (flush on session end), or int (every N turns)
    write_frequency: str | int = "async"
-    # Prefetch budget
+    # Prefetch budget (None = no cap; set to an integer to bound auto-injected context)
    context_tokens: int | None = None
    # Dialectic (peer.chat) settings
    # reasoning_level: "minimal" | "low" | "medium" | "high" | "max"
    dialectic_reasoning_level: str = "low"
-    # dynamic: auto-bump reasoning level based on query length
-    #   true  — low->medium (120+ chars), low->high (400+ chars), capped at "high"
-    #   false — always use dialecticReasoningLevel as-is
+    # When true, the model can override reasoning_level per-call via the
+    # honcho_reasoning tool param (agentic). When false, always uses
+    # dialecticReasoningLevel and ignores model-provided overrides.
    dialectic_dynamic: bool = True
    # Max chars of dialectic result to inject into Hermes system prompt
    dialectic_max_chars: int = 600
+    # Dialectic depth: how many .chat() calls per dialectic cycle (1-3).
+    # Depth 1: single call. Depth 2: self-audit + targeted synthesis.
+    # Depth 3: self-audit + synthesis + reconciliation.
+    dialectic_depth: int = 1
+    # Optional per-pass reasoning level override. Array of reasoning levels
+    # matching dialectic_depth length. When None, uses proportional defaults
+    # derived from dialectic_reasoning_level.
+    dialectic_depth_levels: list[str] | None = None
    # Honcho API limits — configurable for self-hosted instances
    # Max chars per message sent via add_messages() (Honcho cloud: 25000)
    message_max_chars: int = 25000
@ -183,10 +261,8 @@ class HonchoClientConfig:
    # "context" — auto-injected context only, Honcho tools removed
    # "tools"   — Honcho tools only, no auto-injected context
    recall_mode: str = "hybrid"
-    # When True and recallMode is "tools", create the Honcho session eagerly
-    # during initialize() instead of deferring to the first tool call.
-    # This ensures sync_turn() can write from the very first turn.
-    # Does NOT enable automatic context injection — only changes init timing.
+    # Eager init in tools mode — when true, initializes session during
+    # initialize() instead of deferring to first tool call
    init_on_session_start: bool = False
    # Observation mode: legacy string shorthand ("directional" or "unified").
    # Kept for backward compat; granular per-peer booleans below are preferred.
@ -218,12 +294,14 @@ class HonchoClientConfig:
        resolved_host = host or resolve_active_host()
        api_key = os.environ.get("HONCHO_API_KEY")
        base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
+        timeout = _resolve_optional_float(os.environ.get("HONCHO_TIMEOUT"))
        return cls(
            host=resolved_host,
            workspace_id=workspace_id,
            api_key=api_key,
            environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
            base_url=base_url,
+            timeout=timeout,
            ai_peer=resolved_host,
            enabled=bool(api_key or base_url),
        )
@ -284,6 +362,11 @@ class HonchoClientConfig:
            or os.environ.get("HONCHO_BASE_URL", "").strip()
            or None
        )
+        timeout = _resolve_optional_float(
+            raw.get("timeout"),
+            raw.get("requestTimeout"),
+            os.environ.get("HONCHO_TIMEOUT"),
+        )

        # Auto-enable when API key or base_url is present (unless explicitly disabled)
        # Host-level enabled wins, then root-level, then auto-enable if key/url exists.
@ -329,12 +412,16 @@ class HonchoClientConfig:
            api_key=api_key,
            environment=environment,
            base_url=base_url,
+            timeout=timeout,
            peer_name=host_block.get("peerName") or raw.get("peerName"),
            ai_peer=ai_peer,
            enabled=enabled,
            save_messages=save_messages,
            write_frequency=write_frequency,
-            context_tokens=host_block.get("contextTokens") or raw.get("contextTokens"),
+            context_tokens=_parse_context_tokens(
+                host_block.get("contextTokens"),
+                raw.get("contextTokens"),
+            ),
            dialectic_reasoning_level=(
                host_block.get("dialecticReasoningLevel")
                or raw.get("dialecticReasoningLevel")
@ -350,6 +437,15 @@ class HonchoClientConfig:
                or raw.get("dialecticMaxChars")
                or 600
            ),
+            dialectic_depth=_parse_dialectic_depth(
+                host_block.get("dialecticDepth"),
+                raw.get("dialecticDepth"),
+            ),
+            dialectic_depth_levels=_parse_dialectic_depth_levels(
+                host_block.get("dialecticDepthLevels"),
+                raw.get("dialecticDepthLevels"),
+                depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")),
+            ),
            message_max_chars=int(
                host_block.get("messageMaxChars")
                or raw.get("messageMaxChars")
@ -416,16 +512,18 @@ class HonchoClientConfig:
        cwd: str | None = None,
        session_title: str | None = None,
        session_id: str | None = None,
+        gateway_session_key: str | None = None,
    ) -> str | None:
        """Resolve Honcho session name.

        Resolution order:
          1. Manual directory override from sessions map
          2. Hermes session title (from /title command)
-          3. per-session strategy — Hermes session_id ({timestamp}_{hex})
-          4. per-repo strategy — git repo root directory name
-          5. per-directory strategy — directory basename
-          6. global strategy — workspace name
+          3. Gateway session key (stable per-chat identifier from gateway platforms)
+          4. per-session strategy — Hermes session_id ({timestamp}_{hex})
+          5. per-repo strategy — git repo root directory name
+          6. per-directory strategy — directory basename
+          7. global strategy — workspace name
        """
        import re

@ -439,12 +537,22 @@ class HonchoClientConfig:

        # /title mid-session remap
        if session_title:
-            sanitized = re.sub(r'[^a-zA-Z0-9_-]', '-', session_title).strip('-')
+            sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', session_title).strip('-')
            if sanitized:
                if self.session_peer_prefix and self.peer_name:
                    return f"{self.peer_name}-{sanitized}"
                return sanitized

+        # Gateway session key: stable per-chat identifier passed by the gateway
+        # (e.g. "agent:main:telegram:dm:8439114563"). Sanitize colons to hyphens
+        # for Honcho session ID compatibility. This takes priority over strategy-
+        # based resolution because gateway platforms need per-chat isolation that
+        # cwd-based strategies cannot provide.
+        if gateway_session_key:
+            sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-')
+            if sanitized:
+                return sanitized
+
        # per-session: inherit Hermes session_id (new Honcho session each run)
        if self.session_strategy == "per-session" and session_id:
            if self.session_peer_prefix and self.peer_name:
@ -506,13 +614,20 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
    # mapping, enabling remote self-hosted Honcho deployments without
    # requiring the server to live on localhost.
    resolved_base_url = config.base_url
-    if not resolved_base_url:
+    resolved_timeout = config.timeout
+    if not resolved_base_url or resolved_timeout is None:
        try:
            from hermes_cli.config import load_config
            hermes_cfg = load_config()
            honcho_cfg = hermes_cfg.get("honcho", {})
            if isinstance(honcho_cfg, dict):
-                resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
+                if not resolved_base_url:
+                    resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
+                if resolved_timeout is None:
+                    resolved_timeout = _resolve_optional_float(
+                        honcho_cfg.get("timeout"),
+                        honcho_cfg.get("request_timeout"),
+                    )
        except Exception:
            pass

@ -547,6 +662,8 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
    }
    if resolved_base_url:
        kwargs["base_url"] = resolved_base_url
+    if resolved_timeout is not None:
+        kwargs["timeout"] = resolved_timeout

    _honcho_client = Honcho(**kwargs)