diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md index c60d2c635..5d03a5498 100644 --- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md +++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md @@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen. | Key | Default | Description | |-----|---------|-------------| | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `3` | Min turns between dialectic API calls | +| `dialecticCadence` | `1` | Min turns between dialectic API calls | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection | -Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn. +Higher cadence values reduce API calls and cost. `dialecticCadence: 1` (default) fires every turn; set to `3` or higher to throttle for cost. ### Depth (how many) @@ -368,7 +368,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` | | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `3` | Min turns between dialectic LLM calls | +| `dialecticCadence` | `1` | Min turns between dialectic LLM calls | The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions. diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index ca44ce601..ac0f60279 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -206,10 +206,11 @@ class HonchoMemoryProvider(MemoryProvider): self._turn_count = 0 self._injection_frequency = "every-turn" # or "first-turn" self._context_cadence = 1 # minimum turns between context API calls - self._dialectic_cadence = 3 # minimum turns between dialectic API calls + self._dialectic_cadence = 1 # minimum turns between dialectic API calls self._dialectic_depth = 1 # how many .chat() calls per dialectic cycle (1-3) self._dialectic_depth_levels: list[str] | None = None # per-pass reasoning levels - self._reasoning_level_cap: Optional[str] = None # "minimal", "low", "medium", "high" + self._reasoning_heuristic: bool = True # scale base level by query length + self._reasoning_level_cap: str = "high" # ceiling for auto-selected level self._last_context_turn = -999 self._last_dialectic_turn = -999 @@ -305,12 +306,12 @@ class HonchoMemoryProvider(MemoryProvider): raw = cfg.raw or {} self._injection_frequency = raw.get("injectionFrequency", "every-turn") self._context_cadence = int(raw.get("contextCadence", 1)) - self._dialectic_cadence = int(raw.get("dialecticCadence", 3)) + self._dialectic_cadence = int(raw.get("dialecticCadence", 1)) self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3)) self._dialectic_depth_levels = cfg.dialectic_depth_levels - cap = raw.get("reasoningLevelCap") - if cap and cap in ("minimal", "low", "medium", "high"): - self._reasoning_level_cap = cap + self._reasoning_heuristic = cfg.reasoning_heuristic + if cfg.reasoning_level_cap in self._LEVEL_ORDER: + self._reasoning_level_cap = cfg.reasoning_level_cap except Exception as e: logger.debug("Honcho cost-awareness config parse error: %s", e) @@ -391,14 +392,42 @@ class HonchoMemoryProvider(MemoryProvider): except Exception as e: logger.debug("Honcho memory file migration skipped: %s", e) - # ----- B7: Pre-warming context at init ----- + # ----- B7: Pre-warming at init ----- + # Context prewarm: warms peer.context() cache (base layer), consumed + # via pop_context_result() in prefetch(). + # Dialectic prewarm: fires a depth-aware cycle against the plugin's + # own _prefetch_result so turn 1 can consume it directly. Without this + # the first-turn sync path pays for a duplicate .chat() — and at + # depth>1 a single-pass session-start dialectic often returns weak + # output that multi-pass audit/reconciliation is meant to catch. if self._recall_mode in ("context", "hybrid"): try: self._manager.prefetch_context(self._session_key) - self._manager.prefetch_dialectic(self._session_key, "What should I know about this user?") - logger.debug("Honcho pre-warm threads started for session: %s", self._session_key) except Exception as e: - logger.debug("Honcho pre-warm failed: %s", e) + logger.debug("Honcho context prewarm failed: %s", e) + + _prewarm_query = ( + "Summarize what you know about this user. " + "Focus on preferences, current projects, and working style." + ) + + def _prewarm_dialectic() -> None: + try: + r = self._run_dialectic_depth(_prewarm_query) + except Exception as exc: + logger.debug("Honcho dialectic prewarm failed: %s", exc) + return + if r and r.strip(): + with self._prefetch_lock: + self._prefetch_result = r + # Treat prewarm as turn 0 so cadence gating starts clean. + self._last_dialectic_turn = 0 + + self._prefetch_thread = threading.Thread( + target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic" + ) + self._prefetch_thread.start() + logger.debug("Honcho pre-warm started for session: %s", self._session_key) def _ensure_session(self) -> bool: """Lazily initialize the Honcho session (for tools-only mode). @@ -526,6 +555,11 @@ class HonchoMemoryProvider(MemoryProvider): if self._injection_frequency == "first-turn" and self._turn_count > 1: return "" + # Skip trivial prompts — "ok", "yes", slash commands carry no semantic signal, + # so injecting user context there just burns tokens and can derail the reply. + if self._is_trivial_prompt(query): + return "" + parts = [] # ----- Layer 1: Base context (representation + card) ----- @@ -560,37 +594,46 @@ class HonchoMemoryProvider(MemoryProvider): # On the very first turn, no queue_prefetch() has run yet so the # dialectic result is empty. Run with a bounded timeout so a slow # Honcho connection doesn't block the first response indefinitely. - # On timeout the result is skipped and queue_prefetch() will pick it - # up at the next cadence-allowed turn. + # On timeout we let the thread keep running and write its result into + # _prefetch_result under the lock, so the next turn picks it up. + # + # Skip if the session-start prewarm already filled _prefetch_result — + # firing another .chat() would be duplicate work. + with self._prefetch_lock: + _prewarm_landed = bool(self._prefetch_result) + if _prewarm_landed and self._last_dialectic_turn == -999: + self._last_dialectic_turn = self._turn_count + if self._last_dialectic_turn == -999 and query: _first_turn_timeout = ( self._config.timeout if self._config and self._config.timeout else 8.0 ) - _result_holder: list[str] = [] + _fired_at = self._turn_count def _run_first_turn() -> None: try: - _result_holder.append(self._run_dialectic_depth(query)) + r = self._run_dialectic_depth(query) except Exception as exc: logger.debug("Honcho first-turn dialectic failed: %s", exc) - - _t = threading.Thread(target=_run_first_turn, daemon=True) - _t.start() - _t.join(timeout=_first_turn_timeout) - if not _t.is_alive(): - first_turn_dialectic = _result_holder[0] if _result_holder else "" - if first_turn_dialectic and first_turn_dialectic.strip(): + return + if r and r.strip(): with self._prefetch_lock: - self._prefetch_result = first_turn_dialectic - self._last_dialectic_turn = self._turn_count - else: + self._prefetch_result = r + # Only advance cadence on a non-empty result so failures + # don't burn a 3-turn cooldown on nothing. + self._last_dialectic_turn = _fired_at + + self._prefetch_thread = threading.Thread( + target=_run_first_turn, daemon=True, name="honcho-prefetch-first" + ) + self._prefetch_thread.start() + self._prefetch_thread.join(timeout=_first_turn_timeout) + if self._prefetch_thread.is_alive(): logger.debug( - "Honcho first-turn dialectic timed out (%.1fs) — " - "will inject at next cadence-allowed turn", + "Honcho first-turn dialectic still running after %.1fs — " + "will surface on next turn", _first_turn_timeout, ) - # Don't update _last_dialectic_turn: queue_prefetch() will - # retry at the next cadence-allowed turn via the async path. if self._prefetch_thread and self._prefetch_thread.is_alive(): self._prefetch_thread.join(timeout=3.0) @@ -641,6 +684,10 @@ class HonchoMemoryProvider(MemoryProvider): if self._recall_mode == "tools": return + # Trivial prompts don't warrant either a context refresh or a dialectic call. + if self._is_trivial_prompt(query): + return + # ----- Context refresh (base layer) — independent cadence ----- if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence: self._last_context_turn = self._turn_count @@ -650,23 +697,35 @@ class HonchoMemoryProvider(MemoryProvider): logger.debug("Honcho context prefetch failed: %s", e) # ----- Dialectic prefetch (supplement layer) ----- - # B5: cadence check — skip if too soon since last dialectic call - if self._dialectic_cadence > 1: - if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence: - logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d", - self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) - return + # Guard against thread pile-up: if a prior dialectic is still in flight, + # let it finish instead of stacking races on _prefetch_result. + if self._prefetch_thread and self._prefetch_thread.is_alive(): + logger.debug("Honcho dialectic prefetch skipped: prior thread still running") + return - self._last_dialectic_turn = self._turn_count + # B5: cadence check — skip if too soon since last *successful* dialectic call. + # The gate applies uniformly (including cadence=1): "every turn" means once + # per turn, not twice on the same turn when first-turn sync already fired. + if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence: + logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d", + self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) + return + + # Advance cadence only on a non-empty result — otherwise a silent failure + # (empty dialectic, transient API error) would burn the full cadence window + # before the next retry, making it look like dialectic "never fires again". + _fired_at = self._turn_count def _run(): try: result = self._run_dialectic_depth(query) - if result and result.strip(): - with self._prefetch_lock: - self._prefetch_result = result except Exception as e: logger.debug("Honcho prefetch failed: %s", e) + return + if result and result.strip(): + with self._prefetch_lock: + self._prefetch_result = result + self._last_dialectic_turn = _fired_at self._prefetch_thread = threading.Thread( target=_run, daemon=True, name="honcho-prefetch" @@ -692,11 +751,42 @@ class HonchoMemoryProvider(MemoryProvider): _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max") - def _resolve_pass_level(self, pass_idx: int) -> str: + # Reasoning-level heuristic thresholds (restored from pre-9a0ab34c behavior). + # Promoted to class constants so tests can override without widening the + # config surface. Bump to config fields only if real use shows they're needed. + _HEURISTIC_LENGTH_MEDIUM = 120 + _HEURISTIC_LENGTH_HIGH = 400 + + def _apply_reasoning_heuristic(self, base: str, query: str) -> str: + """Scale `base` up by query length, clamped at reasoning_level_cap. + + Char-count heuristic: +1 at >=120 chars, +2 at >=400. Ceiling is + reasoning_level_cap (default 'high' — 'max' is reserved for + explicit tool-path selection). + """ + if not self._reasoning_heuristic or not query: + return base + if base not in self._LEVEL_ORDER: + return base + n = len(query) + if n < self._HEURISTIC_LENGTH_MEDIUM: + bump = 0 + elif n < self._HEURISTIC_LENGTH_HIGH: + bump = 1 + else: + bump = 2 + base_idx = self._LEVEL_ORDER.index(base) + cap_idx = self._LEVEL_ORDER.index(self._reasoning_level_cap) + return self._LEVEL_ORDER[min(base_idx + bump, cap_idx)] + + def _resolve_pass_level(self, pass_idx: int, query: str = "") -> str: """Resolve reasoning level for a given pass index. - Uses dialecticDepthLevels if configured, otherwise proportional - defaults relative to dialecticReasoningLevel. + Precedence: + 1. dialecticDepthLevels (explicit per-pass) — wins absolutely + 2. _PROPORTIONAL_LEVELS table (depth>1 lighter-early passes) + 3. Base level = dialecticReasoningLevel, optionally scaled by the + reasoning heuristic when the mapping falls through to 'base' """ if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels): return self._dialectic_depth_levels[pass_idx] @@ -704,7 +794,7 @@ class HonchoMemoryProvider(MemoryProvider): base = (self._config.dialectic_reasoning_level if self._config else "low") mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx)) if mapping is None or mapping == "base": - return base + return self._apply_reasoning_heuristic(base, query) return mapping def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str: @@ -791,7 +881,7 @@ class HonchoMemoryProvider(MemoryProvider): break prompt = self._build_dialectic_prompt(i, results, is_cold) - level = self._resolve_pass_level(i) + level = self._resolve_pass_level(i, query=query) logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s", self._dialectic_depth, i, level, is_cold) @@ -808,6 +898,29 @@ class HonchoMemoryProvider(MemoryProvider): return r return "" + # Prompts that carry no semantic signal — trivial acknowledgements, slash + # commands, empty input. Skipping injection here saves tokens and prevents + # stale user-model context from derailing one-word replies. + _TRIVIAL_PROMPT_RE = re.compile( + r'^(yes|no|ok|okay|sure|thanks|thank you|y|n|yep|nope|yeah|nah|' + r'continue|go ahead|do it|proceed|got it|cool|nice|great|done|next|lgtm|k)$', + re.IGNORECASE, + ) + + @classmethod + def _is_trivial_prompt(cls, text: str) -> bool: + """Return True if the prompt is too trivial to warrant context injection.""" + if not text: + return True + stripped = text.strip() + if not stripped: + return True + if stripped.startswith("/"): + return True + if cls._TRIVIAL_PROMPT_RE.match(stripped): + return True + return False + def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None: """Track turn count for cadence and injection_frequency logic.""" self._turn_count = turn_number diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index 536d34002..478bf39d8 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -460,17 +460,17 @@ def cmd_setup(args) -> None: pass # keep current # --- 7b. Dialectic cadence --- - current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "3") + current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "1") print("\n Dialectic cadence:") print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).") - print(" 1 = every turn (aggressive), 3 = every 3 turns (recommended), 5+ = sparse.") + print(" 1 = every turn (default), 3+ = sparse (cost-saving).") new_dialectic = _prompt("Dialectic cadence", default=current_dialectic) try: val = int(new_dialectic) if val >= 1: hermes_host["dialecticCadence"] = val except (ValueError, TypeError): - hermes_host["dialecticCadence"] = 3 + hermes_host["dialecticCadence"] = 1 # --- 8. Session strategy --- current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session") @@ -636,7 +636,7 @@ def cmd_status(args) -> None: print(f" Recall mode: {hcfg.recall_mode}") print(f" Context budget: {hcfg.context_tokens or '(uncapped)'} tokens") raw = getattr(hcfg, "raw", None) or {} - dialectic_cadence = raw.get("dialecticCadence") or 3 + dialectic_cadence = raw.get("dialecticCadence") or 1 print(f" Dialectic cad: every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}") print(f" Observation: user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})") print(f" Write freq: {hcfg.write_frequency}") diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py index 2474d3a2b..136b1e60d 100644 --- a/plugins/memory/honcho/client.py +++ b/plugins/memory/honcho/client.py @@ -251,6 +251,14 @@ class HonchoClientConfig: # matching dialectic_depth length. When None, uses proportional defaults # derived from dialectic_reasoning_level. dialectic_depth_levels: list[str] | None = None + # Reasoning-level heuristic for auto-injected dialectic calls. When true, + # scales the base level up on longer queries (restored from pre-#10619 + # behavior; see plugins/memory/honcho/__init__.py for thresholds). + # Never auto-selects a level above reasoning_level_cap. + reasoning_heuristic: bool = True + # Ceiling for heuristic-selected reasoning level. "max" is reserved for + # explicit tool-path selection; default "high" matches the old behavior. + reasoning_level_cap: str = "high" # Honcho API limits — configurable for self-hosted instances # Max chars per message sent via add_messages() (Honcho cloud: 25000) message_max_chars: int = 25000 @@ -446,6 +454,16 @@ class HonchoClientConfig: raw.get("dialecticDepthLevels"), depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")), ), + reasoning_heuristic=_resolve_bool( + host_block.get("reasoningHeuristic"), + raw.get("reasoningHeuristic"), + default=True, + ), + reasoning_level_cap=( + host_block.get("reasoningLevelCap") + or raw.get("reasoningLevelCap") + or "high" + ), message_max_chars=int( host_block.get("messageMaxChars") or raw.get("messageMaxChars") diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py index fd91ee3b3..7344b517e 100644 --- a/plugins/memory/honcho/session.py +++ b/plugins/memory/honcho/session.py @@ -100,9 +100,11 @@ class HonchoSessionManager: self._write_frequency = write_frequency self._turn_counter: int = 0 - # Prefetch caches: session_key → last result (consumed once per turn) + # Prefetch cache: session_key → last context result (consumed once per turn). + # Dialectic results are cached on the plugin side (HonchoMemoryProvider + # ._prefetch_result) so session-start prewarm and turn-driven fires share + # one source of truth; see __init__.py _do_session_init for the prewarm. self._context_cache: dict[str, dict] = {} - self._dialectic_cache: dict[str, str] = {} self._prefetch_cache_lock = threading.Lock() self._dialectic_reasoning_level: str = ( config.dialectic_reasoning_level if config else "low" @@ -499,8 +501,8 @@ class HonchoSessionManager: Query Honcho's dialectic endpoint about a peer. Runs an LLM on Honcho's backend against the target peer's full - representation. Higher latency than context() — call async via - prefetch_dialectic() to avoid blocking the response. + representation. Higher latency than context() — callers run this in + a background thread (see HonchoMemoryProvider) to avoid blocking. Args: session_key: The session key to query against. @@ -555,42 +557,6 @@ class HonchoSessionManager: logger.warning("Honcho dialectic query failed: %s", e) return "" - def prefetch_dialectic(self, session_key: str, query: str) -> None: - """ - Fire a dialectic_query in a background thread, caching the result. - - Non-blocking. The result is available via pop_dialectic_result() - on the next call (typically the following turn). Reasoning level - is selected dynamically based on query complexity. - - Args: - session_key: The session key to query against. - query: The user's current message, used as the query. - """ - def _run(): - result = self.dialectic_query(session_key, query) - if result: - self.set_dialectic_result(session_key, result) - - t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True) - t.start() - - def set_dialectic_result(self, session_key: str, result: str) -> None: - """Store a prefetched dialectic result in a thread-safe way.""" - if not result: - return - with self._prefetch_cache_lock: - self._dialectic_cache[session_key] = result - - def pop_dialectic_result(self, session_key: str) -> str: - """ - Return and clear the cached dialectic result for this session. - - Returns empty string if no result is ready yet. - """ - with self._prefetch_cache_lock: - return self._dialectic_cache.pop(session_key, "") - def prefetch_context(self, session_key: str, user_message: str | None = None) -> None: """ Fire get_prefetch_context in a background thread, caching the result. diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py index 9301960b7..5cd0d8ab4 100644 --- a/tests/agent/test_memory_provider.py +++ b/tests/agent/test_memory_provider.py @@ -971,8 +971,6 @@ class TestHonchoCadenceTracking: class FakeManager: def prefetch_context(self, key, query=None): pass - def prefetch_dialectic(self, key, query): - pass p._manager = FakeManager() diff --git a/tests/honcho_plugin/test_async_memory.py b/tests/honcho_plugin/test_async_memory.py index 936f47884..5df8d2745 100644 --- a/tests/honcho_plugin/test_async_memory.py +++ b/tests/honcho_plugin/test_async_memory.py @@ -460,10 +460,3 @@ class TestPrefetchCacheAccessors: assert mgr.pop_context_result("cli:test") == payload assert mgr.pop_context_result("cli:test") == {} - def test_set_and_pop_dialectic_result(self): - mgr = _make_manager(write_frequency="turn") - - mgr.set_dialectic_result("cli:test", "Resume with toolset cleanup") - - assert mgr.pop_dialectic_result("cli:test") == "Resume with toolset cleanup" - assert mgr.pop_dialectic_result("cli:test") == "" diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index 9784959d3..b0282b196 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -815,6 +815,24 @@ class TestDialecticInputGuard: # --------------------------------------------------------------------------- +def _settle_prewarm(provider): + """Wait for the session-start prewarm dialectic thread, then return the + provider to a clean 'nothing fired yet' state so cadence/first-turn/ + trivial-prompt tests can assert from a known baseline.""" + if provider._prefetch_thread: + provider._prefetch_thread.join(timeout=3.0) + with provider._prefetch_lock: + provider._prefetch_result = "" + provider._prefetch_thread = None + provider._last_dialectic_turn = -999 + if getattr(provider, "_manager", None) is not None: + try: + provider._manager.dialectic_query.reset_mock() + provider._manager.prefetch_context.reset_mock() + except AttributeError: + pass + + class TestDialecticCadenceDefaults: """Regression tests for dialectic_cadence default value.""" @@ -840,12 +858,15 @@ class TestDialecticCadenceDefaults: patch("hermes_constants.get_hermes_home", return_value=MagicMock()): provider.initialize(session_id="test-session-001") + _settle_prewarm(provider) return provider - def test_default_is_3(self): - """Default dialectic_cadence should be 3 to avoid per-turn LLM calls.""" + def test_default_is_1(self): + """Default dialectic_cadence should be 1 (every turn) — restored from + pre-#10619 behavior to avoid a silent regression on upgrade for users + who never set dialecticCadence explicitly.""" provider = self._make_provider() - assert provider._dialectic_cadence == 3 + assert provider._dialectic_cadence == 1 def test_config_override(self): """dialecticCadence from config overrides the default.""" @@ -908,6 +929,7 @@ class TestDialecticDepth: patch("hermes_constants.get_hermes_home", return_value=MagicMock()): provider.initialize(session_id="test-session-001") + _settle_prewarm(provider) return provider def test_default_depth_is_1(self): @@ -1062,7 +1084,8 @@ class TestDialecticDepth: provider.prefetch("hello") assert provider._manager.dialectic_query.call_count == 1 - # Now queue_prefetch on same turn should skip (cadence: 0 - 0 < 3) + # Now queue_prefetch on same turn should skip — _last_dialectic_turn + # was just set to _turn_count by the sync path, so (0 - 0 = 0) < cadence. provider._manager.dialectic_query.reset_mock() provider.queue_prefetch("hello") assert provider._manager.dialectic_query.call_count == 0 @@ -1083,6 +1106,453 @@ class TestDialecticDepth: assert provider._manager.dialectic_query.call_count == 1 +# --------------------------------------------------------------------------- +# Trivial-prompt heuristic + dialectic cadence silent-failure guards +# --------------------------------------------------------------------------- + + +class TestTrivialPromptHeuristic: + """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection. + + Restored after accidental removal during the two-layer prefetch refactor. + """ + + @staticmethod + def _make_provider(): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + cfg = HonchoClientConfig(api_key="test-key", enabled=True, recall_mode="hybrid") + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_session = MagicMock() + mock_session.messages = [] + mock_manager.get_or_create.return_value = mock_session + + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="test-session-trivial") + _settle_prewarm(provider) + return provider + + def test_classifier_catches_common_trivial_forms(self): + for t in ("ok", "OK", " ok ", "y", "yes", "sure", "thanks", "lgtm", "/help", "", " "): + assert HonchoMemoryProvider._is_trivial_prompt(t), f"expected trivial: {t!r}" + + def test_classifier_lets_substantive_prompts_through(self): + for t in ("hello world", "what's my name", "explain this", "ok so what's next"): + assert not HonchoMemoryProvider._is_trivial_prompt(t), f"expected non-trivial: {t!r}" + + def test_prefetch_skips_on_trivial_prompt(self): + provider = self._make_provider() + provider._session_key = "test" + provider._base_context_cache = "cached base" + provider._last_dialectic_turn = 0 + provider._turn_count = 5 + + assert provider.prefetch("ok") == "" + assert provider.prefetch("/help") == "" + # Dialectic should not have fired + assert provider._manager.dialectic_query.call_count == 0 + + def test_queue_prefetch_skips_on_trivial_prompt(self): + provider = self._make_provider() + provider._session_key = "test" + provider._turn_count = 10 + provider._last_dialectic_turn = -999 # would otherwise fire + # initialize() pre-warms; clear call counts before the assertion. + provider._manager.prefetch_context.reset_mock() + provider._manager.dialectic_query.reset_mock() + + provider.queue_prefetch("y") + # Trivial prompts short-circuit both context refresh and dialectic fire. + assert provider._manager.prefetch_context.call_count == 0 + assert provider._manager.dialectic_query.call_count == 0 + + +class TestDialecticCadenceAdvancesOnSuccess: + """Cadence tracker must only advance when the dialectic call actually returned. + + A silent failure (empty result, API blip) used to burn the full cadence window + before retrying — making it look like dialectic 'never fires again'. + """ + + @staticmethod + def _make_provider(): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + cfg = HonchoClientConfig( + api_key="test-key", enabled=True, recall_mode="hybrid", dialectic_depth=1, + ) + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_session = MagicMock() + mock_session.messages = [] + mock_manager.get_or_create.return_value = mock_session + + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="test-session-retry") + _settle_prewarm(provider) + return provider + + def test_empty_dialectic_result_does_not_advance_cadence(self): + import time as _time + provider = self._make_provider() + provider._session_key = "test" + provider._manager.dialectic_query.return_value = "" # silent failure + provider._turn_count = 5 + provider._last_dialectic_turn = 0 # would fire (5 - 0 = 5 ≥ 3) + + provider.queue_prefetch("hello") + # wait for the background thread to settle + if provider._prefetch_thread: + provider._prefetch_thread.join(timeout=2.0) + + # Dialectic call was attempted + assert provider._manager.dialectic_query.call_count == 1 + # But cadence tracker did NOT advance — next turn should retry + assert provider._last_dialectic_turn == 0 + + def test_non_empty_dialectic_result_advances_cadence(self): + provider = self._make_provider() + provider._session_key = "test" + provider._manager.dialectic_query.return_value = "real synthesis output" + provider._turn_count = 5 + provider._last_dialectic_turn = 0 + + provider.queue_prefetch("hello") + if provider._prefetch_thread: + provider._prefetch_thread.join(timeout=2.0) + + assert provider._last_dialectic_turn == 5 + + def test_in_flight_thread_is_not_stacked(self): + import threading as _threading + provider = self._make_provider() + provider._session_key = "test" + provider._turn_count = 10 + provider._last_dialectic_turn = 0 + + # Simulate a prior thread still running + hold = _threading.Event() + + def _block(): + hold.wait(timeout=5.0) + + stale = _threading.Thread(target=_block, daemon=True) + stale.start() + provider._prefetch_thread = stale + + provider.queue_prefetch("hello") + # Should have short-circuited — no new dialectic call + assert provider._manager.dialectic_query.call_count == 0 + hold.set() + stale.join(timeout=2.0) + + +class TestSessionStartDialecticPrewarm: + """Session-start prewarm fires a depth-aware dialectic whose result is + consumed by turn 1 — no duplicate .chat() and no dead-cache orphaning.""" + + @staticmethod + def _make_provider(cfg_extra=None, dialectic_result="prewarm synthesis"): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid") + if cfg_extra: + defaults.update(cfg_extra) + cfg = HonchoClientConfig(**defaults) + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_manager.get_or_create.return_value = MagicMock(messages=[]) + mock_manager.get_prefetch_context.return_value = None + mock_manager.pop_context_result.return_value = None + mock_manager.dialectic_query.return_value = dialectic_result + + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="test-prewarm") + return provider + + def test_prewarm_populates_prefetch_result(self): + p = self._make_provider() + # Wait for prewarm thread to land + if p._prefetch_thread: + p._prefetch_thread.join(timeout=3.0) + with p._prefetch_lock: + assert p._prefetch_result == "prewarm synthesis" + assert p._last_dialectic_turn == 0 + + def test_turn1_consumes_prewarm_without_duplicate_dialectic(self): + """With prewarm result already in _prefetch_result, turn 1 prefetch + should NOT fire another dialectic.""" + p = self._make_provider() + if p._prefetch_thread: + p._prefetch_thread.join(timeout=3.0) + p._manager.dialectic_query.reset_mock() + p._session_key = "test-prewarm" + p._base_context_cache = "" + p._turn_count = 1 + + result = p.prefetch("hello world") + assert "prewarm synthesis" in result + # The sync first-turn path must NOT have fired another .chat() + assert p._manager.dialectic_query.call_count == 0 + + def test_turn1_falls_back_to_sync_when_prewarm_missing(self): + """If the prewarm produced nothing (empty graph, API blip), turn 1 + still fires its own sync dialectic.""" + p = self._make_provider(dialectic_result="") # prewarm returns empty + if p._prefetch_thread: + p._prefetch_thread.join(timeout=3.0) + with p._prefetch_lock: + assert p._prefetch_result == "" # prewarm landed nothing + # Switch dialectic_query to return something on the sync first-turn call + p._manager.dialectic_query.return_value = "sync recovery" + p._manager.dialectic_query.reset_mock() + p._session_key = "test-prewarm" + p._base_context_cache = "" + p._turn_count = 1 + + result = p.prefetch("hello world") + assert "sync recovery" in result + assert p._manager.dialectic_query.call_count == 1 + + +class TestDialecticLifecycleSmoke: + """End-to-end smoke: walks a realistic multi-turn session through every + behavior we care about — prewarm → turn 1 consume → trivial skip → cadence + fire → silent-failure retry → heuristic bump → session-end flush. + + This is the 'velvet circuit' test: one provider, one flow, one set of + assertions. If the suite above lies about intent, this one catches it. + """ + + @staticmethod + def _make_provider(cfg_extra=None): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + defaults = dict( + api_key="test-key", enabled=True, recall_mode="hybrid", + dialectic_reasoning_level="low", reasoning_heuristic=True, + reasoning_level_cap="high", dialectic_depth=1, + ) + if cfg_extra: + defaults.update(cfg_extra) + cfg = HonchoClientConfig(**defaults) + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_session = MagicMock() + mock_session.messages = [] + mock_manager.get_or_create.return_value = mock_session + mock_manager.get_prefetch_context.return_value = None + mock_manager.pop_context_result.return_value = None + + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + return provider, mock_manager, cfg + + def _await_thread(self, provider): + if provider._prefetch_thread: + provider._prefetch_thread.join(timeout=3.0) + + def test_full_multi_turn_session(self): + """Walks init → turns 1..8 → session end. Asserts at every step that + the plugin did exactly what it should and nothing more. + + Uses dialecticCadence=3 so we can exercise skip-turns between fires + and the silent-failure retry path without their gates tripping each + other. Trivial + slash skips apply independent of cadence. + """ + from unittest.mock import patch, MagicMock + provider, mgr, cfg = self._make_provider( + cfg_extra={"raw": {"dialecticCadence": 3}} + ) + + # Program the dialectic responses in the exact order they'll be requested. + # An extra or missing call fails the test — strong smoke signal. + responses = iter([ + "prewarm: user is eri, works on hermes", # session-start prewarm + "cadence fire: long query synthesis", # turn 4 queue_prefetch + "", # turn 7 fire: silent failure + "retry success: fresh synthesis", # turn 8 queue_prefetch retry + ]) + mgr.dialectic_query.side_effect = lambda *a, **kw: next(responses) + + # ---- init: prewarm fires ---- + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mgr), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="smoke-test") + + self._await_thread(provider) + with provider._prefetch_lock: + assert provider._prefetch_result.startswith("prewarm"), \ + "session-start prewarm must land in _prefetch_result" + assert provider._last_dialectic_turn == 0, "prewarm marks turn 0" + assert mgr.dialectic_query.call_count == 1 + + # ---- turn 1: consume prewarm, no duplicate dialectic ---- + provider.on_turn_start(1, "hey") + inject1 = provider.prefetch("hey") + assert "prewarm" in inject1, "turn 1 must surface prewarm" + provider.sync_turn("hey", "hi there") + provider.queue_prefetch("hey") # cadence gate: (1-0)<3 → skip + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 1, \ + "turn 1 must not fire — prewarm covered it and cadence skips" + + # ---- turn 2: trivial 'ok' → skip everything ---- + mgr.prefetch_context.reset_mock() + provider.on_turn_start(2, "ok") + assert provider.prefetch("ok") == "", "trivial prompt must short-circuit injection" + provider.sync_turn("ok", "cool") + provider.queue_prefetch("ok") + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 1, "trivial must not fire dialectic" + assert mgr.prefetch_context.call_count == 0, "trivial must not fire context refresh" + + # ---- turn 3: slash '/help' → also skip ---- + provider.on_turn_start(3, "/help") + assert provider.prefetch("/help") == "" + provider.queue_prefetch("/help") + assert mgr.dialectic_query.call_count == 1 + + # ---- turn 4: long query → cadence fires + heuristic bumps ---- + long_q = "walk me through " + ("x " * 100) # ~200 chars → heuristic +1 + provider.on_turn_start(4, long_q) + provider.prefetch(long_q) + provider.sync_turn(long_q, "sure") + provider.queue_prefetch(long_q) # (4-0)≥3 → fires + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 2, "turn 4 cadence fire" + _, kwargs = mgr.dialectic_query.call_args + assert kwargs.get("reasoning_level") in ("medium", "high"), \ + f"long query must bump reasoning level above 'low'; got {kwargs.get('reasoning_level')}" + assert provider._last_dialectic_turn == 4, "cadence tracker advances on success" + + # ---- turns 5–6: cadence cooldown, no fires ---- + for t in (5, 6): + provider.on_turn_start(t, "tell me more") + provider.queue_prefetch("tell me more") + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 2, "turns 5–6 blocked by cadence window" + + # ---- turn 7: fires but silent failure (empty dialectic) ---- + provider.on_turn_start(7, "and then what") + provider.queue_prefetch("and then what") # (7-4)≥3 → fires + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 3, "turn 7 fires" + assert provider._last_dialectic_turn == 4, \ + "silent failure must NOT burn the cadence window" + + # ---- turn 8: retries because cadence didn't advance ---- + provider.on_turn_start(8, "try again") + provider.queue_prefetch("try again") # (8-4)≥3 → fires again + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 4, \ + "turn 8 retries because turn 7's empty result didn't advance cadence" + assert provider._last_dialectic_turn == 8, "retry success advances" + + # ---- session end: flush messages ---- + provider.on_session_end([]) + mgr.flush_all.assert_called() + + +class TestReasoningHeuristic: + """Restored char-count heuristic for auto-injected dialectic reasoning level. + + Pre-9a0ab34c behavior: scale base up by query length, capped at + reasoning_level_cap. 'max' is reserved for explicit tool-path selection. + """ + + @staticmethod + def _make_provider(cfg_extra=None): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + defaults = dict( + api_key="test-key", enabled=True, recall_mode="hybrid", + dialectic_reasoning_level="low", reasoning_heuristic=True, + reasoning_level_cap="high", + ) + if cfg_extra: + defaults.update(cfg_extra) + cfg = HonchoClientConfig(**defaults) + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_manager.get_or_create.return_value = MagicMock(messages=[]) + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="test-heuristic") + _settle_prewarm(provider) + return provider + + def test_short_query_stays_at_base(self): + p = self._make_provider() + assert p._apply_reasoning_heuristic("low", "hey") == "low" + + def test_medium_query_bumps_one_level(self): + p = self._make_provider() + q = "x" * 150 + assert p._apply_reasoning_heuristic("low", q) == "medium" + + def test_long_query_bumps_two_levels(self): + p = self._make_provider() + q = "x" * 500 + assert p._apply_reasoning_heuristic("low", q) == "high" + + def test_bump_respects_cap(self): + p = self._make_provider(cfg_extra={"reasoning_level_cap": "medium"}) + q = "x" * 500 # would hit 'high' without the cap + assert p._apply_reasoning_heuristic("low", q) == "medium" + + def test_max_never_auto_selected_with_default_cap(self): + p = self._make_provider(cfg_extra={"dialectic_reasoning_level": "high"}) + q = "x" * 500 # base=high, bump would push to 'max' + assert p._apply_reasoning_heuristic("high", q) == "high" + + def test_heuristic_disabled_returns_base(self): + p = self._make_provider(cfg_extra={"reasoning_heuristic": False}) + q = "x" * 500 + assert p._apply_reasoning_heuristic("low", q) == "low" + + def test_resolve_pass_level_applies_heuristic_at_base_mapping(self): + """Depth=1, pass 0 maps to 'base' → heuristic applies.""" + p = self._make_provider() + q = "x" * 150 + assert p._resolve_pass_level(0, query=q) == "medium" + + def test_resolve_pass_level_does_not_touch_explicit_per_pass(self): + """dialecticDepthLevels wins absolutely — no heuristic scaling.""" + p = self._make_provider(cfg_extra={"dialectic_depth_levels": ["minimal"]}) + q = "x" * 500 # heuristic would otherwise bump to 'high' + assert p._resolve_pass_level(0, query=q) == "minimal" + + def test_resolve_pass_level_does_not_touch_lighter_passes(self): + """Depth 3 pass 0 is hardcoded 'minimal' — heuristic must not bump it.""" + p = self._make_provider(cfg_extra={"dialectic_depth": 3}) + q = "x" * 500 + assert p._resolve_pass_level(0, query=q) == "minimal" + # But the 'base' pass (idx 1 for depth 3) does get heuristic + assert p._resolve_pass_level(1, query=q) == "high" + + # --------------------------------------------------------------------------- # set_peer_card None guard # --------------------------------------------------------------------------- diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md index 2040949d2..906a7c030 100644 --- a/website/docs/user-guide/features/honcho.md +++ b/website/docs/user-guide/features/honcho.md @@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs: | Knob | Controls | Default | |------|----------|---------| | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` | -| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` | +| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` | | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` | These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes. @@ -104,7 +104,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho |-----|---------|-------------| | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly | +| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md index f571c7d48..181f30f7f 100644 --- a/website/docs/user-guide/features/memory-providers.md +++ b/website/docs/user-guide/features/memory-providers.md @@ -82,7 +82,7 @@ hermes memory setup # select "honcho" | `workspace` | host key | Shared workspace ID | | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes | +| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | @@ -181,7 +181,7 @@ This inherits settings from the default `hermes` host block and creates new AI p }, "dialecticReasoningLevel": "low", "dialecticDynamic": true, - "dialecticCadence": 3, + "dialecticCadence": 1, "dialecticDepth": 1, "dialecticMaxChars": 600, "contextCadence": 1,