diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md index e79875aa0..1c099ca60 100644 --- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md +++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md @@ -145,7 +145,7 @@ Controls **how often** dialectic and context calls happen. | Key | Default | Description | |-----|---------|-------------| | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic API calls. Unset → every turn; wizard pre-fills `2` | +| `dialecticCadence` | `2` | Min turns between dialectic API calls. Recommended 1–5 | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection | Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn. @@ -370,7 +370,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` | | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic LLM calls | +| `dialecticCadence` | `2` | Min turns between dialectic LLM calls (recommended 1–5) | The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions. diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index d104deb5d..6ca32c1dc 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -207,7 +207,7 @@ class HonchoMemoryProvider(MemoryProvider): self._turn_count = 0 self._injection_frequency = "every-turn" # or "first-turn" self._context_cadence = 1 # minimum turns between context API calls - self._dialectic_cadence = 1 # minimum turns between dialectic API calls + self._dialectic_cadence = 1 # backwards-compat fallback; wizard writes 2 on new configs self._dialectic_depth = 1 # how many .chat() calls per dialectic cycle (1-3) self._dialectic_depth_levels: list[str] | None = None # per-pass reasoning levels self._reasoning_heuristic: bool = True # scale base level by query length @@ -304,6 +304,10 @@ class HonchoMemoryProvider(MemoryProvider): raw = cfg.raw or {} self._injection_frequency = raw.get("injectionFrequency", "every-turn") self._context_cadence = int(raw.get("contextCadence", 1)) + # Backwards-compat: unset dialecticCadence falls back to 1 + # (every turn) so existing honcho.json configs without the key + # behave as they did before. New setups via `hermes honcho setup` + # get dialecticCadence=2 written explicitly by the wizard. self._dialectic_cadence = int(raw.get("dialecticCadence", 1)) self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3)) self._dialectic_depth_levels = cfg.dialectic_depth_levels @@ -844,9 +848,7 @@ class HonchoMemoryProvider(MemoryProvider): def _apply_reasoning_heuristic(self, base: str, query: str) -> str: """Scale `base` up by query length, clamped at reasoning_level_cap. - Char-count heuristic: +1 at >=120 chars, +2 at >=400. Ceiling is - reasoning_level_cap (default 'high' — 'max' is reserved for - explicit tool-path selection). + Char-count heuristic: +1 at >=120 chars, +2 at >=400. """ if not self._reasoning_heuristic or not query: return base diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index eb21c48ea..5c829a4c9 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -463,7 +463,8 @@ def cmd_setup(args) -> None: current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2") print("\n Dialectic cadence:") print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).") - print(" 1 = every turn, 2 = every other turn (wizard default), 3+ = sparse.") + print(" 1 = every turn, 2 = every other turn, 3+ = sparser.") + print(" Recommended: 1-5.") new_dialectic = _prompt("Dialectic cadence", default=current_dialectic) try: val = int(new_dialectic) @@ -472,6 +473,25 @@ def cmd_setup(args) -> None: except (ValueError, TypeError): hermes_host["dialecticCadence"] = 2 + # --- 7c. Dialectic reasoning level --- + current_reasoning = ( + hermes_host.get("dialecticReasoningLevel") + or cfg.get("dialecticReasoningLevel") + or "low" + ) + print("\n Dialectic reasoning level:") + print(" Depth Honcho uses when synthesizing user context on auto-injected calls.") + print(" minimal -- quick factual lookups") + print(" low -- straightforward questions (default)") + print(" medium -- multi-aspect synthesis") + print(" high -- complex behavioral patterns") + print(" max -- thorough audit-level analysis") + new_reasoning = _prompt("Reasoning level", default=current_reasoning) + if new_reasoning in ("minimal", "low", "medium", "high", "max"): + hermes_host["dialecticReasoningLevel"] = new_reasoning + else: + hermes_host["dialecticReasoningLevel"] = "low" + # --- 8. Session strategy --- current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session") print("\n Session strategy:") diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py index 346c2b76e..fef2e2d58 100644 --- a/plugins/memory/honcho/client.py +++ b/plugins/memory/honcho/client.py @@ -254,8 +254,7 @@ class HonchoClientConfig: # When true, the auto-injected dialectic scales reasoning level up on # longer queries. See HonchoMemoryProvider for thresholds. reasoning_heuristic: bool = True - # Ceiling for the heuristic-selected reasoning level. "max" is reserved - # for explicit tool-path selection. + # Ceiling for the heuristic-selected reasoning level. reasoning_level_cap: str = "high" # Honcho API limits — configurable for self-hosted instances # Max chars per message sent via add_messages() (Honcho cloud: 25000) diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index f2a660292..254261183 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -865,8 +865,10 @@ class TestDialecticCadenceDefaults: _settle_prewarm(provider) return provider - def test_default_is_1(self): - """Default dialectic_cadence is 1 — fires every turn unless overridden.""" + def test_unset_falls_back_to_1(self): + """Unset dialecticCadence falls back to 1 (every turn) for backwards + compatibility with existing configs that predate the setting. The + setup wizard writes 2 explicitly on new configs.""" provider = self._make_provider() assert provider._dialectic_cadence == 1 @@ -1569,8 +1571,7 @@ class TestDialecticLifecycleSmoke: class TestReasoningHeuristic: """Char-count heuristic that scales the auto-injected reasoning level by - query length, clamped at reasoning_level_cap. 'max' is reserved for - explicit tool-path selection.""" + query length, clamped at reasoning_level_cap.""" @staticmethod def _make_provider(cfg_extra=None): diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md index bf4b5c6bc..60e82b4b0 100644 --- a/website/docs/user-guide/features/honcho.md +++ b/website/docs/user-guide/features/honcho.md @@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs: | Knob | Controls | Default | |------|----------|---------| | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` | -| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` (code default) / `2` (setup wizard default) | +| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `2` (recommended 1–5) | | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` | These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes. @@ -100,7 +100,7 @@ On session init, Honcho fires a dialectic call in the background at the full con ### Query-Adaptive Reasoning Level -The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. `"max"` is reserved for explicit tool-path selection via `honcho_reasoning`. +The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. Available levels: `minimal`, `low`, `medium`, `high`, `max`. ## Configuration Options @@ -112,7 +112,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho |-----|---------|-------------| | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Code default fires every turn when the key is unset; the setup wizard pre-fills `2`. In `tools` mode, irrelevant — model calls explicitly | +| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Recommended 1–5. In `tools` mode, irrelevant — model calls explicitly | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | @@ -183,7 +183,7 @@ Common patterns: | AI shouldn't re-model the user from its own replies | `"ai": {"observeMe": true, "observeOthers": false}` | | Strong persona the AI peer shouldn't update from self-observation | `"ai": {"observeMe": false, "observeOthers": true}` | -Server-side toggles set via the Honcho dashboard win over local defaults — Hermes syncs them back at session init. +Server-side toggles set via the [Honcho dashboard](https://app.honcho.dev) win over local defaults — Hermes syncs them back at session init. ## Tools diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md index b2469a13e..d11c36657 100644 --- a/website/docs/user-guide/features/memory-providers.md +++ b/website/docs/user-guide/features/memory-providers.md @@ -82,7 +82,7 @@ hermes memory setup # select "honcho" | `workspace` | host key | Shared workspace ID | | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls. Unset → every turn; wizard pre-fills `2`. Only applies to `hybrid`/`context` modes | +| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls. Recommended 1–5. Only applies to `hybrid`/`context` modes | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` |