mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(honcho): context injection overhaul, 5-tool surface, cost safety, session isolation (#10619)
Salvaged from PR #9884 by erosika. Cherry-picked plugin changes onto current main with minimal core modifications. Plugin changes (plugins/memory/honcho/): - New honcho_reasoning tool (5th tool, splits LLM calls from honcho_context) - Two-layer context injection: base context (summary + representation + card) on contextCadence, dialectic supplement on dialecticCadence - Multi-pass dialectic depth (1-3 passes) with early bail-out on strong signal - Cold/warm prompt selection based on session state - dialecticCadence defaults to 3 (was 1) — ~66% fewer Honcho LLM calls - Session summary injection for conversational continuity - Bidirectional peer targeting on all 5 tools - Correctness fixes: peer param fallback, None guard on set_peer_card, schema validation, signal_sufficient anchored regex, mid->medium level fix Core changes (~20 lines across 3 files): - agent/memory_manager.py: Enhanced sanitize_context() to strip full <memory-context> blocks and system notes (prevents leak from saveMessages) - run_agent.py: gateway_session_key param for stable per-chat Honcho sessions, on_turn_start() call before prefetch_all() for cadence tracking, sanitize_context() on user messages to strip leaked memory blocks - gateway/run.py: skip_memory=True on 2 temp agents (prevents orphan sessions), gateway_session_key threading to main agent Tests: 509 passed (3 skipped — honcho SDK not installed locally) Docs: Updated honcho.md, memory-providers.md, tools-reference.md, SKILL.md Co-authored-by: erosika <erosika@users.noreply.github.com>
This commit is contained in:
parent
00ff9a26cd
commit
cc6e8941db
17 changed files with 2632 additions and 396 deletions
|
|
@ -58,7 +58,8 @@ def resolve_config_path() -> Path:
|
|||
|
||||
Resolution order:
|
||||
1. $HERMES_HOME/honcho.json (profile-local, if it exists)
|
||||
2. ~/.honcho/config.json (global, cross-app interop)
|
||||
2. ~/.hermes/honcho.json (default profile — shared host blocks live here)
|
||||
3. ~/.honcho/config.json (global, cross-app interop)
|
||||
|
||||
Returns the global path if none exist (for first-time setup writes).
|
||||
"""
|
||||
|
|
@ -66,6 +67,11 @@ def resolve_config_path() -> Path:
|
|||
if local_path.exists():
|
||||
return local_path
|
||||
|
||||
# Default profile's config — host blocks accumulate here via setup/clone
|
||||
default_path = Path.home() / ".hermes" / "honcho.json"
|
||||
if default_path != local_path and default_path.exists():
|
||||
return default_path
|
||||
|
||||
return GLOBAL_CONFIG_PATH
|
||||
|
||||
|
||||
|
|
@ -88,6 +94,68 @@ def _resolve_bool(host_val, root_val, *, default: bool) -> bool:
|
|||
return default
|
||||
|
||||
|
||||
def _parse_context_tokens(host_val, root_val) -> int | None:
|
||||
"""Parse contextTokens: host wins, then root, then None (uncapped)."""
|
||||
for val in (host_val, root_val):
|
||||
if val is not None:
|
||||
try:
|
||||
return int(val)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _parse_dialectic_depth(host_val, root_val) -> int:
|
||||
"""Parse dialecticDepth: host wins, then root, then 1. Clamped to 1-3."""
|
||||
for val in (host_val, root_val):
|
||||
if val is not None:
|
||||
try:
|
||||
return max(1, min(int(val), 3))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return 1
|
||||
|
||||
|
||||
_VALID_REASONING_LEVELS = ("minimal", "low", "medium", "high", "max")
|
||||
|
||||
|
||||
def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] | None:
|
||||
"""Parse dialecticDepthLevels: optional array of reasoning levels per pass.
|
||||
|
||||
Returns None when not configured (use proportional defaults).
|
||||
When configured, validates each level and truncates/pads to match depth.
|
||||
"""
|
||||
for val in (host_val, root_val):
|
||||
if val is not None and isinstance(val, list):
|
||||
levels = [
|
||||
lvl if lvl in _VALID_REASONING_LEVELS else "low"
|
||||
for lvl in val[:depth]
|
||||
]
|
||||
# Pad with "low" if array is shorter than depth
|
||||
while len(levels) < depth:
|
||||
levels.append("low")
|
||||
return levels
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_optional_float(*values: Any) -> float | None:
|
||||
"""Return the first non-empty value coerced to a positive float."""
|
||||
for value in values:
|
||||
if value is None:
|
||||
continue
|
||||
if isinstance(value, str):
|
||||
value = value.strip()
|
||||
if not value:
|
||||
continue
|
||||
try:
|
||||
parsed = float(value)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if parsed > 0:
|
||||
return parsed
|
||||
return None
|
||||
|
||||
|
||||
_VALID_OBSERVATION_MODES = {"unified", "directional"}
|
||||
_OBSERVATION_MODE_ALIASES = {"shared": "unified", "separate": "directional", "cross": "directional"}
|
||||
|
||||
|
|
@ -153,6 +221,8 @@ class HonchoClientConfig:
|
|||
environment: str = "production"
|
||||
# Optional base URL for self-hosted Honcho (overrides environment mapping)
|
||||
base_url: str | None = None
|
||||
# Optional request timeout in seconds for Honcho SDK HTTP calls
|
||||
timeout: float | None = None
|
||||
# Identity
|
||||
peer_name: str | None = None
|
||||
ai_peer: str = "hermes"
|
||||
|
|
@ -162,17 +232,25 @@ class HonchoClientConfig:
|
|||
# Write frequency: "async" (background thread), "turn" (sync per turn),
|
||||
# "session" (flush on session end), or int (every N turns)
|
||||
write_frequency: str | int = "async"
|
||||
# Prefetch budget
|
||||
# Prefetch budget (None = no cap; set to an integer to bound auto-injected context)
|
||||
context_tokens: int | None = None
|
||||
# Dialectic (peer.chat) settings
|
||||
# reasoning_level: "minimal" | "low" | "medium" | "high" | "max"
|
||||
dialectic_reasoning_level: str = "low"
|
||||
# dynamic: auto-bump reasoning level based on query length
|
||||
# true — low->medium (120+ chars), low->high (400+ chars), capped at "high"
|
||||
# false — always use dialecticReasoningLevel as-is
|
||||
# When true, the model can override reasoning_level per-call via the
|
||||
# honcho_reasoning tool param (agentic). When false, always uses
|
||||
# dialecticReasoningLevel and ignores model-provided overrides.
|
||||
dialectic_dynamic: bool = True
|
||||
# Max chars of dialectic result to inject into Hermes system prompt
|
||||
dialectic_max_chars: int = 600
|
||||
# Dialectic depth: how many .chat() calls per dialectic cycle (1-3).
|
||||
# Depth 1: single call. Depth 2: self-audit + targeted synthesis.
|
||||
# Depth 3: self-audit + synthesis + reconciliation.
|
||||
dialectic_depth: int = 1
|
||||
# Optional per-pass reasoning level override. Array of reasoning levels
|
||||
# matching dialectic_depth length. When None, uses proportional defaults
|
||||
# derived from dialectic_reasoning_level.
|
||||
dialectic_depth_levels: list[str] | None = None
|
||||
# Honcho API limits — configurable for self-hosted instances
|
||||
# Max chars per message sent via add_messages() (Honcho cloud: 25000)
|
||||
message_max_chars: int = 25000
|
||||
|
|
@ -183,10 +261,8 @@ class HonchoClientConfig:
|
|||
# "context" — auto-injected context only, Honcho tools removed
|
||||
# "tools" — Honcho tools only, no auto-injected context
|
||||
recall_mode: str = "hybrid"
|
||||
# When True and recallMode is "tools", create the Honcho session eagerly
|
||||
# during initialize() instead of deferring to the first tool call.
|
||||
# This ensures sync_turn() can write from the very first turn.
|
||||
# Does NOT enable automatic context injection — only changes init timing.
|
||||
# Eager init in tools mode — when true, initializes session during
|
||||
# initialize() instead of deferring to first tool call
|
||||
init_on_session_start: bool = False
|
||||
# Observation mode: legacy string shorthand ("directional" or "unified").
|
||||
# Kept for backward compat; granular per-peer booleans below are preferred.
|
||||
|
|
@ -218,12 +294,14 @@ class HonchoClientConfig:
|
|||
resolved_host = host or resolve_active_host()
|
||||
api_key = os.environ.get("HONCHO_API_KEY")
|
||||
base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
|
||||
timeout = _resolve_optional_float(os.environ.get("HONCHO_TIMEOUT"))
|
||||
return cls(
|
||||
host=resolved_host,
|
||||
workspace_id=workspace_id,
|
||||
api_key=api_key,
|
||||
environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
|
||||
base_url=base_url,
|
||||
timeout=timeout,
|
||||
ai_peer=resolved_host,
|
||||
enabled=bool(api_key or base_url),
|
||||
)
|
||||
|
|
@ -284,6 +362,11 @@ class HonchoClientConfig:
|
|||
or os.environ.get("HONCHO_BASE_URL", "").strip()
|
||||
or None
|
||||
)
|
||||
timeout = _resolve_optional_float(
|
||||
raw.get("timeout"),
|
||||
raw.get("requestTimeout"),
|
||||
os.environ.get("HONCHO_TIMEOUT"),
|
||||
)
|
||||
|
||||
# Auto-enable when API key or base_url is present (unless explicitly disabled)
|
||||
# Host-level enabled wins, then root-level, then auto-enable if key/url exists.
|
||||
|
|
@ -329,12 +412,16 @@ class HonchoClientConfig:
|
|||
api_key=api_key,
|
||||
environment=environment,
|
||||
base_url=base_url,
|
||||
timeout=timeout,
|
||||
peer_name=host_block.get("peerName") or raw.get("peerName"),
|
||||
ai_peer=ai_peer,
|
||||
enabled=enabled,
|
||||
save_messages=save_messages,
|
||||
write_frequency=write_frequency,
|
||||
context_tokens=host_block.get("contextTokens") or raw.get("contextTokens"),
|
||||
context_tokens=_parse_context_tokens(
|
||||
host_block.get("contextTokens"),
|
||||
raw.get("contextTokens"),
|
||||
),
|
||||
dialectic_reasoning_level=(
|
||||
host_block.get("dialecticReasoningLevel")
|
||||
or raw.get("dialecticReasoningLevel")
|
||||
|
|
@ -350,6 +437,15 @@ class HonchoClientConfig:
|
|||
or raw.get("dialecticMaxChars")
|
||||
or 600
|
||||
),
|
||||
dialectic_depth=_parse_dialectic_depth(
|
||||
host_block.get("dialecticDepth"),
|
||||
raw.get("dialecticDepth"),
|
||||
),
|
||||
dialectic_depth_levels=_parse_dialectic_depth_levels(
|
||||
host_block.get("dialecticDepthLevels"),
|
||||
raw.get("dialecticDepthLevels"),
|
||||
depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")),
|
||||
),
|
||||
message_max_chars=int(
|
||||
host_block.get("messageMaxChars")
|
||||
or raw.get("messageMaxChars")
|
||||
|
|
@ -416,16 +512,18 @@ class HonchoClientConfig:
|
|||
cwd: str | None = None,
|
||||
session_title: str | None = None,
|
||||
session_id: str | None = None,
|
||||
gateway_session_key: str | None = None,
|
||||
) -> str | None:
|
||||
"""Resolve Honcho session name.
|
||||
|
||||
Resolution order:
|
||||
1. Manual directory override from sessions map
|
||||
2. Hermes session title (from /title command)
|
||||
3. per-session strategy — Hermes session_id ({timestamp}_{hex})
|
||||
4. per-repo strategy — git repo root directory name
|
||||
5. per-directory strategy — directory basename
|
||||
6. global strategy — workspace name
|
||||
3. Gateway session key (stable per-chat identifier from gateway platforms)
|
||||
4. per-session strategy — Hermes session_id ({timestamp}_{hex})
|
||||
5. per-repo strategy — git repo root directory name
|
||||
6. per-directory strategy — directory basename
|
||||
7. global strategy — workspace name
|
||||
"""
|
||||
import re
|
||||
|
||||
|
|
@ -439,12 +537,22 @@ class HonchoClientConfig:
|
|||
|
||||
# /title mid-session remap
|
||||
if session_title:
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9_-]', '-', session_title).strip('-')
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', session_title).strip('-')
|
||||
if sanitized:
|
||||
if self.session_peer_prefix and self.peer_name:
|
||||
return f"{self.peer_name}-{sanitized}"
|
||||
return sanitized
|
||||
|
||||
# Gateway session key: stable per-chat identifier passed by the gateway
|
||||
# (e.g. "agent:main:telegram:dm:8439114563"). Sanitize colons to hyphens
|
||||
# for Honcho session ID compatibility. This takes priority over strategy-
|
||||
# based resolution because gateway platforms need per-chat isolation that
|
||||
# cwd-based strategies cannot provide.
|
||||
if gateway_session_key:
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-')
|
||||
if sanitized:
|
||||
return sanitized
|
||||
|
||||
# per-session: inherit Hermes session_id (new Honcho session each run)
|
||||
if self.session_strategy == "per-session" and session_id:
|
||||
if self.session_peer_prefix and self.peer_name:
|
||||
|
|
@ -506,13 +614,20 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
# mapping, enabling remote self-hosted Honcho deployments without
|
||||
# requiring the server to live on localhost.
|
||||
resolved_base_url = config.base_url
|
||||
if not resolved_base_url:
|
||||
resolved_timeout = config.timeout
|
||||
if not resolved_base_url or resolved_timeout is None:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
hermes_cfg = load_config()
|
||||
honcho_cfg = hermes_cfg.get("honcho", {})
|
||||
if isinstance(honcho_cfg, dict):
|
||||
resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
|
||||
if not resolved_base_url:
|
||||
resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
|
||||
if resolved_timeout is None:
|
||||
resolved_timeout = _resolve_optional_float(
|
||||
honcho_cfg.get("timeout"),
|
||||
honcho_cfg.get("request_timeout"),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -547,6 +662,8 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
}
|
||||
if resolved_base_url:
|
||||
kwargs["base_url"] = resolved_base_url
|
||||
if resolved_timeout is not None:
|
||||
kwargs["timeout"] = resolved_timeout
|
||||
|
||||
_honcho_client = Honcho(**kwargs)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue