mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 02:01:47 +00:00
feat(honcho): context injection overhaul, 5-tool surface, cost safety, session isolation
Context Injection Overhaul: - Base layer: peer.context() (representation + card) cached with 5-minute TTL - Dialectic supplement: cadence-gated, cached until next refresh - Trivial prompt skip: short inputs/slash commands skip injection - New peer guard: dialectic skipped at session start when peer has no context - Targeted warm prompt for better dialectic quality Tool Surface (5 bidirectional tools): - honcho_profile: read or update peer card - honcho_search: semantic search over context - honcho_context: full session context (summary, representation, card, messages) - honcho_reasoning: synthesized answer, reasoning_level param - honcho_conclude: create or delete conclusions (PII removal) Cost Safety: - dialectic_cadence defaults to 3 (~66% fewer LLM calls) - context_tokens defaults to uncapped (cap opt-in via config/wizard) - on_turn_start hook wired up (fixes broken cadence/injection gating) Correctness: - Explicit target= on peer context/card fetches (fixes identity blur) - honcho_search perspective fix under directional observation - Timeout config plumbing - peerName precedence over gateway user_id - skip_memory on temp agents (orphan session prevention) - gateway_session_key for stable per-chat session continuity - initOnSessionStart for eager tools-mode init - get_session_context fallback respects peer param - mid -> medium in reasoning level validation ABC changes (minimal, honcho-only): - run_agent.py: gateway_session_key param + memory provider wiring (+5 lines) - gateway/run.py: skip_memory on 2 temp agents, gateway_session_key on main agent (+3 lines) - agent/memory_manager.py: sanitize regex for context tag variants (+9 lines)
This commit is contained in:
parent
95d11dfd8e
commit
11b4c9ecf9
16 changed files with 1283 additions and 331 deletions
|
|
@ -94,6 +94,35 @@ def _resolve_bool(host_val, root_val, *, default: bool) -> bool:
|
|||
return default
|
||||
|
||||
|
||||
def _parse_context_tokens(host_val, root_val) -> int | None:
|
||||
"""Parse contextTokens: host wins, then root, then None (uncapped)."""
|
||||
for val in (host_val, root_val):
|
||||
if val is not None:
|
||||
try:
|
||||
return int(val)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_optional_float(*values: Any) -> float | None:
|
||||
"""Return the first non-empty value coerced to a positive float."""
|
||||
for value in values:
|
||||
if value is None:
|
||||
continue
|
||||
if isinstance(value, str):
|
||||
value = value.strip()
|
||||
if not value:
|
||||
continue
|
||||
try:
|
||||
parsed = float(value)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if parsed > 0:
|
||||
return parsed
|
||||
return None
|
||||
|
||||
|
||||
_VALID_OBSERVATION_MODES = {"unified", "directional"}
|
||||
_OBSERVATION_MODE_ALIASES = {"shared": "unified", "separate": "directional", "cross": "directional"}
|
||||
|
||||
|
|
@ -159,6 +188,8 @@ class HonchoClientConfig:
|
|||
environment: str = "production"
|
||||
# Optional base URL for self-hosted Honcho (overrides environment mapping)
|
||||
base_url: str | None = None
|
||||
# Optional request timeout in seconds for Honcho SDK HTTP calls
|
||||
timeout: float | None = None
|
||||
# Identity
|
||||
peer_name: str | None = None
|
||||
ai_peer: str = "hermes"
|
||||
|
|
@ -168,14 +199,14 @@ class HonchoClientConfig:
|
|||
# Write frequency: "async" (background thread), "turn" (sync per turn),
|
||||
# "session" (flush on session end), or int (every N turns)
|
||||
write_frequency: str | int = "async"
|
||||
# Prefetch budget
|
||||
# Prefetch budget (None = no cap; set to an integer to bound auto-injected context)
|
||||
context_tokens: int | None = None
|
||||
# Dialectic (peer.chat) settings
|
||||
# reasoning_level: "minimal" | "low" | "medium" | "high" | "max"
|
||||
dialectic_reasoning_level: str = "low"
|
||||
# dynamic: auto-bump reasoning level based on query length
|
||||
# true — low->medium (120+ chars), low->high (400+ chars), capped at "high"
|
||||
# false — always use dialecticReasoningLevel as-is
|
||||
# When true, the model can override reasoning_level per-call via the
|
||||
# honcho_reasoning tool param (agentic). When false, always uses
|
||||
# dialecticReasoningLevel and ignores model-provided overrides.
|
||||
dialectic_dynamic: bool = True
|
||||
# Max chars of dialectic result to inject into Hermes system prompt
|
||||
dialectic_max_chars: int = 600
|
||||
|
|
@ -189,10 +220,8 @@ class HonchoClientConfig:
|
|||
# "context" — auto-injected context only, Honcho tools removed
|
||||
# "tools" — Honcho tools only, no auto-injected context
|
||||
recall_mode: str = "hybrid"
|
||||
# When True and recallMode is "tools", create the Honcho session eagerly
|
||||
# during initialize() instead of deferring to the first tool call.
|
||||
# This ensures sync_turn() can write from the very first turn.
|
||||
# Does NOT enable automatic context injection — only changes init timing.
|
||||
# Eager init in tools mode — when true, initializes session during
|
||||
# initialize() instead of deferring to first tool call
|
||||
init_on_session_start: bool = False
|
||||
# Observation mode: legacy string shorthand ("directional" or "unified").
|
||||
# Kept for backward compat; granular per-peer booleans below are preferred.
|
||||
|
|
@ -224,12 +253,14 @@ class HonchoClientConfig:
|
|||
resolved_host = host or resolve_active_host()
|
||||
api_key = os.environ.get("HONCHO_API_KEY")
|
||||
base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
|
||||
timeout = _resolve_optional_float(os.environ.get("HONCHO_TIMEOUT"))
|
||||
return cls(
|
||||
host=resolved_host,
|
||||
workspace_id=workspace_id,
|
||||
api_key=api_key,
|
||||
environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
|
||||
base_url=base_url,
|
||||
timeout=timeout,
|
||||
ai_peer=resolved_host,
|
||||
enabled=bool(api_key or base_url),
|
||||
)
|
||||
|
|
@ -290,6 +321,11 @@ class HonchoClientConfig:
|
|||
or os.environ.get("HONCHO_BASE_URL", "").strip()
|
||||
or None
|
||||
)
|
||||
timeout = _resolve_optional_float(
|
||||
raw.get("timeout"),
|
||||
raw.get("requestTimeout"),
|
||||
os.environ.get("HONCHO_TIMEOUT"),
|
||||
)
|
||||
|
||||
# Auto-enable when API key or base_url is present (unless explicitly disabled)
|
||||
# Host-level enabled wins, then root-level, then auto-enable if key/url exists.
|
||||
|
|
@ -335,17 +371,22 @@ class HonchoClientConfig:
|
|||
api_key=api_key,
|
||||
environment=environment,
|
||||
base_url=base_url,
|
||||
timeout=timeout,
|
||||
peer_name=host_block.get("peerName") or raw.get("peerName"),
|
||||
ai_peer=ai_peer,
|
||||
enabled=enabled,
|
||||
save_messages=save_messages,
|
||||
write_frequency=write_frequency,
|
||||
context_tokens=host_block.get("contextTokens") or raw.get("contextTokens"),
|
||||
context_tokens=_parse_context_tokens(
|
||||
host_block.get("contextTokens"),
|
||||
raw.get("contextTokens"),
|
||||
),
|
||||
dialectic_reasoning_level=(
|
||||
host_block.get("dialecticReasoningLevel")
|
||||
or raw.get("dialecticReasoningLevel")
|
||||
or "low"
|
||||
),
|
||||
|
||||
dialectic_dynamic=_resolve_bool(
|
||||
host_block.get("dialecticDynamic"),
|
||||
raw.get("dialecticDynamic"),
|
||||
|
|
@ -422,16 +463,18 @@ class HonchoClientConfig:
|
|||
cwd: str | None = None,
|
||||
session_title: str | None = None,
|
||||
session_id: str | None = None,
|
||||
gateway_session_key: str | None = None,
|
||||
) -> str | None:
|
||||
"""Resolve Honcho session name.
|
||||
|
||||
Resolution order:
|
||||
1. Manual directory override from sessions map
|
||||
2. Hermes session title (from /title command)
|
||||
3. per-session strategy — Hermes session_id ({timestamp}_{hex})
|
||||
4. per-repo strategy — git repo root directory name
|
||||
5. per-directory strategy — directory basename
|
||||
6. global strategy — workspace name
|
||||
3. Gateway session key (stable per-chat identifier from gateway platforms)
|
||||
4. per-session strategy — Hermes session_id ({timestamp}_{hex})
|
||||
5. per-repo strategy — git repo root directory name
|
||||
6. per-directory strategy — directory basename
|
||||
7. global strategy — workspace name
|
||||
"""
|
||||
import re
|
||||
|
||||
|
|
@ -451,6 +494,16 @@ class HonchoClientConfig:
|
|||
return f"{self.peer_name}-{sanitized}"
|
||||
return sanitized
|
||||
|
||||
# Gateway session key: stable per-chat identifier passed by the gateway
|
||||
# (e.g. "agent:main:telegram:dm:8439114563"). Sanitize colons to hyphens
|
||||
# for Honcho session ID compatibility. This takes priority over strategy-
|
||||
# based resolution because gateway platforms need per-chat isolation that
|
||||
# cwd-based strategies cannot provide.
|
||||
if gateway_session_key:
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9_-]', '-', gateway_session_key).strip('-')
|
||||
if sanitized:
|
||||
return sanitized
|
||||
|
||||
# per-session: inherit Hermes session_id (new Honcho session each run)
|
||||
if self.session_strategy == "per-session" and session_id:
|
||||
if self.session_peer_prefix and self.peer_name:
|
||||
|
|
@ -512,13 +565,20 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
# mapping, enabling remote self-hosted Honcho deployments without
|
||||
# requiring the server to live on localhost.
|
||||
resolved_base_url = config.base_url
|
||||
if not resolved_base_url:
|
||||
resolved_timeout = config.timeout
|
||||
if not resolved_base_url or resolved_timeout is None:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
hermes_cfg = load_config()
|
||||
honcho_cfg = hermes_cfg.get("honcho", {})
|
||||
if isinstance(honcho_cfg, dict):
|
||||
resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
|
||||
if not resolved_base_url:
|
||||
resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
|
||||
if resolved_timeout is None:
|
||||
resolved_timeout = _resolve_optional_float(
|
||||
honcho_cfg.get("timeout"),
|
||||
honcho_cfg.get("request_timeout"),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -553,6 +613,8 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
}
|
||||
if resolved_base_url:
|
||||
kwargs["base_url"] = resolved_base_url
|
||||
if resolved_timeout is not None:
|
||||
kwargs["timeout"] = resolved_timeout
|
||||
|
||||
_honcho_client = Honcho(**kwargs)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue