This commit is contained in:
Eri Barrett 2026-04-24 19:05:34 -04:00 committed by GitHub
commit 2b06dd23dd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 2283 additions and 78 deletions

View file

@ -63,6 +63,117 @@ def sanitize_context(text: str) -> str:
return text
class StreamingContextScrubber:
"""Stateful scrubber for streaming text that may contain split memory-context spans.
The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
a ``<memory-context>`` opened in one delta and closed in a later delta
leaks its payload to the UI because the non-greedy block regex needs
both tags in one string. This scrubber runs a small state machine
across deltas, holding back partial-tag tails and discarding
everything inside a span (including the system-note line).
Usage::
scrubber = StreamingContextScrubber()
for delta in stream:
visible = scrubber.feed(delta)
if visible:
emit(visible)
trailing = scrubber.flush() # at end of stream
if trailing:
emit(trailing)
The scrubber is re-entrant per agent instance. Callers building new
top-level responses (new turn) should create a fresh scrubber or call
``reset()``.
"""
_OPEN_TAG = "<memory-context>"
_CLOSE_TAG = "</memory-context>"
def __init__(self) -> None:
self._in_span: bool = False
self._buf: str = ""
def reset(self) -> None:
self._in_span = False
self._buf = ""
def feed(self, text: str) -> str:
"""Return the visible portion of ``text`` after scrubbing.
Any trailing fragment that could be the start of an open/close tag
is held back in the internal buffer and surfaced on the next
``feed()`` call or discarded/emitted by ``flush()``.
"""
if not text:
return ""
buf = self._buf + text
self._buf = ""
out: list[str] = []
while buf:
if self._in_span:
idx = buf.lower().find(self._CLOSE_TAG)
if idx == -1:
# Hold back a potential partial close tag; drop the rest
held = self._max_partial_suffix(buf, self._CLOSE_TAG)
self._buf = buf[-held:] if held else ""
return "".join(out)
# Found close — skip span content + tag, continue
buf = buf[idx + len(self._CLOSE_TAG):]
self._in_span = False
else:
idx = buf.lower().find(self._OPEN_TAG)
if idx == -1:
# No open tag — hold back a potential partial open tag
held = self._max_partial_suffix(buf, self._OPEN_TAG)
if held:
out.append(buf[:-held])
self._buf = buf[-held:]
else:
out.append(buf)
return "".join(out)
# Emit text before the tag, enter span
if idx > 0:
out.append(buf[:idx])
buf = buf[idx + len(self._OPEN_TAG):]
self._in_span = True
return "".join(out)
def flush(self) -> str:
"""Emit any held-back buffer at end-of-stream.
If we're still inside an unterminated span the remaining content is
discarded (safer: leaking partial memory context is worse than a
truncated answer). Otherwise the held-back partial-tag tail is
emitted verbatim (it turned out not to be a real tag).
"""
if self._in_span:
self._buf = ""
self._in_span = False
return ""
tail = self._buf
self._buf = ""
return tail
@staticmethod
def _max_partial_suffix(buf: str, tag: str) -> int:
"""Return the length of the longest buf-suffix that is a tag-prefix.
Case-insensitive. Returns 0 if no suffix could start the tag.
"""
tag_lower = tag.lower()
buf_lower = buf.lower()
max_check = min(len(buf_lower), len(tag_lower) - 1)
for i in range(max_check, 0, -1):
if tag_lower.startswith(buf_lower[-i:]):
return i
return 0
def build_memory_context_block(raw_context: str) -> str:
"""Wrap prefetched memory in a fenced block with system note.

View file

@ -8253,6 +8253,7 @@ class GatewayRunner:
The enriched message string with vision descriptions prepended.
"""
from tools.vision_tools import vision_analyze_tool
from agent.memory_manager import sanitize_context
analysis_prompt = (
"Describe everything visible in this image in thorough detail. "
@ -8271,6 +8272,14 @@ class GatewayRunner:
result = json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "")
# The auxiliary vision LLM can echo injected system-prompt
# memory context back into its output (#5719). Scrub any
# <memory-context> fences and the "## Honcho Context"
# section before the description lands in a user-visible
# message.
description = sanitize_context(description)
if "## Honcho Context" in description:
description = description.split("## Honcho Context", 1)[0].rstrip()
enriched_parts.append(
f"[The user sent an image~ Here's what I can see:\n{description}]\n"
f"[If you need a closer look, use vision_analyze with "

View file

@ -22,6 +22,8 @@ import sqlite3
import threading
import time
from pathlib import Path
from agent.memory_manager import sanitize_context
from hermes_constants import get_hermes_home
from typing import Any, Callable, Dict, List, Optional, TypeVar
@ -1119,7 +1121,10 @@ class SessionDB:
rows = cursor.fetchall()
messages = []
for row in rows:
msg = {"role": row["role"], "content": row["content"]}
content = row["content"]
if row["role"] in {"user", "assistant"} and isinstance(content, str):
content = sanitize_context(content).strip()
msg = {"role": row["role"], "content": content}
if row["tool_call_id"]:
msg["tool_call_id"] = row["tool_call_id"]
if row["tool_name"]:

View file

@ -22,7 +22,14 @@ import threading
import time
from typing import Any, Dict, List, Optional
from agent.memory_manager import sanitize_context
from agent.memory_provider import MemoryProvider
from plugins.memory.honcho.sync_worker import (
CircuitBreaker,
HonchoLatencyTracker,
SyncTask,
SyncWorker,
)
from tools.registry import tool_error
logger = logging.getLogger(__name__)
@ -194,7 +201,22 @@ class HonchoMemoryProvider(MemoryProvider):
self._prefetch_result = ""
self._prefetch_lock = threading.Lock()
self._prefetch_thread: Optional[threading.Thread] = None
self._sync_thread: Optional[threading.Thread] = None
# Post-response write path (sync_turn / on_memory_write). See
# plugins/memory/honcho/sync_worker.py. The tracker + breaker are
# shared with the Honcho SDK client so adaptive timeouts and
# degraded-mode behaviour are consistent across the plugin.
self._latency_tracker = HonchoLatencyTracker()
self._breaker = CircuitBreaker()
self._sync_worker = SyncWorker(
latency_tracker=self._latency_tracker,
breaker=self._breaker,
thread_name="honcho-sync-worker",
)
# Durable backlog of tasks that couldn't reach Honcho (breaker open
# or queue overflow). Drained on recovery — see _drain_backlog().
self._backlog: List[SyncTask] = []
self._backlog_lock = threading.Lock()
# B1: recall_mode — set during initialize from config
self._recall_mode = "hybrid" # "context", "tools", or "hybrid"
@ -1056,8 +1078,82 @@ class HonchoMemoryProvider(MemoryProvider):
return chunks
# -- backlog management (Layer 3) ----------------------------------------
_BACKLOG_MAX = 256
def _enqueue_with_backlog(self, task: SyncTask) -> None:
"""Submit a task to the worker with backlog fall-through on defer.
Wraps the caller's task with a failure hook that captures the
task itself (not just the error) so it can be appended to the
durable backlog when the breaker is open or the queue is full.
"""
original_on_failure = task.on_failure
def _on_failure(error: BaseException) -> None:
reason = str(error)
# Only backlog tasks that were deferred, not ones that crashed
# inside Honcho itself — those are unlikely to succeed on replay.
if any(marker in reason for marker in (
"circuit breaker open",
"sync queue full",
"sync queue overflow",
"shutting down",
)):
with self._backlog_lock:
if len(self._backlog) < self._BACKLOG_MAX:
self._backlog.append(task)
else:
logger.debug("Honcho backlog full; dropping %s", task.name)
else:
logger.debug(
"Honcho sync task %s failed (not backlogged): %s",
task.name, error,
)
if original_on_failure is not None:
try:
original_on_failure(error)
except Exception:
pass
task.on_failure = _on_failure
self._sync_worker.enqueue(task)
def _drain_backlog_if_healthy(self) -> None:
"""Opportunistic replay of backlogged tasks when the breaker closes.
Called from the happy path of ``sync_turn``; never blocks the
user. Walks the backlog, re-enqueueing everything on the worker
if the breaker is still open, tasks will bounce back into
the backlog via their on_failure handler.
Also nudges the Honcho client's HTTP timeout toward the tracker's
observed p95 so stalled backends fail fast on subsequent calls
instead of burning 30s per request.
"""
if self._breaker.state != self._breaker.STATE_CLOSED:
return
# Layer 2: adaptive timeout rebuild (cheap no-op if below threshold).
try:
from plugins.memory.honcho.client import rebuild_honcho_client_with_timeout
rebuild_honcho_client_with_timeout(self._latency_tracker.timeout())
except Exception as e:
logger.debug("Honcho timeout rebuild skipped: %s", e)
with self._backlog_lock:
if not self._backlog:
return
pending = self._backlog
self._backlog = []
for task in pending:
self._sync_worker.enqueue(task)
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
"""Record the conversation turn in Honcho (non-blocking).
"""Record the conversation turn in Honcho (fire-and-forget).
Enqueues the sync on the persistent worker thread and returns
immediately. Callers never wait on Honcho the run_conversation
return path is fully decoupled from post-response writes.
Messages exceeding the Honcho API limit (default 25k chars) are
split into multiple messages with continuation markers.
@ -1068,27 +1164,34 @@ class HonchoMemoryProvider(MemoryProvider):
return
msg_limit = self._config.message_max_chars if self._config else 25000
clean_user_content = sanitize_context(user_content or "").strip()
clean_assistant_content = sanitize_context(assistant_content or "").strip()
session_key = self._session_key
def _sync():
try:
session = self._manager.get_or_create(self._session_key)
for chunk in self._chunk_message(user_content, msg_limit):
session.add_message("user", chunk)
for chunk in self._chunk_message(assistant_content, msg_limit):
session.add_message("assistant", chunk)
self._manager._flush_session(session)
except Exception as e:
logger.debug("Honcho sync_turn failed: %s", e)
session = self._manager.get_or_create(session_key)
for chunk in self._chunk_message(clean_user_content, msg_limit):
session.add_message("user", chunk)
for chunk in self._chunk_message(clean_assistant_content, msg_limit):
session.add_message("assistant", chunk)
self._manager._flush_session(session)
if self._sync_thread and self._sync_thread.is_alive():
self._sync_thread.join(timeout=5.0)
self._sync_thread = threading.Thread(
target=_sync, daemon=True, name="honcho-sync"
task = SyncTask(
fn=_sync,
name="sync_turn",
)
self._sync_thread.start()
self._enqueue_with_backlog(task)
# If the breaker transitioned back to closed between turns, try to
# drain anything that piled up while Honcho was unreachable.
self._drain_backlog_if_healthy()
def on_memory_write(self, action: str, target: str, content: str) -> None:
"""Mirror built-in user profile writes as Honcho conclusions."""
"""Mirror built-in user profile writes as Honcho conclusions.
Enqueued on the shared sync worker so every post-response write
path (turn sync + conclusion mirror) observes the same breaker
and backlog.
"""
if action != "add" or target != "user" or not content:
return
if self._cron_skipped:
@ -1096,14 +1199,17 @@ class HonchoMemoryProvider(MemoryProvider):
if not self._manager or not self._session_key:
return
def _write():
try:
self._manager.create_conclusion(self._session_key, content)
except Exception as e:
logger.debug("Honcho memory mirror failed: %s", e)
session_key = self._session_key
payload = content
t = threading.Thread(target=_write, daemon=True, name="honcho-memwrite")
t.start()
def _write():
self._manager.create_conclusion(session_key, payload)
task = SyncTask(
fn=_write,
name="memory_mirror",
)
self._enqueue_with_backlog(task)
def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
"""Flush all pending messages to Honcho on session end."""
@ -1111,9 +1217,10 @@ class HonchoMemoryProvider(MemoryProvider):
return
if not self._manager:
return
# Wait for pending sync
if self._sync_thread and self._sync_thread.is_alive():
self._sync_thread.join(timeout=10.0)
# Wait briefly for any in-flight sync tasks to drain. We can't
# block session-end indefinitely, but giving the worker 10s to
# finish a pending turn-sync matches the previous behaviour.
self._sync_worker.shutdown(timeout=10.0)
try:
self._manager.flush_all()
except Exception as e:
@ -1233,9 +1340,10 @@ class HonchoMemoryProvider(MemoryProvider):
return tool_error(f"Honcho {tool_name} failed: {e}")
def shutdown(self) -> None:
for t in (self._prefetch_thread, self._sync_thread):
if t and t.is_alive():
t.join(timeout=5.0)
# Drain the prefetch thread (legacy, unchanged) + the sync worker.
if self._prefetch_thread and self._prefetch_thread.is_alive():
self._prefetch_thread.join(timeout=5.0)
self._sync_worker.shutdown(timeout=5.0)
# Flush any remaining messages
if self._manager:
try:

View file

@ -273,9 +273,28 @@ def _write_config(cfg: dict, path: Path | None = None) -> None:
def _resolve_api_key(cfg: dict) -> str:
"""Resolve API key with host -> root -> env fallback."""
"""Resolve API key with host -> root -> env fallback.
For self-hosted instances configured with ``baseUrl`` instead of an API
key, returns ``"local"`` so that credential guards throughout the CLI
don't reject a valid configuration. The ``baseUrl`` is scheme-validated
(http/https only) so that a typo like ``baseUrl: true`` can't silently
pass the guard.
"""
host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
return host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
key = host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
if not key:
base_url = cfg.get("baseUrl") or cfg.get("base_url") or os.environ.get("HONCHO_BASE_URL", "")
base_url = (base_url or "").strip()
if base_url:
from urllib.parse import urlparse
try:
parsed = urlparse(base_url)
except (TypeError, ValueError):
parsed = None
if parsed and parsed.scheme in ("http", "https") and parsed.netloc:
return "local"
return key
def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:

View file

@ -16,6 +16,7 @@ from __future__ import annotations
import json
import os
import logging
import hashlib
from dataclasses import dataclass, field
from pathlib import Path
@ -27,7 +28,6 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
HOST = "hermes"
@ -53,6 +53,11 @@ def resolve_active_host() -> str:
return HOST
def resolve_global_config_path() -> Path:
"""Return the shared Honcho config path for the current HOME."""
return Path.home() / ".honcho" / "config.json"
def resolve_config_path() -> Path:
"""Return the active Honcho config path.
@ -72,7 +77,7 @@ def resolve_config_path() -> Path:
if default_path != local_path and default_path.exists():
return default_path
return GLOBAL_CONFIG_PATH
return resolve_global_config_path()
_RECALL_MODE_ALIASES = {"auto": "hybrid"}
@ -138,6 +143,15 @@ def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] |
return None
# Default HTTP timeout (seconds) applied when no explicit timeout is
# configured via HonchoClientConfig.timeout, honcho.timeout / requestTimeout,
# or HONCHO_TIMEOUT. Honcho calls happen on the post-response path of
# run_conversation; without a cap the agent can block indefinitely when
# the Honcho backend is unreachable, preventing the gateway from
# delivering the already-generated response.
_DEFAULT_HTTP_TIMEOUT = 30.0
def _resolve_optional_float(*values: Any) -> float | None:
"""Return the first non-empty value coerced to a positive float."""
for value in values:
@ -226,6 +240,13 @@ class HonchoClientConfig:
# Identity
peer_name: str | None = None
ai_peer: str = "hermes"
# When True, ``peer_name`` wins over any gateway-supplied runtime
# identity (Telegram UID, Discord ID, …) when resolving the user peer.
# This keeps memory unified across platforms for single-user deployments
# where Honcho's one peer-name is an unambiguous identity — otherwise
# each platform would fork memory into its own peer (#14984). Default
# ``False`` preserves existing multi-user behaviour.
pin_peer_name: bool = False
# Toggles
enabled: bool = False
save_messages: bool = True
@ -420,6 +441,11 @@ class HonchoClientConfig:
timeout=timeout,
peer_name=host_block.get("peerName") or raw.get("peerName"),
ai_peer=ai_peer,
pin_peer_name=_resolve_bool(
host_block.get("pinPeerName"),
raw.get("pinPeerName"),
default=False,
),
enabled=enabled,
save_messages=save_messages,
write_frequency=write_frequency,
@ -522,6 +548,39 @@ class HonchoClientConfig:
pass
return None
# Honcho enforces a 100-char limit on session IDs. Long gateway session keys
# (Matrix "!room:server" + thread event IDs, Telegram supergroup reply
# chains, Slack thread IDs with long workspace prefixes) can overflow this
# limit after sanitization; the Honcho API then rejects every call for that
# session with "session_id too long". See issue #13868.
_HONCHO_SESSION_ID_MAX_LEN = 100
_HONCHO_SESSION_ID_HASH_LEN = 8
@classmethod
def _enforce_session_id_limit(cls, sanitized: str, original: str) -> str:
"""Truncate a sanitized session ID to Honcho's 100-char limit.
The common case (short keys) short-circuits with no modification.
For over-limit keys, keep a prefix of the sanitized ID and append a
deterministic ``-<sha256 prefix>`` suffix so two distinct long keys
that share a leading segment don't collide onto the same truncated ID.
The hash is taken over the *original* pre-sanitization key, so two
inputs that sanitize to the same string still collide intentionally
(same logical session), but two inputs that only share a prefix do not.
"""
max_len = cls._HONCHO_SESSION_ID_MAX_LEN
if len(sanitized) <= max_len:
return sanitized
hash_len = cls._HONCHO_SESSION_ID_HASH_LEN
digest = hashlib.sha256(original.encode("utf-8")).hexdigest()[:hash_len]
# max_len - hash_len - 1 (for the '-' separator) chars of the sanitized
# prefix, then '-<hash>'. Strip any trailing hyphen from the prefix so
# the result doesn't double up on separators.
prefix_len = max_len - hash_len - 1
prefix = sanitized[:prefix_len].rstrip("-")
return f"{prefix}-{digest}"
def resolve_session_name(
self,
cwd: str | None = None,
@ -566,7 +625,7 @@ class HonchoClientConfig:
if gateway_session_key:
sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-')
if sanitized:
return sanitized
return self._enforce_session_id_limit(sanitized, gateway_session_key)
# per-session: inherit Hermes session_id (new Honcho session each run)
if self.session_strategy == "per-session" and session_id:
@ -593,6 +652,8 @@ class HonchoClientConfig:
_honcho_client: Honcho | None = None
_honcho_client_kwargs: dict | None = None
_honcho_client_kwargs_active_timeout: float | None = None
def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
@ -601,7 +662,7 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
When no config is provided, attempts to load ~/.honcho/config.json
first, falling back to environment variables.
"""
global _honcho_client
global _honcho_client, _honcho_client_kwargs, _honcho_client_kwargs_active_timeout
if _honcho_client is not None:
return _honcho_client
@ -646,6 +707,11 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
except Exception:
pass
# Fall back to the default so an unconfigured install cannot hang
# indefinitely on a stalled Honcho request.
if resolved_timeout is None:
resolved_timeout = _DEFAULT_HTTP_TIMEOUT
if resolved_base_url:
logger.info("Initializing Honcho client (base_url: %s, workspace: %s)", resolved_base_url, config.workspace_id)
else:
@ -681,11 +747,56 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
kwargs["timeout"] = resolved_timeout
_honcho_client = Honcho(**kwargs)
_honcho_client_kwargs = dict(kwargs)
_honcho_client_kwargs_active_timeout = resolved_timeout
return _honcho_client
def rebuild_honcho_client_with_timeout(new_timeout: float) -> None:
"""Rebuild the singleton Honcho client with a new HTTP timeout.
Called by the provider's sync worker once enough latency samples
have accumulated for the :class:`HonchoLatencyTracker` to recommend
a timeout meaningfully different from the one the current client
was built with. Safe to call concurrently; no-op if the delta is
below a 20% threshold so small jitter doesn't thrash the client.
The previous client is discarded the Honcho SDK uses a pooled
httpx.Client internally but doesn't expose it for in-place timeout
mutation, so rebuild is the only portable option.
"""
global _honcho_client, _honcho_client_kwargs_active_timeout
if _honcho_client is None or _honcho_client_kwargs is None:
return
active = _honcho_client_kwargs_active_timeout or _DEFAULT_HTTP_TIMEOUT
if active <= 0:
return
ratio = new_timeout / active
if 0.8 <= ratio <= 1.2:
# Not a meaningful change; skip rebuild.
return
try:
from honcho import Honcho
except ImportError:
return
new_kwargs = dict(_honcho_client_kwargs)
new_kwargs["timeout"] = new_timeout
logger.info(
"Adapting Honcho HTTP timeout: %.1fs -> %.1fs (tracker p95)",
active, new_timeout,
)
_honcho_client = Honcho(**new_kwargs)
_honcho_client_kwargs_active_timeout = new_timeout
def reset_honcho_client() -> None:
"""Reset the Honcho client singleton (useful for testing)."""
global _honcho_client
global _honcho_client, _honcho_client_kwargs, _honcho_client_kwargs_active_timeout
_honcho_client = None
_honcho_client_kwargs = None
_honcho_client_kwargs_active_timeout = None

View file

@ -95,6 +95,7 @@ class HonchoSessionManager:
self._config = config
self._runtime_user_peer_name = runtime_user_peer_name
self._cache: dict[str, HonchoSession] = {}
self._cache_lock = threading.RLock()
self._peers_cache: dict[str, Any] = {}
self._sessions_cache: dict[str, Any] = {}
@ -273,17 +274,35 @@ class HonchoSessionManager:
Returns:
The session.
"""
if key in self._cache:
logger.debug("Local session cache hit: %s", key)
return self._cache[key]
with self._cache_lock:
if key in self._cache:
logger.debug("Local session cache hit: %s", key)
return self._cache[key]
# Gateway sessions should use the runtime user identity when available.
if self._runtime_user_peer_name:
# Determine peer IDs — no lock needed (read-only, no shared state mutation).
# Gateway sessions normally use the runtime user identity (the
# platform-native ID: Telegram UID, Discord snowflake, Slack user,
# etc.) so multi-user bots scope memory per user. For a single-user
# deployment the config-supplied ``peer_name`` is an unambiguous
# identity and we should keep it unified across platforms — see
# #14984. Opt into that with ``hosts.<host>.pinPeerName: true`` in
# ``honcho.json`` (or root-level ``pinPeerName: true``).
# `is True` (not `bool(...)`) is deliberate: several multi-user tests
# pass a ``MagicMock`` for ``config`` where ``mock.pin_peer_name``
# silently returns another MagicMock — truthy by default. Requiring
# strict ``True`` keeps pinning as opt-in even for callers that
# haven't updated their mocks yet; real configs built via
# ``from_global_config`` always produce a proper boolean.
pin_peer_name = (
self._config is not None
and bool(getattr(self._config, "peer_name", None))
and getattr(self._config, "pin_peer_name", False) is True
)
if self._runtime_user_peer_name and not pin_peer_name:
user_peer_id = self._sanitize_id(self._runtime_user_peer_name)
elif self._config and self._config.peer_name:
user_peer_id = self._sanitize_id(self._config.peer_name)
else:
# Fallback: derive from session key
parts = key.split(":", 1)
channel = parts[0] if len(parts) > 1 else "default"
chat_id = parts[1] if len(parts) > 1 else key
@ -293,19 +312,14 @@ class HonchoSessionManager:
self._config.ai_peer if self._config else "hermes-assistant"
)
# Sanitize session ID for Honcho
# All expensive I/O outside the lock — Honcho's persistence is source of truth
honcho_session_id = self._sanitize_id(key)
# Get or create peers
user_peer = self._get_or_create_peer(user_peer_id)
assistant_peer = self._get_or_create_peer(assistant_peer_id)
# Get or create Honcho session
honcho_session, existing_messages = self._get_or_create_honcho_session(
honcho_session_id, user_peer, assistant_peer
)
# Convert Honcho messages to local format
local_messages = []
for msg in existing_messages:
role = "assistant" if msg.peer_id == assistant_peer_id else "user"
@ -313,10 +327,9 @@ class HonchoSessionManager:
"role": role,
"content": msg.content,
"timestamp": msg.created_at.isoformat() if msg.created_at else "",
"_synced": True, # Already in Honcho
"_synced": True,
})
# Create local session wrapper with existing messages
session = HonchoSession(
key=key,
user_peer_id=user_peer_id,
@ -325,7 +338,9 @@ class HonchoSessionManager:
messages=local_messages,
)
self._cache[key] = session
# Write to cache under lock — only one writer wins
with self._cache_lock:
self._cache[key] = session
return session
def _flush_session(self, session: HonchoSession) -> bool:
@ -356,13 +371,15 @@ class HonchoSessionManager:
for msg in new_messages:
msg["_synced"] = True
logger.debug("Synced %d messages to Honcho for %s", len(honcho_messages), session.key)
self._cache[session.key] = session
with self._cache_lock:
self._cache[session.key] = session
return True
except Exception as e:
for msg in new_messages:
msg["_synced"] = False
logger.error("Failed to sync messages to Honcho: %s", e)
self._cache[session.key] = session
with self._cache_lock:
self._cache[session.key] = session
return False
def _async_writer_loop(self) -> None:
@ -434,7 +451,9 @@ class HonchoSessionManager:
Called at session end for "session" write_frequency, or to force
a sync before process exit regardless of mode.
"""
for session in list(self._cache.values()):
with self._cache_lock:
sessions = list(self._cache.values())
for session in sessions:
try:
self._flush_session(session)
except Exception as e:
@ -459,9 +478,10 @@ class HonchoSessionManager:
def delete(self, key: str) -> bool:
"""Delete a session from local cache."""
if key in self._cache:
del self._cache[key]
return True
with self._cache_lock:
if key in self._cache:
del self._cache[key]
return True
return False
def new_session(self, key: str) -> HonchoSession:
@ -473,20 +493,25 @@ class HonchoSessionManager:
"""
import time
# Remove old session from caches (but don't delete from Honcho)
old_session = self._cache.pop(key, None)
if old_session:
self._sessions_cache.pop(old_session.honcho_session_id, None)
# Hold the reentrant lock across get_or_create so a concurrent caller
# can't observe the (old-popped, new-not-yet-inserted) gap and create
# its own session under the raw key. `_cache_lock` is an RLock so
# nested reacquisition inside get_or_create is safe.
with self._cache_lock:
# Remove old session from caches (but don't delete from Honcho)
old_session = self._cache.pop(key, None)
if old_session:
self._sessions_cache.pop(old_session.honcho_session_id, None)
# Create new session with timestamp suffix
timestamp = int(time.time())
new_key = f"{key}:{timestamp}"
# Create new session with timestamp suffix
timestamp = int(time.time())
new_key = f"{key}:{timestamp}"
# get_or_create will create a fresh session
session = self.get_or_create(new_key)
# get_or_create will create a fresh session
session = self.get_or_create(new_key)
# Cache under the original key so callers find it by the expected name
self._cache[key] = session
# Cache under the original key so callers find it by the expected name
self._cache[key] = session
logger.info("Created new session for %s (honcho: %s)", key, session.honcho_session_id)
return session

View file

@ -0,0 +1,394 @@
"""Background sync worker for the Honcho memory provider.
The post-response sync path was previously a per-turn thread spawn with a
best-effort ``join(timeout=5.0)`` on the prior turn's thread. That forced
``run_conversation`` to wait up to 5 seconds at the start of every turn if
the previous turn's sync was still in flight, and it serialized all sync
work on a single transient thread that the caller had to coordinate with.
This module replaces that pattern with:
- :class:`SyncWorker` persistent daemon thread draining a bounded
queue of sync/write tasks. Tasks are submitted with ``enqueue()`` and
return immediately; the caller is never blocked by Honcho latency.
- :class:`HonchoLatencyTracker` rolling p95 observer that gives the
client an adaptive timeout with sensible cold-start defaults (Layer 2
of the timeout-ceiling rework).
- :class:`CircuitBreaker` consecutive-failure tripwire that flips to
a degraded state after repeated timeouts and probes for recovery in
the background (Layer 3). While open, sync tasks are persisted to a
local backlog so the outage's worth of writes can be drained once
Honcho is reachable again.
The three primitives compose: ``SyncWorker`` consults the breaker before
each task, records the outcome in the latency tracker, and feeds timeout
+ failure observations back to the breaker. Nothing here depends on
``HonchoMemoryProvider`` the worker takes plain callables so tests can
exercise each primitive in isolation.
"""
from __future__ import annotations
import collections
import logging
import queue
import threading
import time
from dataclasses import dataclass
from typing import Callable, Deque, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Latency tracker — Layer 2
# ---------------------------------------------------------------------------
class HonchoLatencyTracker:
"""Rolling p95 observer for Honcho call latencies.
Provides an adaptive HTTP timeout that scales with observed backend
latency. Hosted Honcho settles to ~1-3s; self-hosted instances with
slow cold starts naturally scale up. Thread-safe: the worker thread
records observations, any thread may read the current timeout.
"""
def __init__(
self,
*,
window: int = 20,
default: float = 30.0,
floor: float = 5.0,
headroom: float = 3.0,
warmup_samples: int = 5,
) -> None:
self._samples: Deque[float] = collections.deque(maxlen=window)
self._default = float(default)
self._floor = float(floor)
self._headroom = float(headroom)
self._warmup = int(warmup_samples)
self._lock = threading.Lock()
def observe(self, seconds: float) -> None:
"""Record a successful call's wall-clock latency (seconds)."""
if seconds < 0 or seconds != seconds: # NaN check
return
with self._lock:
self._samples.append(float(seconds))
def timeout(self) -> float:
"""Return the adaptive timeout for the next call.
During warmup (< warmup_samples observations) returns the default.
Once warm, returns ``max(floor, headroom × p95(samples))``.
"""
with self._lock:
n = len(self._samples)
if n < self._warmup:
return self._default
sorted_samples = sorted(self._samples)
# Nearest-rank p95: index = ceil(0.95 * n) - 1, clamped.
idx = min(n - 1, max(0, int(round(0.95 * (n - 1)))))
p95 = sorted_samples[idx]
return max(self._floor, self._headroom * p95)
def reset(self) -> None:
with self._lock:
self._samples.clear()
# ---------------------------------------------------------------------------
# Circuit breaker — Layer 3
# ---------------------------------------------------------------------------
class CircuitBreaker:
"""Consecutive-failure tripwire with periodic probe recovery.
States:
- ``closed`` traffic flows normally (the happy path)
- ``open`` recent consecutive failures > threshold; skip calls
- ``half_open`` probe window; one test call is allowed through
Transitions:
- closed open after ``failure_threshold`` consecutive failures
- open half_open after ``probe_interval`` seconds
- half_open closed on a successful probe
- half_open open on a failed probe
Thread-safe. The worker consults ``allow()`` before each task and
reports the outcome via ``record_success()`` / ``record_failure()``.
"""
STATE_CLOSED = "closed"
STATE_OPEN = "open"
STATE_HALF_OPEN = "half_open"
def __init__(
self,
*,
failure_threshold: int = 3,
probe_interval: float = 60.0,
time_fn: Callable[[], float] = time.monotonic,
) -> None:
self._failure_threshold = int(failure_threshold)
self._probe_interval = float(probe_interval)
self._time_fn = time_fn
self._lock = threading.Lock()
self._state = self.STATE_CLOSED
self._consecutive_failures = 0
self._opened_at: Optional[float] = None
@property
def state(self) -> str:
with self._lock:
self._maybe_transition_to_probe()
return self._state
def allow(self) -> bool:
"""Return True iff a call should proceed now."""
with self._lock:
self._maybe_transition_to_probe()
return self._state != self.STATE_OPEN
def record_success(self) -> None:
with self._lock:
self._consecutive_failures = 0
if self._state != self.STATE_CLOSED:
logger.info("Honcho circuit breaker: recovered → closed")
self._state = self.STATE_CLOSED
self._opened_at = None
def record_failure(self) -> None:
with self._lock:
self._consecutive_failures += 1
if self._state == self.STATE_HALF_OPEN:
self._state = self.STATE_OPEN
self._opened_at = self._time_fn()
logger.warning("Honcho circuit breaker: probe failed → open")
return
if (
self._state == self.STATE_CLOSED
and self._consecutive_failures >= self._failure_threshold
):
self._state = self.STATE_OPEN
self._opened_at = self._time_fn()
logger.warning(
"Honcho circuit breaker: %d consecutive failures → open",
self._consecutive_failures,
)
def reset(self) -> None:
with self._lock:
self._state = self.STATE_CLOSED
self._consecutive_failures = 0
self._opened_at = None
def _maybe_transition_to_probe(self) -> None:
# Caller must hold the lock.
if self._state == self.STATE_OPEN and self._opened_at is not None:
if self._time_fn() - self._opened_at >= self._probe_interval:
self._state = self.STATE_HALF_OPEN
logger.info(
"Honcho circuit breaker: probe window → half_open"
)
# ---------------------------------------------------------------------------
# Sync worker — Layer 1
# ---------------------------------------------------------------------------
@dataclass
class SyncTask:
"""A unit of work for the sync worker.
``fn`` runs on the worker thread. ``name`` is a human-readable label
used in logs and for backlog replay. ``on_failure`` is optional: if
set, it's called with the exception on breaker-open deferral or when
all retries are exhausted so callers can persist the task to a
durable backlog.
"""
fn: Callable[[], None]
name: str = "sync"
on_failure: Optional[Callable[[BaseException], None]] = None
class SyncWorker:
"""Persistent daemon thread draining a bounded task queue.
This replaces the per-turn ``threading.Thread(target=_sync).start()``
pattern so ``sync_turn`` returns immediately instead of coordinating
thread handoff on every turn. Runs ``SyncTask`` callables serially
on a dedicated thread serialization is intentional because Honcho
session writes must be ordered per-session to avoid re-ordering
messages, and the worker handles one session per provider.
Queue overflow (producer faster than Honcho can drain) drops the
OLDEST task rather than blocking the producer. This favors user-
facing responsiveness over write fidelity in the pathological case,
and the dropped task still has its ``on_failure`` callback invoked
so it can be appended to a durable backlog.
The worker is lazy: the thread starts on first ``enqueue()`` call
and runs until ``shutdown()``. ``shutdown()`` is idempotent.
"""
def __init__(
self,
*,
max_queue: int = 64,
latency_tracker: Optional[HonchoLatencyTracker] = None,
breaker: Optional[CircuitBreaker] = None,
thread_name: str = "honcho-sync-worker",
) -> None:
self._queue: queue.Queue[Optional[SyncTask]] = queue.Queue(maxsize=max_queue)
self._thread: Optional[threading.Thread] = None
self._thread_name = thread_name
self._shutdown = False
self._lock = threading.Lock()
self._latency_tracker = latency_tracker
self._breaker = breaker
self._dropped = 0
# -- lifecycle -----------------------------------------------------------
def _ensure_started(self) -> None:
with self._lock:
if self._thread is not None and self._thread.is_alive():
return
if self._shutdown:
return
self._thread = threading.Thread(
target=self._run,
name=self._thread_name,
daemon=True,
)
self._thread.start()
def shutdown(self, *, timeout: float = 5.0) -> None:
"""Signal the worker to drain and exit; wait up to ``timeout`` seconds."""
with self._lock:
if self._shutdown:
return
self._shutdown = True
thread = self._thread
try:
# Sentinel triggers clean exit from the loop.
self._queue.put_nowait(None)
except queue.Full:
pass
if thread is not None:
thread.join(timeout=timeout)
# -- producer interface --------------------------------------------------
def enqueue(self, task: SyncTask) -> bool:
"""Submit a task. Returns False if the task was dropped.
Breaker-open tasks are dropped synchronously and ``on_failure`` is
called so the caller can persist them. Queue-full tasks evict
the oldest task (which also gets its ``on_failure`` called) to
keep the pipeline moving under load.
"""
if self._shutdown:
if task.on_failure is not None:
try:
task.on_failure(RuntimeError("sync worker is shutting down"))
except Exception:
pass
return False
breaker = self._breaker
if breaker is not None and not breaker.allow():
if task.on_failure is not None:
try:
task.on_failure(RuntimeError("circuit breaker open"))
except Exception:
pass
return False
self._ensure_started()
try:
self._queue.put_nowait(task)
return True
except queue.Full:
# Evict the oldest queued task to make room; its failure
# callback still fires so the backlog can capture it.
try:
victim = self._queue.get_nowait()
self._dropped += 1
if victim is not None and victim.on_failure is not None:
try:
victim.on_failure(
RuntimeError("sync queue overflow — task dropped")
)
except Exception:
pass
except queue.Empty:
pass
try:
self._queue.put_nowait(task)
return True
except queue.Full:
if task.on_failure is not None:
try:
task.on_failure(RuntimeError("sync queue full"))
except Exception:
pass
return False
# -- worker loop ---------------------------------------------------------
def _run(self) -> None:
while True:
try:
task = self._queue.get(timeout=1.0)
except queue.Empty:
if self._shutdown:
return
continue
if task is None: # sentinel
return
started = time.monotonic()
ok = False
error: Optional[BaseException] = None
try:
task.fn()
ok = True
except BaseException as e: # task bodies may raise anything
error = e
logger.debug("Honcho sync task %s failed: %s", task.name, e)
elapsed = time.monotonic() - started
if ok:
if self._latency_tracker is not None:
self._latency_tracker.observe(elapsed)
if self._breaker is not None:
self._breaker.record_success()
else:
if self._breaker is not None:
self._breaker.record_failure()
if task.on_failure is not None and error is not None:
try:
task.on_failure(error)
except Exception:
pass
# -- introspection (for hermes honcho status etc.) -----------------------
def qsize(self) -> int:
return self._queue.qsize()
def dropped(self) -> int:
return self._dropped
def is_running(self) -> bool:
return self._thread is not None and self._thread.is_alive()

View file

@ -79,7 +79,7 @@ from tools.browser_tool import cleanup_browser
# Agent internals extracted to agent/ package for modularity
from agent.memory_manager import build_memory_context_block, sanitize_context
from agent.memory_manager import StreamingContextScrubber, build_memory_context_block, sanitize_context
from agent.retry_utils import jittered_backoff
from agent.error_classifier import classify_api_error, FailoverReason
from agent.prompt_builder import (
@ -1208,6 +1208,10 @@ class AIAgent:
# Deferred paragraph break flag — set after tool iterations so a
# single "\n\n" is prepended to the next real text delta.
self._stream_needs_break = False
# Stateful scrubber for <memory-context> spans split across stream
# deltas (#5719). sanitize_context() alone can't survive chunk
# boundaries because the block regex needs both tags in one string.
self._stream_context_scrubber = StreamingContextScrubber()
# Visible assistant text already delivered through live token callbacks
# during the current model response. Used to avoid re-sending the same
# commentary when the provider later returns it as a completed interim
@ -5784,6 +5788,20 @@ class AIAgent:
def _reset_stream_delivery_tracking(self) -> None:
"""Reset tracking for text delivered during the current model response."""
# Flush any benign partial-tag tail held by the context scrubber so it
# reaches the UI before we clear state for the next model call. If
# the scrubber is mid-span, flush() drops the orphaned content.
scrubber = getattr(self, "_stream_context_scrubber", None)
if scrubber is not None:
tail = scrubber.flush()
if tail:
callbacks = [cb for cb in (self.stream_delta_callback, self._stream_callback) if cb is not None]
for cb in callbacks:
try:
cb(tail)
except Exception:
pass
self._record_streamed_assistant_text(tail)
self._current_streamed_assistant_text = ""
def _record_streamed_assistant_text(self, text: str) -> None:
@ -5816,7 +5834,7 @@ class AIAgent:
if cb is None or not isinstance(assistant_msg, dict):
return
content = assistant_msg.get("content")
visible = self._strip_think_blocks(content or "").strip()
visible = sanitize_context(self._strip_think_blocks(content or "")).strip()
if not visible or visible == "(empty)":
return
already_streamed = self._interim_content_was_streamed(visible)
@ -5834,6 +5852,25 @@ class AIAgent:
if getattr(self, "_stream_needs_break", False) and text and text.strip():
self._stream_needs_break = False
text = "\n\n" + text
prepended_break = True
else:
prepended_break = False
if isinstance(text, str):
# Strip <think> blocks first (per-delta is safe for closed pairs; the
# unterminated-tag path is handled downstream by stream_consumer).
# Then feed through the stateful context scrubber so memory-context
# spans split across chunks cannot leak to the UI (#5719).
text = self._strip_think_blocks(text or "")
scrubber = getattr(self, "_stream_context_scrubber", None)
if scrubber is not None:
text = scrubber.feed(text)
else:
# Defensive: legacy callers without the scrubber attribute.
text = sanitize_context(text)
if not prepended_break:
text = text.lstrip("\n")
if not text:
return
callbacks = [cb for cb in (self.stream_delta_callback, self._stream_callback) if cb is not None]
delivered = False
for cb in callbacks:
@ -7612,7 +7649,7 @@ class AIAgent:
# API replay, session transcript, gateway delivery, CLI display,
# compression, title generation.
if isinstance(_san_content, str) and _san_content:
_san_content = self._strip_think_blocks(_san_content).strip()
_san_content = sanitize_context(self._strip_think_blocks(_san_content)).strip()
msg = {
"role": "assistant",
@ -9378,6 +9415,13 @@ class AIAgent:
# Track user turns for memory flush and periodic nudge logic
self._user_turn_count += 1
# Reset the streaming context scrubber at the top of each turn so a
# hung span from a prior interrupted stream can't taint this turn's
# output.
scrubber = getattr(self, "_stream_context_scrubber", None)
if scrubber is not None:
scrubber.reset()
# Preserve the original user message (no nudge injection).
original_user_message = persist_user_message if persist_user_message is not None else user_message
@ -12339,8 +12383,9 @@ class AIAgent:
truncated_response_prefix = ""
length_continue_retries = 0
# Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
final_response = self._strip_think_blocks(final_response).strip()
# Strip internal context / reasoning wrappers from the user-facing
# response (keep only clean visible text in transcript + UI).
final_response = sanitize_context(self._strip_think_blocks(final_response)).strip()
final_msg = self._build_assistant_message(assistant_message, finish_reason)

View file

@ -0,0 +1,150 @@
"""Unit tests for StreamingContextScrubber (agent/memory_manager.py).
Regression coverage for #5719 — memory-context spans split across stream
deltas must not leak payload to the UI. The one-shot sanitize_context()
regex can't survive chunk boundaries, so _fire_stream_delta routes deltas
through a stateful scrubber.
"""
from agent.memory_manager import StreamingContextScrubber, sanitize_context
class TestStreamingContextScrubberBasics:
def test_empty_input_returns_empty(self):
s = StreamingContextScrubber()
assert s.feed("") == ""
assert s.flush() == ""
def test_plain_text_passes_through(self):
s = StreamingContextScrubber()
assert s.feed("hello world") == "hello world"
assert s.flush() == ""
def test_complete_block_in_single_delta(self):
"""Regression: the one-shot test case from #13672 must still work."""
s = StreamingContextScrubber()
leaked = (
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new "
"user input. Treat as informational background data.]\n\n"
"## Honcho Context\nstale memory\n"
"</memory-context>\n\nVisible answer"
)
out = s.feed(leaked) + s.flush()
assert out == "\n\nVisible answer"
def test_open_and_close_in_separate_deltas_strips_payload(self):
"""The real streaming case: tag pair split across deltas."""
s = StreamingContextScrubber()
deltas = [
"Hello ",
"<memory-context>\npayload ",
"more payload\n",
"</memory-context> world",
]
out = "".join(s.feed(d) for d in deltas) + s.flush()
assert out == "Hello world"
assert "payload" not in out
def test_realistic_fragmented_chunks_strip_memory_payload(self):
"""Exact leak scenario from the reviewer's comment — 4 realistic chunks.
This is the case the original #13672 fix silently leaks on: the open
tag, system note, payload, and close tag each arrive in their own
delta because providers emit 1-80 char chunks.
"""
s = StreamingContextScrubber()
deltas = [
"<memory-context>\n[System note: The following",
" is recalled memory context, NOT new user input. "
"Treat as informational background data.]\n\n",
"## Honcho Context\nstale memory\n",
"</memory-context>\n\nVisible answer",
]
out = "".join(s.feed(d) for d in deltas) + s.flush()
assert out == "\n\nVisible answer"
# The system-note line and payload must never reach the UI.
assert "System note" not in out
assert "Honcho Context" not in out
assert "stale memory" not in out
def test_open_tag_split_across_two_deltas(self):
"""The open tag itself arriving in two fragments."""
s = StreamingContextScrubber()
out = (
s.feed("pre <memory")
+ s.feed("-context>leak</memory-context> post")
+ s.flush()
)
assert out == "pre post"
assert "leak" not in out
def test_close_tag_split_across_two_deltas(self):
"""The close tag arriving in two fragments."""
s = StreamingContextScrubber()
out = (
s.feed("pre <memory-context>leak</memory")
+ s.feed("-context> post")
+ s.flush()
)
assert out == "pre post"
assert "leak" not in out
class TestStreamingContextScrubberPartialTagFalsePositives:
def test_partial_open_tag_tail_emitted_on_flush(self):
"""Bare '<mem' at end of stream is not really a memory-context tag."""
s = StreamingContextScrubber()
out = s.feed("hello <mem") + s.feed("ory other") + s.flush()
assert out == "hello <memory other"
def test_partial_tag_released_when_disambiguated(self):
"""A held-back partial tag that turns out to be prose gets released."""
s = StreamingContextScrubber()
# '< ' should not look like the start of any tag.
out = s.feed("price < ") + s.feed("10 dollars") + s.flush()
assert out == "price < 10 dollars"
class TestStreamingContextScrubberUnterminatedSpan:
def test_unterminated_span_drops_payload(self):
"""Provider drops close tag — better to lose output than to leak."""
s = StreamingContextScrubber()
out = s.feed("pre <memory-context>secret never closed") + s.flush()
assert out == "pre "
assert "secret" not in out
def test_reset_clears_hung_span(self):
"""Cross-turn scrubber reset drops a hung span so next turn is clean."""
s = StreamingContextScrubber()
s.feed("pre <memory-context>half")
s.reset()
out = s.feed("clean text") + s.flush()
assert out == "clean text"
class TestStreamingContextScrubberCaseInsensitivity:
def test_uppercase_tags_still_scrubbed(self):
s = StreamingContextScrubber()
out = (
s.feed("<MEMORY-CONTEXT>secret")
+ s.feed("</Memory-Context>visible")
+ s.flush()
)
assert out == "visible"
assert "secret" not in out
class TestSanitizeContextUnchanged:
"""Smoke test that the one-shot sanitize_context still works for whole strings."""
def test_whole_block_still_sanitized(self):
leaked = (
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new "
"user input. Treat as informational background data.]\n"
"payload\n"
"</memory-context>\nVisible"
)
out = sanitize_context(leaked).strip()
assert out == "Visible"

View file

@ -0,0 +1,99 @@
"""Tests for _enrich_message_with_vision — regression for #5719.
The auxiliary vision LLM can echo system-prompt Honcho memory back into
its analysis output. When that echo reaches the user as the enriched
image description, recalled memory context (personal facts, dialectic
output) surfaces into a user-visible message.
The boundary fix in gateway/run.py strips both <memory-context>...</memory-context>
fenced blocks AND any "## Honcho Context" section from vision descriptions
before they're embedded into the enriched user message.
"""
import asyncio
import json
from unittest.mock import AsyncMock, patch
import pytest
@pytest.fixture
def gateway_runner():
"""Minimal GatewayRunner stub with just the method under test bound."""
from gateway.run import GatewayRunner
class _Stub:
_enrich_message_with_vision = GatewayRunner._enrich_message_with_vision
return _Stub()
def _run(coro):
return asyncio.get_event_loop().run_until_complete(coro) if False else asyncio.new_event_loop().run_until_complete(coro)
class TestEnrichMessageWithVision:
def test_clean_description_passes_through(self, gateway_runner):
"""Vision output without leaked memory is embedded unchanged."""
fake_result = json.dumps({
"success": True,
"analysis": "A photograph of a sunset over the ocean.",
})
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
assert "sunset over the ocean" in out
def test_honcho_context_header_stripped(self, gateway_runner):
"""'## Honcho Context' section and everything after is removed."""
leaked = (
"A photograph of a sunset.\n\n"
"## Honcho Context\n"
"User prefers concise answers, works at Plastic Labs,\n"
"uses OPSEC pseudonyms.\n"
)
fake_result = json.dumps({"success": True, "analysis": leaked})
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
assert "sunset" in out
assert "Honcho Context" not in out
assert "Plastic Labs" not in out
assert "OPSEC" not in out
def test_memory_context_fence_stripped(self, gateway_runner):
"""<memory-context>...</memory-context> fenced block is scrubbed."""
leaked = (
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new "
"user input. Treat as informational background data.]\n\n"
"User details and preferences here.\n"
"</memory-context>\n"
"A photograph of a cat."
)
fake_result = json.dumps({"success": True, "analysis": leaked})
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
assert "photograph of a cat" in out
assert "<memory-context>" not in out
assert "User details and preferences" not in out
assert "System note" not in out
def test_both_leak_patterns_together_stripped(self, gateway_runner):
"""A vision output containing both leak shapes is fully scrubbed."""
leaked = (
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new "
"user input. Treat as informational background data.]\n"
"fenced leak\n"
"</memory-context>\n"
"A photograph of a dog.\n\n"
"## Honcho Context\n"
"header leak\n"
)
fake_result = json.dumps({"success": True, "analysis": leaked})
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
assert "photograph of a dog" in out
assert "fenced leak" not in out
assert "header leak" not in out
assert "Honcho Context" not in out
assert "<memory-context>" not in out

View file

@ -3,6 +3,72 @@
from types import SimpleNamespace
class TestResolveApiKey:
"""Test _resolve_api_key with various config shapes."""
def test_returns_api_key_from_root(self, monkeypatch):
import plugins.memory.honcho.cli as honcho_cli
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
assert honcho_cli._resolve_api_key({"apiKey": "root-key"}) == "root-key"
def test_returns_api_key_from_host_block(self, monkeypatch):
import plugins.memory.honcho.cli as honcho_cli
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
cfg = {"hosts": {"hermes": {"apiKey": "host-key"}}, "apiKey": "root-key"}
assert honcho_cli._resolve_api_key(cfg) == "host-key"
def test_returns_local_for_base_url_without_api_key(self, monkeypatch):
import plugins.memory.honcho.cli as honcho_cli
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
cfg = {"baseUrl": "http://localhost:8000"}
assert honcho_cli._resolve_api_key(cfg) == "local"
def test_returns_local_for_base_url_env_var(self, monkeypatch):
import plugins.memory.honcho.cli as honcho_cli
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
monkeypatch.setenv("HONCHO_BASE_URL", "http://10.0.0.5:8000")
assert honcho_cli._resolve_api_key({}) == "local"
def test_returns_empty_when_nothing_configured(self, monkeypatch):
import plugins.memory.honcho.cli as honcho_cli
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
assert honcho_cli._resolve_api_key({}) == ""
def test_rejects_garbage_base_url_without_scheme(self, monkeypatch):
"""A non-URL string in baseUrl (typo) must not pass the guard."""
import plugins.memory.honcho.cli as honcho_cli
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
for garbage in ("true", "yes", "1", "localhost", "10.0.0.5"):
assert honcho_cli._resolve_api_key({"baseUrl": garbage}) == "", \
f"expected empty for garbage {garbage!r}"
def test_rejects_non_http_scheme_base_url(self, monkeypatch):
"""Only http/https schemes are accepted; file:// / ftp:// are not."""
import plugins.memory.honcho.cli as honcho_cli
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
for bad in ("file:///etc/passwd", "ftp://host/", "ws://host/"):
assert honcho_cli._resolve_api_key({"baseUrl": bad}) == "", \
f"expected empty for scheme of {bad!r}"
def test_accepts_https_base_url(self, monkeypatch):
import plugins.memory.honcho.cli as honcho_cli
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
assert honcho_cli._resolve_api_key({"baseUrl": "https://honcho.example.com"}) == "local"
class TestCmdStatus:
def test_reports_connection_failure_when_session_setup_fails(self, monkeypatch, capsys, tmp_path):
import plugins.memory.honcho.cli as honcho_cli

View file

@ -14,7 +14,7 @@ from plugins.memory.honcho.client import (
reset_honcho_client,
resolve_active_host,
resolve_config_path,
GLOBAL_CONFIG_PATH,
resolve_global_config_path,
HOST,
)
@ -360,7 +360,7 @@ class TestResolveConfigPath:
with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}), \
patch.object(Path, "home", return_value=fake_home):
result = resolve_config_path()
assert result == GLOBAL_CONFIG_PATH
assert result == fake_home / ".honcho" / "config.json"
def test_falls_back_to_global_without_hermes_home_env(self, tmp_path):
fake_home = tmp_path / "fakehome"
@ -370,7 +370,18 @@ class TestResolveConfigPath:
patch.object(Path, "home", return_value=fake_home):
os.environ.pop("HERMES_HOME", None)
result = resolve_config_path()
assert result == GLOBAL_CONFIG_PATH
assert result == fake_home / ".honcho" / "config.json"
def test_global_fallback_uses_home_at_call_time(self, tmp_path):
fake_home = tmp_path / "fakehome"
fake_home.mkdir()
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}), \
patch.object(Path, "home", return_value=fake_home):
assert resolve_global_config_path() == fake_home / ".honcho" / "config.json"
assert resolve_config_path() == fake_home / ".honcho" / "config.json"
def test_from_global_config_uses_local_path(self, tmp_path):
hermes_home = tmp_path / "hermes"
@ -589,6 +600,28 @@ class TestGetHonchoClient:
mock_honcho.assert_called_once()
assert mock_honcho.call_args.kwargs["timeout"] == 88.0
@pytest.mark.skipif(
not importlib.util.find_spec("honcho"),
reason="honcho SDK not installed"
)
def test_defaults_to_30s_when_no_timeout_configured(self):
from plugins.memory.honcho.client import _DEFAULT_HTTP_TIMEOUT
fake_honcho = MagicMock(name="Honcho")
cfg = HonchoClientConfig(
api_key="test-key",
workspace_id="hermes",
environment="production",
)
with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
patch("hermes_cli.config.load_config", return_value={}):
client = get_honcho_client(cfg)
assert client is fake_honcho
mock_honcho.assert_called_once()
assert mock_honcho.call_args.kwargs["timeout"] == _DEFAULT_HTTP_TIMEOUT
@pytest.mark.skipif(
not importlib.util.find_spec("honcho"),
reason="honcho SDK not installed"
@ -656,6 +689,82 @@ class TestResolveSessionNameGatewayKey:
assert ":" not in result
class TestResolveSessionNameLengthLimit:
"""Regression tests for Honcho's 100-char session ID limit (issue #13868).
Long gateway session keys (Matrix room+event IDs, Telegram supergroup
reply chains, Slack thread IDs with long workspace prefixes) can overflow
Honcho's 100-char session_id limit after sanitization. Before this fix,
every Honcho API call for those sessions 400'd with "session_id too long".
"""
HONCHO_MAX = 100
def test_short_gateway_key_unchanged(self):
"""Short keys must not get a hash suffix appended."""
config = HonchoClientConfig()
result = config.resolve_session_name(
gateway_session_key="agent:main:telegram:dm:8439114563",
)
# Unchanged fast-path: sanitize only, no truncation, no hash suffix.
assert result == "agent-main-telegram-dm-8439114563"
assert len(result) <= self.HONCHO_MAX
def test_key_at_exact_limit_unchanged(self):
"""A sanitized key that is exactly 100 chars must be returned as-is."""
key = "a" * self.HONCHO_MAX
config = HonchoClientConfig()
result = config.resolve_session_name(gateway_session_key=key)
assert result == key
assert len(result) == self.HONCHO_MAX
def test_long_gateway_key_truncated_to_limit(self):
"""An over-limit sanitized key must truncate to exactly 100 chars."""
key = "!roomid:matrix.example.org|" + "$event_" + ("a" * 300)
config = HonchoClientConfig()
result = config.resolve_session_name(gateway_session_key=key)
assert result is not None
assert len(result) == self.HONCHO_MAX
def test_truncation_is_deterministic(self):
"""Same long key must always produce the same truncated session ID."""
key = "matrix-" + ("a" * 300)
config = HonchoClientConfig()
first = config.resolve_session_name(gateway_session_key=key)
second = config.resolve_session_name(gateway_session_key=key)
assert first == second
def test_truncated_result_respects_char_allowlist(self):
"""Truncated result must still match Honcho's [a-zA-Z0-9_-] allowlist."""
import re
key = "slack:T12345:thread-reply:" + ("x" * 300) + ":with:colons:and:slashes/here"
config = HonchoClientConfig()
result = config.resolve_session_name(gateway_session_key=key)
assert result is not None
assert re.fullmatch(r"[a-zA-Z0-9_-]+", result)
def test_distinct_long_keys_do_not_collide(self):
"""Two long keys sharing a prefix must produce different truncated IDs."""
prefix = "matrix:!room:example.org|" + "a" * 200
key_a = prefix + "-suffix-alpha"
key_b = prefix + "-suffix-beta"
config = HonchoClientConfig()
result_a = config.resolve_session_name(gateway_session_key=key_a)
result_b = config.resolve_session_name(gateway_session_key=key_b)
assert result_a != result_b
assert len(result_a) == self.HONCHO_MAX
assert len(result_b) == self.HONCHO_MAX
def test_truncated_result_has_hash_suffix(self):
"""Truncated IDs must end with '-<8 hex chars>' for collision resistance."""
import re
key = "matrix-" + ("a" * 300)
config = HonchoClientConfig()
result = config.resolve_session_name(gateway_session_key=key)
# Last 9 chars: '-' + 8 hex chars.
assert re.search(r"-[0-9a-f]{8}$", result)
class TestResetHonchoClient:
def test_reset_clears_singleton(self):
import plugins.memory.honcho.client as mod

View file

@ -0,0 +1,307 @@
"""Tests for the ``pinPeerName`` config flag (#14984).
By default, when Hermes runs under a gateway (Telegram, Discord, Slack, ...)
it passes the platform-native user ID as ``runtime_user_peer_name`` into
``HonchoSessionManager``. That ID wins over any configured ``peer_name``
so multi-user bots scope memory per user.
For a single-user personal deployment where the user connects over multiple
platforms, that default forks memory into one Honcho peer per platform
(Telegram UID, Discord snowflake, Slack user ID, ...). The user asked for
an opt-in knob that pins the user peer to ``peer_name`` from ``honcho.json``
so the same person's memory stays unified regardless of which platform the
turn arrived on ``hosts.<host>.pinPeerName: true`` (or root-level
``pinPeerName: true``).
These tests exercise both the config parsing (``client.py::from_global_config``)
and the resolution order (``session.py::get_or_create``). We stub the
Honcho API calls so we can assert the chosen ``user_peer_id`` without
touching the network.
"""
import json
from unittest.mock import MagicMock
import pytest
from plugins.memory.honcho.client import HonchoClientConfig
from plugins.memory.honcho.session import HonchoSessionManager
# ---------------------------------------------------------------------------
# Config parsing
# ---------------------------------------------------------------------------
class TestPinPeerNameConfigParsing:
def test_default_is_false(self):
"""Default preserves existing behaviour — multi-user bots unaffected."""
config = HonchoClientConfig()
assert config.pin_peer_name is False
def test_root_level_true(self, tmp_path, monkeypatch):
config_file = tmp_path / "honcho.json"
config_file.write_text(json.dumps({
"apiKey": "k",
"peerName": "Igor",
"pinPeerName": True,
}))
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.pin_peer_name is True
assert config.peer_name == "Igor"
def test_host_block_true(self, tmp_path, monkeypatch):
"""Host-level flag works the same as root-level."""
config_file = tmp_path / "honcho.json"
config_file.write_text(json.dumps({
"apiKey": "k",
"peerName": "Igor",
"hosts": {
"hermes": {"pinPeerName": True},
},
}))
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.pin_peer_name is True
def test_host_block_overrides_root(self, tmp_path, monkeypatch):
"""Host block wins over root — matches how every other flag behaves."""
config_file = tmp_path / "honcho.json"
config_file.write_text(json.dumps({
"apiKey": "k",
"peerName": "Igor",
"pinPeerName": True,
"hosts": {
"hermes": {"pinPeerName": False},
},
}))
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.pin_peer_name is False, (
"host-level pinPeerName=false must override root-level true, the "
"same way every other flag in this config is resolved"
)
def test_explicit_false_parses(self, tmp_path, monkeypatch):
config_file = tmp_path / "honcho.json"
config_file.write_text(json.dumps({
"apiKey": "k",
"peerName": "Igor",
"pinPeerName": False,
}))
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.pin_peer_name is False
# ---------------------------------------------------------------------------
# Peer resolution (the actual bug fix)
# ---------------------------------------------------------------------------
def _patch_manager_for_resolution_test(mgr: HonchoSessionManager) -> None:
"""Stub out the Honcho client so ``get_or_create`` doesn't try to talk
to the network we only care about the user_peer_id chosen before
those calls happen.
"""
fake_peer = MagicMock()
mgr._get_or_create_peer = MagicMock(return_value=fake_peer)
mgr._get_or_create_honcho_session = MagicMock(
return_value=(MagicMock(), [])
)
class TestPeerResolutionOrder:
"""Matrix of (runtime_id, pin_peer_name, peer_name) → expected user_peer_id."""
def _config(self, *, peer_name: str | None, pin_peer_name: bool) -> HonchoClientConfig:
# The test doesn't need auth / Honcho — disable the provider so
# the manager doesn't try to open a real client.
return HonchoClientConfig(
api_key="test-key",
peer_name=peer_name,
pin_peer_name=pin_peer_name,
enabled=False,
write_frequency="turn", # avoid spawning the async writer thread
)
def test_runtime_wins_when_pin_is_false(self):
"""Regression guard: default behaviour must stay unchanged.
Multi-user bots rely on the platform-native ID winning."""
mgr = HonchoSessionManager(
honcho=MagicMock(),
config=self._config(peer_name="Igor", pin_peer_name=False),
runtime_user_peer_name="86701400", # e.g. Telegram UID
)
_patch_manager_for_resolution_test(mgr)
session = mgr.get_or_create("telegram:86701400")
assert session.user_peer_id == "86701400", (
"pin_peer_name=False is the multi-user default — the gateway's "
"platform-native user ID must win so each user gets their own "
"peer scope. If this regresses, every Telegram/Discord/Slack "
"bot immediately merges memory across users."
)
def test_config_wins_when_pin_is_true(self):
"""The #14984 fix: single-user deployments opt into config pinning."""
mgr = HonchoSessionManager(
honcho=MagicMock(),
config=self._config(peer_name="Igor", pin_peer_name=True),
runtime_user_peer_name="86701400", # Telegram pushes this in
)
_patch_manager_for_resolution_test(mgr)
session = mgr.get_or_create("telegram:86701400")
assert session.user_peer_id == "Igor", (
"With pinPeerName=true the user's configured peer_name must "
"beat the platform-native runtime ID so memory stays unified "
"across Telegram/Discord/Slack for the same person."
)
def test_pin_noop_when_peer_name_missing(self):
"""Safety: pinPeerName alone (no peer_name) must not silently drop
the runtime identity. Without a configured peer_name there's
nothing to pin to fall back to runtime as before."""
mgr = HonchoSessionManager(
honcho=MagicMock(),
config=self._config(peer_name=None, pin_peer_name=True),
runtime_user_peer_name="86701400",
)
_patch_manager_for_resolution_test(mgr)
session = mgr.get_or_create("telegram:86701400")
assert session.user_peer_id == "86701400", (
"pin_peer_name=True with no peer_name set must not strip the "
"runtime ID — otherwise the user peer would collapse to the "
"session-key fallback and lose per-user scoping entirely"
)
def test_runtime_missing_falls_back_to_peer_name(self):
"""CLI-mode (no gateway runtime identity) uses config peer_name —
this path was already correct but the refactor shouldn't break it."""
mgr = HonchoSessionManager(
honcho=MagicMock(),
config=self._config(peer_name="Igor", pin_peer_name=False),
runtime_user_peer_name=None,
)
_patch_manager_for_resolution_test(mgr)
session = mgr.get_or_create("cli:local")
assert session.user_peer_id == "Igor"
def test_everything_missing_falls_back_to_session_key(self):
"""Deepest fallback: no runtime identity, no peer_name, no pin.
Must still produce a deterministic peer_id from the session key."""
# Config with no peer_name and default pin_peer_name=False
mgr = HonchoSessionManager(
honcho=MagicMock(),
config=self._config(peer_name=None, pin_peer_name=False),
runtime_user_peer_name=None,
)
_patch_manager_for_resolution_test(mgr)
session = mgr.get_or_create("telegram:123")
assert session.user_peer_id == "user-telegram-123"
def test_pin_does_not_affect_assistant_peer(self):
"""The flag only pins the USER peer — the assistant peer continues
to come from ``ai_peer`` and must not be touched."""
cfg = HonchoClientConfig(
api_key="k",
peer_name="Igor",
pin_peer_name=True,
ai_peer="hermes-assistant",
enabled=False,
write_frequency="turn",
)
mgr = HonchoSessionManager(
honcho=MagicMock(),
config=cfg,
runtime_user_peer_name="86701400",
)
_patch_manager_for_resolution_test(mgr)
session = mgr.get_or_create("telegram:86701400")
assert session.user_peer_id == "Igor"
assert session.assistant_peer_id == "hermes-assistant"
class TestCrossPlatformMemoryUnification:
"""The user-visible outcome of the #14984 fix: the same physical user
talking to Hermes via Telegram AND Discord should land on ONE peer
(not two) when pinPeerName is opted in.
"""
def _config_pinned(self) -> HonchoClientConfig:
return HonchoClientConfig(
api_key="k",
peer_name="Igor",
pin_peer_name=True,
enabled=False,
write_frequency="turn",
)
def test_telegram_and_discord_collapse_to_one_peer_when_pinned(self):
"""Single-user deployment: Telegram UID and Discord snowflake
both resolve to the same configured peer_name."""
# Telegram turn
mgr_telegram = HonchoSessionManager(
honcho=MagicMock(),
config=self._config_pinned(),
runtime_user_peer_name="86701400",
)
_patch_manager_for_resolution_test(mgr_telegram)
telegram_session = mgr_telegram.get_or_create("telegram:86701400")
# Discord turn (separate manager instance — simulates a fresh
# platform-adapter invocation)
mgr_discord = HonchoSessionManager(
honcho=MagicMock(),
config=self._config_pinned(),
runtime_user_peer_name="1348750102029926454",
)
_patch_manager_for_resolution_test(mgr_discord)
discord_session = mgr_discord.get_or_create("discord:1348750102029926454")
assert telegram_session.user_peer_id == "Igor"
assert discord_session.user_peer_id == "Igor"
assert telegram_session.user_peer_id == discord_session.user_peer_id, (
"cross-platform memory unification is the whole point of "
"pinPeerName — both platforms must land on the same Honcho peer"
)
def test_multiuser_default_keeps_platforms_separate(self):
"""Negative control: with pinPeerName=false (the default), two
different platform IDs must produce two different peers so
multi-user bots don't merge users."""
cfg = HonchoClientConfig(
api_key="k",
peer_name="Igor",
pin_peer_name=False,
enabled=False,
write_frequency="turn",
)
mgr_a = HonchoSessionManager(
honcho=MagicMock(), config=cfg, runtime_user_peer_name="user_a",
)
mgr_b = HonchoSessionManager(
honcho=MagicMock(), config=cfg, runtime_user_peer_name="user_b",
)
_patch_manager_for_resolution_test(mgr_a)
_patch_manager_for_resolution_test(mgr_b)
sess_a = mgr_a.get_or_create("telegram:a")
sess_b = mgr_b.get_or_create("telegram:b")
assert sess_a.user_peer_id == "user_a"
assert sess_b.user_peer_id == "user_b"
assert sess_a.user_peer_id != sess_b.user_peer_id, (
"multi-user default MUST keep users separate — a regression "
"here would silently merge unrelated users' memory"
)

View file

@ -0,0 +1,146 @@
"""Integration tests for the sync worker's integration into HonchoMemoryProvider.
Layer 1 (fire-and-forget): sync_turn must return in < 20ms even when the
Honcho backend would block for seconds.
Layer 3 (breaker + backlog): when the breaker trips open, sync_turn tasks
land in the provider's in-memory backlog instead of running. When the
breaker closes (via probe recovery), the backlog drains on the next
sync_turn call.
"""
from __future__ import annotations
import threading
import time
from unittest.mock import MagicMock
from plugins.memory.honcho import HonchoMemoryProvider
from plugins.memory.honcho.sync_worker import SyncTask
def _wait_until(predicate, timeout: float = 2.0, interval: float = 0.01) -> bool:
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
if predicate():
return True
time.sleep(interval)
return False
def _make_provider() -> HonchoMemoryProvider:
provider = HonchoMemoryProvider()
provider._manager = MagicMock()
session = MagicMock()
provider._manager.get_or_create.return_value = session
provider._session_key = "agent:main:test"
provider._cron_skipped = False
provider._config = MagicMock(message_max_chars=25000)
return provider
class TestLayer1FireAndForget:
def test_sync_turn_returns_immediately_with_slow_backend(self):
"""sync_turn must not block even if the backend flush takes seconds."""
provider = _make_provider()
# Make the flush block for up to 2s.
flush_started = threading.Event()
release_flush = threading.Event()
def slow_flush(_session):
flush_started.set()
release_flush.wait(timeout=3.0)
provider._manager._flush_session.side_effect = slow_flush
try:
t0 = time.monotonic()
provider.sync_turn("hello", "world")
elapsed = time.monotonic() - t0
assert elapsed < 0.1, f"sync_turn blocked for {elapsed:.3f}s"
# Confirm the worker did pick it up
assert flush_started.wait(timeout=1.0)
finally:
release_flush.set()
provider.shutdown()
def test_multiple_sync_turns_do_not_serialize_caller(self):
"""Back-to-back sync_turns must not block on prior turn's completion."""
provider = _make_provider()
gate = threading.Event()
provider._manager._flush_session.side_effect = lambda _s: gate.wait(timeout=3.0)
try:
t0 = time.monotonic()
for _ in range(5):
provider.sync_turn("u", "a")
elapsed = time.monotonic() - t0
# Without fire-and-forget, the old code would serialize on
# the previous turn's join(timeout=5.0). 5 turns × 5s = 25s
# worst case. We assert << 1s.
assert elapsed < 0.2, f"5 sync_turns took {elapsed:.3f}s"
finally:
gate.set()
provider.shutdown()
class TestLayer3BacklogAndBreaker:
def test_breaker_open_backlogs_task(self):
"""While the breaker is open, sync_turn tasks must land in the backlog."""
provider = _make_provider()
# Trip the breaker manually.
provider._breaker._state = provider._breaker.STATE_OPEN
provider._breaker._opened_at = float("inf") # never recover
try:
provider.sync_turn("hello", "world")
# The task should have landed in the backlog rather than run.
assert len(provider._backlog) == 1
assert provider._backlog[0].name == "sync_turn"
finally:
provider.shutdown()
def test_backlog_drains_when_breaker_closes(self):
"""Once the breaker closes, next sync_turn drains the backlog."""
provider = _make_provider()
# Trip the breaker and enqueue a backlog.
provider._breaker._state = provider._breaker.STATE_OPEN
provider._breaker._opened_at = float("inf")
for _ in range(3):
provider.sync_turn("u", "a")
assert len(provider._backlog) == 3
# Close the breaker (simulating recovery) and trigger another sync.
provider._breaker.reset()
try:
provider.sync_turn("u", "a")
# One new task + 3 drained = 4 flushes eventually.
assert _wait_until(
lambda: provider._manager._flush_session.call_count >= 4,
timeout=2.0,
), (
"expected >= 4 flushes after recovery, got "
f"{provider._manager._flush_session.call_count}"
)
assert provider._backlog == []
finally:
provider.shutdown()
def test_backlog_honors_max_size(self):
"""Backlog must not grow unbounded during a long outage."""
provider = _make_provider()
provider._BACKLOG_MAX = 5
provider._breaker._state = provider._breaker.STATE_OPEN
provider._breaker._opened_at = float("inf")
try:
for _ in range(20):
provider.sync_turn("u", "a")
assert len(provider._backlog) == 5
finally:
provider.shutdown()

View file

@ -525,6 +525,39 @@ class TestConcludeToolDispatch:
assert parsed == {"error": "Exactly one of conclusion or delete_id must be provided."}
provider._manager.delete_conclusion.assert_not_called()
def test_sync_turn_strips_leaked_memory_context_before_honcho_ingest(self):
provider = HonchoMemoryProvider()
provider._session_key = "telegram:123"
provider._manager = MagicMock()
provider._cron_skipped = False
provider._config = SimpleNamespace(message_max_chars=25000)
session = MagicMock()
provider._manager.get_or_create.return_value = session
provider.sync_turn(
(
"hello\n\n"
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
"## Honcho Context\n"
"stale memory\n"
"</memory-context>"
),
(
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
"## Honcho Context\n"
"stale memory\n"
"</memory-context>\n\n"
"Visible answer"
),
)
provider._sync_worker.shutdown(timeout=1.0)
assert session.add_message.call_args_list[0].args == ("user", "hello")
assert session.add_message.call_args_list[1].args == ("assistant", "Visible answer")
# ---------------------------------------------------------------------------
# Message chunking

View file

@ -0,0 +1,342 @@
"""Tests for plugins/memory/honcho/sync_worker.py — Layers 1/2/3."""
from __future__ import annotations
import threading
import time
from typing import List
import pytest
from plugins.memory.honcho.sync_worker import (
CircuitBreaker,
HonchoLatencyTracker,
SyncTask,
SyncWorker,
)
# ---------------------------------------------------------------------------
# HonchoLatencyTracker
# ---------------------------------------------------------------------------
class TestHonchoLatencyTracker:
def test_returns_default_during_warmup(self):
t = HonchoLatencyTracker(default=30.0, warmup_samples=5)
for _ in range(4):
t.observe(1.0)
assert t.timeout() == 30.0
def test_adapts_to_observed_p95(self):
t = HonchoLatencyTracker(default=30.0, floor=5.0, headroom=3.0, warmup_samples=5)
# 10 samples at 1s, 10 samples at 2s — p95 should land at the 2s end
for _ in range(10):
t.observe(1.0)
for _ in range(10):
t.observe(2.0)
t_out = t.timeout()
assert 5.0 <= t_out <= 7.0 # 3 × 2.0 with some rounding latitude
def test_respects_floor(self):
t = HonchoLatencyTracker(default=30.0, floor=5.0, headroom=3.0, warmup_samples=3)
# Very fast samples — 3 × 0.1 = 0.3 < floor → floor applies
for _ in range(10):
t.observe(0.1)
assert t.timeout() == 5.0
def test_rejects_nan_and_negative(self):
t = HonchoLatencyTracker(warmup_samples=1)
t.observe(float("nan"))
t.observe(-1.0)
# No valid samples → still default
assert t.timeout() == t._default
def test_rolling_window_discards_old(self):
t = HonchoLatencyTracker(window=5, default=30.0, floor=0.1, headroom=1.0, warmup_samples=1)
for _ in range(5):
t.observe(100.0)
assert t.timeout() >= 50.0 # dominated by 100s samples
for _ in range(5):
t.observe(0.5)
# Old samples rolled out, now dominated by 0.5s
assert t.timeout() <= 1.0
def test_thread_safe_concurrent_observations(self):
t = HonchoLatencyTracker(window=1000, warmup_samples=1)
def observer(val: float):
for _ in range(200):
t.observe(val)
threads = [
threading.Thread(target=observer, args=(i * 0.1,)) for i in range(5)
]
for th in threads:
th.start()
for th in threads:
th.join()
# No crash + timeout() returns a real number
assert t.timeout() > 0
# ---------------------------------------------------------------------------
# CircuitBreaker
# ---------------------------------------------------------------------------
class _Clock:
"""Test double for time.monotonic — manually advanced."""
def __init__(self, start: float = 0.0) -> None:
self.now = start
def __call__(self) -> float:
return self.now
class TestCircuitBreaker:
def test_starts_closed(self):
cb = CircuitBreaker()
assert cb.state == cb.STATE_CLOSED
assert cb.allow() is True
def test_opens_after_threshold_failures(self):
cb = CircuitBreaker(failure_threshold=3)
for _ in range(2):
cb.record_failure()
assert cb.state == cb.STATE_CLOSED
cb.record_failure()
assert cb.state == cb.STATE_OPEN
assert cb.allow() is False
def test_resets_counter_on_success(self):
cb = CircuitBreaker(failure_threshold=3)
cb.record_failure()
cb.record_failure()
cb.record_success()
cb.record_failure()
assert cb.state == cb.STATE_CLOSED
def test_transitions_to_half_open_after_probe_interval(self):
clock = _Clock()
cb = CircuitBreaker(failure_threshold=2, probe_interval=60.0, time_fn=clock)
cb.record_failure()
cb.record_failure()
assert cb.state == cb.STATE_OPEN
clock.now = 30.0
assert cb.state == cb.STATE_OPEN # still within probe window
clock.now = 61.0
assert cb.state == cb.STATE_HALF_OPEN
assert cb.allow() is True # probe permitted
def test_half_open_success_closes_breaker(self):
clock = _Clock()
cb = CircuitBreaker(failure_threshold=2, probe_interval=60.0, time_fn=clock)
cb.record_failure()
cb.record_failure()
clock.now = 61.0
_ = cb.state # transition
cb.record_success()
assert cb.state == cb.STATE_CLOSED
def test_half_open_failure_reopens_breaker(self):
clock = _Clock()
cb = CircuitBreaker(failure_threshold=2, probe_interval=60.0, time_fn=clock)
cb.record_failure()
cb.record_failure()
clock.now = 61.0
_ = cb.state
cb.record_failure()
assert cb.state == cb.STATE_OPEN
def test_reset_returns_to_closed(self):
cb = CircuitBreaker(failure_threshold=1)
cb.record_failure()
assert cb.state == cb.STATE_OPEN
cb.reset()
assert cb.state == cb.STATE_CLOSED
assert cb.allow() is True
# ---------------------------------------------------------------------------
# SyncWorker
# ---------------------------------------------------------------------------
def _wait_until(predicate, timeout: float = 2.0, interval: float = 0.01) -> bool:
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
if predicate():
return True
time.sleep(interval)
return False
class TestSyncWorkerBasics:
def test_enqueue_runs_task_on_worker_thread(self):
worker = SyncWorker()
try:
results: List[str] = []
worker.enqueue(SyncTask(fn=lambda: results.append("ran"), name="t"))
assert _wait_until(lambda: results == ["ran"])
finally:
worker.shutdown(timeout=2.0)
def test_enqueue_returns_immediately(self):
worker = SyncWorker()
try:
slow_event = threading.Event()
def slow_task():
slow_event.wait(timeout=2.0)
t0 = time.monotonic()
worker.enqueue(SyncTask(fn=slow_task, name="slow"))
elapsed = time.monotonic() - t0
assert elapsed < 0.1, f"enqueue blocked for {elapsed}s"
slow_event.set()
finally:
worker.shutdown(timeout=2.0)
def test_tasks_execute_in_fifo_order(self):
worker = SyncWorker()
try:
results: List[int] = []
for i in range(10):
worker.enqueue(SyncTask(fn=lambda i=i: results.append(i), name=f"t{i}"))
assert _wait_until(lambda: len(results) == 10)
assert results == list(range(10))
finally:
worker.shutdown(timeout=2.0)
def test_task_exception_does_not_kill_worker(self):
worker = SyncWorker()
try:
survived: List[str] = []
worker.enqueue(SyncTask(fn=lambda: (_ for _ in ()).throw(RuntimeError("boom")), name="boom"))
worker.enqueue(SyncTask(fn=lambda: survived.append("ok"), name="ok"))
assert _wait_until(lambda: survived == ["ok"])
finally:
worker.shutdown(timeout=2.0)
def test_on_failure_callback_invoked_when_task_raises(self):
worker = SyncWorker()
try:
failures: List[BaseException] = []
worker.enqueue(
SyncTask(
fn=lambda: (_ for _ in ()).throw(ValueError("nope")),
name="fail",
on_failure=failures.append,
)
)
assert _wait_until(lambda: len(failures) == 1)
assert isinstance(failures[0], ValueError)
finally:
worker.shutdown(timeout=2.0)
class TestSyncWorkerBackpressure:
def test_queue_overflow_drops_oldest_task(self):
worker = SyncWorker(max_queue=3)
try:
block = threading.Event()
ran: List[int] = []
dropped: List[int] = []
# Fill the queue with a blocker + 3 more waiting tasks.
worker.enqueue(SyncTask(fn=lambda: block.wait(timeout=3.0), name="blocker"))
for i in range(3):
worker.enqueue(
SyncTask(
fn=lambda i=i: ran.append(i),
name=f"t{i}",
on_failure=lambda e, i=i: dropped.append(i),
)
)
# Now try to enqueue a 4th task — should evict the oldest queued
# (task 0) to make room.
worker.enqueue(SyncTask(fn=lambda: ran.append(99), name="overflow"))
# Queue overflow dropped exactly one task (task 0).
block.set()
assert _wait_until(lambda: 99 in ran)
assert 0 in dropped or ran == [1, 2, 99] or ran == [1, 2, 3, 99]
finally:
worker.shutdown(timeout=3.0)
class TestSyncWorkerIntegration:
def test_breaker_open_skips_task(self):
breaker = CircuitBreaker(failure_threshold=1)
breaker.record_failure()
assert breaker.state == breaker.STATE_OPEN
worker = SyncWorker(breaker=breaker)
try:
failures: List[BaseException] = []
ran: List[str] = []
worker.enqueue(
SyncTask(
fn=lambda: ran.append("should_not_run"),
name="blocked",
on_failure=failures.append,
)
)
# Give the worker a moment — but the task should never run.
time.sleep(0.2)
assert ran == []
assert len(failures) == 1
assert "circuit breaker open" in str(failures[0])
finally:
worker.shutdown(timeout=2.0)
def test_successful_task_feeds_latency_tracker_and_resets_breaker(self):
tracker = HonchoLatencyTracker(warmup_samples=1)
breaker = CircuitBreaker(failure_threshold=2)
breaker.record_failure()
assert breaker._consecutive_failures == 1
worker = SyncWorker(latency_tracker=tracker, breaker=breaker)
try:
worker.enqueue(SyncTask(fn=lambda: time.sleep(0.05), name="t"))
assert _wait_until(lambda: len(tracker._samples) >= 1)
assert breaker.state == breaker.STATE_CLOSED
assert breaker._consecutive_failures == 0
finally:
worker.shutdown(timeout=2.0)
def test_failed_task_increments_breaker(self):
breaker = CircuitBreaker(failure_threshold=2)
worker = SyncWorker(breaker=breaker)
try:
for _ in range(2):
worker.enqueue(
SyncTask(
fn=lambda: (_ for _ in ()).throw(RuntimeError("x")),
name="fail",
)
)
assert _wait_until(lambda: breaker.state == breaker.STATE_OPEN)
finally:
worker.shutdown(timeout=2.0)
class TestSyncWorkerShutdown:
def test_shutdown_is_idempotent(self):
worker = SyncWorker()
worker.enqueue(SyncTask(fn=lambda: None, name="t"))
worker.shutdown(timeout=2.0)
worker.shutdown(timeout=2.0) # Must not raise
assert not worker.is_running()
def test_enqueue_after_shutdown_calls_on_failure(self):
worker = SyncWorker()
worker.shutdown(timeout=2.0)
failures: List[BaseException] = []
ok = worker.enqueue(
SyncTask(fn=lambda: None, name="late", on_failure=failures.append)
)
assert ok is False
assert len(failures) == 1

View file

@ -1441,6 +1441,20 @@ class TestBuildAssistantMessage:
result = agent._build_assistant_message(msg, "stop")
assert result["content"] == "No thinking here."
def test_memory_context_stripped_from_stored_content(self, agent):
msg = _mock_assistant_msg(
content=(
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
"## Honcho Context\n"
"stale memory\n"
"</memory-context>\n\n"
"Visible answer"
)
)
result = agent._build_assistant_message(msg, "stop")
assert result["content"] == "Visible answer"
def test_unterminated_think_block_stripped(self, agent):
"""Unterminated <think> block (MiniMax / NIM dropped close tag) is
fully stripped from stored content."""

View file

@ -1088,6 +1088,100 @@ def test_interim_commentary_is_not_marked_already_streamed_when_stream_callback_
}
def test_interim_commentary_strips_leaked_memory_context(monkeypatch):
agent = _build_agent(monkeypatch)
observed = {}
agent.interim_assistant_callback = lambda text, *, already_streamed=False: observed.update(
{"text": text, "already_streamed": already_streamed}
)
leaked = (
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
"## Honcho Context\n"
"stale memory\n"
"</memory-context>\n\n"
"I'll inspect the repo structure first."
)
agent._emit_interim_assistant_message({"role": "assistant", "content": leaked})
assert observed == {
"text": "I'll inspect the repo structure first.",
"already_streamed": False,
}
def test_stream_delta_strips_leaked_memory_context(monkeypatch):
agent = _build_agent(monkeypatch)
observed = []
agent.stream_delta_callback = observed.append
leaked = (
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
"## Honcho Context\n"
"stale memory\n"
"</memory-context>\n\n"
"Visible answer"
)
agent._fire_stream_delta(leaked)
assert observed == ["Visible answer"]
def test_stream_delta_strips_leaked_memory_context_across_chunks(monkeypatch):
"""Regression for #5719 — the real streaming case.
Providers typically emit 1-80 char chunks, so the memory-context open
tag, system-note line, payload, and close tag each arrive in separate
deltas. The per-delta sanitize_context() regex cannot survive that
only a stateful scrubber can. None of the payload, system-note
text, or "## Honcho Context" header may reach the delta callback.
"""
agent = _build_agent(monkeypatch)
observed = []
agent.stream_delta_callback = observed.append
deltas = [
"<memory-context>\n[System note: The following",
" is recalled memory context, NOT new user input. ",
"Treat as informational background data.]\n\n",
"## Honcho Context\n",
"stale memory about eri\n",
"</memory-context>\n\n",
"Visible answer",
]
for d in deltas:
agent._fire_stream_delta(d)
combined = "".join(observed)
assert "Visible answer" in combined
# None of the leaked payload may surface.
assert "System note" not in combined
assert "Honcho Context" not in combined
assert "stale memory" not in combined
assert "<memory-context>" not in combined
assert "</memory-context>" not in combined
def test_stream_delta_scrubber_resets_between_turns(monkeypatch):
"""An unterminated span from a prior turn must not taint the next turn."""
agent = _build_agent(monkeypatch)
# Simulate a hung span carried over — directly populate the scrubber.
agent._stream_context_scrubber.feed("pre <memory-context>leaked")
# Normally run_conversation() resets the scrubber at turn start.
agent._stream_context_scrubber.reset()
observed = []
agent.stream_delta_callback = observed.append
agent._fire_stream_delta("clean new turn text")
assert "".join(observed) == "clean new turn text"
def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [

View file

@ -229,6 +229,24 @@ class TestMessageStorage:
messages = db.get_messages("s1")
assert messages[0]["finish_reason"] == "stop"
def test_get_messages_as_conversation_strips_leaked_memory_context(self, db):
db.create_session(session_id="s1", source="cli")
db.append_message(
"s1",
role="assistant",
content=(
"<memory-context>\n"
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
"## Honcho Context\n"
"stale memory\n"
"</memory-context>\n\n"
"Visible answer"
),
)
conv = db.get_messages_as_conversation("s1")
assert conv == [{"role": "assistant", "content": "Visible answer"}]
def test_reasoning_persisted_and_restored(self, db):
"""Reasoning text is stored for assistant messages and restored by
get_messages_as_conversation() so providers receive coherent multi-turn