mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge f512fdf697 into 4fade39c90
This commit is contained in:
commit
2b06dd23dd
20 changed files with 2283 additions and 78 deletions
|
|
@ -63,6 +63,117 @@ def sanitize_context(text: str) -> str:
|
|||
return text
|
||||
|
||||
|
||||
class StreamingContextScrubber:
|
||||
"""Stateful scrubber for streaming text that may contain split memory-context spans.
|
||||
|
||||
The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
|
||||
a ``<memory-context>`` opened in one delta and closed in a later delta
|
||||
leaks its payload to the UI because the non-greedy block regex needs
|
||||
both tags in one string. This scrubber runs a small state machine
|
||||
across deltas, holding back partial-tag tails and discarding
|
||||
everything inside a span (including the system-note line).
|
||||
|
||||
Usage::
|
||||
|
||||
scrubber = StreamingContextScrubber()
|
||||
for delta in stream:
|
||||
visible = scrubber.feed(delta)
|
||||
if visible:
|
||||
emit(visible)
|
||||
trailing = scrubber.flush() # at end of stream
|
||||
if trailing:
|
||||
emit(trailing)
|
||||
|
||||
The scrubber is re-entrant per agent instance. Callers building new
|
||||
top-level responses (new turn) should create a fresh scrubber or call
|
||||
``reset()``.
|
||||
"""
|
||||
|
||||
_OPEN_TAG = "<memory-context>"
|
||||
_CLOSE_TAG = "</memory-context>"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._in_span: bool = False
|
||||
self._buf: str = ""
|
||||
|
||||
def reset(self) -> None:
|
||||
self._in_span = False
|
||||
self._buf = ""
|
||||
|
||||
def feed(self, text: str) -> str:
|
||||
"""Return the visible portion of ``text`` after scrubbing.
|
||||
|
||||
Any trailing fragment that could be the start of an open/close tag
|
||||
is held back in the internal buffer and surfaced on the next
|
||||
``feed()`` call or discarded/emitted by ``flush()``.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
buf = self._buf + text
|
||||
self._buf = ""
|
||||
out: list[str] = []
|
||||
|
||||
while buf:
|
||||
if self._in_span:
|
||||
idx = buf.lower().find(self._CLOSE_TAG)
|
||||
if idx == -1:
|
||||
# Hold back a potential partial close tag; drop the rest
|
||||
held = self._max_partial_suffix(buf, self._CLOSE_TAG)
|
||||
self._buf = buf[-held:] if held else ""
|
||||
return "".join(out)
|
||||
# Found close — skip span content + tag, continue
|
||||
buf = buf[idx + len(self._CLOSE_TAG):]
|
||||
self._in_span = False
|
||||
else:
|
||||
idx = buf.lower().find(self._OPEN_TAG)
|
||||
if idx == -1:
|
||||
# No open tag — hold back a potential partial open tag
|
||||
held = self._max_partial_suffix(buf, self._OPEN_TAG)
|
||||
if held:
|
||||
out.append(buf[:-held])
|
||||
self._buf = buf[-held:]
|
||||
else:
|
||||
out.append(buf)
|
||||
return "".join(out)
|
||||
# Emit text before the tag, enter span
|
||||
if idx > 0:
|
||||
out.append(buf[:idx])
|
||||
buf = buf[idx + len(self._OPEN_TAG):]
|
||||
self._in_span = True
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def flush(self) -> str:
|
||||
"""Emit any held-back buffer at end-of-stream.
|
||||
|
||||
If we're still inside an unterminated span the remaining content is
|
||||
discarded (safer: leaking partial memory context is worse than a
|
||||
truncated answer). Otherwise the held-back partial-tag tail is
|
||||
emitted verbatim (it turned out not to be a real tag).
|
||||
"""
|
||||
if self._in_span:
|
||||
self._buf = ""
|
||||
self._in_span = False
|
||||
return ""
|
||||
tail = self._buf
|
||||
self._buf = ""
|
||||
return tail
|
||||
|
||||
@staticmethod
|
||||
def _max_partial_suffix(buf: str, tag: str) -> int:
|
||||
"""Return the length of the longest buf-suffix that is a tag-prefix.
|
||||
|
||||
Case-insensitive. Returns 0 if no suffix could start the tag.
|
||||
"""
|
||||
tag_lower = tag.lower()
|
||||
buf_lower = buf.lower()
|
||||
max_check = min(len(buf_lower), len(tag_lower) - 1)
|
||||
for i in range(max_check, 0, -1):
|
||||
if tag_lower.startswith(buf_lower[-i:]):
|
||||
return i
|
||||
return 0
|
||||
|
||||
|
||||
def build_memory_context_block(raw_context: str) -> str:
|
||||
"""Wrap prefetched memory in a fenced block with system note.
|
||||
|
||||
|
|
|
|||
|
|
@ -8253,6 +8253,7 @@ class GatewayRunner:
|
|||
The enriched message string with vision descriptions prepended.
|
||||
"""
|
||||
from tools.vision_tools import vision_analyze_tool
|
||||
from agent.memory_manager import sanitize_context
|
||||
|
||||
analysis_prompt = (
|
||||
"Describe everything visible in this image in thorough detail. "
|
||||
|
|
@ -8271,6 +8272,14 @@ class GatewayRunner:
|
|||
result = json.loads(result_json)
|
||||
if result.get("success"):
|
||||
description = result.get("analysis", "")
|
||||
# The auxiliary vision LLM can echo injected system-prompt
|
||||
# memory context back into its output (#5719). Scrub any
|
||||
# <memory-context> fences and the "## Honcho Context"
|
||||
# section before the description lands in a user-visible
|
||||
# message.
|
||||
description = sanitize_context(description)
|
||||
if "## Honcho Context" in description:
|
||||
description = description.split("## Honcho Context", 1)[0].rstrip()
|
||||
enriched_parts.append(
|
||||
f"[The user sent an image~ Here's what I can see:\n{description}]\n"
|
||||
f"[If you need a closer look, use vision_analyze with "
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ import sqlite3
|
|||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from agent.memory_manager import sanitize_context
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
||||
|
||||
|
|
@ -1119,7 +1121,10 @@ class SessionDB:
|
|||
rows = cursor.fetchall()
|
||||
messages = []
|
||||
for row in rows:
|
||||
msg = {"role": row["role"], "content": row["content"]}
|
||||
content = row["content"]
|
||||
if row["role"] in {"user", "assistant"} and isinstance(content, str):
|
||||
content = sanitize_context(content).strip()
|
||||
msg = {"role": row["role"], "content": content}
|
||||
if row["tool_call_id"]:
|
||||
msg["tool_call_id"] = row["tool_call_id"]
|
||||
if row["tool_name"]:
|
||||
|
|
|
|||
|
|
@ -22,7 +22,14 @@ import threading
|
|||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_manager import sanitize_context
|
||||
from agent.memory_provider import MemoryProvider
|
||||
from plugins.memory.honcho.sync_worker import (
|
||||
CircuitBreaker,
|
||||
HonchoLatencyTracker,
|
||||
SyncTask,
|
||||
SyncWorker,
|
||||
)
|
||||
from tools.registry import tool_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -194,7 +201,22 @@ class HonchoMemoryProvider(MemoryProvider):
|
|||
self._prefetch_result = ""
|
||||
self._prefetch_lock = threading.Lock()
|
||||
self._prefetch_thread: Optional[threading.Thread] = None
|
||||
self._sync_thread: Optional[threading.Thread] = None
|
||||
|
||||
# Post-response write path (sync_turn / on_memory_write). See
|
||||
# plugins/memory/honcho/sync_worker.py. The tracker + breaker are
|
||||
# shared with the Honcho SDK client so adaptive timeouts and
|
||||
# degraded-mode behaviour are consistent across the plugin.
|
||||
self._latency_tracker = HonchoLatencyTracker()
|
||||
self._breaker = CircuitBreaker()
|
||||
self._sync_worker = SyncWorker(
|
||||
latency_tracker=self._latency_tracker,
|
||||
breaker=self._breaker,
|
||||
thread_name="honcho-sync-worker",
|
||||
)
|
||||
# Durable backlog of tasks that couldn't reach Honcho (breaker open
|
||||
# or queue overflow). Drained on recovery — see _drain_backlog().
|
||||
self._backlog: List[SyncTask] = []
|
||||
self._backlog_lock = threading.Lock()
|
||||
|
||||
# B1: recall_mode — set during initialize from config
|
||||
self._recall_mode = "hybrid" # "context", "tools", or "hybrid"
|
||||
|
|
@ -1056,8 +1078,82 @@ class HonchoMemoryProvider(MemoryProvider):
|
|||
|
||||
return chunks
|
||||
|
||||
# -- backlog management (Layer 3) ----------------------------------------
|
||||
|
||||
_BACKLOG_MAX = 256
|
||||
|
||||
def _enqueue_with_backlog(self, task: SyncTask) -> None:
|
||||
"""Submit a task to the worker with backlog fall-through on defer.
|
||||
|
||||
Wraps the caller's task with a failure hook that captures the
|
||||
task itself (not just the error) so it can be appended to the
|
||||
durable backlog when the breaker is open or the queue is full.
|
||||
"""
|
||||
original_on_failure = task.on_failure
|
||||
|
||||
def _on_failure(error: BaseException) -> None:
|
||||
reason = str(error)
|
||||
# Only backlog tasks that were deferred, not ones that crashed
|
||||
# inside Honcho itself — those are unlikely to succeed on replay.
|
||||
if any(marker in reason for marker in (
|
||||
"circuit breaker open",
|
||||
"sync queue full",
|
||||
"sync queue overflow",
|
||||
"shutting down",
|
||||
)):
|
||||
with self._backlog_lock:
|
||||
if len(self._backlog) < self._BACKLOG_MAX:
|
||||
self._backlog.append(task)
|
||||
else:
|
||||
logger.debug("Honcho backlog full; dropping %s", task.name)
|
||||
else:
|
||||
logger.debug(
|
||||
"Honcho sync task %s failed (not backlogged): %s",
|
||||
task.name, error,
|
||||
)
|
||||
if original_on_failure is not None:
|
||||
try:
|
||||
original_on_failure(error)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
task.on_failure = _on_failure
|
||||
self._sync_worker.enqueue(task)
|
||||
|
||||
def _drain_backlog_if_healthy(self) -> None:
|
||||
"""Opportunistic replay of backlogged tasks when the breaker closes.
|
||||
|
||||
Called from the happy path of ``sync_turn``; never blocks the
|
||||
user. Walks the backlog, re-enqueueing everything on the worker
|
||||
— if the breaker is still open, tasks will bounce back into
|
||||
the backlog via their on_failure handler.
|
||||
|
||||
Also nudges the Honcho client's HTTP timeout toward the tracker's
|
||||
observed p95 so stalled backends fail fast on subsequent calls
|
||||
instead of burning 30s per request.
|
||||
"""
|
||||
if self._breaker.state != self._breaker.STATE_CLOSED:
|
||||
return
|
||||
# Layer 2: adaptive timeout rebuild (cheap no-op if below threshold).
|
||||
try:
|
||||
from plugins.memory.honcho.client import rebuild_honcho_client_with_timeout
|
||||
rebuild_honcho_client_with_timeout(self._latency_tracker.timeout())
|
||||
except Exception as e:
|
||||
logger.debug("Honcho timeout rebuild skipped: %s", e)
|
||||
with self._backlog_lock:
|
||||
if not self._backlog:
|
||||
return
|
||||
pending = self._backlog
|
||||
self._backlog = []
|
||||
for task in pending:
|
||||
self._sync_worker.enqueue(task)
|
||||
|
||||
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
||||
"""Record the conversation turn in Honcho (non-blocking).
|
||||
"""Record the conversation turn in Honcho (fire-and-forget).
|
||||
|
||||
Enqueues the sync on the persistent worker thread and returns
|
||||
immediately. Callers never wait on Honcho — the run_conversation
|
||||
return path is fully decoupled from post-response writes.
|
||||
|
||||
Messages exceeding the Honcho API limit (default 25k chars) are
|
||||
split into multiple messages with continuation markers.
|
||||
|
|
@ -1068,27 +1164,34 @@ class HonchoMemoryProvider(MemoryProvider):
|
|||
return
|
||||
|
||||
msg_limit = self._config.message_max_chars if self._config else 25000
|
||||
clean_user_content = sanitize_context(user_content or "").strip()
|
||||
clean_assistant_content = sanitize_context(assistant_content or "").strip()
|
||||
session_key = self._session_key
|
||||
|
||||
def _sync():
|
||||
try:
|
||||
session = self._manager.get_or_create(self._session_key)
|
||||
for chunk in self._chunk_message(user_content, msg_limit):
|
||||
session.add_message("user", chunk)
|
||||
for chunk in self._chunk_message(assistant_content, msg_limit):
|
||||
session.add_message("assistant", chunk)
|
||||
self._manager._flush_session(session)
|
||||
except Exception as e:
|
||||
logger.debug("Honcho sync_turn failed: %s", e)
|
||||
session = self._manager.get_or_create(session_key)
|
||||
for chunk in self._chunk_message(clean_user_content, msg_limit):
|
||||
session.add_message("user", chunk)
|
||||
for chunk in self._chunk_message(clean_assistant_content, msg_limit):
|
||||
session.add_message("assistant", chunk)
|
||||
self._manager._flush_session(session)
|
||||
|
||||
if self._sync_thread and self._sync_thread.is_alive():
|
||||
self._sync_thread.join(timeout=5.0)
|
||||
self._sync_thread = threading.Thread(
|
||||
target=_sync, daemon=True, name="honcho-sync"
|
||||
task = SyncTask(
|
||||
fn=_sync,
|
||||
name="sync_turn",
|
||||
)
|
||||
self._sync_thread.start()
|
||||
self._enqueue_with_backlog(task)
|
||||
# If the breaker transitioned back to closed between turns, try to
|
||||
# drain anything that piled up while Honcho was unreachable.
|
||||
self._drain_backlog_if_healthy()
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
"""Mirror built-in user profile writes as Honcho conclusions."""
|
||||
"""Mirror built-in user profile writes as Honcho conclusions.
|
||||
|
||||
Enqueued on the shared sync worker so every post-response write
|
||||
path (turn sync + conclusion mirror) observes the same breaker
|
||||
and backlog.
|
||||
"""
|
||||
if action != "add" or target != "user" or not content:
|
||||
return
|
||||
if self._cron_skipped:
|
||||
|
|
@ -1096,14 +1199,17 @@ class HonchoMemoryProvider(MemoryProvider):
|
|||
if not self._manager or not self._session_key:
|
||||
return
|
||||
|
||||
def _write():
|
||||
try:
|
||||
self._manager.create_conclusion(self._session_key, content)
|
||||
except Exception as e:
|
||||
logger.debug("Honcho memory mirror failed: %s", e)
|
||||
session_key = self._session_key
|
||||
payload = content
|
||||
|
||||
t = threading.Thread(target=_write, daemon=True, name="honcho-memwrite")
|
||||
t.start()
|
||||
def _write():
|
||||
self._manager.create_conclusion(session_key, payload)
|
||||
|
||||
task = SyncTask(
|
||||
fn=_write,
|
||||
name="memory_mirror",
|
||||
)
|
||||
self._enqueue_with_backlog(task)
|
||||
|
||||
def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Flush all pending messages to Honcho on session end."""
|
||||
|
|
@ -1111,9 +1217,10 @@ class HonchoMemoryProvider(MemoryProvider):
|
|||
return
|
||||
if not self._manager:
|
||||
return
|
||||
# Wait for pending sync
|
||||
if self._sync_thread and self._sync_thread.is_alive():
|
||||
self._sync_thread.join(timeout=10.0)
|
||||
# Wait briefly for any in-flight sync tasks to drain. We can't
|
||||
# block session-end indefinitely, but giving the worker 10s to
|
||||
# finish a pending turn-sync matches the previous behaviour.
|
||||
self._sync_worker.shutdown(timeout=10.0)
|
||||
try:
|
||||
self._manager.flush_all()
|
||||
except Exception as e:
|
||||
|
|
@ -1233,9 +1340,10 @@ class HonchoMemoryProvider(MemoryProvider):
|
|||
return tool_error(f"Honcho {tool_name} failed: {e}")
|
||||
|
||||
def shutdown(self) -> None:
|
||||
for t in (self._prefetch_thread, self._sync_thread):
|
||||
if t and t.is_alive():
|
||||
t.join(timeout=5.0)
|
||||
# Drain the prefetch thread (legacy, unchanged) + the sync worker.
|
||||
if self._prefetch_thread and self._prefetch_thread.is_alive():
|
||||
self._prefetch_thread.join(timeout=5.0)
|
||||
self._sync_worker.shutdown(timeout=5.0)
|
||||
# Flush any remaining messages
|
||||
if self._manager:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -273,9 +273,28 @@ def _write_config(cfg: dict, path: Path | None = None) -> None:
|
|||
|
||||
|
||||
def _resolve_api_key(cfg: dict) -> str:
|
||||
"""Resolve API key with host -> root -> env fallback."""
|
||||
"""Resolve API key with host -> root -> env fallback.
|
||||
|
||||
For self-hosted instances configured with ``baseUrl`` instead of an API
|
||||
key, returns ``"local"`` so that credential guards throughout the CLI
|
||||
don't reject a valid configuration. The ``baseUrl`` is scheme-validated
|
||||
(http/https only) so that a typo like ``baseUrl: true`` can't silently
|
||||
pass the guard.
|
||||
"""
|
||||
host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
|
||||
return host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
|
||||
key = host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
|
||||
if not key:
|
||||
base_url = cfg.get("baseUrl") or cfg.get("base_url") or os.environ.get("HONCHO_BASE_URL", "")
|
||||
base_url = (base_url or "").strip()
|
||||
if base_url:
|
||||
from urllib.parse import urlparse
|
||||
try:
|
||||
parsed = urlparse(base_url)
|
||||
except (TypeError, ValueError):
|
||||
parsed = None
|
||||
if parsed and parsed.scheme in ("http", "https") and parsed.netloc:
|
||||
return "local"
|
||||
return key
|
||||
|
||||
|
||||
def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from __future__ import annotations
|
|||
import json
|
||||
import os
|
||||
import logging
|
||||
import hashlib
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
|
|
@ -27,7 +28,6 @@ if TYPE_CHECKING:
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
|
||||
HOST = "hermes"
|
||||
|
||||
|
||||
|
|
@ -53,6 +53,11 @@ def resolve_active_host() -> str:
|
|||
return HOST
|
||||
|
||||
|
||||
def resolve_global_config_path() -> Path:
|
||||
"""Return the shared Honcho config path for the current HOME."""
|
||||
return Path.home() / ".honcho" / "config.json"
|
||||
|
||||
|
||||
def resolve_config_path() -> Path:
|
||||
"""Return the active Honcho config path.
|
||||
|
||||
|
|
@ -72,7 +77,7 @@ def resolve_config_path() -> Path:
|
|||
if default_path != local_path and default_path.exists():
|
||||
return default_path
|
||||
|
||||
return GLOBAL_CONFIG_PATH
|
||||
return resolve_global_config_path()
|
||||
|
||||
|
||||
_RECALL_MODE_ALIASES = {"auto": "hybrid"}
|
||||
|
|
@ -138,6 +143,15 @@ def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] |
|
|||
return None
|
||||
|
||||
|
||||
# Default HTTP timeout (seconds) applied when no explicit timeout is
|
||||
# configured via HonchoClientConfig.timeout, honcho.timeout / requestTimeout,
|
||||
# or HONCHO_TIMEOUT. Honcho calls happen on the post-response path of
|
||||
# run_conversation; without a cap the agent can block indefinitely when
|
||||
# the Honcho backend is unreachable, preventing the gateway from
|
||||
# delivering the already-generated response.
|
||||
_DEFAULT_HTTP_TIMEOUT = 30.0
|
||||
|
||||
|
||||
def _resolve_optional_float(*values: Any) -> float | None:
|
||||
"""Return the first non-empty value coerced to a positive float."""
|
||||
for value in values:
|
||||
|
|
@ -226,6 +240,13 @@ class HonchoClientConfig:
|
|||
# Identity
|
||||
peer_name: str | None = None
|
||||
ai_peer: str = "hermes"
|
||||
# When True, ``peer_name`` wins over any gateway-supplied runtime
|
||||
# identity (Telegram UID, Discord ID, …) when resolving the user peer.
|
||||
# This keeps memory unified across platforms for single-user deployments
|
||||
# where Honcho's one peer-name is an unambiguous identity — otherwise
|
||||
# each platform would fork memory into its own peer (#14984). Default
|
||||
# ``False`` preserves existing multi-user behaviour.
|
||||
pin_peer_name: bool = False
|
||||
# Toggles
|
||||
enabled: bool = False
|
||||
save_messages: bool = True
|
||||
|
|
@ -420,6 +441,11 @@ class HonchoClientConfig:
|
|||
timeout=timeout,
|
||||
peer_name=host_block.get("peerName") or raw.get("peerName"),
|
||||
ai_peer=ai_peer,
|
||||
pin_peer_name=_resolve_bool(
|
||||
host_block.get("pinPeerName"),
|
||||
raw.get("pinPeerName"),
|
||||
default=False,
|
||||
),
|
||||
enabled=enabled,
|
||||
save_messages=save_messages,
|
||||
write_frequency=write_frequency,
|
||||
|
|
@ -522,6 +548,39 @@ class HonchoClientConfig:
|
|||
pass
|
||||
return None
|
||||
|
||||
# Honcho enforces a 100-char limit on session IDs. Long gateway session keys
|
||||
# (Matrix "!room:server" + thread event IDs, Telegram supergroup reply
|
||||
# chains, Slack thread IDs with long workspace prefixes) can overflow this
|
||||
# limit after sanitization; the Honcho API then rejects every call for that
|
||||
# session with "session_id too long". See issue #13868.
|
||||
_HONCHO_SESSION_ID_MAX_LEN = 100
|
||||
_HONCHO_SESSION_ID_HASH_LEN = 8
|
||||
|
||||
@classmethod
|
||||
def _enforce_session_id_limit(cls, sanitized: str, original: str) -> str:
|
||||
"""Truncate a sanitized session ID to Honcho's 100-char limit.
|
||||
|
||||
The common case (short keys) short-circuits with no modification.
|
||||
For over-limit keys, keep a prefix of the sanitized ID and append a
|
||||
deterministic ``-<sha256 prefix>`` suffix so two distinct long keys
|
||||
that share a leading segment don't collide onto the same truncated ID.
|
||||
The hash is taken over the *original* pre-sanitization key, so two
|
||||
inputs that sanitize to the same string still collide intentionally
|
||||
(same logical session), but two inputs that only share a prefix do not.
|
||||
"""
|
||||
max_len = cls._HONCHO_SESSION_ID_MAX_LEN
|
||||
if len(sanitized) <= max_len:
|
||||
return sanitized
|
||||
|
||||
hash_len = cls._HONCHO_SESSION_ID_HASH_LEN
|
||||
digest = hashlib.sha256(original.encode("utf-8")).hexdigest()[:hash_len]
|
||||
# max_len - hash_len - 1 (for the '-' separator) chars of the sanitized
|
||||
# prefix, then '-<hash>'. Strip any trailing hyphen from the prefix so
|
||||
# the result doesn't double up on separators.
|
||||
prefix_len = max_len - hash_len - 1
|
||||
prefix = sanitized[:prefix_len].rstrip("-")
|
||||
return f"{prefix}-{digest}"
|
||||
|
||||
def resolve_session_name(
|
||||
self,
|
||||
cwd: str | None = None,
|
||||
|
|
@ -566,7 +625,7 @@ class HonchoClientConfig:
|
|||
if gateway_session_key:
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-')
|
||||
if sanitized:
|
||||
return sanitized
|
||||
return self._enforce_session_id_limit(sanitized, gateway_session_key)
|
||||
|
||||
# per-session: inherit Hermes session_id (new Honcho session each run)
|
||||
if self.session_strategy == "per-session" and session_id:
|
||||
|
|
@ -593,6 +652,8 @@ class HonchoClientConfig:
|
|||
|
||||
|
||||
_honcho_client: Honcho | None = None
|
||||
_honcho_client_kwargs: dict | None = None
|
||||
_honcho_client_kwargs_active_timeout: float | None = None
|
||||
|
||||
|
||||
def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
||||
|
|
@ -601,7 +662,7 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
When no config is provided, attempts to load ~/.honcho/config.json
|
||||
first, falling back to environment variables.
|
||||
"""
|
||||
global _honcho_client
|
||||
global _honcho_client, _honcho_client_kwargs, _honcho_client_kwargs_active_timeout
|
||||
|
||||
if _honcho_client is not None:
|
||||
return _honcho_client
|
||||
|
|
@ -646,6 +707,11 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Fall back to the default so an unconfigured install cannot hang
|
||||
# indefinitely on a stalled Honcho request.
|
||||
if resolved_timeout is None:
|
||||
resolved_timeout = _DEFAULT_HTTP_TIMEOUT
|
||||
|
||||
if resolved_base_url:
|
||||
logger.info("Initializing Honcho client (base_url: %s, workspace: %s)", resolved_base_url, config.workspace_id)
|
||||
else:
|
||||
|
|
@ -681,11 +747,56 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
kwargs["timeout"] = resolved_timeout
|
||||
|
||||
_honcho_client = Honcho(**kwargs)
|
||||
_honcho_client_kwargs = dict(kwargs)
|
||||
_honcho_client_kwargs_active_timeout = resolved_timeout
|
||||
|
||||
return _honcho_client
|
||||
|
||||
|
||||
def rebuild_honcho_client_with_timeout(new_timeout: float) -> None:
|
||||
"""Rebuild the singleton Honcho client with a new HTTP timeout.
|
||||
|
||||
Called by the provider's sync worker once enough latency samples
|
||||
have accumulated for the :class:`HonchoLatencyTracker` to recommend
|
||||
a timeout meaningfully different from the one the current client
|
||||
was built with. Safe to call concurrently; no-op if the delta is
|
||||
below a 20% threshold so small jitter doesn't thrash the client.
|
||||
|
||||
The previous client is discarded — the Honcho SDK uses a pooled
|
||||
httpx.Client internally but doesn't expose it for in-place timeout
|
||||
mutation, so rebuild is the only portable option.
|
||||
"""
|
||||
global _honcho_client, _honcho_client_kwargs_active_timeout
|
||||
|
||||
if _honcho_client is None or _honcho_client_kwargs is None:
|
||||
return
|
||||
|
||||
active = _honcho_client_kwargs_active_timeout or _DEFAULT_HTTP_TIMEOUT
|
||||
if active <= 0:
|
||||
return
|
||||
ratio = new_timeout / active
|
||||
if 0.8 <= ratio <= 1.2:
|
||||
# Not a meaningful change; skip rebuild.
|
||||
return
|
||||
|
||||
try:
|
||||
from honcho import Honcho
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
new_kwargs = dict(_honcho_client_kwargs)
|
||||
new_kwargs["timeout"] = new_timeout
|
||||
logger.info(
|
||||
"Adapting Honcho HTTP timeout: %.1fs -> %.1fs (tracker p95)",
|
||||
active, new_timeout,
|
||||
)
|
||||
_honcho_client = Honcho(**new_kwargs)
|
||||
_honcho_client_kwargs_active_timeout = new_timeout
|
||||
|
||||
|
||||
def reset_honcho_client() -> None:
|
||||
"""Reset the Honcho client singleton (useful for testing)."""
|
||||
global _honcho_client
|
||||
global _honcho_client, _honcho_client_kwargs, _honcho_client_kwargs_active_timeout
|
||||
_honcho_client = None
|
||||
_honcho_client_kwargs = None
|
||||
_honcho_client_kwargs_active_timeout = None
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ class HonchoSessionManager:
|
|||
self._config = config
|
||||
self._runtime_user_peer_name = runtime_user_peer_name
|
||||
self._cache: dict[str, HonchoSession] = {}
|
||||
self._cache_lock = threading.RLock()
|
||||
self._peers_cache: dict[str, Any] = {}
|
||||
self._sessions_cache: dict[str, Any] = {}
|
||||
|
||||
|
|
@ -273,17 +274,35 @@ class HonchoSessionManager:
|
|||
Returns:
|
||||
The session.
|
||||
"""
|
||||
if key in self._cache:
|
||||
logger.debug("Local session cache hit: %s", key)
|
||||
return self._cache[key]
|
||||
with self._cache_lock:
|
||||
if key in self._cache:
|
||||
logger.debug("Local session cache hit: %s", key)
|
||||
return self._cache[key]
|
||||
|
||||
# Gateway sessions should use the runtime user identity when available.
|
||||
if self._runtime_user_peer_name:
|
||||
# Determine peer IDs — no lock needed (read-only, no shared state mutation).
|
||||
# Gateway sessions normally use the runtime user identity (the
|
||||
# platform-native ID: Telegram UID, Discord snowflake, Slack user,
|
||||
# etc.) so multi-user bots scope memory per user. For a single-user
|
||||
# deployment the config-supplied ``peer_name`` is an unambiguous
|
||||
# identity and we should keep it unified across platforms — see
|
||||
# #14984. Opt into that with ``hosts.<host>.pinPeerName: true`` in
|
||||
# ``honcho.json`` (or root-level ``pinPeerName: true``).
|
||||
# `is True` (not `bool(...)`) is deliberate: several multi-user tests
|
||||
# pass a ``MagicMock`` for ``config`` where ``mock.pin_peer_name``
|
||||
# silently returns another MagicMock — truthy by default. Requiring
|
||||
# strict ``True`` keeps pinning as opt-in even for callers that
|
||||
# haven't updated their mocks yet; real configs built via
|
||||
# ``from_global_config`` always produce a proper boolean.
|
||||
pin_peer_name = (
|
||||
self._config is not None
|
||||
and bool(getattr(self._config, "peer_name", None))
|
||||
and getattr(self._config, "pin_peer_name", False) is True
|
||||
)
|
||||
if self._runtime_user_peer_name and not pin_peer_name:
|
||||
user_peer_id = self._sanitize_id(self._runtime_user_peer_name)
|
||||
elif self._config and self._config.peer_name:
|
||||
user_peer_id = self._sanitize_id(self._config.peer_name)
|
||||
else:
|
||||
# Fallback: derive from session key
|
||||
parts = key.split(":", 1)
|
||||
channel = parts[0] if len(parts) > 1 else "default"
|
||||
chat_id = parts[1] if len(parts) > 1 else key
|
||||
|
|
@ -293,19 +312,14 @@ class HonchoSessionManager:
|
|||
self._config.ai_peer if self._config else "hermes-assistant"
|
||||
)
|
||||
|
||||
# Sanitize session ID for Honcho
|
||||
# All expensive I/O outside the lock — Honcho's persistence is source of truth
|
||||
honcho_session_id = self._sanitize_id(key)
|
||||
|
||||
# Get or create peers
|
||||
user_peer = self._get_or_create_peer(user_peer_id)
|
||||
assistant_peer = self._get_or_create_peer(assistant_peer_id)
|
||||
|
||||
# Get or create Honcho session
|
||||
honcho_session, existing_messages = self._get_or_create_honcho_session(
|
||||
honcho_session_id, user_peer, assistant_peer
|
||||
)
|
||||
|
||||
# Convert Honcho messages to local format
|
||||
local_messages = []
|
||||
for msg in existing_messages:
|
||||
role = "assistant" if msg.peer_id == assistant_peer_id else "user"
|
||||
|
|
@ -313,10 +327,9 @@ class HonchoSessionManager:
|
|||
"role": role,
|
||||
"content": msg.content,
|
||||
"timestamp": msg.created_at.isoformat() if msg.created_at else "",
|
||||
"_synced": True, # Already in Honcho
|
||||
"_synced": True,
|
||||
})
|
||||
|
||||
# Create local session wrapper with existing messages
|
||||
session = HonchoSession(
|
||||
key=key,
|
||||
user_peer_id=user_peer_id,
|
||||
|
|
@ -325,7 +338,9 @@ class HonchoSessionManager:
|
|||
messages=local_messages,
|
||||
)
|
||||
|
||||
self._cache[key] = session
|
||||
# Write to cache under lock — only one writer wins
|
||||
with self._cache_lock:
|
||||
self._cache[key] = session
|
||||
return session
|
||||
|
||||
def _flush_session(self, session: HonchoSession) -> bool:
|
||||
|
|
@ -356,13 +371,15 @@ class HonchoSessionManager:
|
|||
for msg in new_messages:
|
||||
msg["_synced"] = True
|
||||
logger.debug("Synced %d messages to Honcho for %s", len(honcho_messages), session.key)
|
||||
self._cache[session.key] = session
|
||||
with self._cache_lock:
|
||||
self._cache[session.key] = session
|
||||
return True
|
||||
except Exception as e:
|
||||
for msg in new_messages:
|
||||
msg["_synced"] = False
|
||||
logger.error("Failed to sync messages to Honcho: %s", e)
|
||||
self._cache[session.key] = session
|
||||
with self._cache_lock:
|
||||
self._cache[session.key] = session
|
||||
return False
|
||||
|
||||
def _async_writer_loop(self) -> None:
|
||||
|
|
@ -434,7 +451,9 @@ class HonchoSessionManager:
|
|||
Called at session end for "session" write_frequency, or to force
|
||||
a sync before process exit regardless of mode.
|
||||
"""
|
||||
for session in list(self._cache.values()):
|
||||
with self._cache_lock:
|
||||
sessions = list(self._cache.values())
|
||||
for session in sessions:
|
||||
try:
|
||||
self._flush_session(session)
|
||||
except Exception as e:
|
||||
|
|
@ -459,9 +478,10 @@ class HonchoSessionManager:
|
|||
|
||||
def delete(self, key: str) -> bool:
|
||||
"""Delete a session from local cache."""
|
||||
if key in self._cache:
|
||||
del self._cache[key]
|
||||
return True
|
||||
with self._cache_lock:
|
||||
if key in self._cache:
|
||||
del self._cache[key]
|
||||
return True
|
||||
return False
|
||||
|
||||
def new_session(self, key: str) -> HonchoSession:
|
||||
|
|
@ -473,20 +493,25 @@ class HonchoSessionManager:
|
|||
"""
|
||||
import time
|
||||
|
||||
# Remove old session from caches (but don't delete from Honcho)
|
||||
old_session = self._cache.pop(key, None)
|
||||
if old_session:
|
||||
self._sessions_cache.pop(old_session.honcho_session_id, None)
|
||||
# Hold the reentrant lock across get_or_create so a concurrent caller
|
||||
# can't observe the (old-popped, new-not-yet-inserted) gap and create
|
||||
# its own session under the raw key. `_cache_lock` is an RLock so
|
||||
# nested reacquisition inside get_or_create is safe.
|
||||
with self._cache_lock:
|
||||
# Remove old session from caches (but don't delete from Honcho)
|
||||
old_session = self._cache.pop(key, None)
|
||||
if old_session:
|
||||
self._sessions_cache.pop(old_session.honcho_session_id, None)
|
||||
|
||||
# Create new session with timestamp suffix
|
||||
timestamp = int(time.time())
|
||||
new_key = f"{key}:{timestamp}"
|
||||
# Create new session with timestamp suffix
|
||||
timestamp = int(time.time())
|
||||
new_key = f"{key}:{timestamp}"
|
||||
|
||||
# get_or_create will create a fresh session
|
||||
session = self.get_or_create(new_key)
|
||||
# get_or_create will create a fresh session
|
||||
session = self.get_or_create(new_key)
|
||||
|
||||
# Cache under the original key so callers find it by the expected name
|
||||
self._cache[key] = session
|
||||
# Cache under the original key so callers find it by the expected name
|
||||
self._cache[key] = session
|
||||
|
||||
logger.info("Created new session for %s (honcho: %s)", key, session.honcho_session_id)
|
||||
return session
|
||||
|
|
|
|||
394
plugins/memory/honcho/sync_worker.py
Normal file
394
plugins/memory/honcho/sync_worker.py
Normal file
|
|
@ -0,0 +1,394 @@
|
|||
"""Background sync worker for the Honcho memory provider.
|
||||
|
||||
The post-response sync path was previously a per-turn thread spawn with a
|
||||
best-effort ``join(timeout=5.0)`` on the prior turn's thread. That forced
|
||||
``run_conversation`` to wait up to 5 seconds at the start of every turn if
|
||||
the previous turn's sync was still in flight, and it serialized all sync
|
||||
work on a single transient thread that the caller had to coordinate with.
|
||||
|
||||
This module replaces that pattern with:
|
||||
|
||||
- :class:`SyncWorker` — persistent daemon thread draining a bounded
|
||||
queue of sync/write tasks. Tasks are submitted with ``enqueue()`` and
|
||||
return immediately; the caller is never blocked by Honcho latency.
|
||||
|
||||
- :class:`HonchoLatencyTracker` — rolling p95 observer that gives the
|
||||
client an adaptive timeout with sensible cold-start defaults (Layer 2
|
||||
of the timeout-ceiling rework).
|
||||
|
||||
- :class:`CircuitBreaker` — consecutive-failure tripwire that flips to
|
||||
a degraded state after repeated timeouts and probes for recovery in
|
||||
the background (Layer 3). While open, sync tasks are persisted to a
|
||||
local backlog so the outage's worth of writes can be drained once
|
||||
Honcho is reachable again.
|
||||
|
||||
The three primitives compose: ``SyncWorker`` consults the breaker before
|
||||
each task, records the outcome in the latency tracker, and feeds timeout
|
||||
+ failure observations back to the breaker. Nothing here depends on
|
||||
``HonchoMemoryProvider`` — the worker takes plain callables so tests can
|
||||
exercise each primitive in isolation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, Deque, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Latency tracker — Layer 2
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class HonchoLatencyTracker:
|
||||
"""Rolling p95 observer for Honcho call latencies.
|
||||
|
||||
Provides an adaptive HTTP timeout that scales with observed backend
|
||||
latency. Hosted Honcho settles to ~1-3s; self-hosted instances with
|
||||
slow cold starts naturally scale up. Thread-safe: the worker thread
|
||||
records observations, any thread may read the current timeout.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
window: int = 20,
|
||||
default: float = 30.0,
|
||||
floor: float = 5.0,
|
||||
headroom: float = 3.0,
|
||||
warmup_samples: int = 5,
|
||||
) -> None:
|
||||
self._samples: Deque[float] = collections.deque(maxlen=window)
|
||||
self._default = float(default)
|
||||
self._floor = float(floor)
|
||||
self._headroom = float(headroom)
|
||||
self._warmup = int(warmup_samples)
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def observe(self, seconds: float) -> None:
|
||||
"""Record a successful call's wall-clock latency (seconds)."""
|
||||
if seconds < 0 or seconds != seconds: # NaN check
|
||||
return
|
||||
with self._lock:
|
||||
self._samples.append(float(seconds))
|
||||
|
||||
def timeout(self) -> float:
|
||||
"""Return the adaptive timeout for the next call.
|
||||
|
||||
During warmup (< warmup_samples observations) returns the default.
|
||||
Once warm, returns ``max(floor, headroom × p95(samples))``.
|
||||
"""
|
||||
with self._lock:
|
||||
n = len(self._samples)
|
||||
if n < self._warmup:
|
||||
return self._default
|
||||
sorted_samples = sorted(self._samples)
|
||||
# Nearest-rank p95: index = ceil(0.95 * n) - 1, clamped.
|
||||
idx = min(n - 1, max(0, int(round(0.95 * (n - 1)))))
|
||||
p95 = sorted_samples[idx]
|
||||
return max(self._floor, self._headroom * p95)
|
||||
|
||||
def reset(self) -> None:
|
||||
with self._lock:
|
||||
self._samples.clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Circuit breaker — Layer 3
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class CircuitBreaker:
|
||||
"""Consecutive-failure tripwire with periodic probe recovery.
|
||||
|
||||
States:
|
||||
- ``closed`` — traffic flows normally (the happy path)
|
||||
- ``open`` — recent consecutive failures > threshold; skip calls
|
||||
- ``half_open`` — probe window; one test call is allowed through
|
||||
|
||||
Transitions:
|
||||
- closed → open after ``failure_threshold`` consecutive failures
|
||||
- open → half_open after ``probe_interval`` seconds
|
||||
- half_open → closed on a successful probe
|
||||
- half_open → open on a failed probe
|
||||
|
||||
Thread-safe. The worker consults ``allow()`` before each task and
|
||||
reports the outcome via ``record_success()`` / ``record_failure()``.
|
||||
"""
|
||||
|
||||
STATE_CLOSED = "closed"
|
||||
STATE_OPEN = "open"
|
||||
STATE_HALF_OPEN = "half_open"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
failure_threshold: int = 3,
|
||||
probe_interval: float = 60.0,
|
||||
time_fn: Callable[[], float] = time.monotonic,
|
||||
) -> None:
|
||||
self._failure_threshold = int(failure_threshold)
|
||||
self._probe_interval = float(probe_interval)
|
||||
self._time_fn = time_fn
|
||||
self._lock = threading.Lock()
|
||||
self._state = self.STATE_CLOSED
|
||||
self._consecutive_failures = 0
|
||||
self._opened_at: Optional[float] = None
|
||||
|
||||
@property
|
||||
def state(self) -> str:
|
||||
with self._lock:
|
||||
self._maybe_transition_to_probe()
|
||||
return self._state
|
||||
|
||||
def allow(self) -> bool:
|
||||
"""Return True iff a call should proceed now."""
|
||||
with self._lock:
|
||||
self._maybe_transition_to_probe()
|
||||
return self._state != self.STATE_OPEN
|
||||
|
||||
def record_success(self) -> None:
|
||||
with self._lock:
|
||||
self._consecutive_failures = 0
|
||||
if self._state != self.STATE_CLOSED:
|
||||
logger.info("Honcho circuit breaker: recovered → closed")
|
||||
self._state = self.STATE_CLOSED
|
||||
self._opened_at = None
|
||||
|
||||
def record_failure(self) -> None:
|
||||
with self._lock:
|
||||
self._consecutive_failures += 1
|
||||
if self._state == self.STATE_HALF_OPEN:
|
||||
self._state = self.STATE_OPEN
|
||||
self._opened_at = self._time_fn()
|
||||
logger.warning("Honcho circuit breaker: probe failed → open")
|
||||
return
|
||||
if (
|
||||
self._state == self.STATE_CLOSED
|
||||
and self._consecutive_failures >= self._failure_threshold
|
||||
):
|
||||
self._state = self.STATE_OPEN
|
||||
self._opened_at = self._time_fn()
|
||||
logger.warning(
|
||||
"Honcho circuit breaker: %d consecutive failures → open",
|
||||
self._consecutive_failures,
|
||||
)
|
||||
|
||||
def reset(self) -> None:
|
||||
with self._lock:
|
||||
self._state = self.STATE_CLOSED
|
||||
self._consecutive_failures = 0
|
||||
self._opened_at = None
|
||||
|
||||
def _maybe_transition_to_probe(self) -> None:
|
||||
# Caller must hold the lock.
|
||||
if self._state == self.STATE_OPEN and self._opened_at is not None:
|
||||
if self._time_fn() - self._opened_at >= self._probe_interval:
|
||||
self._state = self.STATE_HALF_OPEN
|
||||
logger.info(
|
||||
"Honcho circuit breaker: probe window → half_open"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sync worker — Layer 1
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class SyncTask:
|
||||
"""A unit of work for the sync worker.
|
||||
|
||||
``fn`` runs on the worker thread. ``name`` is a human-readable label
|
||||
used in logs and for backlog replay. ``on_failure`` is optional: if
|
||||
set, it's called with the exception on breaker-open deferral or when
|
||||
all retries are exhausted so callers can persist the task to a
|
||||
durable backlog.
|
||||
"""
|
||||
|
||||
fn: Callable[[], None]
|
||||
name: str = "sync"
|
||||
on_failure: Optional[Callable[[BaseException], None]] = None
|
||||
|
||||
|
||||
class SyncWorker:
|
||||
"""Persistent daemon thread draining a bounded task queue.
|
||||
|
||||
This replaces the per-turn ``threading.Thread(target=_sync).start()``
|
||||
pattern so ``sync_turn`` returns immediately instead of coordinating
|
||||
thread handoff on every turn. Runs ``SyncTask`` callables serially
|
||||
on a dedicated thread — serialization is intentional because Honcho
|
||||
session writes must be ordered per-session to avoid re-ordering
|
||||
messages, and the worker handles one session per provider.
|
||||
|
||||
Queue overflow (producer faster than Honcho can drain) drops the
|
||||
OLDEST task rather than blocking the producer. This favors user-
|
||||
facing responsiveness over write fidelity in the pathological case,
|
||||
and the dropped task still has its ``on_failure`` callback invoked
|
||||
so it can be appended to a durable backlog.
|
||||
|
||||
The worker is lazy: the thread starts on first ``enqueue()`` call
|
||||
and runs until ``shutdown()``. ``shutdown()`` is idempotent.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
max_queue: int = 64,
|
||||
latency_tracker: Optional[HonchoLatencyTracker] = None,
|
||||
breaker: Optional[CircuitBreaker] = None,
|
||||
thread_name: str = "honcho-sync-worker",
|
||||
) -> None:
|
||||
self._queue: queue.Queue[Optional[SyncTask]] = queue.Queue(maxsize=max_queue)
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._thread_name = thread_name
|
||||
self._shutdown = False
|
||||
self._lock = threading.Lock()
|
||||
self._latency_tracker = latency_tracker
|
||||
self._breaker = breaker
|
||||
self._dropped = 0
|
||||
|
||||
# -- lifecycle -----------------------------------------------------------
|
||||
|
||||
def _ensure_started(self) -> None:
|
||||
with self._lock:
|
||||
if self._thread is not None and self._thread.is_alive():
|
||||
return
|
||||
if self._shutdown:
|
||||
return
|
||||
self._thread = threading.Thread(
|
||||
target=self._run,
|
||||
name=self._thread_name,
|
||||
daemon=True,
|
||||
)
|
||||
self._thread.start()
|
||||
|
||||
def shutdown(self, *, timeout: float = 5.0) -> None:
|
||||
"""Signal the worker to drain and exit; wait up to ``timeout`` seconds."""
|
||||
with self._lock:
|
||||
if self._shutdown:
|
||||
return
|
||||
self._shutdown = True
|
||||
thread = self._thread
|
||||
try:
|
||||
# Sentinel triggers clean exit from the loop.
|
||||
self._queue.put_nowait(None)
|
||||
except queue.Full:
|
||||
pass
|
||||
if thread is not None:
|
||||
thread.join(timeout=timeout)
|
||||
|
||||
# -- producer interface --------------------------------------------------
|
||||
|
||||
def enqueue(self, task: SyncTask) -> bool:
|
||||
"""Submit a task. Returns False if the task was dropped.
|
||||
|
||||
Breaker-open tasks are dropped synchronously and ``on_failure`` is
|
||||
called so the caller can persist them. Queue-full tasks evict
|
||||
the oldest task (which also gets its ``on_failure`` called) to
|
||||
keep the pipeline moving under load.
|
||||
"""
|
||||
if self._shutdown:
|
||||
if task.on_failure is not None:
|
||||
try:
|
||||
task.on_failure(RuntimeError("sync worker is shutting down"))
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
breaker = self._breaker
|
||||
if breaker is not None and not breaker.allow():
|
||||
if task.on_failure is not None:
|
||||
try:
|
||||
task.on_failure(RuntimeError("circuit breaker open"))
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
self._ensure_started()
|
||||
|
||||
try:
|
||||
self._queue.put_nowait(task)
|
||||
return True
|
||||
except queue.Full:
|
||||
# Evict the oldest queued task to make room; its failure
|
||||
# callback still fires so the backlog can capture it.
|
||||
try:
|
||||
victim = self._queue.get_nowait()
|
||||
self._dropped += 1
|
||||
if victim is not None and victim.on_failure is not None:
|
||||
try:
|
||||
victim.on_failure(
|
||||
RuntimeError("sync queue overflow — task dropped")
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
except queue.Empty:
|
||||
pass
|
||||
try:
|
||||
self._queue.put_nowait(task)
|
||||
return True
|
||||
except queue.Full:
|
||||
if task.on_failure is not None:
|
||||
try:
|
||||
task.on_failure(RuntimeError("sync queue full"))
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
# -- worker loop ---------------------------------------------------------
|
||||
|
||||
def _run(self) -> None:
|
||||
while True:
|
||||
try:
|
||||
task = self._queue.get(timeout=1.0)
|
||||
except queue.Empty:
|
||||
if self._shutdown:
|
||||
return
|
||||
continue
|
||||
|
||||
if task is None: # sentinel
|
||||
return
|
||||
|
||||
started = time.monotonic()
|
||||
ok = False
|
||||
error: Optional[BaseException] = None
|
||||
try:
|
||||
task.fn()
|
||||
ok = True
|
||||
except BaseException as e: # task bodies may raise anything
|
||||
error = e
|
||||
logger.debug("Honcho sync task %s failed: %s", task.name, e)
|
||||
|
||||
elapsed = time.monotonic() - started
|
||||
if ok:
|
||||
if self._latency_tracker is not None:
|
||||
self._latency_tracker.observe(elapsed)
|
||||
if self._breaker is not None:
|
||||
self._breaker.record_success()
|
||||
else:
|
||||
if self._breaker is not None:
|
||||
self._breaker.record_failure()
|
||||
if task.on_failure is not None and error is not None:
|
||||
try:
|
||||
task.on_failure(error)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# -- introspection (for hermes honcho status etc.) -----------------------
|
||||
|
||||
def qsize(self) -> int:
|
||||
return self._queue.qsize()
|
||||
|
||||
def dropped(self) -> int:
|
||||
return self._dropped
|
||||
|
||||
def is_running(self) -> bool:
|
||||
return self._thread is not None and self._thread.is_alive()
|
||||
55
run_agent.py
55
run_agent.py
|
|
@ -79,7 +79,7 @@ from tools.browser_tool import cleanup_browser
|
|||
|
||||
|
||||
# Agent internals extracted to agent/ package for modularity
|
||||
from agent.memory_manager import build_memory_context_block, sanitize_context
|
||||
from agent.memory_manager import StreamingContextScrubber, build_memory_context_block, sanitize_context
|
||||
from agent.retry_utils import jittered_backoff
|
||||
from agent.error_classifier import classify_api_error, FailoverReason
|
||||
from agent.prompt_builder import (
|
||||
|
|
@ -1208,6 +1208,10 @@ class AIAgent:
|
|||
# Deferred paragraph break flag — set after tool iterations so a
|
||||
# single "\n\n" is prepended to the next real text delta.
|
||||
self._stream_needs_break = False
|
||||
# Stateful scrubber for <memory-context> spans split across stream
|
||||
# deltas (#5719). sanitize_context() alone can't survive chunk
|
||||
# boundaries because the block regex needs both tags in one string.
|
||||
self._stream_context_scrubber = StreamingContextScrubber()
|
||||
# Visible assistant text already delivered through live token callbacks
|
||||
# during the current model response. Used to avoid re-sending the same
|
||||
# commentary when the provider later returns it as a completed interim
|
||||
|
|
@ -5784,6 +5788,20 @@ class AIAgent:
|
|||
|
||||
def _reset_stream_delivery_tracking(self) -> None:
|
||||
"""Reset tracking for text delivered during the current model response."""
|
||||
# Flush any benign partial-tag tail held by the context scrubber so it
|
||||
# reaches the UI before we clear state for the next model call. If
|
||||
# the scrubber is mid-span, flush() drops the orphaned content.
|
||||
scrubber = getattr(self, "_stream_context_scrubber", None)
|
||||
if scrubber is not None:
|
||||
tail = scrubber.flush()
|
||||
if tail:
|
||||
callbacks = [cb for cb in (self.stream_delta_callback, self._stream_callback) if cb is not None]
|
||||
for cb in callbacks:
|
||||
try:
|
||||
cb(tail)
|
||||
except Exception:
|
||||
pass
|
||||
self._record_streamed_assistant_text(tail)
|
||||
self._current_streamed_assistant_text = ""
|
||||
|
||||
def _record_streamed_assistant_text(self, text: str) -> None:
|
||||
|
|
@ -5816,7 +5834,7 @@ class AIAgent:
|
|||
if cb is None or not isinstance(assistant_msg, dict):
|
||||
return
|
||||
content = assistant_msg.get("content")
|
||||
visible = self._strip_think_blocks(content or "").strip()
|
||||
visible = sanitize_context(self._strip_think_blocks(content or "")).strip()
|
||||
if not visible or visible == "(empty)":
|
||||
return
|
||||
already_streamed = self._interim_content_was_streamed(visible)
|
||||
|
|
@ -5834,6 +5852,25 @@ class AIAgent:
|
|||
if getattr(self, "_stream_needs_break", False) and text and text.strip():
|
||||
self._stream_needs_break = False
|
||||
text = "\n\n" + text
|
||||
prepended_break = True
|
||||
else:
|
||||
prepended_break = False
|
||||
if isinstance(text, str):
|
||||
# Strip <think> blocks first (per-delta is safe for closed pairs; the
|
||||
# unterminated-tag path is handled downstream by stream_consumer).
|
||||
# Then feed through the stateful context scrubber so memory-context
|
||||
# spans split across chunks cannot leak to the UI (#5719).
|
||||
text = self._strip_think_blocks(text or "")
|
||||
scrubber = getattr(self, "_stream_context_scrubber", None)
|
||||
if scrubber is not None:
|
||||
text = scrubber.feed(text)
|
||||
else:
|
||||
# Defensive: legacy callers without the scrubber attribute.
|
||||
text = sanitize_context(text)
|
||||
if not prepended_break:
|
||||
text = text.lstrip("\n")
|
||||
if not text:
|
||||
return
|
||||
callbacks = [cb for cb in (self.stream_delta_callback, self._stream_callback) if cb is not None]
|
||||
delivered = False
|
||||
for cb in callbacks:
|
||||
|
|
@ -7612,7 +7649,7 @@ class AIAgent:
|
|||
# API replay, session transcript, gateway delivery, CLI display,
|
||||
# compression, title generation.
|
||||
if isinstance(_san_content, str) and _san_content:
|
||||
_san_content = self._strip_think_blocks(_san_content).strip()
|
||||
_san_content = sanitize_context(self._strip_think_blocks(_san_content)).strip()
|
||||
|
||||
msg = {
|
||||
"role": "assistant",
|
||||
|
|
@ -9378,6 +9415,13 @@ class AIAgent:
|
|||
# Track user turns for memory flush and periodic nudge logic
|
||||
self._user_turn_count += 1
|
||||
|
||||
# Reset the streaming context scrubber at the top of each turn so a
|
||||
# hung span from a prior interrupted stream can't taint this turn's
|
||||
# output.
|
||||
scrubber = getattr(self, "_stream_context_scrubber", None)
|
||||
if scrubber is not None:
|
||||
scrubber.reset()
|
||||
|
||||
# Preserve the original user message (no nudge injection).
|
||||
original_user_message = persist_user_message if persist_user_message is not None else user_message
|
||||
|
||||
|
|
@ -12339,8 +12383,9 @@ class AIAgent:
|
|||
truncated_response_prefix = ""
|
||||
length_continue_retries = 0
|
||||
|
||||
# Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
|
||||
final_response = self._strip_think_blocks(final_response).strip()
|
||||
# Strip internal context / reasoning wrappers from the user-facing
|
||||
# response (keep only clean visible text in transcript + UI).
|
||||
final_response = sanitize_context(self._strip_think_blocks(final_response)).strip()
|
||||
|
||||
final_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
|
||||
|
|
|
|||
150
tests/agent/test_streaming_context_scrubber.py
Normal file
150
tests/agent/test_streaming_context_scrubber.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
"""Unit tests for StreamingContextScrubber (agent/memory_manager.py).
|
||||
|
||||
Regression coverage for #5719 — memory-context spans split across stream
|
||||
deltas must not leak payload to the UI. The one-shot sanitize_context()
|
||||
regex can't survive chunk boundaries, so _fire_stream_delta routes deltas
|
||||
through a stateful scrubber.
|
||||
"""
|
||||
|
||||
from agent.memory_manager import StreamingContextScrubber, sanitize_context
|
||||
|
||||
|
||||
class TestStreamingContextScrubberBasics:
|
||||
def test_empty_input_returns_empty(self):
|
||||
s = StreamingContextScrubber()
|
||||
assert s.feed("") == ""
|
||||
assert s.flush() == ""
|
||||
|
||||
def test_plain_text_passes_through(self):
|
||||
s = StreamingContextScrubber()
|
||||
assert s.feed("hello world") == "hello world"
|
||||
assert s.flush() == ""
|
||||
|
||||
def test_complete_block_in_single_delta(self):
|
||||
"""Regression: the one-shot test case from #13672 must still work."""
|
||||
s = StreamingContextScrubber()
|
||||
leaked = (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new "
|
||||
"user input. Treat as informational background data.]\n\n"
|
||||
"## Honcho Context\nstale memory\n"
|
||||
"</memory-context>\n\nVisible answer"
|
||||
)
|
||||
out = s.feed(leaked) + s.flush()
|
||||
assert out == "\n\nVisible answer"
|
||||
|
||||
def test_open_and_close_in_separate_deltas_strips_payload(self):
|
||||
"""The real streaming case: tag pair split across deltas."""
|
||||
s = StreamingContextScrubber()
|
||||
deltas = [
|
||||
"Hello ",
|
||||
"<memory-context>\npayload ",
|
||||
"more payload\n",
|
||||
"</memory-context> world",
|
||||
]
|
||||
out = "".join(s.feed(d) for d in deltas) + s.flush()
|
||||
assert out == "Hello world"
|
||||
assert "payload" not in out
|
||||
|
||||
def test_realistic_fragmented_chunks_strip_memory_payload(self):
|
||||
"""Exact leak scenario from the reviewer's comment — 4 realistic chunks.
|
||||
|
||||
This is the case the original #13672 fix silently leaks on: the open
|
||||
tag, system note, payload, and close tag each arrive in their own
|
||||
delta because providers emit 1-80 char chunks.
|
||||
"""
|
||||
s = StreamingContextScrubber()
|
||||
deltas = [
|
||||
"<memory-context>\n[System note: The following",
|
||||
" is recalled memory context, NOT new user input. "
|
||||
"Treat as informational background data.]\n\n",
|
||||
"## Honcho Context\nstale memory\n",
|
||||
"</memory-context>\n\nVisible answer",
|
||||
]
|
||||
out = "".join(s.feed(d) for d in deltas) + s.flush()
|
||||
assert out == "\n\nVisible answer"
|
||||
# The system-note line and payload must never reach the UI.
|
||||
assert "System note" not in out
|
||||
assert "Honcho Context" not in out
|
||||
assert "stale memory" not in out
|
||||
|
||||
def test_open_tag_split_across_two_deltas(self):
|
||||
"""The open tag itself arriving in two fragments."""
|
||||
s = StreamingContextScrubber()
|
||||
out = (
|
||||
s.feed("pre <memory")
|
||||
+ s.feed("-context>leak</memory-context> post")
|
||||
+ s.flush()
|
||||
)
|
||||
assert out == "pre post"
|
||||
assert "leak" not in out
|
||||
|
||||
def test_close_tag_split_across_two_deltas(self):
|
||||
"""The close tag arriving in two fragments."""
|
||||
s = StreamingContextScrubber()
|
||||
out = (
|
||||
s.feed("pre <memory-context>leak</memory")
|
||||
+ s.feed("-context> post")
|
||||
+ s.flush()
|
||||
)
|
||||
assert out == "pre post"
|
||||
assert "leak" not in out
|
||||
|
||||
|
||||
class TestStreamingContextScrubberPartialTagFalsePositives:
|
||||
def test_partial_open_tag_tail_emitted_on_flush(self):
|
||||
"""Bare '<mem' at end of stream is not really a memory-context tag."""
|
||||
s = StreamingContextScrubber()
|
||||
out = s.feed("hello <mem") + s.feed("ory other") + s.flush()
|
||||
assert out == "hello <memory other"
|
||||
|
||||
def test_partial_tag_released_when_disambiguated(self):
|
||||
"""A held-back partial tag that turns out to be prose gets released."""
|
||||
s = StreamingContextScrubber()
|
||||
# '< ' should not look like the start of any tag.
|
||||
out = s.feed("price < ") + s.feed("10 dollars") + s.flush()
|
||||
assert out == "price < 10 dollars"
|
||||
|
||||
|
||||
class TestStreamingContextScrubberUnterminatedSpan:
|
||||
def test_unterminated_span_drops_payload(self):
|
||||
"""Provider drops close tag — better to lose output than to leak."""
|
||||
s = StreamingContextScrubber()
|
||||
out = s.feed("pre <memory-context>secret never closed") + s.flush()
|
||||
assert out == "pre "
|
||||
assert "secret" not in out
|
||||
|
||||
def test_reset_clears_hung_span(self):
|
||||
"""Cross-turn scrubber reset drops a hung span so next turn is clean."""
|
||||
s = StreamingContextScrubber()
|
||||
s.feed("pre <memory-context>half")
|
||||
s.reset()
|
||||
out = s.feed("clean text") + s.flush()
|
||||
assert out == "clean text"
|
||||
|
||||
|
||||
class TestStreamingContextScrubberCaseInsensitivity:
|
||||
def test_uppercase_tags_still_scrubbed(self):
|
||||
s = StreamingContextScrubber()
|
||||
out = (
|
||||
s.feed("<MEMORY-CONTEXT>secret")
|
||||
+ s.feed("</Memory-Context>visible")
|
||||
+ s.flush()
|
||||
)
|
||||
assert out == "visible"
|
||||
assert "secret" not in out
|
||||
|
||||
|
||||
class TestSanitizeContextUnchanged:
|
||||
"""Smoke test that the one-shot sanitize_context still works for whole strings."""
|
||||
|
||||
def test_whole_block_still_sanitized(self):
|
||||
leaked = (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new "
|
||||
"user input. Treat as informational background data.]\n"
|
||||
"payload\n"
|
||||
"</memory-context>\nVisible"
|
||||
)
|
||||
out = sanitize_context(leaked).strip()
|
||||
assert out == "Visible"
|
||||
99
tests/gateway/test_vision_memory_leak.py
Normal file
99
tests/gateway/test_vision_memory_leak.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
"""Tests for _enrich_message_with_vision — regression for #5719.
|
||||
|
||||
The auxiliary vision LLM can echo system-prompt Honcho memory back into
|
||||
its analysis output. When that echo reaches the user as the enriched
|
||||
image description, recalled memory context (personal facts, dialectic
|
||||
output) surfaces into a user-visible message.
|
||||
|
||||
The boundary fix in gateway/run.py strips both <memory-context>...</memory-context>
|
||||
fenced blocks AND any "## Honcho Context" section from vision descriptions
|
||||
before they're embedded into the enriched user message.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def gateway_runner():
|
||||
"""Minimal GatewayRunner stub with just the method under test bound."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
class _Stub:
|
||||
_enrich_message_with_vision = GatewayRunner._enrich_message_with_vision
|
||||
|
||||
return _Stub()
|
||||
|
||||
|
||||
def _run(coro):
|
||||
return asyncio.get_event_loop().run_until_complete(coro) if False else asyncio.new_event_loop().run_until_complete(coro)
|
||||
|
||||
|
||||
class TestEnrichMessageWithVision:
|
||||
def test_clean_description_passes_through(self, gateway_runner):
|
||||
"""Vision output without leaked memory is embedded unchanged."""
|
||||
fake_result = json.dumps({
|
||||
"success": True,
|
||||
"analysis": "A photograph of a sunset over the ocean.",
|
||||
})
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
|
||||
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
|
||||
assert "sunset over the ocean" in out
|
||||
|
||||
def test_honcho_context_header_stripped(self, gateway_runner):
|
||||
"""'## Honcho Context' section and everything after is removed."""
|
||||
leaked = (
|
||||
"A photograph of a sunset.\n\n"
|
||||
"## Honcho Context\n"
|
||||
"User prefers concise answers, works at Plastic Labs,\n"
|
||||
"uses OPSEC pseudonyms.\n"
|
||||
)
|
||||
fake_result = json.dumps({"success": True, "analysis": leaked})
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
|
||||
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
|
||||
assert "sunset" in out
|
||||
assert "Honcho Context" not in out
|
||||
assert "Plastic Labs" not in out
|
||||
assert "OPSEC" not in out
|
||||
|
||||
def test_memory_context_fence_stripped(self, gateway_runner):
|
||||
"""<memory-context>...</memory-context> fenced block is scrubbed."""
|
||||
leaked = (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new "
|
||||
"user input. Treat as informational background data.]\n\n"
|
||||
"User details and preferences here.\n"
|
||||
"</memory-context>\n"
|
||||
"A photograph of a cat."
|
||||
)
|
||||
fake_result = json.dumps({"success": True, "analysis": leaked})
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
|
||||
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
|
||||
assert "photograph of a cat" in out
|
||||
assert "<memory-context>" not in out
|
||||
assert "User details and preferences" not in out
|
||||
assert "System note" not in out
|
||||
|
||||
def test_both_leak_patterns_together_stripped(self, gateway_runner):
|
||||
"""A vision output containing both leak shapes is fully scrubbed."""
|
||||
leaked = (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new "
|
||||
"user input. Treat as informational background data.]\n"
|
||||
"fenced leak\n"
|
||||
"</memory-context>\n"
|
||||
"A photograph of a dog.\n\n"
|
||||
"## Honcho Context\n"
|
||||
"header leak\n"
|
||||
)
|
||||
fake_result = json.dumps({"success": True, "analysis": leaked})
|
||||
with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
|
||||
out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
|
||||
assert "photograph of a dog" in out
|
||||
assert "fenced leak" not in out
|
||||
assert "header leak" not in out
|
||||
assert "Honcho Context" not in out
|
||||
assert "<memory-context>" not in out
|
||||
|
|
@ -3,6 +3,72 @@
|
|||
from types import SimpleNamespace
|
||||
|
||||
|
||||
class TestResolveApiKey:
|
||||
"""Test _resolve_api_key with various config shapes."""
|
||||
|
||||
def test_returns_api_key_from_root(self, monkeypatch):
|
||||
import plugins.memory.honcho.cli as honcho_cli
|
||||
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
|
||||
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
|
||||
assert honcho_cli._resolve_api_key({"apiKey": "root-key"}) == "root-key"
|
||||
|
||||
def test_returns_api_key_from_host_block(self, monkeypatch):
|
||||
import plugins.memory.honcho.cli as honcho_cli
|
||||
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
|
||||
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
|
||||
cfg = {"hosts": {"hermes": {"apiKey": "host-key"}}, "apiKey": "root-key"}
|
||||
assert honcho_cli._resolve_api_key(cfg) == "host-key"
|
||||
|
||||
def test_returns_local_for_base_url_without_api_key(self, monkeypatch):
|
||||
import plugins.memory.honcho.cli as honcho_cli
|
||||
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
|
||||
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
|
||||
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
|
||||
cfg = {"baseUrl": "http://localhost:8000"}
|
||||
assert honcho_cli._resolve_api_key(cfg) == "local"
|
||||
|
||||
def test_returns_local_for_base_url_env_var(self, monkeypatch):
|
||||
import plugins.memory.honcho.cli as honcho_cli
|
||||
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
|
||||
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
|
||||
monkeypatch.setenv("HONCHO_BASE_URL", "http://10.0.0.5:8000")
|
||||
assert honcho_cli._resolve_api_key({}) == "local"
|
||||
|
||||
def test_returns_empty_when_nothing_configured(self, monkeypatch):
|
||||
import plugins.memory.honcho.cli as honcho_cli
|
||||
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
|
||||
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
|
||||
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
|
||||
assert honcho_cli._resolve_api_key({}) == ""
|
||||
|
||||
def test_rejects_garbage_base_url_without_scheme(self, monkeypatch):
|
||||
"""A non-URL string in baseUrl (typo) must not pass the guard."""
|
||||
import plugins.memory.honcho.cli as honcho_cli
|
||||
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
|
||||
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
|
||||
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
|
||||
for garbage in ("true", "yes", "1", "localhost", "10.0.0.5"):
|
||||
assert honcho_cli._resolve_api_key({"baseUrl": garbage}) == "", \
|
||||
f"expected empty for garbage {garbage!r}"
|
||||
|
||||
def test_rejects_non_http_scheme_base_url(self, monkeypatch):
|
||||
"""Only http/https schemes are accepted; file:// / ftp:// are not."""
|
||||
import plugins.memory.honcho.cli as honcho_cli
|
||||
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
|
||||
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
|
||||
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
|
||||
for bad in ("file:///etc/passwd", "ftp://host/", "ws://host/"):
|
||||
assert honcho_cli._resolve_api_key({"baseUrl": bad}) == "", \
|
||||
f"expected empty for scheme of {bad!r}"
|
||||
|
||||
def test_accepts_https_base_url(self, monkeypatch):
|
||||
import plugins.memory.honcho.cli as honcho_cli
|
||||
monkeypatch.setattr(honcho_cli, "_host_key", lambda: "hermes")
|
||||
monkeypatch.delenv("HONCHO_API_KEY", raising=False)
|
||||
monkeypatch.delenv("HONCHO_BASE_URL", raising=False)
|
||||
assert honcho_cli._resolve_api_key({"baseUrl": "https://honcho.example.com"}) == "local"
|
||||
|
||||
|
||||
class TestCmdStatus:
|
||||
def test_reports_connection_failure_when_session_setup_fails(self, monkeypatch, capsys, tmp_path):
|
||||
import plugins.memory.honcho.cli as honcho_cli
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from plugins.memory.honcho.client import (
|
|||
reset_honcho_client,
|
||||
resolve_active_host,
|
||||
resolve_config_path,
|
||||
GLOBAL_CONFIG_PATH,
|
||||
resolve_global_config_path,
|
||||
HOST,
|
||||
)
|
||||
|
||||
|
|
@ -360,7 +360,7 @@ class TestResolveConfigPath:
|
|||
with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}), \
|
||||
patch.object(Path, "home", return_value=fake_home):
|
||||
result = resolve_config_path()
|
||||
assert result == GLOBAL_CONFIG_PATH
|
||||
assert result == fake_home / ".honcho" / "config.json"
|
||||
|
||||
def test_falls_back_to_global_without_hermes_home_env(self, tmp_path):
|
||||
fake_home = tmp_path / "fakehome"
|
||||
|
|
@ -370,7 +370,18 @@ class TestResolveConfigPath:
|
|||
patch.object(Path, "home", return_value=fake_home):
|
||||
os.environ.pop("HERMES_HOME", None)
|
||||
result = resolve_config_path()
|
||||
assert result == GLOBAL_CONFIG_PATH
|
||||
assert result == fake_home / ".honcho" / "config.json"
|
||||
|
||||
def test_global_fallback_uses_home_at_call_time(self, tmp_path):
|
||||
fake_home = tmp_path / "fakehome"
|
||||
fake_home.mkdir()
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
|
||||
with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}), \
|
||||
patch.object(Path, "home", return_value=fake_home):
|
||||
assert resolve_global_config_path() == fake_home / ".honcho" / "config.json"
|
||||
assert resolve_config_path() == fake_home / ".honcho" / "config.json"
|
||||
|
||||
def test_from_global_config_uses_local_path(self, tmp_path):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
|
|
@ -589,6 +600,28 @@ class TestGetHonchoClient:
|
|||
mock_honcho.assert_called_once()
|
||||
assert mock_honcho.call_args.kwargs["timeout"] == 88.0
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not importlib.util.find_spec("honcho"),
|
||||
reason="honcho SDK not installed"
|
||||
)
|
||||
def test_defaults_to_30s_when_no_timeout_configured(self):
|
||||
from plugins.memory.honcho.client import _DEFAULT_HTTP_TIMEOUT
|
||||
|
||||
fake_honcho = MagicMock(name="Honcho")
|
||||
cfg = HonchoClientConfig(
|
||||
api_key="test-key",
|
||||
workspace_id="hermes",
|
||||
environment="production",
|
||||
)
|
||||
|
||||
with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
|
||||
patch("hermes_cli.config.load_config", return_value={}):
|
||||
client = get_honcho_client(cfg)
|
||||
|
||||
assert client is fake_honcho
|
||||
mock_honcho.assert_called_once()
|
||||
assert mock_honcho.call_args.kwargs["timeout"] == _DEFAULT_HTTP_TIMEOUT
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not importlib.util.find_spec("honcho"),
|
||||
reason="honcho SDK not installed"
|
||||
|
|
@ -656,6 +689,82 @@ class TestResolveSessionNameGatewayKey:
|
|||
assert ":" not in result
|
||||
|
||||
|
||||
class TestResolveSessionNameLengthLimit:
|
||||
"""Regression tests for Honcho's 100-char session ID limit (issue #13868).
|
||||
|
||||
Long gateway session keys (Matrix room+event IDs, Telegram supergroup
|
||||
reply chains, Slack thread IDs with long workspace prefixes) can overflow
|
||||
Honcho's 100-char session_id limit after sanitization. Before this fix,
|
||||
every Honcho API call for those sessions 400'd with "session_id too long".
|
||||
"""
|
||||
|
||||
HONCHO_MAX = 100
|
||||
|
||||
def test_short_gateway_key_unchanged(self):
|
||||
"""Short keys must not get a hash suffix appended."""
|
||||
config = HonchoClientConfig()
|
||||
result = config.resolve_session_name(
|
||||
gateway_session_key="agent:main:telegram:dm:8439114563",
|
||||
)
|
||||
# Unchanged fast-path: sanitize only, no truncation, no hash suffix.
|
||||
assert result == "agent-main-telegram-dm-8439114563"
|
||||
assert len(result) <= self.HONCHO_MAX
|
||||
|
||||
def test_key_at_exact_limit_unchanged(self):
|
||||
"""A sanitized key that is exactly 100 chars must be returned as-is."""
|
||||
key = "a" * self.HONCHO_MAX
|
||||
config = HonchoClientConfig()
|
||||
result = config.resolve_session_name(gateway_session_key=key)
|
||||
assert result == key
|
||||
assert len(result) == self.HONCHO_MAX
|
||||
|
||||
def test_long_gateway_key_truncated_to_limit(self):
|
||||
"""An over-limit sanitized key must truncate to exactly 100 chars."""
|
||||
key = "!roomid:matrix.example.org|" + "$event_" + ("a" * 300)
|
||||
config = HonchoClientConfig()
|
||||
result = config.resolve_session_name(gateway_session_key=key)
|
||||
assert result is not None
|
||||
assert len(result) == self.HONCHO_MAX
|
||||
|
||||
def test_truncation_is_deterministic(self):
|
||||
"""Same long key must always produce the same truncated session ID."""
|
||||
key = "matrix-" + ("a" * 300)
|
||||
config = HonchoClientConfig()
|
||||
first = config.resolve_session_name(gateway_session_key=key)
|
||||
second = config.resolve_session_name(gateway_session_key=key)
|
||||
assert first == second
|
||||
|
||||
def test_truncated_result_respects_char_allowlist(self):
|
||||
"""Truncated result must still match Honcho's [a-zA-Z0-9_-] allowlist."""
|
||||
import re
|
||||
key = "slack:T12345:thread-reply:" + ("x" * 300) + ":with:colons:and:slashes/here"
|
||||
config = HonchoClientConfig()
|
||||
result = config.resolve_session_name(gateway_session_key=key)
|
||||
assert result is not None
|
||||
assert re.fullmatch(r"[a-zA-Z0-9_-]+", result)
|
||||
|
||||
def test_distinct_long_keys_do_not_collide(self):
|
||||
"""Two long keys sharing a prefix must produce different truncated IDs."""
|
||||
prefix = "matrix:!room:example.org|" + "a" * 200
|
||||
key_a = prefix + "-suffix-alpha"
|
||||
key_b = prefix + "-suffix-beta"
|
||||
config = HonchoClientConfig()
|
||||
result_a = config.resolve_session_name(gateway_session_key=key_a)
|
||||
result_b = config.resolve_session_name(gateway_session_key=key_b)
|
||||
assert result_a != result_b
|
||||
assert len(result_a) == self.HONCHO_MAX
|
||||
assert len(result_b) == self.HONCHO_MAX
|
||||
|
||||
def test_truncated_result_has_hash_suffix(self):
|
||||
"""Truncated IDs must end with '-<8 hex chars>' for collision resistance."""
|
||||
import re
|
||||
key = "matrix-" + ("a" * 300)
|
||||
config = HonchoClientConfig()
|
||||
result = config.resolve_session_name(gateway_session_key=key)
|
||||
# Last 9 chars: '-' + 8 hex chars.
|
||||
assert re.search(r"-[0-9a-f]{8}$", result)
|
||||
|
||||
|
||||
class TestResetHonchoClient:
|
||||
def test_reset_clears_singleton(self):
|
||||
import plugins.memory.honcho.client as mod
|
||||
|
|
|
|||
307
tests/honcho_plugin/test_pin_peer_name.py
Normal file
307
tests/honcho_plugin/test_pin_peer_name.py
Normal file
|
|
@ -0,0 +1,307 @@
|
|||
"""Tests for the ``pinPeerName`` config flag (#14984).
|
||||
|
||||
By default, when Hermes runs under a gateway (Telegram, Discord, Slack, ...)
|
||||
it passes the platform-native user ID as ``runtime_user_peer_name`` into
|
||||
``HonchoSessionManager``. That ID wins over any configured ``peer_name``
|
||||
so multi-user bots scope memory per user.
|
||||
|
||||
For a single-user personal deployment where the user connects over multiple
|
||||
platforms, that default forks memory into one Honcho peer per platform
|
||||
(Telegram UID, Discord snowflake, Slack user ID, ...). The user asked for
|
||||
an opt-in knob that pins the user peer to ``peer_name`` from ``honcho.json``
|
||||
so the same person's memory stays unified regardless of which platform the
|
||||
turn arrived on — ``hosts.<host>.pinPeerName: true`` (or root-level
|
||||
``pinPeerName: true``).
|
||||
|
||||
These tests exercise both the config parsing (``client.py::from_global_config``)
|
||||
and the resolution order (``session.py::get_or_create``). We stub the
|
||||
Honcho API calls so we can assert the chosen ``user_peer_id`` without
|
||||
touching the network.
|
||||
"""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from plugins.memory.honcho.client import HonchoClientConfig
|
||||
from plugins.memory.honcho.session import HonchoSessionManager
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPinPeerNameConfigParsing:
|
||||
def test_default_is_false(self):
|
||||
"""Default preserves existing behaviour — multi-user bots unaffected."""
|
||||
config = HonchoClientConfig()
|
||||
assert config.pin_peer_name is False
|
||||
|
||||
def test_root_level_true(self, tmp_path, monkeypatch):
|
||||
config_file = tmp_path / "honcho.json"
|
||||
config_file.write_text(json.dumps({
|
||||
"apiKey": "k",
|
||||
"peerName": "Igor",
|
||||
"pinPeerName": True,
|
||||
}))
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))
|
||||
|
||||
config = HonchoClientConfig.from_global_config(config_path=config_file)
|
||||
assert config.pin_peer_name is True
|
||||
assert config.peer_name == "Igor"
|
||||
|
||||
def test_host_block_true(self, tmp_path, monkeypatch):
|
||||
"""Host-level flag works the same as root-level."""
|
||||
config_file = tmp_path / "honcho.json"
|
||||
config_file.write_text(json.dumps({
|
||||
"apiKey": "k",
|
||||
"peerName": "Igor",
|
||||
"hosts": {
|
||||
"hermes": {"pinPeerName": True},
|
||||
},
|
||||
}))
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))
|
||||
|
||||
config = HonchoClientConfig.from_global_config(config_path=config_file)
|
||||
assert config.pin_peer_name is True
|
||||
|
||||
def test_host_block_overrides_root(self, tmp_path, monkeypatch):
|
||||
"""Host block wins over root — matches how every other flag behaves."""
|
||||
config_file = tmp_path / "honcho.json"
|
||||
config_file.write_text(json.dumps({
|
||||
"apiKey": "k",
|
||||
"peerName": "Igor",
|
||||
"pinPeerName": True,
|
||||
"hosts": {
|
||||
"hermes": {"pinPeerName": False},
|
||||
},
|
||||
}))
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))
|
||||
|
||||
config = HonchoClientConfig.from_global_config(config_path=config_file)
|
||||
assert config.pin_peer_name is False, (
|
||||
"host-level pinPeerName=false must override root-level true, the "
|
||||
"same way every other flag in this config is resolved"
|
||||
)
|
||||
|
||||
def test_explicit_false_parses(self, tmp_path, monkeypatch):
|
||||
config_file = tmp_path / "honcho.json"
|
||||
config_file.write_text(json.dumps({
|
||||
"apiKey": "k",
|
||||
"peerName": "Igor",
|
||||
"pinPeerName": False,
|
||||
}))
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))
|
||||
|
||||
config = HonchoClientConfig.from_global_config(config_path=config_file)
|
||||
assert config.pin_peer_name is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Peer resolution (the actual bug fix)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _patch_manager_for_resolution_test(mgr: HonchoSessionManager) -> None:
|
||||
"""Stub out the Honcho client so ``get_or_create`` doesn't try to talk
|
||||
to the network — we only care about the user_peer_id chosen before
|
||||
those calls happen.
|
||||
"""
|
||||
fake_peer = MagicMock()
|
||||
mgr._get_or_create_peer = MagicMock(return_value=fake_peer)
|
||||
mgr._get_or_create_honcho_session = MagicMock(
|
||||
return_value=(MagicMock(), [])
|
||||
)
|
||||
|
||||
|
||||
class TestPeerResolutionOrder:
|
||||
"""Matrix of (runtime_id, pin_peer_name, peer_name) → expected user_peer_id."""
|
||||
|
||||
def _config(self, *, peer_name: str | None, pin_peer_name: bool) -> HonchoClientConfig:
|
||||
# The test doesn't need auth / Honcho — disable the provider so
|
||||
# the manager doesn't try to open a real client.
|
||||
return HonchoClientConfig(
|
||||
api_key="test-key",
|
||||
peer_name=peer_name,
|
||||
pin_peer_name=pin_peer_name,
|
||||
enabled=False,
|
||||
write_frequency="turn", # avoid spawning the async writer thread
|
||||
)
|
||||
|
||||
def test_runtime_wins_when_pin_is_false(self):
|
||||
"""Regression guard: default behaviour must stay unchanged.
|
||||
Multi-user bots rely on the platform-native ID winning."""
|
||||
mgr = HonchoSessionManager(
|
||||
honcho=MagicMock(),
|
||||
config=self._config(peer_name="Igor", pin_peer_name=False),
|
||||
runtime_user_peer_name="86701400", # e.g. Telegram UID
|
||||
)
|
||||
_patch_manager_for_resolution_test(mgr)
|
||||
|
||||
session = mgr.get_or_create("telegram:86701400")
|
||||
assert session.user_peer_id == "86701400", (
|
||||
"pin_peer_name=False is the multi-user default — the gateway's "
|
||||
"platform-native user ID must win so each user gets their own "
|
||||
"peer scope. If this regresses, every Telegram/Discord/Slack "
|
||||
"bot immediately merges memory across users."
|
||||
)
|
||||
|
||||
def test_config_wins_when_pin_is_true(self):
|
||||
"""The #14984 fix: single-user deployments opt into config pinning."""
|
||||
mgr = HonchoSessionManager(
|
||||
honcho=MagicMock(),
|
||||
config=self._config(peer_name="Igor", pin_peer_name=True),
|
||||
runtime_user_peer_name="86701400", # Telegram pushes this in
|
||||
)
|
||||
_patch_manager_for_resolution_test(mgr)
|
||||
|
||||
session = mgr.get_or_create("telegram:86701400")
|
||||
assert session.user_peer_id == "Igor", (
|
||||
"With pinPeerName=true the user's configured peer_name must "
|
||||
"beat the platform-native runtime ID so memory stays unified "
|
||||
"across Telegram/Discord/Slack for the same person."
|
||||
)
|
||||
|
||||
def test_pin_noop_when_peer_name_missing(self):
|
||||
"""Safety: pinPeerName alone (no peer_name) must not silently drop
|
||||
the runtime identity. Without a configured peer_name there's
|
||||
nothing to pin to — fall back to runtime as before."""
|
||||
mgr = HonchoSessionManager(
|
||||
honcho=MagicMock(),
|
||||
config=self._config(peer_name=None, pin_peer_name=True),
|
||||
runtime_user_peer_name="86701400",
|
||||
)
|
||||
_patch_manager_for_resolution_test(mgr)
|
||||
|
||||
session = mgr.get_or_create("telegram:86701400")
|
||||
assert session.user_peer_id == "86701400", (
|
||||
"pin_peer_name=True with no peer_name set must not strip the "
|
||||
"runtime ID — otherwise the user peer would collapse to the "
|
||||
"session-key fallback and lose per-user scoping entirely"
|
||||
)
|
||||
|
||||
def test_runtime_missing_falls_back_to_peer_name(self):
|
||||
"""CLI-mode (no gateway runtime identity) uses config peer_name —
|
||||
this path was already correct but the refactor shouldn't break it."""
|
||||
mgr = HonchoSessionManager(
|
||||
honcho=MagicMock(),
|
||||
config=self._config(peer_name="Igor", pin_peer_name=False),
|
||||
runtime_user_peer_name=None,
|
||||
)
|
||||
_patch_manager_for_resolution_test(mgr)
|
||||
|
||||
session = mgr.get_or_create("cli:local")
|
||||
assert session.user_peer_id == "Igor"
|
||||
|
||||
def test_everything_missing_falls_back_to_session_key(self):
|
||||
"""Deepest fallback: no runtime identity, no peer_name, no pin.
|
||||
Must still produce a deterministic peer_id from the session key."""
|
||||
# Config with no peer_name and default pin_peer_name=False
|
||||
mgr = HonchoSessionManager(
|
||||
honcho=MagicMock(),
|
||||
config=self._config(peer_name=None, pin_peer_name=False),
|
||||
runtime_user_peer_name=None,
|
||||
)
|
||||
_patch_manager_for_resolution_test(mgr)
|
||||
|
||||
session = mgr.get_or_create("telegram:123")
|
||||
assert session.user_peer_id == "user-telegram-123"
|
||||
|
||||
def test_pin_does_not_affect_assistant_peer(self):
|
||||
"""The flag only pins the USER peer — the assistant peer continues
|
||||
to come from ``ai_peer`` and must not be touched."""
|
||||
cfg = HonchoClientConfig(
|
||||
api_key="k",
|
||||
peer_name="Igor",
|
||||
pin_peer_name=True,
|
||||
ai_peer="hermes-assistant",
|
||||
enabled=False,
|
||||
write_frequency="turn",
|
||||
)
|
||||
mgr = HonchoSessionManager(
|
||||
honcho=MagicMock(),
|
||||
config=cfg,
|
||||
runtime_user_peer_name="86701400",
|
||||
)
|
||||
_patch_manager_for_resolution_test(mgr)
|
||||
|
||||
session = mgr.get_or_create("telegram:86701400")
|
||||
assert session.user_peer_id == "Igor"
|
||||
assert session.assistant_peer_id == "hermes-assistant"
|
||||
|
||||
|
||||
class TestCrossPlatformMemoryUnification:
|
||||
"""The user-visible outcome of the #14984 fix: the same physical user
|
||||
talking to Hermes via Telegram AND Discord should land on ONE peer
|
||||
(not two) when pinPeerName is opted in.
|
||||
"""
|
||||
|
||||
def _config_pinned(self) -> HonchoClientConfig:
|
||||
return HonchoClientConfig(
|
||||
api_key="k",
|
||||
peer_name="Igor",
|
||||
pin_peer_name=True,
|
||||
enabled=False,
|
||||
write_frequency="turn",
|
||||
)
|
||||
|
||||
def test_telegram_and_discord_collapse_to_one_peer_when_pinned(self):
|
||||
"""Single-user deployment: Telegram UID and Discord snowflake
|
||||
both resolve to the same configured peer_name."""
|
||||
# Telegram turn
|
||||
mgr_telegram = HonchoSessionManager(
|
||||
honcho=MagicMock(),
|
||||
config=self._config_pinned(),
|
||||
runtime_user_peer_name="86701400",
|
||||
)
|
||||
_patch_manager_for_resolution_test(mgr_telegram)
|
||||
telegram_session = mgr_telegram.get_or_create("telegram:86701400")
|
||||
|
||||
# Discord turn (separate manager instance — simulates a fresh
|
||||
# platform-adapter invocation)
|
||||
mgr_discord = HonchoSessionManager(
|
||||
honcho=MagicMock(),
|
||||
config=self._config_pinned(),
|
||||
runtime_user_peer_name="1348750102029926454",
|
||||
)
|
||||
_patch_manager_for_resolution_test(mgr_discord)
|
||||
discord_session = mgr_discord.get_or_create("discord:1348750102029926454")
|
||||
|
||||
assert telegram_session.user_peer_id == "Igor"
|
||||
assert discord_session.user_peer_id == "Igor"
|
||||
assert telegram_session.user_peer_id == discord_session.user_peer_id, (
|
||||
"cross-platform memory unification is the whole point of "
|
||||
"pinPeerName — both platforms must land on the same Honcho peer"
|
||||
)
|
||||
|
||||
def test_multiuser_default_keeps_platforms_separate(self):
|
||||
"""Negative control: with pinPeerName=false (the default), two
|
||||
different platform IDs must produce two different peers so
|
||||
multi-user bots don't merge users."""
|
||||
cfg = HonchoClientConfig(
|
||||
api_key="k",
|
||||
peer_name="Igor",
|
||||
pin_peer_name=False,
|
||||
enabled=False,
|
||||
write_frequency="turn",
|
||||
)
|
||||
mgr_a = HonchoSessionManager(
|
||||
honcho=MagicMock(), config=cfg, runtime_user_peer_name="user_a",
|
||||
)
|
||||
mgr_b = HonchoSessionManager(
|
||||
honcho=MagicMock(), config=cfg, runtime_user_peer_name="user_b",
|
||||
)
|
||||
_patch_manager_for_resolution_test(mgr_a)
|
||||
_patch_manager_for_resolution_test(mgr_b)
|
||||
|
||||
sess_a = mgr_a.get_or_create("telegram:a")
|
||||
sess_b = mgr_b.get_or_create("telegram:b")
|
||||
|
||||
assert sess_a.user_peer_id == "user_a"
|
||||
assert sess_b.user_peer_id == "user_b"
|
||||
assert sess_a.user_peer_id != sess_b.user_peer_id, (
|
||||
"multi-user default MUST keep users separate — a regression "
|
||||
"here would silently merge unrelated users' memory"
|
||||
)
|
||||
146
tests/honcho_plugin/test_provider_sync_integration.py
Normal file
146
tests/honcho_plugin/test_provider_sync_integration.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Integration tests for the sync worker's integration into HonchoMemoryProvider.
|
||||
|
||||
Layer 1 (fire-and-forget): sync_turn must return in < 20ms even when the
|
||||
Honcho backend would block for seconds.
|
||||
|
||||
Layer 3 (breaker + backlog): when the breaker trips open, sync_turn tasks
|
||||
land in the provider's in-memory backlog instead of running. When the
|
||||
breaker closes (via probe recovery), the backlog drains on the next
|
||||
sync_turn call.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from plugins.memory.honcho import HonchoMemoryProvider
|
||||
from plugins.memory.honcho.sync_worker import SyncTask
|
||||
|
||||
|
||||
def _wait_until(predicate, timeout: float = 2.0, interval: float = 0.01) -> bool:
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
if predicate():
|
||||
return True
|
||||
time.sleep(interval)
|
||||
return False
|
||||
|
||||
|
||||
def _make_provider() -> HonchoMemoryProvider:
|
||||
provider = HonchoMemoryProvider()
|
||||
provider._manager = MagicMock()
|
||||
session = MagicMock()
|
||||
provider._manager.get_or_create.return_value = session
|
||||
provider._session_key = "agent:main:test"
|
||||
provider._cron_skipped = False
|
||||
provider._config = MagicMock(message_max_chars=25000)
|
||||
return provider
|
||||
|
||||
|
||||
class TestLayer1FireAndForget:
|
||||
def test_sync_turn_returns_immediately_with_slow_backend(self):
|
||||
"""sync_turn must not block even if the backend flush takes seconds."""
|
||||
provider = _make_provider()
|
||||
|
||||
# Make the flush block for up to 2s.
|
||||
flush_started = threading.Event()
|
||||
release_flush = threading.Event()
|
||||
|
||||
def slow_flush(_session):
|
||||
flush_started.set()
|
||||
release_flush.wait(timeout=3.0)
|
||||
|
||||
provider._manager._flush_session.side_effect = slow_flush
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
provider.sync_turn("hello", "world")
|
||||
elapsed = time.monotonic() - t0
|
||||
assert elapsed < 0.1, f"sync_turn blocked for {elapsed:.3f}s"
|
||||
# Confirm the worker did pick it up
|
||||
assert flush_started.wait(timeout=1.0)
|
||||
finally:
|
||||
release_flush.set()
|
||||
provider.shutdown()
|
||||
|
||||
def test_multiple_sync_turns_do_not_serialize_caller(self):
|
||||
"""Back-to-back sync_turns must not block on prior turn's completion."""
|
||||
provider = _make_provider()
|
||||
|
||||
gate = threading.Event()
|
||||
provider._manager._flush_session.side_effect = lambda _s: gate.wait(timeout=3.0)
|
||||
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
for _ in range(5):
|
||||
provider.sync_turn("u", "a")
|
||||
elapsed = time.monotonic() - t0
|
||||
# Without fire-and-forget, the old code would serialize on
|
||||
# the previous turn's join(timeout=5.0). 5 turns × 5s = 25s
|
||||
# worst case. We assert << 1s.
|
||||
assert elapsed < 0.2, f"5 sync_turns took {elapsed:.3f}s"
|
||||
finally:
|
||||
gate.set()
|
||||
provider.shutdown()
|
||||
|
||||
|
||||
class TestLayer3BacklogAndBreaker:
|
||||
def test_breaker_open_backlogs_task(self):
|
||||
"""While the breaker is open, sync_turn tasks must land in the backlog."""
|
||||
provider = _make_provider()
|
||||
|
||||
# Trip the breaker manually.
|
||||
provider._breaker._state = provider._breaker.STATE_OPEN
|
||||
provider._breaker._opened_at = float("inf") # never recover
|
||||
|
||||
try:
|
||||
provider.sync_turn("hello", "world")
|
||||
# The task should have landed in the backlog rather than run.
|
||||
assert len(provider._backlog) == 1
|
||||
assert provider._backlog[0].name == "sync_turn"
|
||||
finally:
|
||||
provider.shutdown()
|
||||
|
||||
def test_backlog_drains_when_breaker_closes(self):
|
||||
"""Once the breaker closes, next sync_turn drains the backlog."""
|
||||
provider = _make_provider()
|
||||
|
||||
# Trip the breaker and enqueue a backlog.
|
||||
provider._breaker._state = provider._breaker.STATE_OPEN
|
||||
provider._breaker._opened_at = float("inf")
|
||||
for _ in range(3):
|
||||
provider.sync_turn("u", "a")
|
||||
assert len(provider._backlog) == 3
|
||||
|
||||
# Close the breaker (simulating recovery) and trigger another sync.
|
||||
provider._breaker.reset()
|
||||
|
||||
try:
|
||||
provider.sync_turn("u", "a")
|
||||
# One new task + 3 drained = 4 flushes eventually.
|
||||
assert _wait_until(
|
||||
lambda: provider._manager._flush_session.call_count >= 4,
|
||||
timeout=2.0,
|
||||
), (
|
||||
"expected >= 4 flushes after recovery, got "
|
||||
f"{provider._manager._flush_session.call_count}"
|
||||
)
|
||||
assert provider._backlog == []
|
||||
finally:
|
||||
provider.shutdown()
|
||||
|
||||
def test_backlog_honors_max_size(self):
|
||||
"""Backlog must not grow unbounded during a long outage."""
|
||||
provider = _make_provider()
|
||||
provider._BACKLOG_MAX = 5
|
||||
provider._breaker._state = provider._breaker.STATE_OPEN
|
||||
provider._breaker._opened_at = float("inf")
|
||||
|
||||
try:
|
||||
for _ in range(20):
|
||||
provider.sync_turn("u", "a")
|
||||
assert len(provider._backlog) == 5
|
||||
finally:
|
||||
provider.shutdown()
|
||||
|
|
@ -525,6 +525,39 @@ class TestConcludeToolDispatch:
|
|||
assert parsed == {"error": "Exactly one of conclusion or delete_id must be provided."}
|
||||
provider._manager.delete_conclusion.assert_not_called()
|
||||
|
||||
def test_sync_turn_strips_leaked_memory_context_before_honcho_ingest(self):
|
||||
provider = HonchoMemoryProvider()
|
||||
provider._session_key = "telegram:123"
|
||||
provider._manager = MagicMock()
|
||||
provider._cron_skipped = False
|
||||
provider._config = SimpleNamespace(message_max_chars=25000)
|
||||
|
||||
session = MagicMock()
|
||||
provider._manager.get_or_create.return_value = session
|
||||
|
||||
provider.sync_turn(
|
||||
(
|
||||
"hello\n\n"
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
|
||||
"## Honcho Context\n"
|
||||
"stale memory\n"
|
||||
"</memory-context>"
|
||||
),
|
||||
(
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
|
||||
"## Honcho Context\n"
|
||||
"stale memory\n"
|
||||
"</memory-context>\n\n"
|
||||
"Visible answer"
|
||||
),
|
||||
)
|
||||
provider._sync_worker.shutdown(timeout=1.0)
|
||||
|
||||
assert session.add_message.call_args_list[0].args == ("user", "hello")
|
||||
assert session.add_message.call_args_list[1].args == ("assistant", "Visible answer")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Message chunking
|
||||
|
|
|
|||
342
tests/honcho_plugin/test_sync_worker.py
Normal file
342
tests/honcho_plugin/test_sync_worker.py
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
"""Tests for plugins/memory/honcho/sync_worker.py — Layers 1/2/3."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
|
||||
from plugins.memory.honcho.sync_worker import (
|
||||
CircuitBreaker,
|
||||
HonchoLatencyTracker,
|
||||
SyncTask,
|
||||
SyncWorker,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HonchoLatencyTracker
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHonchoLatencyTracker:
|
||||
def test_returns_default_during_warmup(self):
|
||||
t = HonchoLatencyTracker(default=30.0, warmup_samples=5)
|
||||
for _ in range(4):
|
||||
t.observe(1.0)
|
||||
assert t.timeout() == 30.0
|
||||
|
||||
def test_adapts_to_observed_p95(self):
|
||||
t = HonchoLatencyTracker(default=30.0, floor=5.0, headroom=3.0, warmup_samples=5)
|
||||
# 10 samples at 1s, 10 samples at 2s — p95 should land at the 2s end
|
||||
for _ in range(10):
|
||||
t.observe(1.0)
|
||||
for _ in range(10):
|
||||
t.observe(2.0)
|
||||
t_out = t.timeout()
|
||||
assert 5.0 <= t_out <= 7.0 # 3 × 2.0 with some rounding latitude
|
||||
|
||||
def test_respects_floor(self):
|
||||
t = HonchoLatencyTracker(default=30.0, floor=5.0, headroom=3.0, warmup_samples=3)
|
||||
# Very fast samples — 3 × 0.1 = 0.3 < floor → floor applies
|
||||
for _ in range(10):
|
||||
t.observe(0.1)
|
||||
assert t.timeout() == 5.0
|
||||
|
||||
def test_rejects_nan_and_negative(self):
|
||||
t = HonchoLatencyTracker(warmup_samples=1)
|
||||
t.observe(float("nan"))
|
||||
t.observe(-1.0)
|
||||
# No valid samples → still default
|
||||
assert t.timeout() == t._default
|
||||
|
||||
def test_rolling_window_discards_old(self):
|
||||
t = HonchoLatencyTracker(window=5, default=30.0, floor=0.1, headroom=1.0, warmup_samples=1)
|
||||
for _ in range(5):
|
||||
t.observe(100.0)
|
||||
assert t.timeout() >= 50.0 # dominated by 100s samples
|
||||
for _ in range(5):
|
||||
t.observe(0.5)
|
||||
# Old samples rolled out, now dominated by 0.5s
|
||||
assert t.timeout() <= 1.0
|
||||
|
||||
def test_thread_safe_concurrent_observations(self):
|
||||
t = HonchoLatencyTracker(window=1000, warmup_samples=1)
|
||||
|
||||
def observer(val: float):
|
||||
for _ in range(200):
|
||||
t.observe(val)
|
||||
|
||||
threads = [
|
||||
threading.Thread(target=observer, args=(i * 0.1,)) for i in range(5)
|
||||
]
|
||||
for th in threads:
|
||||
th.start()
|
||||
for th in threads:
|
||||
th.join()
|
||||
# No crash + timeout() returns a real number
|
||||
assert t.timeout() > 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CircuitBreaker
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _Clock:
|
||||
"""Test double for time.monotonic — manually advanced."""
|
||||
|
||||
def __init__(self, start: float = 0.0) -> None:
|
||||
self.now = start
|
||||
|
||||
def __call__(self) -> float:
|
||||
return self.now
|
||||
|
||||
|
||||
class TestCircuitBreaker:
|
||||
def test_starts_closed(self):
|
||||
cb = CircuitBreaker()
|
||||
assert cb.state == cb.STATE_CLOSED
|
||||
assert cb.allow() is True
|
||||
|
||||
def test_opens_after_threshold_failures(self):
|
||||
cb = CircuitBreaker(failure_threshold=3)
|
||||
for _ in range(2):
|
||||
cb.record_failure()
|
||||
assert cb.state == cb.STATE_CLOSED
|
||||
cb.record_failure()
|
||||
assert cb.state == cb.STATE_OPEN
|
||||
assert cb.allow() is False
|
||||
|
||||
def test_resets_counter_on_success(self):
|
||||
cb = CircuitBreaker(failure_threshold=3)
|
||||
cb.record_failure()
|
||||
cb.record_failure()
|
||||
cb.record_success()
|
||||
cb.record_failure()
|
||||
assert cb.state == cb.STATE_CLOSED
|
||||
|
||||
def test_transitions_to_half_open_after_probe_interval(self):
|
||||
clock = _Clock()
|
||||
cb = CircuitBreaker(failure_threshold=2, probe_interval=60.0, time_fn=clock)
|
||||
cb.record_failure()
|
||||
cb.record_failure()
|
||||
assert cb.state == cb.STATE_OPEN
|
||||
clock.now = 30.0
|
||||
assert cb.state == cb.STATE_OPEN # still within probe window
|
||||
clock.now = 61.0
|
||||
assert cb.state == cb.STATE_HALF_OPEN
|
||||
assert cb.allow() is True # probe permitted
|
||||
|
||||
def test_half_open_success_closes_breaker(self):
|
||||
clock = _Clock()
|
||||
cb = CircuitBreaker(failure_threshold=2, probe_interval=60.0, time_fn=clock)
|
||||
cb.record_failure()
|
||||
cb.record_failure()
|
||||
clock.now = 61.0
|
||||
_ = cb.state # transition
|
||||
cb.record_success()
|
||||
assert cb.state == cb.STATE_CLOSED
|
||||
|
||||
def test_half_open_failure_reopens_breaker(self):
|
||||
clock = _Clock()
|
||||
cb = CircuitBreaker(failure_threshold=2, probe_interval=60.0, time_fn=clock)
|
||||
cb.record_failure()
|
||||
cb.record_failure()
|
||||
clock.now = 61.0
|
||||
_ = cb.state
|
||||
cb.record_failure()
|
||||
assert cb.state == cb.STATE_OPEN
|
||||
|
||||
def test_reset_returns_to_closed(self):
|
||||
cb = CircuitBreaker(failure_threshold=1)
|
||||
cb.record_failure()
|
||||
assert cb.state == cb.STATE_OPEN
|
||||
cb.reset()
|
||||
assert cb.state == cb.STATE_CLOSED
|
||||
assert cb.allow() is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SyncWorker
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _wait_until(predicate, timeout: float = 2.0, interval: float = 0.01) -> bool:
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
if predicate():
|
||||
return True
|
||||
time.sleep(interval)
|
||||
return False
|
||||
|
||||
|
||||
class TestSyncWorkerBasics:
|
||||
def test_enqueue_runs_task_on_worker_thread(self):
|
||||
worker = SyncWorker()
|
||||
try:
|
||||
results: List[str] = []
|
||||
worker.enqueue(SyncTask(fn=lambda: results.append("ran"), name="t"))
|
||||
assert _wait_until(lambda: results == ["ran"])
|
||||
finally:
|
||||
worker.shutdown(timeout=2.0)
|
||||
|
||||
def test_enqueue_returns_immediately(self):
|
||||
worker = SyncWorker()
|
||||
try:
|
||||
slow_event = threading.Event()
|
||||
|
||||
def slow_task():
|
||||
slow_event.wait(timeout=2.0)
|
||||
|
||||
t0 = time.monotonic()
|
||||
worker.enqueue(SyncTask(fn=slow_task, name="slow"))
|
||||
elapsed = time.monotonic() - t0
|
||||
assert elapsed < 0.1, f"enqueue blocked for {elapsed}s"
|
||||
slow_event.set()
|
||||
finally:
|
||||
worker.shutdown(timeout=2.0)
|
||||
|
||||
def test_tasks_execute_in_fifo_order(self):
|
||||
worker = SyncWorker()
|
||||
try:
|
||||
results: List[int] = []
|
||||
for i in range(10):
|
||||
worker.enqueue(SyncTask(fn=lambda i=i: results.append(i), name=f"t{i}"))
|
||||
assert _wait_until(lambda: len(results) == 10)
|
||||
assert results == list(range(10))
|
||||
finally:
|
||||
worker.shutdown(timeout=2.0)
|
||||
|
||||
def test_task_exception_does_not_kill_worker(self):
|
||||
worker = SyncWorker()
|
||||
try:
|
||||
survived: List[str] = []
|
||||
worker.enqueue(SyncTask(fn=lambda: (_ for _ in ()).throw(RuntimeError("boom")), name="boom"))
|
||||
worker.enqueue(SyncTask(fn=lambda: survived.append("ok"), name="ok"))
|
||||
assert _wait_until(lambda: survived == ["ok"])
|
||||
finally:
|
||||
worker.shutdown(timeout=2.0)
|
||||
|
||||
def test_on_failure_callback_invoked_when_task_raises(self):
|
||||
worker = SyncWorker()
|
||||
try:
|
||||
failures: List[BaseException] = []
|
||||
worker.enqueue(
|
||||
SyncTask(
|
||||
fn=lambda: (_ for _ in ()).throw(ValueError("nope")),
|
||||
name="fail",
|
||||
on_failure=failures.append,
|
||||
)
|
||||
)
|
||||
assert _wait_until(lambda: len(failures) == 1)
|
||||
assert isinstance(failures[0], ValueError)
|
||||
finally:
|
||||
worker.shutdown(timeout=2.0)
|
||||
|
||||
|
||||
class TestSyncWorkerBackpressure:
|
||||
def test_queue_overflow_drops_oldest_task(self):
|
||||
worker = SyncWorker(max_queue=3)
|
||||
try:
|
||||
block = threading.Event()
|
||||
ran: List[int] = []
|
||||
dropped: List[int] = []
|
||||
|
||||
# Fill the queue with a blocker + 3 more waiting tasks.
|
||||
worker.enqueue(SyncTask(fn=lambda: block.wait(timeout=3.0), name="blocker"))
|
||||
for i in range(3):
|
||||
worker.enqueue(
|
||||
SyncTask(
|
||||
fn=lambda i=i: ran.append(i),
|
||||
name=f"t{i}",
|
||||
on_failure=lambda e, i=i: dropped.append(i),
|
||||
)
|
||||
)
|
||||
# Now try to enqueue a 4th task — should evict the oldest queued
|
||||
# (task 0) to make room.
|
||||
worker.enqueue(SyncTask(fn=lambda: ran.append(99), name="overflow"))
|
||||
|
||||
# Queue overflow dropped exactly one task (task 0).
|
||||
block.set()
|
||||
assert _wait_until(lambda: 99 in ran)
|
||||
assert 0 in dropped or ran == [1, 2, 99] or ran == [1, 2, 3, 99]
|
||||
finally:
|
||||
worker.shutdown(timeout=3.0)
|
||||
|
||||
|
||||
class TestSyncWorkerIntegration:
|
||||
def test_breaker_open_skips_task(self):
|
||||
breaker = CircuitBreaker(failure_threshold=1)
|
||||
breaker.record_failure()
|
||||
assert breaker.state == breaker.STATE_OPEN
|
||||
|
||||
worker = SyncWorker(breaker=breaker)
|
||||
try:
|
||||
failures: List[BaseException] = []
|
||||
ran: List[str] = []
|
||||
worker.enqueue(
|
||||
SyncTask(
|
||||
fn=lambda: ran.append("should_not_run"),
|
||||
name="blocked",
|
||||
on_failure=failures.append,
|
||||
)
|
||||
)
|
||||
# Give the worker a moment — but the task should never run.
|
||||
time.sleep(0.2)
|
||||
assert ran == []
|
||||
assert len(failures) == 1
|
||||
assert "circuit breaker open" in str(failures[0])
|
||||
finally:
|
||||
worker.shutdown(timeout=2.0)
|
||||
|
||||
def test_successful_task_feeds_latency_tracker_and_resets_breaker(self):
|
||||
tracker = HonchoLatencyTracker(warmup_samples=1)
|
||||
breaker = CircuitBreaker(failure_threshold=2)
|
||||
breaker.record_failure()
|
||||
assert breaker._consecutive_failures == 1
|
||||
|
||||
worker = SyncWorker(latency_tracker=tracker, breaker=breaker)
|
||||
try:
|
||||
worker.enqueue(SyncTask(fn=lambda: time.sleep(0.05), name="t"))
|
||||
assert _wait_until(lambda: len(tracker._samples) >= 1)
|
||||
assert breaker.state == breaker.STATE_CLOSED
|
||||
assert breaker._consecutive_failures == 0
|
||||
finally:
|
||||
worker.shutdown(timeout=2.0)
|
||||
|
||||
def test_failed_task_increments_breaker(self):
|
||||
breaker = CircuitBreaker(failure_threshold=2)
|
||||
worker = SyncWorker(breaker=breaker)
|
||||
try:
|
||||
for _ in range(2):
|
||||
worker.enqueue(
|
||||
SyncTask(
|
||||
fn=lambda: (_ for _ in ()).throw(RuntimeError("x")),
|
||||
name="fail",
|
||||
)
|
||||
)
|
||||
assert _wait_until(lambda: breaker.state == breaker.STATE_OPEN)
|
||||
finally:
|
||||
worker.shutdown(timeout=2.0)
|
||||
|
||||
|
||||
class TestSyncWorkerShutdown:
|
||||
def test_shutdown_is_idempotent(self):
|
||||
worker = SyncWorker()
|
||||
worker.enqueue(SyncTask(fn=lambda: None, name="t"))
|
||||
worker.shutdown(timeout=2.0)
|
||||
worker.shutdown(timeout=2.0) # Must not raise
|
||||
assert not worker.is_running()
|
||||
|
||||
def test_enqueue_after_shutdown_calls_on_failure(self):
|
||||
worker = SyncWorker()
|
||||
worker.shutdown(timeout=2.0)
|
||||
failures: List[BaseException] = []
|
||||
ok = worker.enqueue(
|
||||
SyncTask(fn=lambda: None, name="late", on_failure=failures.append)
|
||||
)
|
||||
assert ok is False
|
||||
assert len(failures) == 1
|
||||
|
|
@ -1441,6 +1441,20 @@ class TestBuildAssistantMessage:
|
|||
result = agent._build_assistant_message(msg, "stop")
|
||||
assert result["content"] == "No thinking here."
|
||||
|
||||
def test_memory_context_stripped_from_stored_content(self, agent):
|
||||
msg = _mock_assistant_msg(
|
||||
content=(
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
|
||||
"## Honcho Context\n"
|
||||
"stale memory\n"
|
||||
"</memory-context>\n\n"
|
||||
"Visible answer"
|
||||
)
|
||||
)
|
||||
result = agent._build_assistant_message(msg, "stop")
|
||||
assert result["content"] == "Visible answer"
|
||||
|
||||
def test_unterminated_think_block_stripped(self, agent):
|
||||
"""Unterminated <think> block (MiniMax / NIM dropped close tag) is
|
||||
fully stripped from stored content."""
|
||||
|
|
|
|||
|
|
@ -1088,6 +1088,100 @@ def test_interim_commentary_is_not_marked_already_streamed_when_stream_callback_
|
|||
}
|
||||
|
||||
|
||||
def test_interim_commentary_strips_leaked_memory_context(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
observed = {}
|
||||
agent.interim_assistant_callback = lambda text, *, already_streamed=False: observed.update(
|
||||
{"text": text, "already_streamed": already_streamed}
|
||||
)
|
||||
|
||||
leaked = (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
|
||||
"## Honcho Context\n"
|
||||
"stale memory\n"
|
||||
"</memory-context>\n\n"
|
||||
"I'll inspect the repo structure first."
|
||||
)
|
||||
|
||||
agent._emit_interim_assistant_message({"role": "assistant", "content": leaked})
|
||||
|
||||
assert observed == {
|
||||
"text": "I'll inspect the repo structure first.",
|
||||
"already_streamed": False,
|
||||
}
|
||||
|
||||
|
||||
def test_stream_delta_strips_leaked_memory_context(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
observed = []
|
||||
agent.stream_delta_callback = observed.append
|
||||
|
||||
leaked = (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
|
||||
"## Honcho Context\n"
|
||||
"stale memory\n"
|
||||
"</memory-context>\n\n"
|
||||
"Visible answer"
|
||||
)
|
||||
|
||||
agent._fire_stream_delta(leaked)
|
||||
|
||||
assert observed == ["Visible answer"]
|
||||
|
||||
|
||||
def test_stream_delta_strips_leaked_memory_context_across_chunks(monkeypatch):
|
||||
"""Regression for #5719 — the real streaming case.
|
||||
|
||||
Providers typically emit 1-80 char chunks, so the memory-context open
|
||||
tag, system-note line, payload, and close tag each arrive in separate
|
||||
deltas. The per-delta sanitize_context() regex cannot survive that
|
||||
— only a stateful scrubber can. None of the payload, system-note
|
||||
text, or "## Honcho Context" header may reach the delta callback.
|
||||
"""
|
||||
agent = _build_agent(monkeypatch)
|
||||
observed = []
|
||||
agent.stream_delta_callback = observed.append
|
||||
|
||||
deltas = [
|
||||
"<memory-context>\n[System note: The following",
|
||||
" is recalled memory context, NOT new user input. ",
|
||||
"Treat as informational background data.]\n\n",
|
||||
"## Honcho Context\n",
|
||||
"stale memory about eri\n",
|
||||
"</memory-context>\n\n",
|
||||
"Visible answer",
|
||||
]
|
||||
for d in deltas:
|
||||
agent._fire_stream_delta(d)
|
||||
|
||||
combined = "".join(observed)
|
||||
assert "Visible answer" in combined
|
||||
# None of the leaked payload may surface.
|
||||
assert "System note" not in combined
|
||||
assert "Honcho Context" not in combined
|
||||
assert "stale memory" not in combined
|
||||
assert "<memory-context>" not in combined
|
||||
assert "</memory-context>" not in combined
|
||||
|
||||
|
||||
def test_stream_delta_scrubber_resets_between_turns(monkeypatch):
|
||||
"""An unterminated span from a prior turn must not taint the next turn."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
|
||||
# Simulate a hung span carried over — directly populate the scrubber.
|
||||
agent._stream_context_scrubber.feed("pre <memory-context>leaked")
|
||||
|
||||
# Normally run_conversation() resets the scrubber at turn start.
|
||||
agent._stream_context_scrubber.reset()
|
||||
|
||||
observed = []
|
||||
agent.stream_delta_callback = observed.append
|
||||
agent._fire_stream_delta("clean new turn text")
|
||||
assert "".join(observed) == "clean new turn text"
|
||||
|
||||
|
||||
def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
responses = [
|
||||
|
|
|
|||
|
|
@ -229,6 +229,24 @@ class TestMessageStorage:
|
|||
messages = db.get_messages("s1")
|
||||
assert messages[0]["finish_reason"] == "stop"
|
||||
|
||||
def test_get_messages_as_conversation_strips_leaked_memory_context(self, db):
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message(
|
||||
"s1",
|
||||
role="assistant",
|
||||
content=(
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
|
||||
"## Honcho Context\n"
|
||||
"stale memory\n"
|
||||
"</memory-context>\n\n"
|
||||
"Visible answer"
|
||||
),
|
||||
)
|
||||
|
||||
conv = db.get_messages_as_conversation("s1")
|
||||
assert conv == [{"role": "assistant", "content": "Visible answer"}]
|
||||
|
||||
def test_reasoning_persisted_and_restored(self, db):
|
||||
"""Reasoning text is stored for assistant messages and restored by
|
||||
get_messages_as_conversation() so providers receive coherent multi-turn
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue