fix: harden memory-context leak boundaries

2026-04-29 01:31:41 +00:00 · 2026-04-21 16:01:10 -04:00 · 2026-04-21 16:01:10 -04:00 · f1ba4014e1
commit f1ba4014e1
parent 39713ba2ae
7 changed files with 108 additions and 6 deletions
--- a/hermes_state.py
+++ b/hermes_state.py
@ -22,6 +22,8 @@ import sqlite3
 import threading
 import time
 from pathlib import Path
+
+from agent.memory_manager import sanitize_context
 from hermes_constants import get_hermes_home
 from typing import Any, Callable, Dict, List, Optional, TypeVar

@ -1199,7 +1201,10 @@ class SessionDB:

        messages = []
        for row in rows:
-            msg = {"role": row["role"], "content": row["content"]}
+            content = row["content"]
+            if row["role"] in {"user", "assistant"} and isinstance(content, str):
+                content = sanitize_context(content).strip()
+            msg = {"role": row["role"], "content": content}
            if row["tool_call_id"]:
                msg["tool_call_id"] = row["tool_call_id"]
            if row["tool_name"]:
--- a/plugins/memory/honcho/init.py
+++ b/plugins/memory/honcho/init.py
@ -22,6 +22,7 @@ import threading
 import time
 from typing import Any, Dict, List, Optional

+from agent.memory_manager import sanitize_context
 from agent.memory_provider import MemoryProvider
 from tools.registry import tool_error

@ -1068,13 +1069,15 @@ class HonchoMemoryProvider(MemoryProvider):
            return

        msg_limit = self._config.message_max_chars if self._config else 25000
+        clean_user_content = sanitize_context(user_content or "").strip()
+        clean_assistant_content = sanitize_context(assistant_content or "").strip()

        def _sync():
            try:
                session = self._manager.get_or_create(self._session_key)
-                for chunk in self._chunk_message(user_content, msg_limit):
+                for chunk in self._chunk_message(clean_user_content, msg_limit):
                    session.add_message("user", chunk)
-                for chunk in self._chunk_message(assistant_content, msg_limit):
+                for chunk in self._chunk_message(clean_assistant_content, msg_limit):
                    session.add_message("assistant", chunk)
                self._manager._flush_session(session)
            except Exception as e:
--- a/run_agent.py
+++ b/run_agent.py
@ -6069,6 +6069,15 @@ class AIAgent:
        if getattr(self, "_stream_needs_break", False) and text and text.strip():
            self._stream_needs_break = False
            text = "\n\n" + text
+            prepended_break = True
+        else:
+            prepended_break = False
+        if isinstance(text, str):
+            text = sanitize_context(self._strip_think_blocks(text or ""))
+            if not prepended_break:
+                text = text.lstrip("\n")
+        if not text:
+            return
        callbacks = [cb for cb in (self.stream_delta_callback, self._stream_callback) if cb is not None]
        delivered = False
        for cb in callbacks:
@ -8040,7 +8049,7 @@ class AIAgent:
        # API replay, session transcript, gateway delivery, CLI display,
        # compression, title generation.
        if isinstance(_san_content, str) and _san_content:
-            _san_content = self._strip_think_blocks(_san_content).strip()
+            _san_content = sanitize_context(self._strip_think_blocks(_san_content)).strip()

        msg = {
            "role": "assistant",
@ -12711,8 +12720,9 @@ class AIAgent:
                        truncated_response_prefix = ""
                        length_continue_retries = 0
                    
-                    # Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
-                    final_response = self._strip_think_blocks(final_response).strip()
+                    # Strip internal context / reasoning wrappers from the user-facing
+                    # response (keep only clean visible text in transcript + UI).
+                    final_response = sanitize_context(self._strip_think_blocks(final_response)).strip()
                    
                    final_msg = self._build_assistant_message(assistant_message, finish_reason)

--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@ -525,6 +525,39 @@ class TestConcludeToolDispatch:
        assert parsed == {"error": "Exactly one of conclusion or delete_id must be provided."}
        provider._manager.delete_conclusion.assert_not_called()

+    def test_sync_turn_strips_leaked_memory_context_before_honcho_ingest(self):
+        provider = HonchoMemoryProvider()
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+        provider._cron_skipped = False
+        provider._config = SimpleNamespace(message_max_chars=25000)
+
+        session = MagicMock()
+        provider._manager.get_or_create.return_value = session
+
+        provider.sync_turn(
+            (
+                "hello\n\n"
+                "<memory-context>\n"
+                "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
+                "## Honcho Context\n"
+                "stale memory\n"
+                "</memory-context>"
+            ),
+            (
+                "<memory-context>\n"
+                "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
+                "## Honcho Context\n"
+                "stale memory\n"
+                "</memory-context>\n\n"
+                "Visible answer"
+            ),
+        )
+        provider._sync_thread.join(timeout=1.0)
+
+        assert session.add_message.call_args_list[0].args == ("user", "hello")
+        assert session.add_message.call_args_list[1].args == ("assistant", "Visible answer")
+

 # ---------------------------------------------------------------------------
 # Message chunking
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -1441,6 +1441,20 @@ class TestBuildAssistantMessage:
        result = agent._build_assistant_message(msg, "stop")
        assert result["content"] == "No thinking here."

+    def test_memory_context_stripped_from_stored_content(self, agent):
+        msg = _mock_assistant_msg(
+            content=(
+                "<memory-context>\n"
+                "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
+                "## Honcho Context\n"
+                "stale memory\n"
+                "</memory-context>\n\n"
+                "Visible answer"
+            )
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["content"] == "Visible answer"
+
    def test_unterminated_think_block_stripped(self, agent):
        """Unterminated <think> block (MiniMax / NIM dropped close tag) is
        fully stripped from stored content."""
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@ -1139,6 +1139,25 @@ def test_interim_commentary_strips_leaked_memory_context(monkeypatch):
    }


+def test_stream_delta_strips_leaked_memory_context(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    observed = []
+    agent.stream_delta_callback = observed.append
+
+    leaked = (
+        "<memory-context>\n"
+        "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
+        "## Honcho Context\n"
+        "stale memory\n"
+        "</memory-context>\n\n"
+        "Visible answer"
+    )
+
+    agent._fire_stream_delta(leaked)
+
+    assert observed == ["Visible answer"]
+
+
 def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
    agent = _build_agent(monkeypatch)
    responses = [
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@ -258,6 +258,24 @@ class TestMessageStorage:
        messages = db.get_messages("s1")
        assert messages[0]["finish_reason"] == "stop"

+    def test_get_messages_as_conversation_strips_leaked_memory_context(self, db):
+        db.create_session(session_id="s1", source="cli")
+        db.append_message(
+            "s1",
+            role="assistant",
+            content=(
+                "<memory-context>\n"
+                "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
+                "## Honcho Context\n"
+                "stale memory\n"
+                "</memory-context>\n\n"
+                "Visible answer"
+            ),
+        )
+
+        conv = db.get_messages_as_conversation("s1")
+        assert conv == [{"role": "assistant", "content": "Visible answer"}]
+
    def test_reasoning_persisted_and_restored(self, db):
        """Reasoning text is stored for assistant messages and restored by
        get_messages_as_conversation() so providers receive coherent multi-turn