diff --git a/run_agent.py b/run_agent.py index 9324b1c2901..b10a68cf9d0 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4386,6 +4386,21 @@ class AIAgent: # owns the loop and the agent-loop tools dispatch. if _parent_api_mode == "codex_app_server": _parent_api_mode = "codex_responses" + # skip_memory=True keeps the review fork from + # touching external memory plugins (honcho, mem0, + # supermemory, etc.). Without it, the fork's + # __init__ rebuilds its own _memory_manager from + # config, scoped to the parent's session_id, and + # run_conversation() then leaks the harness prompt + # into the user's real memory namespace via three + # ingestion sites: on_turn_start (cadence + turn + # message), prefetch_all (recall query), and + # sync_all (harness prompt + review output recorded + # as a (user, assistant) turn pair). Built-in + # MEMORY.md / USER.md state is re-bound from the + # parent below so memory(action="add") writes from + # the review still land on disk; the review just + # has zero side effects on external providers. review_agent = AIAgent( model=self.model, max_iterations=16, @@ -4397,6 +4412,7 @@ class AIAgent: api_key=_parent_runtime.get("api_key") or None, credential_pool=getattr(self, "_credential_pool", None), parent_session_id=self.session_id, + skip_memory=True, ) review_agent._memory_write_origin = "background_review" review_agent._memory_write_context = "background_review" diff --git a/tests/run_agent/test_background_review.py b/tests/run_agent/test_background_review.py index 2e79b10b346..89626f857d5 100644 --- a/tests/run_agent/test_background_review.py +++ b/tests/run_agent/test_background_review.py @@ -193,3 +193,51 @@ def test_background_review_summary_is_attributed_to_self_improvement_loop(monkey assert captured_bg_callback[0].startswith("💾 Self-improvement review:"), ( captured_bg_callback[0] ) + + +def test_background_review_fork_skips_external_memory_plugins(monkeypatch): + """The background review fork must NOT touch external memory plugins. + + Without skip_memory=True on the fork constructor, AIAgent.__init__ + rebuilds its own _memory_manager from config, scoped to the parent's + session_id. The review fork's run_conversation() then leaks the + harness prompt into the user's real memory namespace via three + ingestion sites: on_turn_start (cadence + turn message), + prefetch_all (recall query), and sync_all (harness prompt + review + output recorded as a (user, assistant) turn pair). The fix is a + single kwarg on the fork constructor — this test guards it. + """ + captured_kwargs: dict = {} + + class FakeReviewAgent: + def __init__(self, **kwargs): + captured_kwargs.update(kwargs) + self._session_messages = [] + + def run_conversation(self, **kwargs): + pass + + def shutdown_memory_provider(self): + pass + + def close(self): + pass + + monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent) + monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread) + + agent = _bare_agent() + + AIAgent._spawn_background_review( + agent, + messages_snapshot=[{"role": "user", "content": "hello"}], + review_memory=True, + ) + + assert captured_kwargs.get("skip_memory") is True, ( + "Background review fork must be constructed with skip_memory=True " + "so AIAgent.__init__ does not rebuild a _memory_manager wired to " + "external plugins (honcho, mem0, supermemory, ...). Without this " + "the fork leaks harness prompts into the user's real memory " + "namespace via on_turn_start / prefetch_all / sync_all." + )