fix(run_agent): isolate background review fork from external memory plugins (#27190)

Pass skip_memory=True to the AIAgent constructor used by _spawn_background_review() so the review fork's __init__ no longer rebuilds a _memory_manager wired to honcho / mem0 / supermemory / etc. under the parent's session_id. Before this change, the review fork ingested its harness prompt (the 'Review the conversation above and update the skill library...' text) into the user's real memory namespace via three sites in run_conversation(): - on_turn_start(turn_count, prompt) cadence + turn-message - prefetch_all(prompt) recall query - sync_all(prompt, review_output, ...) harness + review output recorded as a (user, assistant) pair Built-in MEMORY.md / USER.md state is still rebound from the parent right after construction, so memory(action='add') writes from the review continue to land on disk; only the external-plugin side effects are removed. Reported by @Utku.
2026-05-18 04:41:56 +00:00 · 2026-05-16 20:33:38 -07:00 · 2026-05-16 20:33:38 -07:00 · 973f27e956
commit 973f27e956
parent 96b7f3da45
2 changed files with 64 additions and 0 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -4386,6 +4386,21 @@ class AIAgent:
                    # owns the loop and the agent-loop tools dispatch.
                    if _parent_api_mode == "codex_app_server":
                        _parent_api_mode = "codex_responses"
                    # skip_memory=True keeps the review fork from
                    # touching external memory plugins (honcho, mem0,
                    # supermemory, etc.).  Without it, the fork's
                    # __init__ rebuilds its own _memory_manager from
                    # config, scoped to the parent's session_id, and
                    # run_conversation() then leaks the harness prompt
                    # into the user's real memory namespace via three
                    # ingestion sites: on_turn_start (cadence + turn
                    # message), prefetch_all (recall query), and
                    # sync_all (harness prompt + review output recorded
                    # as a (user, assistant) turn pair).  Built-in
                    # MEMORY.md / USER.md state is re-bound from the
                    # parent below so memory(action="add") writes from
                    # the review still land on disk; the review just
                    # has zero side effects on external providers.
                    review_agent = AIAgent(
                        model=self.model,
                        max_iterations=16,
@ -4397,6 +4412,7 @@ class AIAgent:
                        api_key=_parent_runtime.get("api_key") or None,
                        credential_pool=getattr(self, "_credential_pool", None),
                        parent_session_id=self.session_id,
                        skip_memory=True,
                    )
                    review_agent._memory_write_origin = "background_review"
                    review_agent._memory_write_context = "background_review"
--- a/tests/run_agent/test_background_review.py
+++ b/tests/run_agent/test_background_review.py
@ -193,3 +193,51 @@ def test_background_review_summary_is_attributed_to_self_improvement_loop(monkey
    assert captured_bg_callback[0].startswith("💾 Self-improvement review:"), (
        captured_bg_callback[0]
    )
 def test_background_review_fork_skips_external_memory_plugins(monkeypatch):
    """The background review fork must NOT touch external memory plugins.
    Without skip_memory=True on the fork constructor, AIAgent.__init__
    rebuilds its own _memory_manager from config, scoped to the parent's
    session_id.  The review fork's run_conversation() then leaks the
    harness prompt into the user's real memory namespace via three
    ingestion sites: on_turn_start (cadence + turn message),
    prefetch_all (recall query), and sync_all (harness prompt + review
    output recorded as a (user, assistant) turn pair).  The fix is a
    single kwarg on the fork constructor — this test guards it.
    """
    captured_kwargs: dict = {}
    class FakeReviewAgent:
        def __init__(self, **kwargs):
            captured_kwargs.update(kwargs)
            self._session_messages = []
        def run_conversation(self, **kwargs):
            pass
        def shutdown_memory_provider(self):
            pass
        def close(self):
            pass
    monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
    monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
    agent = _bare_agent()
    AIAgent._spawn_background_review(
        agent,
        messages_snapshot=[{"role": "user", "content": "hello"}],
        review_memory=True,
    )
    assert captured_kwargs.get("skip_memory") is True, (
        "Background review fork must be constructed with skip_memory=True "
        "so AIAgent.__init__ does not rebuild a _memory_manager wired to "
        "external plugins (honcho, mem0, supermemory, ...).  Without this "
        "the fork leaks harness prompts into the user's real memory "
        "namespace via on_turn_start / prefetch_all / sync_all."
    )