fix(run_agent): isolate background review fork from external memory plugins (#27190)

Pass skip_memory=True to the AIAgent constructor used by
_spawn_background_review() so the review fork's __init__ no longer
rebuilds a _memory_manager wired to honcho / mem0 / supermemory /
etc. under the parent's session_id.

Before this change, the review fork ingested its harness prompt
(the 'Review the conversation above and update the skill library...'
text) into the user's real memory namespace via three sites in
run_conversation():
  - on_turn_start(turn_count, prompt)      cadence + turn-message
  - prefetch_all(prompt)                   recall query
  - sync_all(prompt, review_output, ...)   harness + review output
                                           recorded as a
                                           (user, assistant) pair

Built-in MEMORY.md / USER.md state is still rebound from the parent
right after construction, so memory(action='add') writes from the
review continue to land on disk; only the external-plugin side
effects are removed.

Reported by @Utku.
This commit is contained in:
Teknium 2026-05-16 20:33:38 -07:00 committed by GitHub
parent 96b7f3da45
commit 973f27e956
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 64 additions and 0 deletions

View file

@ -4386,6 +4386,21 @@ class AIAgent:
# owns the loop and the agent-loop tools dispatch. # owns the loop and the agent-loop tools dispatch.
if _parent_api_mode == "codex_app_server": if _parent_api_mode == "codex_app_server":
_parent_api_mode = "codex_responses" _parent_api_mode = "codex_responses"
# skip_memory=True keeps the review fork from
# touching external memory plugins (honcho, mem0,
# supermemory, etc.). Without it, the fork's
# __init__ rebuilds its own _memory_manager from
# config, scoped to the parent's session_id, and
# run_conversation() then leaks the harness prompt
# into the user's real memory namespace via three
# ingestion sites: on_turn_start (cadence + turn
# message), prefetch_all (recall query), and
# sync_all (harness prompt + review output recorded
# as a (user, assistant) turn pair). Built-in
# MEMORY.md / USER.md state is re-bound from the
# parent below so memory(action="add") writes from
# the review still land on disk; the review just
# has zero side effects on external providers.
review_agent = AIAgent( review_agent = AIAgent(
model=self.model, model=self.model,
max_iterations=16, max_iterations=16,
@ -4397,6 +4412,7 @@ class AIAgent:
api_key=_parent_runtime.get("api_key") or None, api_key=_parent_runtime.get("api_key") or None,
credential_pool=getattr(self, "_credential_pool", None), credential_pool=getattr(self, "_credential_pool", None),
parent_session_id=self.session_id, parent_session_id=self.session_id,
skip_memory=True,
) )
review_agent._memory_write_origin = "background_review" review_agent._memory_write_origin = "background_review"
review_agent._memory_write_context = "background_review" review_agent._memory_write_context = "background_review"

View file

@ -193,3 +193,51 @@ def test_background_review_summary_is_attributed_to_self_improvement_loop(monkey
assert captured_bg_callback[0].startswith("💾 Self-improvement review:"), ( assert captured_bg_callback[0].startswith("💾 Self-improvement review:"), (
captured_bg_callback[0] captured_bg_callback[0]
) )
def test_background_review_fork_skips_external_memory_plugins(monkeypatch):
"""The background review fork must NOT touch external memory plugins.
Without skip_memory=True on the fork constructor, AIAgent.__init__
rebuilds its own _memory_manager from config, scoped to the parent's
session_id. The review fork's run_conversation() then leaks the
harness prompt into the user's real memory namespace via three
ingestion sites: on_turn_start (cadence + turn message),
prefetch_all (recall query), and sync_all (harness prompt + review
output recorded as a (user, assistant) turn pair). The fix is a
single kwarg on the fork constructor this test guards it.
"""
captured_kwargs: dict = {}
class FakeReviewAgent:
def __init__(self, **kwargs):
captured_kwargs.update(kwargs)
self._session_messages = []
def run_conversation(self, **kwargs):
pass
def shutdown_memory_provider(self):
pass
def close(self):
pass
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
agent = _bare_agent()
AIAgent._spawn_background_review(
agent,
messages_snapshot=[{"role": "user", "content": "hello"}],
review_memory=True,
)
assert captured_kwargs.get("skip_memory") is True, (
"Background review fork must be constructed with skip_memory=True "
"so AIAgent.__init__ does not rebuild a _memory_manager wired to "
"external plugins (honcho, mem0, supermemory, ...). Without this "
"the fork leaks harness prompts into the user's real memory "
"namespace via on_turn_start / prefetch_all / sync_all."
)