From 6855d177531f9abfbb00d0da1f2db6ff91bdb872 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 23 May 2026 02:51:29 -0700 Subject: [PATCH] fix(memory): guard against external drift in MEMORY.md/USER.md (#26045) (#30877) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reproduction (production, 2026-05-14): two concurrent sessions on the same agent. Session A patches MEMORY.md directly via the patch tool, appending ~8KB of structured content (Vendor Master, Standing Orders, Pin Board) — none of it through the memory tool, so no § delimiters. Session B starts later with stale in-memory state (1 entry, ~331 chars). Session B calls memory(action=replace) on its one known entry. The tool's _read_file parses A's content as a single 8KB 'entry' (no § splits), then replace truncates that entry to B's new 333-byte content. ~8KB of structured content silently destroyed. The atomic-rename write path is fine in isolation. The bug is the implicit contract: the tool assumes MEMORY.md is exclusively a §-delimited list of small entries it wrote, but the v0.13 install runbook itself uses 'cat >> MEMORY.md' for onboarding, the patch tool edits the file directly, and operators do too. Fix: a drift guard in MemoryStore._detect_external_drift that fires on either signal: 1. Re-parse + re-serialize doesn't produce identical bytes (catches oddly-encoded delimiters / partial writes). 2. Any single parsed entry exceeds the store's whole-file char limit. The tool budgets the ENTIRE store against that limit (2200 chars for memory, 1375 for user), so no tool-written entry can legitimately be larger. An entry bigger than the store limit means an external writer dropped free-form content into what the tool will treat as one entry. When drift fires, _reload_target writes a .bak. snapshot of the on-disk file, then add/replace/remove refuse to flush. The original file stays untouched. The error dict surfaces the .bak path AND a remediation string ('integrate missing entries via memory(add=...) one at a time, then rewrite the file clean') so the model can act on it without escalating to the operator. Tests: - test_replace_refuses_on_drift, test_add_refuses_on_drift, test_remove_refuses_on_drift — all three mutators refuse - test_clean_file_does_not_trigger_drift — false-positive check - test_error_message_points_at_remediation — error string shape - test_drift_guard_also_protects_user_target — USER.md too - test_drift_backup_filename_is_unique_per_invocation — bak. naming pin 144 memory tests passing (was 137; +7). Fixes #26045 --- tests/tools/test_memory_tool.py | 125 ++++++++++++++++++++++++++++++++ tools/memory_tool.py | 116 +++++++++++++++++++++++++++-- 2 files changed, 235 insertions(+), 6 deletions(-) diff --git a/tests/tools/test_memory_tool.py b/tests/tools/test_memory_tool.py index 7f63aee1ebb..1a635aa1ac3 100644 --- a/tests/tools/test_memory_tool.py +++ b/tests/tools/test_memory_tool.py @@ -255,3 +255,128 @@ class TestMemoryToolDispatcher: def test_remove_requires_old_text(self, store): result = json.loads(memory_tool(action="remove", store=store)) assert result["success"] is False + + +# ========================================================================= +# External drift guard (#26045) +# +# An external writer — patch tool, shell append, manual edit, or sister +# session — can grow MEMORY.md beyond the tool's mental model: no § +# delimiters, content that would all collapse into a single "entry" larger +# than the char limit. Pre-fix, the next memory(action=replace) from a +# session with stale in-memory state truncated that giant entry, silently +# discarding the appended bytes. Reproduced in production on 2026-05-14 — +# ~8KB of structured vendor / standing-orders / pinboard content destroyed +# by a sister session's replace. +# ========================================================================= + + +class TestExternalDriftGuard: + """Mutations must refuse to flush when on-disk content shows external drift.""" + + def _plant_drift(self, store, target="memory"): + """Append free-form content (no § delimiters) past char_limit.""" + path = store._path_for(target) + path.parent.mkdir(parents=True, exist_ok=True) + # 800 chars per entry × 3 sections == ~2.4KB without delimiters, + # well over the test fixture's 500-char limit. + block = "\n\n## Vendor Master\n" + "x" * 800 + block += "\n\n## Standing Orders\n" + "y" * 800 + block += "\n\n## Pin Board\n" + "z" * 800 + existing = path.read_text(encoding="utf-8") if path.exists() else "" + path.write_text(existing + block, encoding="utf-8") + return path + + def test_replace_refuses_on_drift(self, store): + store.add("memory", "User likes brevity.") + path = self._plant_drift(store) + original_size = path.stat().st_size + + result = store.replace("memory", "User likes", "User prefers concise.") + + assert result["success"] is False + assert "drift_backup" in result + # On-disk file is UNTOUCHED — that's the point. + assert path.stat().st_size == original_size + assert "Vendor Master" in path.read_text() + # Backup exists with the drifted content. + bak = result["drift_backup"] + assert Path(bak).exists() + assert "Vendor Master" in Path(bak).read_text() + + def test_add_refuses_on_drift(self, store): + store.add("memory", "Existing.") + path = self._plant_drift(store) + original = path.read_text() + + result = store.add("memory", "New entry under drift.") + + assert result["success"] is False + assert "drift_backup" in result + assert path.read_text() == original # untouched + + def test_remove_refuses_on_drift(self, store): + store.add("memory", "Target entry to remove.") + path = self._plant_drift(store) + original = path.read_text() + + result = store.remove("memory", "Target entry") + + assert result["success"] is False + assert "drift_backup" in result + assert path.read_text() == original # untouched + + def test_clean_file_does_not_trigger_drift(self, store): + """A normally-written file (just below char_limit, §-delimited) is fine.""" + # Two tool-shaped entries totaling under the 500-char limit. + store.add("memory", "Entry one — normal length.") + store.add("memory", "Entry two — also normal.") + + result = store.add("memory", "Entry three.") + assert result["success"] is True + assert "drift_backup" not in result + + result = store.replace("memory", "Entry two", "Entry two replaced.") + assert result["success"] is True + + def test_error_message_points_at_remediation(self, store): + """The error string must reference the backup AND remediation steps.""" + store.add("memory", "Initial.") + self._plant_drift(store) + + result = store.replace("memory", "Initial", "Replacement.") + assert result["success"] is False + # The model has to know what file to look at and what to do. + assert ".bak." in result["error"] + assert "remediation" in result + assert "26045" in result["error"] # tracking-issue back-reference + + def test_drift_guard_also_protects_user_target(self, store): + """USER.md gets the same guarantee as MEMORY.md.""" + store.add("user", "Some preference.") + path = self._plant_drift(store, target="user") + original_size = path.stat().st_size + + result = store.replace("user", "Some preference", "New preference.") + assert result["success"] is False + assert path.stat().st_size == original_size + + def test_drift_backup_filename_is_unique_per_invocation(self, store): + """Two drift refusals close together must not collide on bak.. + + If two refusals share the same epoch second, the second call would + overwrite the first .bak. The current implementation accepts that + — both files describe the same on-disk state — but pin the path + format here so any future change has to think about it. + """ + store.add("memory", "Initial.") + self._plant_drift(store) + + r1 = store.replace("memory", "Initial", "Replacement.") + r2 = store.add("memory", "Another.") + assert r1.get("drift_backup") + assert r2.get("drift_backup") + # Same epoch second is the expected collision case — both point + # at the same snapshot. Different second is also fine. + assert ".bak." in r1["drift_backup"] + assert ".bak." in r2["drift_backup"] diff --git a/tools/memory_tool.py b/tools/memory_tool.py index 78d3a154933..97ea5ae7cf5 100644 --- a/tools/memory_tool.py +++ b/tools/memory_tool.py @@ -28,6 +28,7 @@ import logging import os import re import tempfile +import time from contextlib import contextmanager from pathlib import Path from hermes_constants import get_hermes_home @@ -104,6 +105,36 @@ def _scan_memory_content(content: str) -> Optional[str]: return None +def _drift_error(path: "Path", bak_path: str) -> Dict[str, Any]: + """Build the error dict returned when external drift is detected. + + The on-disk memory file contains content that wouldn't round-trip + through the tool's parser/serializer — flushing would discard the + appended/edited content from a patch tool, shell append, manual edit, + or sister-session write. We refuse the mutation, point the operator at + the .bak. snapshot we took, and tell them what to do next. + """ + return { + "success": False, + "error": ( + f"Refusing to write {path.name}: file on disk has content that " + f"wouldn't round-trip through the memory tool (likely added by " + f"the patch tool, a shell append, a manual edit, or a " + f"concurrent session). A snapshot was saved to {bak_path}. " + f"Resolve the drift first — either rewrite the file as a clean " + f"§-delimited list of entries, or move the extra content out — " + f"then retry. This guard exists to prevent silent data loss " + f"(issue #26045)." + ), + "drift_backup": bak_path, + "remediation": ( + "Open the .bak file, integrate the missing entries into the " + "memory tool one at a time via memory(action=add, content=...), " + "then remove or rewrite the original file to a clean state." + ), + } + + class MemoryStore: """ Bounded curated memory with file persistence. One instance per AIAgent. @@ -185,14 +216,23 @@ class MemoryStore: return mem_dir / "USER.md" return mem_dir / "MEMORY.md" - def _reload_target(self, target: str): + def _reload_target(self, target: str) -> Optional[str]: """Re-read entries from disk into in-memory state. Called under file lock to get the latest state before mutating. + Returns the backup path if external drift was detected (the on-disk + file contains content that wouldn't round-trip through our + parser/serializer, OR an entry larger than the store's char limit). + When drift is detected the caller must abort the mutation — + flushing would discard the un-roundtrippable content. + Returns None on clean reload. """ - fresh = self._read_file(self._path_for(target)) + path = self._path_for(target) + bak = self._detect_external_drift(target) + fresh = self._read_file(path) fresh = list(dict.fromkeys(fresh)) # deduplicate self._set_entries(target, fresh) + return bak def save_to_disk(self, target: str): """Persist entries to the appropriate file. Called after every mutation.""" @@ -233,8 +273,13 @@ class MemoryStore: return {"success": False, "error": scan_error} with self._file_lock(self._path_for(target)): - # Re-read from disk under lock to pick up writes from other sessions - self._reload_target(target) + # Re-read from disk under lock to pick up writes from other sessions. + # If external drift was detected, the file was backed up to .bak. + # — refuse the mutation so we don't clobber the un-roundtrippable + # content the patch tool / shell append / sister session wrote. + bak = self._reload_target(target) + if bak: + return _drift_error(self._path_for(target), bak) entries = self._entries_for(target) limit = self._char_limit(target) @@ -281,7 +326,9 @@ class MemoryStore: return {"success": False, "error": scan_error} with self._file_lock(self._path_for(target)): - self._reload_target(target) + bak = self._reload_target(target) + if bak: + return _drift_error(self._path_for(target), bak) entries = self._entries_for(target) matches = [(i, e) for i, e in enumerate(entries) if old_text in e] @@ -331,7 +378,9 @@ class MemoryStore: return {"success": False, "error": "old_text cannot be empty."} with self._file_lock(self._path_for(target)): - self._reload_target(target) + bak = self._reload_target(target) + if bak: + return _drift_error(self._path_for(target), bak) entries = self._entries_for(target) matches = [(i, e) for i, e in enumerate(entries) if old_text in e] @@ -430,6 +479,61 @@ class MemoryStore: entries = [e.strip() for e in raw.split(ENTRY_DELIMITER)] return [e for e in entries if e] + def _detect_external_drift(self, target: str) -> Optional[str]: + """Return a backup-path string if on-disk content shows external drift. + + The memory file is supposed to be a list of small entries the tool + wrote, joined by §. Detect drift via two signals: + + 1. Round-trip mismatch — re-parsing and re-serializing the file + doesn't produce identical bytes (rare; would catch oddly-encoded + delimiters). + 2. Entry-size overflow — any single parsed entry exceeds the + store's whole-file char limit. The tool budgets the ENTIRE store + against that limit; no single tool-written entry can exceed it. + When we see one entry larger than the limit, an external writer + (patch tool, shell append, manual edit, sister session) appended + free-form content into what the tool will treat as one entry. + Flushing would then truncate that entry to the model's new + content, discarding the appended bytes — issue #26045. + + Returns the absolute path of the .bak file when drift was found and + backed up; returns None when the file looks tool-shaped. + + Note: this is an INSTANCE method (not static) because we need the + per-target char_limit for signal #2. + """ + path = self._path_for(target) + if not path.exists(): + return None + try: + raw = path.read_text(encoding="utf-8") + except (OSError, IOError): + return None + if not raw.strip(): + return None + + parsed = [e.strip() for e in raw.split(ENTRY_DELIMITER) if e.strip()] + roundtrip = ENTRY_DELIMITER.join(parsed) + + char_limit = self._char_limit(target) + max_entry_len = max((len(e) for e in parsed), default=0) + + drift_detected = (raw.strip() != roundtrip) or (max_entry_len > char_limit) + if not drift_detected: + return None + + # Drift confirmed — snapshot the file so the operator can recover + # whatever the external writer added, then return the .bak path so + # the caller can refuse the mutation. + ts = int(time.time()) + bak_path = path.with_suffix(path.suffix + f".bak.{ts}") + try: + bak_path.write_text(raw, encoding="utf-8") + except (OSError, IOError): + return str(bak_path) + " (BACKUP FAILED — file unchanged on disk)" + return str(bak_path) + @staticmethod def _write_file(path: Path, entries: List[str]): """Write entries to a memory file using atomic temp-file + rename.