mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Reproduction (production, 2026-05-14): two concurrent sessions on the
same agent. Session A patches MEMORY.md directly via the patch tool,
appending ~8KB of structured content (Vendor Master, Standing Orders,
Pin Board) — none of it through the memory tool, so no § delimiters.
Session B starts later with stale in-memory state (1 entry, ~331
chars). Session B calls memory(action=replace) on its one known
entry. The tool's _read_file parses A's content as a single 8KB
'entry' (no § splits), then replace truncates that entry to B's new
333-byte content. ~8KB of structured content silently destroyed.
The atomic-rename write path is fine in isolation. The bug is the
implicit contract: the tool assumes MEMORY.md is exclusively a
§-delimited list of small entries it wrote, but the v0.13 install
runbook itself uses 'cat >> MEMORY.md' for onboarding, the patch tool
edits the file directly, and operators do too.
Fix: a drift guard in MemoryStore._detect_external_drift that fires
on either signal:
1. Re-parse + re-serialize doesn't produce identical bytes
(catches oddly-encoded delimiters / partial writes).
2. Any single parsed entry exceeds the store's whole-file char
limit. The tool budgets the ENTIRE store against that limit
(2200 chars for memory, 1375 for user), so no tool-written
entry can legitimately be larger. An entry bigger than the
store limit means an external writer dropped free-form content
into what the tool will treat as one entry.
When drift fires, _reload_target writes a .bak.<ts> snapshot of the
on-disk file, then add/replace/remove refuse to flush. The original
file stays untouched. The error dict surfaces the .bak path AND a
remediation string ('integrate missing entries via memory(add=...)
one at a time, then rewrite the file clean') so the model can act on
it without escalating to the operator.
Tests:
- test_replace_refuses_on_drift, test_add_refuses_on_drift,
test_remove_refuses_on_drift — all three mutators refuse
- test_clean_file_does_not_trigger_drift — false-positive check
- test_error_message_points_at_remediation — error string shape
- test_drift_guard_also_protects_user_target — USER.md too
- test_drift_backup_filename_is_unique_per_invocation — bak.<ts>
naming pin
144 memory tests passing (was 137; +7).
Fixes #26045
This commit is contained in:
parent
cc93053b42
commit
6855d17753
2 changed files with 235 additions and 6 deletions
|
|
@ -255,3 +255,128 @@ class TestMemoryToolDispatcher:
|
|||
def test_remove_requires_old_text(self, store):
|
||||
result = json.loads(memory_tool(action="remove", store=store))
|
||||
assert result["success"] is False
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# External drift guard (#26045)
|
||||
#
|
||||
# An external writer — patch tool, shell append, manual edit, or sister
|
||||
# session — can grow MEMORY.md beyond the tool's mental model: no §
|
||||
# delimiters, content that would all collapse into a single "entry" larger
|
||||
# than the char limit. Pre-fix, the next memory(action=replace) from a
|
||||
# session with stale in-memory state truncated that giant entry, silently
|
||||
# discarding the appended bytes. Reproduced in production on 2026-05-14 —
|
||||
# ~8KB of structured vendor / standing-orders / pinboard content destroyed
|
||||
# by a sister session's replace.
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestExternalDriftGuard:
|
||||
"""Mutations must refuse to flush when on-disk content shows external drift."""
|
||||
|
||||
def _plant_drift(self, store, target="memory"):
|
||||
"""Append free-form content (no § delimiters) past char_limit."""
|
||||
path = store._path_for(target)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# 800 chars per entry × 3 sections == ~2.4KB without delimiters,
|
||||
# well over the test fixture's 500-char limit.
|
||||
block = "\n\n## Vendor Master\n" + "x" * 800
|
||||
block += "\n\n## Standing Orders\n" + "y" * 800
|
||||
block += "\n\n## Pin Board\n" + "z" * 800
|
||||
existing = path.read_text(encoding="utf-8") if path.exists() else ""
|
||||
path.write_text(existing + block, encoding="utf-8")
|
||||
return path
|
||||
|
||||
def test_replace_refuses_on_drift(self, store):
|
||||
store.add("memory", "User likes brevity.")
|
||||
path = self._plant_drift(store)
|
||||
original_size = path.stat().st_size
|
||||
|
||||
result = store.replace("memory", "User likes", "User prefers concise.")
|
||||
|
||||
assert result["success"] is False
|
||||
assert "drift_backup" in result
|
||||
# On-disk file is UNTOUCHED — that's the point.
|
||||
assert path.stat().st_size == original_size
|
||||
assert "Vendor Master" in path.read_text()
|
||||
# Backup exists with the drifted content.
|
||||
bak = result["drift_backup"]
|
||||
assert Path(bak).exists()
|
||||
assert "Vendor Master" in Path(bak).read_text()
|
||||
|
||||
def test_add_refuses_on_drift(self, store):
|
||||
store.add("memory", "Existing.")
|
||||
path = self._plant_drift(store)
|
||||
original = path.read_text()
|
||||
|
||||
result = store.add("memory", "New entry under drift.")
|
||||
|
||||
assert result["success"] is False
|
||||
assert "drift_backup" in result
|
||||
assert path.read_text() == original # untouched
|
||||
|
||||
def test_remove_refuses_on_drift(self, store):
|
||||
store.add("memory", "Target entry to remove.")
|
||||
path = self._plant_drift(store)
|
||||
original = path.read_text()
|
||||
|
||||
result = store.remove("memory", "Target entry")
|
||||
|
||||
assert result["success"] is False
|
||||
assert "drift_backup" in result
|
||||
assert path.read_text() == original # untouched
|
||||
|
||||
def test_clean_file_does_not_trigger_drift(self, store):
|
||||
"""A normally-written file (just below char_limit, §-delimited) is fine."""
|
||||
# Two tool-shaped entries totaling under the 500-char limit.
|
||||
store.add("memory", "Entry one — normal length.")
|
||||
store.add("memory", "Entry two — also normal.")
|
||||
|
||||
result = store.add("memory", "Entry three.")
|
||||
assert result["success"] is True
|
||||
assert "drift_backup" not in result
|
||||
|
||||
result = store.replace("memory", "Entry two", "Entry two replaced.")
|
||||
assert result["success"] is True
|
||||
|
||||
def test_error_message_points_at_remediation(self, store):
|
||||
"""The error string must reference the backup AND remediation steps."""
|
||||
store.add("memory", "Initial.")
|
||||
self._plant_drift(store)
|
||||
|
||||
result = store.replace("memory", "Initial", "Replacement.")
|
||||
assert result["success"] is False
|
||||
# The model has to know what file to look at and what to do.
|
||||
assert ".bak." in result["error"]
|
||||
assert "remediation" in result
|
||||
assert "26045" in result["error"] # tracking-issue back-reference
|
||||
|
||||
def test_drift_guard_also_protects_user_target(self, store):
|
||||
"""USER.md gets the same guarantee as MEMORY.md."""
|
||||
store.add("user", "Some preference.")
|
||||
path = self._plant_drift(store, target="user")
|
||||
original_size = path.stat().st_size
|
||||
|
||||
result = store.replace("user", "Some preference", "New preference.")
|
||||
assert result["success"] is False
|
||||
assert path.stat().st_size == original_size
|
||||
|
||||
def test_drift_backup_filename_is_unique_per_invocation(self, store):
|
||||
"""Two drift refusals close together must not collide on bak.<ts>.
|
||||
|
||||
If two refusals share the same epoch second, the second call would
|
||||
overwrite the first .bak. The current implementation accepts that
|
||||
— both files describe the same on-disk state — but pin the path
|
||||
format here so any future change has to think about it.
|
||||
"""
|
||||
store.add("memory", "Initial.")
|
||||
self._plant_drift(store)
|
||||
|
||||
r1 = store.replace("memory", "Initial", "Replacement.")
|
||||
r2 = store.add("memory", "Another.")
|
||||
assert r1.get("drift_backup")
|
||||
assert r2.get("drift_backup")
|
||||
# Same epoch second is the expected collision case — both point
|
||||
# at the same snapshot. Different second is also fine.
|
||||
assert ".bak." in r1["drift_backup"]
|
||||
assert ".bak." in r2["drift_backup"]
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import logging
|
|||
import os
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
|
|
@ -104,6 +105,36 @@ def _scan_memory_content(content: str) -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def _drift_error(path: "Path", bak_path: str) -> Dict[str, Any]:
|
||||
"""Build the error dict returned when external drift is detected.
|
||||
|
||||
The on-disk memory file contains content that wouldn't round-trip
|
||||
through the tool's parser/serializer — flushing would discard the
|
||||
appended/edited content from a patch tool, shell append, manual edit,
|
||||
or sister-session write. We refuse the mutation, point the operator at
|
||||
the .bak.<ts> snapshot we took, and tell them what to do next.
|
||||
"""
|
||||
return {
|
||||
"success": False,
|
||||
"error": (
|
||||
f"Refusing to write {path.name}: file on disk has content that "
|
||||
f"wouldn't round-trip through the memory tool (likely added by "
|
||||
f"the patch tool, a shell append, a manual edit, or a "
|
||||
f"concurrent session). A snapshot was saved to {bak_path}. "
|
||||
f"Resolve the drift first — either rewrite the file as a clean "
|
||||
f"§-delimited list of entries, or move the extra content out — "
|
||||
f"then retry. This guard exists to prevent silent data loss "
|
||||
f"(issue #26045)."
|
||||
),
|
||||
"drift_backup": bak_path,
|
||||
"remediation": (
|
||||
"Open the .bak file, integrate the missing entries into the "
|
||||
"memory tool one at a time via memory(action=add, content=...), "
|
||||
"then remove or rewrite the original file to a clean state."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class MemoryStore:
|
||||
"""
|
||||
Bounded curated memory with file persistence. One instance per AIAgent.
|
||||
|
|
@ -185,14 +216,23 @@ class MemoryStore:
|
|||
return mem_dir / "USER.md"
|
||||
return mem_dir / "MEMORY.md"
|
||||
|
||||
def _reload_target(self, target: str):
|
||||
def _reload_target(self, target: str) -> Optional[str]:
|
||||
"""Re-read entries from disk into in-memory state.
|
||||
|
||||
Called under file lock to get the latest state before mutating.
|
||||
Returns the backup path if external drift was detected (the on-disk
|
||||
file contains content that wouldn't round-trip through our
|
||||
parser/serializer, OR an entry larger than the store's char limit).
|
||||
When drift is detected the caller must abort the mutation —
|
||||
flushing would discard the un-roundtrippable content.
|
||||
Returns None on clean reload.
|
||||
"""
|
||||
fresh = self._read_file(self._path_for(target))
|
||||
path = self._path_for(target)
|
||||
bak = self._detect_external_drift(target)
|
||||
fresh = self._read_file(path)
|
||||
fresh = list(dict.fromkeys(fresh)) # deduplicate
|
||||
self._set_entries(target, fresh)
|
||||
return bak
|
||||
|
||||
def save_to_disk(self, target: str):
|
||||
"""Persist entries to the appropriate file. Called after every mutation."""
|
||||
|
|
@ -233,8 +273,13 @@ class MemoryStore:
|
|||
return {"success": False, "error": scan_error}
|
||||
|
||||
with self._file_lock(self._path_for(target)):
|
||||
# Re-read from disk under lock to pick up writes from other sessions
|
||||
self._reload_target(target)
|
||||
# Re-read from disk under lock to pick up writes from other sessions.
|
||||
# If external drift was detected, the file was backed up to .bak.<ts>
|
||||
# — refuse the mutation so we don't clobber the un-roundtrippable
|
||||
# content the patch tool / shell append / sister session wrote.
|
||||
bak = self._reload_target(target)
|
||||
if bak:
|
||||
return _drift_error(self._path_for(target), bak)
|
||||
|
||||
entries = self._entries_for(target)
|
||||
limit = self._char_limit(target)
|
||||
|
|
@ -281,7 +326,9 @@ class MemoryStore:
|
|||
return {"success": False, "error": scan_error}
|
||||
|
||||
with self._file_lock(self._path_for(target)):
|
||||
self._reload_target(target)
|
||||
bak = self._reload_target(target)
|
||||
if bak:
|
||||
return _drift_error(self._path_for(target), bak)
|
||||
|
||||
entries = self._entries_for(target)
|
||||
matches = [(i, e) for i, e in enumerate(entries) if old_text in e]
|
||||
|
|
@ -331,7 +378,9 @@ class MemoryStore:
|
|||
return {"success": False, "error": "old_text cannot be empty."}
|
||||
|
||||
with self._file_lock(self._path_for(target)):
|
||||
self._reload_target(target)
|
||||
bak = self._reload_target(target)
|
||||
if bak:
|
||||
return _drift_error(self._path_for(target), bak)
|
||||
|
||||
entries = self._entries_for(target)
|
||||
matches = [(i, e) for i, e in enumerate(entries) if old_text in e]
|
||||
|
|
@ -430,6 +479,61 @@ class MemoryStore:
|
|||
entries = [e.strip() for e in raw.split(ENTRY_DELIMITER)]
|
||||
return [e for e in entries if e]
|
||||
|
||||
def _detect_external_drift(self, target: str) -> Optional[str]:
|
||||
"""Return a backup-path string if on-disk content shows external drift.
|
||||
|
||||
The memory file is supposed to be a list of small entries the tool
|
||||
wrote, joined by §. Detect drift via two signals:
|
||||
|
||||
1. Round-trip mismatch — re-parsing and re-serializing the file
|
||||
doesn't produce identical bytes (rare; would catch oddly-encoded
|
||||
delimiters).
|
||||
2. Entry-size overflow — any single parsed entry exceeds the
|
||||
store's whole-file char limit. The tool budgets the ENTIRE store
|
||||
against that limit; no single tool-written entry can exceed it.
|
||||
When we see one entry larger than the limit, an external writer
|
||||
(patch tool, shell append, manual edit, sister session) appended
|
||||
free-form content into what the tool will treat as one entry.
|
||||
Flushing would then truncate that entry to the model's new
|
||||
content, discarding the appended bytes — issue #26045.
|
||||
|
||||
Returns the absolute path of the .bak file when drift was found and
|
||||
backed up; returns None when the file looks tool-shaped.
|
||||
|
||||
Note: this is an INSTANCE method (not static) because we need the
|
||||
per-target char_limit for signal #2.
|
||||
"""
|
||||
path = self._path_for(target)
|
||||
if not path.exists():
|
||||
return None
|
||||
try:
|
||||
raw = path.read_text(encoding="utf-8")
|
||||
except (OSError, IOError):
|
||||
return None
|
||||
if not raw.strip():
|
||||
return None
|
||||
|
||||
parsed = [e.strip() for e in raw.split(ENTRY_DELIMITER) if e.strip()]
|
||||
roundtrip = ENTRY_DELIMITER.join(parsed)
|
||||
|
||||
char_limit = self._char_limit(target)
|
||||
max_entry_len = max((len(e) for e in parsed), default=0)
|
||||
|
||||
drift_detected = (raw.strip() != roundtrip) or (max_entry_len > char_limit)
|
||||
if not drift_detected:
|
||||
return None
|
||||
|
||||
# Drift confirmed — snapshot the file so the operator can recover
|
||||
# whatever the external writer added, then return the .bak path so
|
||||
# the caller can refuse the mutation.
|
||||
ts = int(time.time())
|
||||
bak_path = path.with_suffix(path.suffix + f".bak.{ts}")
|
||||
try:
|
||||
bak_path.write_text(raw, encoding="utf-8")
|
||||
except (OSError, IOError):
|
||||
return str(bak_path) + " (BACKUP FAILED — file unchanged on disk)"
|
||||
return str(bak_path)
|
||||
|
||||
@staticmethod
|
||||
def _write_file(path: Path, entries: List[str]):
|
||||
"""Write entries to a memory file using atomic temp-file + rename.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue