🐛 fix(memory): require newline after context tag

This commit is contained in:
墨綠BG 2026-05-02 12:26:46 +08:00 committed by kshitij
parent 341c8d3030
commit 50e93f23f2
2 changed files with 46 additions and 7 deletions

View file

@ -126,7 +126,10 @@ class StreamingContextScrubber:
idx = self._find_boundary_open_tag(buf)
if idx == -1:
# No open tag — hold back a potential partial open tag
held = self._max_partial_suffix(buf, self._OPEN_TAG)
held = (
self._max_pending_open_suffix(buf)
or self._max_partial_suffix(buf, self._OPEN_TAG)
)
if held:
self._append_visible(out, buf[:-held])
self._buf = buf[-held:]
@ -179,10 +182,25 @@ class StreamingContextScrubber:
idx = buf_lower.find(self._OPEN_TAG, search_start)
if idx == -1:
return -1
if self._is_block_boundary(buf, idx):
if self._is_block_boundary(buf, idx) and self._has_block_opener_suffix(buf, idx):
return idx
search_start = idx + 1
def _max_pending_open_suffix(self, buf: str) -> int:
"""Hold a complete boundary tag until the following char confirms it."""
if not buf.lower().endswith(self._OPEN_TAG):
return 0
idx = len(buf) - len(self._OPEN_TAG)
if not self._is_block_boundary(buf, idx):
return 0
return len(self._OPEN_TAG)
def _has_block_opener_suffix(self, buf: str, idx: int) -> bool:
after_idx = idx + len(self._OPEN_TAG)
if after_idx >= len(buf):
return False
return buf[after_idx] in "\r\n"
def _is_block_boundary(self, buf: str, idx: int) -> bool:
if idx == 0:
return self._at_block_boundary

View file

@ -73,7 +73,18 @@ class TestStreamingContextScrubberBasics:
s = StreamingContextScrubber()
out = (
s.feed("pre \n<memory")
+ s.feed("-context>leak</memory-context> post")
+ s.feed("-context>\nleak</memory-context> post")
+ s.flush()
)
assert out == "pre \n post"
assert "leak" not in out
def test_open_tag_waits_for_newline_confirmation_across_deltas(self):
"""A boundary tag is only a leaked block when the next char is a newline."""
s = StreamingContextScrubber()
out = (
s.feed("pre \n<memory-context>")
+ s.feed("\nleak</memory-context> post")
+ s.flush()
)
assert out == "pre \n post"
@ -83,7 +94,7 @@ class TestStreamingContextScrubberBasics:
"""The close tag arriving in two fragments."""
s = StreamingContextScrubber()
out = (
s.feed("pre \n<memory-context>leak</memory")
s.feed("pre \n<memory-context>\nleak</memory")
+ s.feed("-context> post")
+ s.flush()
)
@ -116,18 +127,28 @@ class TestStreamingContextScrubberPartialTagFalsePositives:
)
assert out == "In that previous `<memory-context>` block, there was no matching fact."
def test_mid_sentence_memory_context_pair_is_not_scrubbed(self):
def test_mid_sentence_memory_context_mention_is_not_scrubbed(self):
"""Only block-like memory-context spans are treated as leaked context."""
s = StreamingContextScrubber()
out = s.feed("The <memory-context> tag name is documented here.") + s.flush()
assert out == "The <memory-context> tag name is documented here."
def test_line_start_memory_context_mention_without_close_is_not_scrubbed(self):
"""A plain-text line that starts with the tag name must be preserved."""
s = StreamingContextScrubber()
out = (
s.feed("Visible intro\n")
+ s.feed("<memory-context> is the literal tag name mentioned here.")
+ s.flush()
)
assert out == "Visible intro\n<memory-context> is the literal tag name mentioned here."
class TestStreamingContextScrubberUnterminatedSpan:
def test_unterminated_span_drops_payload(self):
"""Provider drops close tag — better to lose output than to leak."""
s = StreamingContextScrubber()
out = s.feed("pre \n<memory-context>secret never closed") + s.flush()
out = s.feed("pre \n<memory-context>\nsecret never closed") + s.flush()
assert out == "pre \n"
assert "secret" not in out
@ -144,7 +165,7 @@ class TestStreamingContextScrubberCaseInsensitivity:
def test_uppercase_tags_still_scrubbed(self):
s = StreamingContextScrubber()
out = (
s.feed("<MEMORY-CONTEXT>secret")
s.feed("<MEMORY-CONTEXT>\nsecret")
+ s.feed("</Memory-Context>visible")
+ s.flush()
)