From 8e5b7592f8dee4c6f2ddfe418349843588288be3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 11 Jun 2026 23:27:28 -0700 Subject: [PATCH] refactor(agent): hoist MEDIA-directive regex to module level Avoid recompiling the pattern on every _serialize_for_summary call; name it beside _PATH_MENTION_RE with the #14665 rationale. --- agent/context_compressor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index c2f6fa3d241..4611616085f 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -150,6 +150,11 @@ _AUTO_FOCUS_MAX_CHARS = 700 _PATH_MENTION_RE = re.compile(r"(?:/|~/?|[A-Za-z]:\\)[^\s`'\")\]}<>]+") +# MEDIA delivery directives must not reach the summarizer — if one leaks into +# the summary, the downstream model may re-emit it as an active directive on +# the next turn, triggering bogus attachment sends (#14665). +_MEDIA_DIRECTIVE_RE = re.compile(r"MEDIA:\S+") + def _dedupe_append(items: list[str], value: str, *, limit: int) -> None: value = value.strip() @@ -1006,16 +1011,11 @@ class ContextCompressor(ContextEngine): (API keys, tokens, passwords) from leaking into the summary that gets sent to the auxiliary model and persisted across compactions. """ - # Strip MEDIA directives before sending to the summarizer — if they - # leak into the summary, the downstream model may re-emit them as - # active directives on the next turn (#14665). - _MEDIA_RE = re.compile(r'MEDIA:\S+') - parts = [] for msg in turns: role = msg.get("role", "unknown") content = redact_sensitive_text(msg.get("content") or "") - content = _MEDIA_RE.sub("[media attachment]", content) + content = _MEDIA_DIRECTIVE_RE.sub("[media attachment]", content) # Tool results: keep enough content for the summarizer if role == "tool":