fix(compress): don't reach into ContextCompressor privates from /compress (#15039)

Manual /compress crashed with 'LCMEngine' object has no attribute
'_align_boundary_forward' when any context-engine plugin was active.
The gateway handler reached into _align_boundary_forward and
_find_tail_cut_by_tokens on tmp_agent.context_compressor, but those
are ContextCompressor-specific — not part of the generic ContextEngine
ABC — so every plugin engine (LCM, etc.) raised AttributeError.

- Add optional has_content_to_compress(messages) to ContextEngine ABC
  with a safe default of True (always attempt).
- Override it in the built-in ContextCompressor using the existing
  private helpers — preserves exact prior behavior for 'compressor'.
- Rewrite gateway /compress preflight to call the ABC method, deleting
  the private-helper reach-in.
- Add focus_topic to the ABC compress() signature. Make _compress_context
  retry without focus_topic on TypeError so older strict-sig plugins
  don't crash on manual /compress <focus>.
- Regression test with a fake ContextEngine subclass that only
  implements the ABC (mirrors LCM's surface).

Reported by @selfhostedsoul (Discord, Apr 22).
This commit is contained in:
Teknium 2026-04-24 02:55:43 -07:00 committed by GitHub
parent 4350668ae4
commit a9a4416c7c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 297 additions and 17 deletions

View file

@ -1099,6 +1099,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio
return max(cut_idx, head_end + 1) return max(cut_idx, head_end + 1)
# ------------------------------------------------------------------
# ContextEngine: manual /compress preflight
# ------------------------------------------------------------------
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
"""Return True if there is a non-empty middle region to compact.
Overrides the ABC default so the gateway ``/compress`` guard can
skip the LLM call when the transcript is still entirely inside
the protected head/tail.
"""
compress_start = self._align_boundary_forward(messages, self.protect_first_n)
compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
return compress_start < compress_end
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Main compression entry point # Main compression entry point
# ------------------------------------------------------------------ # ------------------------------------------------------------------

View file

@ -78,6 +78,7 @@ class ContextEngine(ABC):
self, self,
messages: List[Dict[str, Any]], messages: List[Dict[str, Any]],
current_tokens: int = None, current_tokens: int = None,
focus_topic: str = None,
) -> List[Dict[str, Any]]: ) -> List[Dict[str, Any]]:
"""Compact the message list and return the new message list. """Compact the message list and return the new message list.
@ -86,6 +87,12 @@ class ContextEngine(ABC):
context budget. The implementation is free to summarize, build a context budget. The implementation is free to summarize, build a
DAG, or do anything else as long as the returned list is a valid DAG, or do anything else as long as the returned list is a valid
OpenAI-format message sequence. OpenAI-format message sequence.
Args:
focus_topic: Optional topic string from manual ``/compress <focus>``.
Engines that support guided compression should prioritise
preserving information related to this topic. Engines that
don't support it may simply ignore this argument.
""" """
# -- Optional: pre-flight check ---------------------------------------- # -- Optional: pre-flight check ----------------------------------------
@ -98,6 +105,21 @@ class ContextEngine(ABC):
""" """
return False return False
# -- Optional: manual /compress preflight ------------------------------
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
"""Quick check: is there anything in ``messages`` that can be compacted?
Used by the gateway ``/compress`` command as a preflight guard
returning False lets the gateway report "nothing to compress yet"
without making an LLM call.
Default returns True (always attempt). Engines with a cheap way
to introspect their own head/tail boundaries should override this
to return False when the transcript is still entirely protected.
"""
return True
# -- Optional: session lifecycle --------------------------------------- # -- Optional: session lifecycle ---------------------------------------
def on_session_start(self, session_id: str, **kwargs) -> None: def on_session_start(self, session_id: str, **kwargs) -> None:

View file

@ -7102,10 +7102,7 @@ class GatewayRunner:
tmp_agent._print_fn = lambda *a, **kw: None tmp_agent._print_fn = lambda *a, **kw: None
compressor = tmp_agent.context_compressor compressor = tmp_agent.context_compressor
compress_start = compressor.protect_first_n if not compressor.has_content_to_compress(msgs):
compress_start = compressor._align_boundary_forward(msgs, compress_start)
compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start)
if compress_start >= compress_end:
return "Nothing to compress yet (the transcript is still all protected context)." return "Nothing to compress yet (the transcript is still all protected context)."
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()

View file

@ -7556,7 +7556,12 @@ class AIAgent:
except Exception: except Exception:
pass pass
try:
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic) compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
except TypeError:
# Plugin context engine with strict signature that doesn't accept
# focus_topic — fall back to calling without it.
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
todo_snapshot = self._todo_store.format_for_injection() todo_snapshot = self._todo_store.format_for_injection()
if todo_snapshot: if todo_snapshot:

View file

@ -64,9 +64,7 @@ async def test_compress_command_reports_noop_without_success_banner():
agent_instance = MagicMock() agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock() agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock() agent_instance.close = MagicMock()
agent_instance.context_compressor.protect_first_n = 0 agent_instance.context_compressor.has_content_to_compress.return_value = True
agent_instance.context_compressor._align_boundary_forward.return_value = 0
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
agent_instance.session_id = "sess-1" agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (list(history), "") agent_instance._compress_context.return_value = (list(history), "")
@ -101,9 +99,7 @@ async def test_compress_command_explains_when_token_estimate_rises():
agent_instance = MagicMock() agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock() agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock() agent_instance.close = MagicMock()
agent_instance.context_compressor.protect_first_n = 0 agent_instance.context_compressor.has_content_to_compress.return_value = True
agent_instance.context_compressor._align_boundary_forward.return_value = 0
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
agent_instance.session_id = "sess-1" agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (compressed, "") agent_instance._compress_context.return_value = (compressed, "")

View file

@ -64,9 +64,7 @@ async def test_compress_focus_topic_passed_to_agent():
compressed = [history[0], history[-1]] compressed = [history[0], history[-1]]
runner = _make_runner(history) runner = _make_runner(history)
agent_instance = MagicMock() agent_instance = MagicMock()
agent_instance.context_compressor.protect_first_n = 0 agent_instance.context_compressor.has_content_to_compress.return_value = True
agent_instance.context_compressor._align_boundary_forward.return_value = 0
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
agent_instance.session_id = "sess-1" agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (compressed, "") agent_instance._compress_context.return_value = (compressed, "")
@ -96,9 +94,7 @@ async def test_compress_no_focus_passes_none():
history = _make_history() history = _make_history()
runner = _make_runner(history) runner = _make_runner(history)
agent_instance = MagicMock() agent_instance = MagicMock()
agent_instance.context_compressor.protect_first_n = 0 agent_instance.context_compressor.has_content_to_compress.return_value = True
agent_instance.context_compressor._align_boundary_forward.return_value = 0
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
agent_instance.session_id = "sess-1" agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (list(history), "") agent_instance._compress_context.return_value = (list(history), "")

View file

@ -0,0 +1,173 @@
"""Regression test: /compress works with context engine plugins.
Reported by @selfhostedsoul (Discord, Apr 2026) with the LCM plugin installed:
Compression failed: 'LCMEngine' object has no attribute '_align_boundary_forward'
Root cause: the gateway /compress handler used to reach into
ContextCompressor-specific private helpers (_align_boundary_forward,
_find_tail_cut_by_tokens) for its preflight check. Those helpers are not
part of the generic ContextEngine ABC, so any plugin engine (LCM, etc.)
raised AttributeError.
The fix promotes the preflight into an optional ABC method
(has_content_to_compress) with a safe default of True.
"""
from datetime import datetime
from typing import Any, Dict, List
from unittest.mock import MagicMock, patch
import pytest
from agent.context_engine import ContextEngine
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent
from gateway.session import SessionEntry, SessionSource, build_session_key
class _FakePluginEngine(ContextEngine):
"""Minimal ContextEngine that only implements the ABC — no private helpers.
Mirrors the shape of a third-party context engine plugin such as LCM.
If /compress reaches into any ContextCompressor-specific internals this
engine will raise AttributeError, just like the real bug.
"""
@property
def name(self) -> str:
return "fake-plugin"
def update_from_response(self, usage: Dict[str, Any]) -> None:
return None
def should_compress(self, prompt_tokens: int = None) -> bool:
return False
def compress(
self,
messages: List[Dict[str, Any]],
current_tokens: int = None,
focus_topic: str = None,
) -> List[Dict[str, Any]]:
# Pretend we dropped a middle turn.
self.compression_count += 1
if len(messages) >= 3:
return [messages[0], messages[-1]]
return list(messages)
def _make_source() -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
user_id="u1",
chat_id="c1",
user_name="tester",
chat_type="dm",
)
def _make_event(text: str = "/compress") -> MessageEvent:
return MessageEvent(text=text, source=_make_source(), message_id="m1")
def _make_history() -> list[dict[str, str]]:
return [
{"role": "user", "content": "one"},
{"role": "assistant", "content": "two"},
{"role": "user", "content": "three"},
{"role": "assistant", "content": "four"},
]
def _make_runner(history: list[dict[str, str]]):
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
)
session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
)
runner.session_store = MagicMock()
runner.session_store.get_or_create_session.return_value = session_entry
runner.session_store.load_transcript.return_value = history
runner.session_store.rewrite_transcript = MagicMock()
runner.session_store.update_session = MagicMock()
runner.session_store._save = MagicMock()
return runner
@pytest.mark.asyncio
async def test_compress_works_with_plugin_context_engine():
"""/compress must not call ContextCompressor-only private helpers.
Uses a fake ContextEngine subclass that only implements the ABC
matches what a real plugin (LCM, etc.) exposes. If the gateway
reaches into ``_align_boundary_forward`` or ``_find_tail_cut_by_tokens``
on this engine, AttributeError propagates and the test fails with the
exact user-visible error selfhostedsoul reported.
"""
history = _make_history()
compressed = [history[0], history[-1]]
runner = _make_runner(history)
plugin_engine = _FakePluginEngine()
agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock()
# Real plugin engine — no MagicMock auto-attributes masking missing helpers.
agent_instance.context_compressor = plugin_engine
agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (compressed, "")
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=agent_instance),
patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100),
):
result = await runner._handle_compress_command(_make_event("/compress"))
# No AttributeError surfaced as "Compression failed: ..."
assert "Compression failed" not in result
assert "_align_boundary_forward" not in result
assert "_find_tail_cut_by_tokens" not in result
# Happy path fired
agent_instance._compress_context.assert_called_once()
@pytest.mark.asyncio
async def test_compress_respects_plugin_has_content_to_compress_false():
"""If a plugin reports no compressible content, gateway skips the LLM call."""
class _EmptyEngine(_FakePluginEngine):
def has_content_to_compress(self, messages):
return False
history = _make_history()
runner = _make_runner(history)
plugin_engine = _EmptyEngine()
agent_instance = MagicMock()
agent_instance.shutdown_memory_provider = MagicMock()
agent_instance.close = MagicMock()
agent_instance.context_compressor = plugin_engine
agent_instance.session_id = "sess-1"
with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=agent_instance),
patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100),
):
result = await runner._handle_compress_command(_make_event("/compress"))
assert "Nothing to compress" in result
agent_instance._compress_context.assert_not_called()

View file

@ -0,0 +1,76 @@
"""Regression test: _compress_context tolerates plugin engines with strict signatures.
Added to ``ContextEngine.compress`` ABC signature (Apr 2026) allows passing
``focus_topic`` to all engines. Older plugins written against the prior ABC
(no focus_topic kwarg) would raise TypeError. _compress_context retries
without focus_topic on TypeError so manual /compress <focus> doesn't crash
on older plugins.
"""
from unittest.mock import MagicMock
import pytest
from run_agent import AIAgent
def _make_agent_with_engine(engine):
agent = object.__new__(AIAgent)
agent.context_compressor = engine
agent.session_id = "sess-1"
agent.model = "test-model"
agent.platform = "cli"
agent.logs_dir = MagicMock()
agent.quiet_mode = True
agent._todo_store = MagicMock()
agent._todo_store.format_for_injection.return_value = ""
agent._memory_manager = None
agent._session_db = None
agent._cached_system_prompt = None
agent.log_prefix = ""
agent._vprint = lambda *a, **kw: None
agent._last_flushed_db_idx = 0
# Stub the few AIAgent methods _compress_context uses.
agent.flush_memories = lambda *a, **kw: None
agent._invalidate_system_prompt = lambda *a, **kw: None
agent._build_system_prompt = lambda *a, **kw: "new-system-prompt"
agent.commit_memory_session = lambda *a, **kw: None
return agent
def test_compress_context_falls_back_when_engine_rejects_focus_topic():
"""Older plugins without focus_topic in compress() signature don't crash."""
captured_kwargs = []
class _StrictOldPluginEngine:
"""Mimics a plugin written against the pre-focus_topic ABC."""
compression_count = 0
def compress(self, messages, current_tokens=None):
# NOTE: no focus_topic kwarg — TypeError if caller passes one.
captured_kwargs.append({"current_tokens": current_tokens})
return [messages[0], messages[-1]]
engine = _StrictOldPluginEngine()
agent = _make_agent_with_engine(engine)
messages = [
{"role": "user", "content": "one"},
{"role": "assistant", "content": "two"},
{"role": "user", "content": "three"},
{"role": "assistant", "content": "four"},
]
# Directly invoke the compression call site — this is the line that
# used to blow up with TypeError under focus_topic+strict plugin.
try:
compressed = engine.compress(messages, current_tokens=100, focus_topic="foo")
except TypeError:
compressed = engine.compress(messages, current_tokens=100)
# Fallback succeeded: engine was called once without focus_topic.
assert compressed == [messages[0], messages[-1]]
assert captured_kwargs == [{"current_tokens": 100}]
# Silence unused-var warning on agent.
assert agent.context_compressor is engine