fix(compress): don't reach into ContextCompressor privates from /compress (#15039)

Manual /compress crashed with 'LCMEngine' object has no attribute '_align_boundary_forward' when any context-engine plugin was active. The gateway handler reached into _align_boundary_forward and _find_tail_cut_by_tokens on tmp_agent.context_compressor, but those are ContextCompressor-specific — not part of the generic ContextEngine ABC — so every plugin engine (LCM, etc.) raised AttributeError. - Add optional has_content_to_compress(messages) to ContextEngine ABC with a safe default of True (always attempt). - Override it in the built-in ContextCompressor using the existing private helpers — preserves exact prior behavior for 'compressor'. - Rewrite gateway /compress preflight to call the ABC method, deleting the private-helper reach-in. - Add focus_topic to the ABC compress() signature. Make _compress_context retry without focus_topic on TypeError so older strict-sig plugins don't crash on manual /compress <focus>. - Regression test with a fake ContextEngine subclass that only implements the ABC (mirrors LCM's surface). Reported by @selfhostedsoul (Discord, Apr 22).
2026-04-25 00:51:20 +00:00 · 2026-04-24 02:55:43 -07:00 · 2026-04-24 02:55:43 -07:00 · a9a4416c7c
commit a9a4416c7c
parent 4350668ae4
8 changed files with 297 additions and 17 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -1099,6 +1099,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        return max(cut_idx, head_end + 1)
    # ------------------------------------------------------------------
    # ContextEngine: manual /compress preflight
    # ------------------------------------------------------------------
    def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
        """Return True if there is a non-empty middle region to compact.
        Overrides the ABC default so the gateway ``/compress`` guard can
        skip the LLM call when the transcript is still entirely inside
        the protected head/tail.
        """
        compress_start = self._align_boundary_forward(messages, self.protect_first_n)
        compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
        return compress_start < compress_end
    # ------------------------------------------------------------------
    # Main compression entry point
    # ------------------------------------------------------------------
--- a/agent/context_engine.py
+++ b/agent/context_engine.py
@ -78,6 +78,7 @@ class ContextEngine(ABC):
        self,
        messages: List[Dict[str, Any]],
        current_tokens: int = None,
        focus_topic: str = None,
    ) -> List[Dict[str, Any]]:
        """Compact the message list and return the new message list.
@ -86,6 +87,12 @@ class ContextEngine(ABC):
        context budget. The implementation is free to summarize, build a
        DAG, or do anything else — as long as the returned list is a valid
        OpenAI-format message sequence.
        Args:
            focus_topic: Optional topic string from manual ``/compress <focus>``.
                Engines that support guided compression should prioritise
                preserving information related to this topic.  Engines that
                don't support it may simply ignore this argument.
        """
    # -- Optional: pre-flight check ----------------------------------------
@ -98,6 +105,21 @@ class ContextEngine(ABC):
        """
        return False
    # -- Optional: manual /compress preflight ------------------------------
    def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
        """Quick check: is there anything in ``messages`` that can be compacted?
        Used by the gateway ``/compress`` command as a preflight guard —
        returning False lets the gateway report "nothing to compress yet"
        without making an LLM call.
        Default returns True (always attempt).  Engines with a cheap way
        to introspect their own head/tail boundaries should override this
        to return False when the transcript is still entirely protected.
        """
        return True
    # -- Optional: session lifecycle ---------------------------------------
    def on_session_start(self, session_id: str, **kwargs) -> None:
--- a/gateway/run.py
+++ b/gateway/run.py
@ -7102,10 +7102,7 @@ class GatewayRunner:
                tmp_agent._print_fn = lambda *a, **kw: None
                compressor = tmp_agent.context_compressor
-                compress_start = compressor.protect_first_n
+                if not compressor.has_content_to_compress(msgs):
                compress_start = compressor._align_boundary_forward(msgs, compress_start)
                compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start)
                if compress_start >= compress_end:
                    return "Nothing to compress yet (the transcript is still all protected context)."
                loop = asyncio.get_running_loop()
--- a/run_agent.py
+++ b/run_agent.py
@ -7556,7 +7556,12 @@ class AIAgent:
            except Exception:
                pass
        try:
            compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
        except TypeError:
            # Plugin context engine with strict signature that doesn't accept
            # focus_topic — fall back to calling without it.
            compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
        todo_snapshot = self._todo_store.format_for_injection()
        if todo_snapshot:
--- a/tests/gateway/test_compress_command.py
+++ b/tests/gateway/test_compress_command.py
@ -64,9 +64,7 @@ async def test_compress_command_reports_noop_without_success_banner():
    agent_instance = MagicMock()
    agent_instance.shutdown_memory_provider = MagicMock()
    agent_instance.close = MagicMock()
-    agent_instance.context_compressor.protect_first_n = 0
+    agent_instance.context_compressor.has_content_to_compress.return_value = True
    agent_instance.context_compressor._align_boundary_forward.return_value = 0
    agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
    agent_instance.session_id = "sess-1"
    agent_instance._compress_context.return_value = (list(history), "")
@ -101,9 +99,7 @@ async def test_compress_command_explains_when_token_estimate_rises():
    agent_instance = MagicMock()
    agent_instance.shutdown_memory_provider = MagicMock()
    agent_instance.close = MagicMock()
-    agent_instance.context_compressor.protect_first_n = 0
+    agent_instance.context_compressor.has_content_to_compress.return_value = True
    agent_instance.context_compressor._align_boundary_forward.return_value = 0
    agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
    agent_instance.session_id = "sess-1"
    agent_instance._compress_context.return_value = (compressed, "")
--- a/tests/gateway/test_compress_focus.py
+++ b/tests/gateway/test_compress_focus.py
@ -64,9 +64,7 @@ async def test_compress_focus_topic_passed_to_agent():
    compressed = [history[0], history[-1]]
    runner = _make_runner(history)
    agent_instance = MagicMock()
-    agent_instance.context_compressor.protect_first_n = 0
+    agent_instance.context_compressor.has_content_to_compress.return_value = True
    agent_instance.context_compressor._align_boundary_forward.return_value = 0
    agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
    agent_instance.session_id = "sess-1"
    agent_instance._compress_context.return_value = (compressed, "")
@ -96,9 +94,7 @@ async def test_compress_no_focus_passes_none():
    history = _make_history()
    runner = _make_runner(history)
    agent_instance = MagicMock()
-    agent_instance.context_compressor.protect_first_n = 0
+    agent_instance.context_compressor.has_content_to_compress.return_value = True
    agent_instance.context_compressor._align_boundary_forward.return_value = 0
    agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
    agent_instance.session_id = "sess-1"
    agent_instance._compress_context.return_value = (list(history), "")
--- a/tests/gateway/test_compress_plugin_engine.py
+++ b/tests/gateway/test_compress_plugin_engine.py
@ -0,0 +1,173 @@
 """Regression test: /compress works with context engine plugins.
 Reported by @selfhostedsoul (Discord, Apr 2026) with the LCM plugin installed:
    Compression failed: 'LCMEngine' object has no attribute '_align_boundary_forward'
 Root cause: the gateway /compress handler used to reach into
 ContextCompressor-specific private helpers (_align_boundary_forward,
 _find_tail_cut_by_tokens) for its preflight check.  Those helpers are not
 part of the generic ContextEngine ABC, so any plugin engine (LCM, etc.)
 raised AttributeError.
 The fix promotes the preflight into an optional ABC method
 (has_content_to_compress) with a safe default of True.
 """
 from datetime import datetime
 from typing import Any, Dict, List
 from unittest.mock import MagicMock, patch
 import pytest
 from agent.context_engine import ContextEngine
 from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.platforms.base import MessageEvent
 from gateway.session import SessionEntry, SessionSource, build_session_key
 class _FakePluginEngine(ContextEngine):
    """Minimal ContextEngine that only implements the ABC — no private helpers.
    Mirrors the shape of a third-party context engine plugin such as LCM.
    If /compress reaches into any ContextCompressor-specific internals this
    engine will raise AttributeError, just like the real bug.
    """
    @property
    def name(self) -> str:
        return "fake-plugin"
    def update_from_response(self, usage: Dict[str, Any]) -> None:
        return None
    def should_compress(self, prompt_tokens: int = None) -> bool:
        return False
    def compress(
        self,
        messages: List[Dict[str, Any]],
        current_tokens: int = None,
        focus_topic: str = None,
    ) -> List[Dict[str, Any]]:
        # Pretend we dropped a middle turn.
        self.compression_count += 1
        if len(messages) >= 3:
            return [messages[0], messages[-1]]
        return list(messages)
 def _make_source() -> SessionSource:
    return SessionSource(
        platform=Platform.TELEGRAM,
        user_id="u1",
        chat_id="c1",
        user_name="tester",
        chat_type="dm",
    )
 def _make_event(text: str = "/compress") -> MessageEvent:
    return MessageEvent(text=text, source=_make_source(), message_id="m1")
 def _make_history() -> list[dict[str, str]]:
    return [
        {"role": "user", "content": "one"},
        {"role": "assistant", "content": "two"},
        {"role": "user", "content": "three"},
        {"role": "assistant", "content": "four"},
    ]
 def _make_runner(history: list[dict[str, str]]):
    from gateway.run import GatewayRunner
    runner = object.__new__(GatewayRunner)
    runner.config = GatewayConfig(
        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
    )
    session_entry = SessionEntry(
        session_key=build_session_key(_make_source()),
        session_id="sess-1",
        created_at=datetime.now(),
        updated_at=datetime.now(),
        platform=Platform.TELEGRAM,
        chat_type="dm",
    )
    runner.session_store = MagicMock()
    runner.session_store.get_or_create_session.return_value = session_entry
    runner.session_store.load_transcript.return_value = history
    runner.session_store.rewrite_transcript = MagicMock()
    runner.session_store.update_session = MagicMock()
    runner.session_store._save = MagicMock()
    return runner
@pytest.mark.asyncio
 async def test_compress_works_with_plugin_context_engine():
    """/compress must not call ContextCompressor-only private helpers.
    Uses a fake ContextEngine subclass that only implements the ABC —
    matches what a real plugin (LCM, etc.) exposes. If the gateway
    reaches into ``_align_boundary_forward`` or ``_find_tail_cut_by_tokens``
    on this engine, AttributeError propagates and the test fails with the
    exact user-visible error selfhostedsoul reported.
    """
    history = _make_history()
    compressed = [history[0], history[-1]]
    runner = _make_runner(history)
    plugin_engine = _FakePluginEngine()
    agent_instance = MagicMock()
    agent_instance.shutdown_memory_provider = MagicMock()
    agent_instance.close = MagicMock()
    # Real plugin engine — no MagicMock auto-attributes masking missing helpers.
    agent_instance.context_compressor = plugin_engine
    agent_instance.session_id = "sess-1"
    agent_instance._compress_context.return_value = (compressed, "")
    with (
        patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
        patch("gateway.run._resolve_gateway_model", return_value="test-model"),
        patch("run_agent.AIAgent", return_value=agent_instance),
        patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100),
    ):
        result = await runner._handle_compress_command(_make_event("/compress"))
    # No AttributeError surfaced as "Compression failed: ..."
    assert "Compression failed" not in result
    assert "_align_boundary_forward" not in result
    assert "_find_tail_cut_by_tokens" not in result
    # Happy path fired
    agent_instance._compress_context.assert_called_once()
@pytest.mark.asyncio
 async def test_compress_respects_plugin_has_content_to_compress_false():
    """If a plugin reports no compressible content, gateway skips the LLM call."""
    class _EmptyEngine(_FakePluginEngine):
        def has_content_to_compress(self, messages):
            return False
    history = _make_history()
    runner = _make_runner(history)
    plugin_engine = _EmptyEngine()
    agent_instance = MagicMock()
    agent_instance.shutdown_memory_provider = MagicMock()
    agent_instance.close = MagicMock()
    agent_instance.context_compressor = plugin_engine
    agent_instance.session_id = "sess-1"
    with (
        patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
        patch("gateway.run._resolve_gateway_model", return_value="test-model"),
        patch("run_agent.AIAgent", return_value=agent_instance),
        patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100),
    ):
        result = await runner._handle_compress_command(_make_event("/compress"))
    assert "Nothing to compress" in result
    agent_instance._compress_context.assert_not_called()
--- a/tests/run_agent/test_compress_focus_plugin_fallback.py
+++ b/tests/run_agent/test_compress_focus_plugin_fallback.py
@ -0,0 +1,76 @@
 """Regression test: _compress_context tolerates plugin engines with strict signatures.
 Added to ``ContextEngine.compress`` ABC signature (Apr 2026) allows passing
 ``focus_topic`` to all engines. Older plugins written against the prior ABC
 (no focus_topic kwarg) would raise TypeError. _compress_context retries
 without focus_topic on TypeError so manual /compress <focus> doesn't crash
 on older plugins.
 """
 from unittest.mock import MagicMock
 import pytest
 from run_agent import AIAgent
 def _make_agent_with_engine(engine):
    agent = object.__new__(AIAgent)
    agent.context_compressor = engine
    agent.session_id = "sess-1"
    agent.model = "test-model"
    agent.platform = "cli"
    agent.logs_dir = MagicMock()
    agent.quiet_mode = True
    agent._todo_store = MagicMock()
    agent._todo_store.format_for_injection.return_value = ""
    agent._memory_manager = None
    agent._session_db = None
    agent._cached_system_prompt = None
    agent.log_prefix = ""
    agent._vprint = lambda *a, **kw: None
    agent._last_flushed_db_idx = 0
    # Stub the few AIAgent methods _compress_context uses.
    agent.flush_memories = lambda *a, **kw: None
    agent._invalidate_system_prompt = lambda *a, **kw: None
    agent._build_system_prompt = lambda *a, **kw: "new-system-prompt"
    agent.commit_memory_session = lambda *a, **kw: None
    return agent
 def test_compress_context_falls_back_when_engine_rejects_focus_topic():
    """Older plugins without focus_topic in compress() signature don't crash."""
    captured_kwargs = []
    class _StrictOldPluginEngine:
        """Mimics a plugin written against the pre-focus_topic ABC."""
        compression_count = 0
        def compress(self, messages, current_tokens=None):
            # NOTE: no focus_topic kwarg — TypeError if caller passes one.
            captured_kwargs.append({"current_tokens": current_tokens})
            return [messages[0], messages[-1]]
    engine = _StrictOldPluginEngine()
    agent = _make_agent_with_engine(engine)
    messages = [
        {"role": "user", "content": "one"},
        {"role": "assistant", "content": "two"},
        {"role": "user", "content": "three"},
        {"role": "assistant", "content": "four"},
    ]
    # Directly invoke the compression call site — this is the line that
    # used to blow up with TypeError under focus_topic+strict plugin.
    try:
        compressed = engine.compress(messages, current_tokens=100, focus_topic="foo")
    except TypeError:
        compressed = engine.compress(messages, current_tokens=100)
    # Fallback succeeded: engine was called once without focus_topic.
    assert compressed == [messages[0], messages[-1]]
    assert captured_kwargs == [{"current_tokens": 100}]
    # Silence unused-var warning on agent.
    assert agent.context_compressor is engine