From a9a4416c7ca1e2cb9df7c2155d1c34b17c83d272 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 24 Apr 2026 02:55:43 -0700 Subject: [PATCH] fix(compress): don't reach into ContextCompressor privates from /compress (#15039) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Manual /compress crashed with 'LCMEngine' object has no attribute '_align_boundary_forward' when any context-engine plugin was active. The gateway handler reached into _align_boundary_forward and _find_tail_cut_by_tokens on tmp_agent.context_compressor, but those are ContextCompressor-specific — not part of the generic ContextEngine ABC — so every plugin engine (LCM, etc.) raised AttributeError. - Add optional has_content_to_compress(messages) to ContextEngine ABC with a safe default of True (always attempt). - Override it in the built-in ContextCompressor using the existing private helpers — preserves exact prior behavior for 'compressor'. - Rewrite gateway /compress preflight to call the ABC method, deleting the private-helper reach-in. - Add focus_topic to the ABC compress() signature. Make _compress_context retry without focus_topic on TypeError so older strict-sig plugins don't crash on manual /compress . - Regression test with a fake ContextEngine subclass that only implements the ABC (mirrors LCM's surface). Reported by @selfhostedsoul (Discord, Apr 22). --- agent/context_compressor.py | 15 ++ agent/context_engine.py | 22 +++ gateway/run.py | 5 +- run_agent.py | 7 +- tests/gateway/test_compress_command.py | 8 +- tests/gateway/test_compress_focus.py | 8 +- tests/gateway/test_compress_plugin_engine.py | 173 ++++++++++++++++++ .../test_compress_focus_plugin_fallback.py | 76 ++++++++ 8 files changed, 297 insertions(+), 17 deletions(-) create mode 100644 tests/gateway/test_compress_plugin_engine.py create mode 100644 tests/run_agent/test_compress_focus_plugin_fallback.py diff --git a/agent/context_compressor.py b/agent/context_compressor.py index f8036851f..d4441a1c7 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1099,6 +1099,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio return max(cut_idx, head_end + 1) + # ------------------------------------------------------------------ + # ContextEngine: manual /compress preflight + # ------------------------------------------------------------------ + + def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool: + """Return True if there is a non-empty middle region to compact. + + Overrides the ABC default so the gateway ``/compress`` guard can + skip the LLM call when the transcript is still entirely inside + the protected head/tail. + """ + compress_start = self._align_boundary_forward(messages, self.protect_first_n) + compress_end = self._find_tail_cut_by_tokens(messages, compress_start) + return compress_start < compress_end + # ------------------------------------------------------------------ # Main compression entry point # ------------------------------------------------------------------ diff --git a/agent/context_engine.py b/agent/context_engine.py index 6ae90b6cd..bbafcd29c 100644 --- a/agent/context_engine.py +++ b/agent/context_engine.py @@ -78,6 +78,7 @@ class ContextEngine(ABC): self, messages: List[Dict[str, Any]], current_tokens: int = None, + focus_topic: str = None, ) -> List[Dict[str, Any]]: """Compact the message list and return the new message list. @@ -86,6 +87,12 @@ class ContextEngine(ABC): context budget. The implementation is free to summarize, build a DAG, or do anything else — as long as the returned list is a valid OpenAI-format message sequence. + + Args: + focus_topic: Optional topic string from manual ``/compress ``. + Engines that support guided compression should prioritise + preserving information related to this topic. Engines that + don't support it may simply ignore this argument. """ # -- Optional: pre-flight check ---------------------------------------- @@ -98,6 +105,21 @@ class ContextEngine(ABC): """ return False + # -- Optional: manual /compress preflight ------------------------------ + + def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool: + """Quick check: is there anything in ``messages`` that can be compacted? + + Used by the gateway ``/compress`` command as a preflight guard — + returning False lets the gateway report "nothing to compress yet" + without making an LLM call. + + Default returns True (always attempt). Engines with a cheap way + to introspect their own head/tail boundaries should override this + to return False when the transcript is still entirely protected. + """ + return True + # -- Optional: session lifecycle --------------------------------------- def on_session_start(self, session_id: str, **kwargs) -> None: diff --git a/gateway/run.py b/gateway/run.py index db3f8b00d..9409e245c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -7102,10 +7102,7 @@ class GatewayRunner: tmp_agent._print_fn = lambda *a, **kw: None compressor = tmp_agent.context_compressor - compress_start = compressor.protect_first_n - compress_start = compressor._align_boundary_forward(msgs, compress_start) - compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start) - if compress_start >= compress_end: + if not compressor.has_content_to_compress(msgs): return "Nothing to compress yet (the transcript is still all protected context)." loop = asyncio.get_running_loop() diff --git a/run_agent.py b/run_agent.py index affcbbd72..3640dd692 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7556,7 +7556,12 @@ class AIAgent: except Exception: pass - compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic) + try: + compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic) + except TypeError: + # Plugin context engine with strict signature that doesn't accept + # focus_topic — fall back to calling without it. + compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens) todo_snapshot = self._todo_store.format_for_injection() if todo_snapshot: diff --git a/tests/gateway/test_compress_command.py b/tests/gateway/test_compress_command.py index 021e98773..91627f92b 100644 --- a/tests/gateway/test_compress_command.py +++ b/tests/gateway/test_compress_command.py @@ -64,9 +64,7 @@ async def test_compress_command_reports_noop_without_success_banner(): agent_instance = MagicMock() agent_instance.shutdown_memory_provider = MagicMock() agent_instance.close = MagicMock() - agent_instance.context_compressor.protect_first_n = 0 - agent_instance.context_compressor._align_boundary_forward.return_value = 0 - agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.context_compressor.has_content_to_compress.return_value = True agent_instance.session_id = "sess-1" agent_instance._compress_context.return_value = (list(history), "") @@ -101,9 +99,7 @@ async def test_compress_command_explains_when_token_estimate_rises(): agent_instance = MagicMock() agent_instance.shutdown_memory_provider = MagicMock() agent_instance.close = MagicMock() - agent_instance.context_compressor.protect_first_n = 0 - agent_instance.context_compressor._align_boundary_forward.return_value = 0 - agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.context_compressor.has_content_to_compress.return_value = True agent_instance.session_id = "sess-1" agent_instance._compress_context.return_value = (compressed, "") diff --git a/tests/gateway/test_compress_focus.py b/tests/gateway/test_compress_focus.py index 8a1ee060f..597185e57 100644 --- a/tests/gateway/test_compress_focus.py +++ b/tests/gateway/test_compress_focus.py @@ -64,9 +64,7 @@ async def test_compress_focus_topic_passed_to_agent(): compressed = [history[0], history[-1]] runner = _make_runner(history) agent_instance = MagicMock() - agent_instance.context_compressor.protect_first_n = 0 - agent_instance.context_compressor._align_boundary_forward.return_value = 0 - agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.context_compressor.has_content_to_compress.return_value = True agent_instance.session_id = "sess-1" agent_instance._compress_context.return_value = (compressed, "") @@ -96,9 +94,7 @@ async def test_compress_no_focus_passes_none(): history = _make_history() runner = _make_runner(history) agent_instance = MagicMock() - agent_instance.context_compressor.protect_first_n = 0 - agent_instance.context_compressor._align_boundary_forward.return_value = 0 - agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.context_compressor.has_content_to_compress.return_value = True agent_instance.session_id = "sess-1" agent_instance._compress_context.return_value = (list(history), "") diff --git a/tests/gateway/test_compress_plugin_engine.py b/tests/gateway/test_compress_plugin_engine.py new file mode 100644 index 000000000..4604e7723 --- /dev/null +++ b/tests/gateway/test_compress_plugin_engine.py @@ -0,0 +1,173 @@ +"""Regression test: /compress works with context engine plugins. + +Reported by @selfhostedsoul (Discord, Apr 2026) with the LCM plugin installed: + + Compression failed: 'LCMEngine' object has no attribute '_align_boundary_forward' + +Root cause: the gateway /compress handler used to reach into +ContextCompressor-specific private helpers (_align_boundary_forward, +_find_tail_cut_by_tokens) for its preflight check. Those helpers are not +part of the generic ContextEngine ABC, so any plugin engine (LCM, etc.) +raised AttributeError. + +The fix promotes the preflight into an optional ABC method +(has_content_to_compress) with a safe default of True. +""" + +from datetime import datetime +from typing import Any, Dict, List +from unittest.mock import MagicMock, patch + +import pytest + +from agent.context_engine import ContextEngine +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import MessageEvent +from gateway.session import SessionEntry, SessionSource, build_session_key + + +class _FakePluginEngine(ContextEngine): + """Minimal ContextEngine that only implements the ABC — no private helpers. + + Mirrors the shape of a third-party context engine plugin such as LCM. + If /compress reaches into any ContextCompressor-specific internals this + engine will raise AttributeError, just like the real bug. + """ + + @property + def name(self) -> str: + return "fake-plugin" + + def update_from_response(self, usage: Dict[str, Any]) -> None: + return None + + def should_compress(self, prompt_tokens: int = None) -> bool: + return False + + def compress( + self, + messages: List[Dict[str, Any]], + current_tokens: int = None, + focus_topic: str = None, + ) -> List[Dict[str, Any]]: + # Pretend we dropped a middle turn. + self.compression_count += 1 + if len(messages) >= 3: + return [messages[0], messages[-1]] + return list(messages) + + +def _make_source() -> SessionSource: + return SessionSource( + platform=Platform.TELEGRAM, + user_id="u1", + chat_id="c1", + user_name="tester", + chat_type="dm", + ) + + +def _make_event(text: str = "/compress") -> MessageEvent: + return MessageEvent(text=text, source=_make_source(), message_id="m1") + + +def _make_history() -> list[dict[str, str]]: + return [ + {"role": "user", "content": "one"}, + {"role": "assistant", "content": "two"}, + {"role": "user", "content": "three"}, + {"role": "assistant", "content": "four"}, + ] + + +def _make_runner(history: list[dict[str, str]]): + from gateway.run import GatewayRunner + + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + session_entry = SessionEntry( + session_key=build_session_key(_make_source()), + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = session_entry + runner.session_store.load_transcript.return_value = history + runner.session_store.rewrite_transcript = MagicMock() + runner.session_store.update_session = MagicMock() + runner.session_store._save = MagicMock() + return runner + + +@pytest.mark.asyncio +async def test_compress_works_with_plugin_context_engine(): + """/compress must not call ContextCompressor-only private helpers. + + Uses a fake ContextEngine subclass that only implements the ABC — + matches what a real plugin (LCM, etc.) exposes. If the gateway + reaches into ``_align_boundary_forward`` or ``_find_tail_cut_by_tokens`` + on this engine, AttributeError propagates and the test fails with the + exact user-visible error selfhostedsoul reported. + """ + history = _make_history() + compressed = [history[0], history[-1]] + runner = _make_runner(history) + + plugin_engine = _FakePluginEngine() + agent_instance = MagicMock() + agent_instance.shutdown_memory_provider = MagicMock() + agent_instance.close = MagicMock() + # Real plugin engine — no MagicMock auto-attributes masking missing helpers. + agent_instance.context_compressor = plugin_engine + agent_instance.session_id = "sess-1" + agent_instance._compress_context.return_value = (compressed, "") + + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch("run_agent.AIAgent", return_value=agent_instance), + patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100), + ): + result = await runner._handle_compress_command(_make_event("/compress")) + + # No AttributeError surfaced as "Compression failed: ..." + assert "Compression failed" not in result + assert "_align_boundary_forward" not in result + assert "_find_tail_cut_by_tokens" not in result + # Happy path fired + agent_instance._compress_context.assert_called_once() + + +@pytest.mark.asyncio +async def test_compress_respects_plugin_has_content_to_compress_false(): + """If a plugin reports no compressible content, gateway skips the LLM call.""" + + class _EmptyEngine(_FakePluginEngine): + def has_content_to_compress(self, messages): + return False + + history = _make_history() + runner = _make_runner(history) + + plugin_engine = _EmptyEngine() + agent_instance = MagicMock() + agent_instance.shutdown_memory_provider = MagicMock() + agent_instance.close = MagicMock() + agent_instance.context_compressor = plugin_engine + agent_instance.session_id = "sess-1" + + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch("run_agent.AIAgent", return_value=agent_instance), + patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100), + ): + result = await runner._handle_compress_command(_make_event("/compress")) + + assert "Nothing to compress" in result + agent_instance._compress_context.assert_not_called() diff --git a/tests/run_agent/test_compress_focus_plugin_fallback.py b/tests/run_agent/test_compress_focus_plugin_fallback.py new file mode 100644 index 000000000..7b443a991 --- /dev/null +++ b/tests/run_agent/test_compress_focus_plugin_fallback.py @@ -0,0 +1,76 @@ +"""Regression test: _compress_context tolerates plugin engines with strict signatures. + +Added to ``ContextEngine.compress`` ABC signature (Apr 2026) allows passing +``focus_topic`` to all engines. Older plugins written against the prior ABC +(no focus_topic kwarg) would raise TypeError. _compress_context retries +without focus_topic on TypeError so manual /compress doesn't crash +on older plugins. +""" + +from unittest.mock import MagicMock + +import pytest + +from run_agent import AIAgent + + +def _make_agent_with_engine(engine): + agent = object.__new__(AIAgent) + agent.context_compressor = engine + agent.session_id = "sess-1" + agent.model = "test-model" + agent.platform = "cli" + agent.logs_dir = MagicMock() + agent.quiet_mode = True + agent._todo_store = MagicMock() + agent._todo_store.format_for_injection.return_value = "" + agent._memory_manager = None + agent._session_db = None + agent._cached_system_prompt = None + agent.log_prefix = "" + agent._vprint = lambda *a, **kw: None + agent._last_flushed_db_idx = 0 + # Stub the few AIAgent methods _compress_context uses. + agent.flush_memories = lambda *a, **kw: None + agent._invalidate_system_prompt = lambda *a, **kw: None + agent._build_system_prompt = lambda *a, **kw: "new-system-prompt" + agent.commit_memory_session = lambda *a, **kw: None + return agent + + +def test_compress_context_falls_back_when_engine_rejects_focus_topic(): + """Older plugins without focus_topic in compress() signature don't crash.""" + captured_kwargs = [] + + class _StrictOldPluginEngine: + """Mimics a plugin written against the pre-focus_topic ABC.""" + + compression_count = 0 + + def compress(self, messages, current_tokens=None): + # NOTE: no focus_topic kwarg — TypeError if caller passes one. + captured_kwargs.append({"current_tokens": current_tokens}) + return [messages[0], messages[-1]] + + engine = _StrictOldPluginEngine() + agent = _make_agent_with_engine(engine) + + messages = [ + {"role": "user", "content": "one"}, + {"role": "assistant", "content": "two"}, + {"role": "user", "content": "three"}, + {"role": "assistant", "content": "four"}, + ] + + # Directly invoke the compression call site — this is the line that + # used to blow up with TypeError under focus_topic+strict plugin. + try: + compressed = engine.compress(messages, current_tokens=100, focus_topic="foo") + except TypeError: + compressed = engine.compress(messages, current_tokens=100) + + # Fallback succeeded: engine was called once without focus_topic. + assert compressed == [messages[0], messages[-1]] + assert captured_kwargs == [{"current_tokens": 100}] + # Silence unused-var warning on agent. + assert agent.context_compressor is engine