mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(compress): don't reach into ContextCompressor privates from /compress (#15039)
Manual /compress crashed with 'LCMEngine' object has no attribute '_align_boundary_forward' when any context-engine plugin was active. The gateway handler reached into _align_boundary_forward and _find_tail_cut_by_tokens on tmp_agent.context_compressor, but those are ContextCompressor-specific — not part of the generic ContextEngine ABC — so every plugin engine (LCM, etc.) raised AttributeError. - Add optional has_content_to_compress(messages) to ContextEngine ABC with a safe default of True (always attempt). - Override it in the built-in ContextCompressor using the existing private helpers — preserves exact prior behavior for 'compressor'. - Rewrite gateway /compress preflight to call the ABC method, deleting the private-helper reach-in. - Add focus_topic to the ABC compress() signature. Make _compress_context retry without focus_topic on TypeError so older strict-sig plugins don't crash on manual /compress <focus>. - Regression test with a fake ContextEngine subclass that only implements the ABC (mirrors LCM's surface). Reported by @selfhostedsoul (Discord, Apr 22).
This commit is contained in:
parent
4350668ae4
commit
a9a4416c7c
8 changed files with 297 additions and 17 deletions
|
|
@ -1099,6 +1099,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||||
|
|
||||||
return max(cut_idx, head_end + 1)
|
return max(cut_idx, head_end + 1)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# ContextEngine: manual /compress preflight
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||||
|
"""Return True if there is a non-empty middle region to compact.
|
||||||
|
|
||||||
|
Overrides the ABC default so the gateway ``/compress`` guard can
|
||||||
|
skip the LLM call when the transcript is still entirely inside
|
||||||
|
the protected head/tail.
|
||||||
|
"""
|
||||||
|
compress_start = self._align_boundary_forward(messages, self.protect_first_n)
|
||||||
|
compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
|
||||||
|
return compress_start < compress_end
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Main compression entry point
|
# Main compression entry point
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,7 @@ class ContextEngine(ABC):
|
||||||
self,
|
self,
|
||||||
messages: List[Dict[str, Any]],
|
messages: List[Dict[str, Any]],
|
||||||
current_tokens: int = None,
|
current_tokens: int = None,
|
||||||
|
focus_topic: str = None,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""Compact the message list and return the new message list.
|
"""Compact the message list and return the new message list.
|
||||||
|
|
||||||
|
|
@ -86,6 +87,12 @@ class ContextEngine(ABC):
|
||||||
context budget. The implementation is free to summarize, build a
|
context budget. The implementation is free to summarize, build a
|
||||||
DAG, or do anything else — as long as the returned list is a valid
|
DAG, or do anything else — as long as the returned list is a valid
|
||||||
OpenAI-format message sequence.
|
OpenAI-format message sequence.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
focus_topic: Optional topic string from manual ``/compress <focus>``.
|
||||||
|
Engines that support guided compression should prioritise
|
||||||
|
preserving information related to this topic. Engines that
|
||||||
|
don't support it may simply ignore this argument.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# -- Optional: pre-flight check ----------------------------------------
|
# -- Optional: pre-flight check ----------------------------------------
|
||||||
|
|
@ -98,6 +105,21 @@ class ContextEngine(ABC):
|
||||||
"""
|
"""
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# -- Optional: manual /compress preflight ------------------------------
|
||||||
|
|
||||||
|
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||||
|
"""Quick check: is there anything in ``messages`` that can be compacted?
|
||||||
|
|
||||||
|
Used by the gateway ``/compress`` command as a preflight guard —
|
||||||
|
returning False lets the gateway report "nothing to compress yet"
|
||||||
|
without making an LLM call.
|
||||||
|
|
||||||
|
Default returns True (always attempt). Engines with a cheap way
|
||||||
|
to introspect their own head/tail boundaries should override this
|
||||||
|
to return False when the transcript is still entirely protected.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
# -- Optional: session lifecycle ---------------------------------------
|
# -- Optional: session lifecycle ---------------------------------------
|
||||||
|
|
||||||
def on_session_start(self, session_id: str, **kwargs) -> None:
|
def on_session_start(self, session_id: str, **kwargs) -> None:
|
||||||
|
|
|
||||||
|
|
@ -7102,10 +7102,7 @@ class GatewayRunner:
|
||||||
tmp_agent._print_fn = lambda *a, **kw: None
|
tmp_agent._print_fn = lambda *a, **kw: None
|
||||||
|
|
||||||
compressor = tmp_agent.context_compressor
|
compressor = tmp_agent.context_compressor
|
||||||
compress_start = compressor.protect_first_n
|
if not compressor.has_content_to_compress(msgs):
|
||||||
compress_start = compressor._align_boundary_forward(msgs, compress_start)
|
|
||||||
compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start)
|
|
||||||
if compress_start >= compress_end:
|
|
||||||
return "Nothing to compress yet (the transcript is still all protected context)."
|
return "Nothing to compress yet (the transcript is still all protected context)."
|
||||||
|
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
|
|
|
||||||
|
|
@ -7556,7 +7556,12 @@ class AIAgent:
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
|
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
|
||||||
|
except TypeError:
|
||||||
|
# Plugin context engine with strict signature that doesn't accept
|
||||||
|
# focus_topic — fall back to calling without it.
|
||||||
|
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||||
|
|
||||||
todo_snapshot = self._todo_store.format_for_injection()
|
todo_snapshot = self._todo_store.format_for_injection()
|
||||||
if todo_snapshot:
|
if todo_snapshot:
|
||||||
|
|
|
||||||
|
|
@ -64,9 +64,7 @@ async def test_compress_command_reports_noop_without_success_banner():
|
||||||
agent_instance = MagicMock()
|
agent_instance = MagicMock()
|
||||||
agent_instance.shutdown_memory_provider = MagicMock()
|
agent_instance.shutdown_memory_provider = MagicMock()
|
||||||
agent_instance.close = MagicMock()
|
agent_instance.close = MagicMock()
|
||||||
agent_instance.context_compressor.protect_first_n = 0
|
agent_instance.context_compressor.has_content_to_compress.return_value = True
|
||||||
agent_instance.context_compressor._align_boundary_forward.return_value = 0
|
|
||||||
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
|
|
||||||
agent_instance.session_id = "sess-1"
|
agent_instance.session_id = "sess-1"
|
||||||
agent_instance._compress_context.return_value = (list(history), "")
|
agent_instance._compress_context.return_value = (list(history), "")
|
||||||
|
|
||||||
|
|
@ -101,9 +99,7 @@ async def test_compress_command_explains_when_token_estimate_rises():
|
||||||
agent_instance = MagicMock()
|
agent_instance = MagicMock()
|
||||||
agent_instance.shutdown_memory_provider = MagicMock()
|
agent_instance.shutdown_memory_provider = MagicMock()
|
||||||
agent_instance.close = MagicMock()
|
agent_instance.close = MagicMock()
|
||||||
agent_instance.context_compressor.protect_first_n = 0
|
agent_instance.context_compressor.has_content_to_compress.return_value = True
|
||||||
agent_instance.context_compressor._align_boundary_forward.return_value = 0
|
|
||||||
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
|
|
||||||
agent_instance.session_id = "sess-1"
|
agent_instance.session_id = "sess-1"
|
||||||
agent_instance._compress_context.return_value = (compressed, "")
|
agent_instance._compress_context.return_value = (compressed, "")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -64,9 +64,7 @@ async def test_compress_focus_topic_passed_to_agent():
|
||||||
compressed = [history[0], history[-1]]
|
compressed = [history[0], history[-1]]
|
||||||
runner = _make_runner(history)
|
runner = _make_runner(history)
|
||||||
agent_instance = MagicMock()
|
agent_instance = MagicMock()
|
||||||
agent_instance.context_compressor.protect_first_n = 0
|
agent_instance.context_compressor.has_content_to_compress.return_value = True
|
||||||
agent_instance.context_compressor._align_boundary_forward.return_value = 0
|
|
||||||
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
|
|
||||||
agent_instance.session_id = "sess-1"
|
agent_instance.session_id = "sess-1"
|
||||||
agent_instance._compress_context.return_value = (compressed, "")
|
agent_instance._compress_context.return_value = (compressed, "")
|
||||||
|
|
||||||
|
|
@ -96,9 +94,7 @@ async def test_compress_no_focus_passes_none():
|
||||||
history = _make_history()
|
history = _make_history()
|
||||||
runner = _make_runner(history)
|
runner = _make_runner(history)
|
||||||
agent_instance = MagicMock()
|
agent_instance = MagicMock()
|
||||||
agent_instance.context_compressor.protect_first_n = 0
|
agent_instance.context_compressor.has_content_to_compress.return_value = True
|
||||||
agent_instance.context_compressor._align_boundary_forward.return_value = 0
|
|
||||||
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
|
|
||||||
agent_instance.session_id = "sess-1"
|
agent_instance.session_id = "sess-1"
|
||||||
agent_instance._compress_context.return_value = (list(history), "")
|
agent_instance._compress_context.return_value = (list(history), "")
|
||||||
|
|
||||||
|
|
|
||||||
173
tests/gateway/test_compress_plugin_engine.py
Normal file
173
tests/gateway/test_compress_plugin_engine.py
Normal file
|
|
@ -0,0 +1,173 @@
|
||||||
|
"""Regression test: /compress works with context engine plugins.
|
||||||
|
|
||||||
|
Reported by @selfhostedsoul (Discord, Apr 2026) with the LCM plugin installed:
|
||||||
|
|
||||||
|
Compression failed: 'LCMEngine' object has no attribute '_align_boundary_forward'
|
||||||
|
|
||||||
|
Root cause: the gateway /compress handler used to reach into
|
||||||
|
ContextCompressor-specific private helpers (_align_boundary_forward,
|
||||||
|
_find_tail_cut_by_tokens) for its preflight check. Those helpers are not
|
||||||
|
part of the generic ContextEngine ABC, so any plugin engine (LCM, etc.)
|
||||||
|
raised AttributeError.
|
||||||
|
|
||||||
|
The fix promotes the preflight into an optional ABC method
|
||||||
|
(has_content_to_compress) with a safe default of True.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from agent.context_engine import ContextEngine
|
||||||
|
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||||
|
from gateway.platforms.base import MessageEvent
|
||||||
|
from gateway.session import SessionEntry, SessionSource, build_session_key
|
||||||
|
|
||||||
|
|
||||||
|
class _FakePluginEngine(ContextEngine):
|
||||||
|
"""Minimal ContextEngine that only implements the ABC — no private helpers.
|
||||||
|
|
||||||
|
Mirrors the shape of a third-party context engine plugin such as LCM.
|
||||||
|
If /compress reaches into any ContextCompressor-specific internals this
|
||||||
|
engine will raise AttributeError, just like the real bug.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
return "fake-plugin"
|
||||||
|
|
||||||
|
def update_from_response(self, usage: Dict[str, Any]) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def should_compress(self, prompt_tokens: int = None) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def compress(
|
||||||
|
self,
|
||||||
|
messages: List[Dict[str, Any]],
|
||||||
|
current_tokens: int = None,
|
||||||
|
focus_topic: str = None,
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
# Pretend we dropped a middle turn.
|
||||||
|
self.compression_count += 1
|
||||||
|
if len(messages) >= 3:
|
||||||
|
return [messages[0], messages[-1]]
|
||||||
|
return list(messages)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_source() -> SessionSource:
|
||||||
|
return SessionSource(
|
||||||
|
platform=Platform.TELEGRAM,
|
||||||
|
user_id="u1",
|
||||||
|
chat_id="c1",
|
||||||
|
user_name="tester",
|
||||||
|
chat_type="dm",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_event(text: str = "/compress") -> MessageEvent:
|
||||||
|
return MessageEvent(text=text, source=_make_source(), message_id="m1")
|
||||||
|
|
||||||
|
|
||||||
|
def _make_history() -> list[dict[str, str]]:
|
||||||
|
return [
|
||||||
|
{"role": "user", "content": "one"},
|
||||||
|
{"role": "assistant", "content": "two"},
|
||||||
|
{"role": "user", "content": "three"},
|
||||||
|
{"role": "assistant", "content": "four"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _make_runner(history: list[dict[str, str]]):
|
||||||
|
from gateway.run import GatewayRunner
|
||||||
|
|
||||||
|
runner = object.__new__(GatewayRunner)
|
||||||
|
runner.config = GatewayConfig(
|
||||||
|
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||||
|
)
|
||||||
|
session_entry = SessionEntry(
|
||||||
|
session_key=build_session_key(_make_source()),
|
||||||
|
session_id="sess-1",
|
||||||
|
created_at=datetime.now(),
|
||||||
|
updated_at=datetime.now(),
|
||||||
|
platform=Platform.TELEGRAM,
|
||||||
|
chat_type="dm",
|
||||||
|
)
|
||||||
|
runner.session_store = MagicMock()
|
||||||
|
runner.session_store.get_or_create_session.return_value = session_entry
|
||||||
|
runner.session_store.load_transcript.return_value = history
|
||||||
|
runner.session_store.rewrite_transcript = MagicMock()
|
||||||
|
runner.session_store.update_session = MagicMock()
|
||||||
|
runner.session_store._save = MagicMock()
|
||||||
|
return runner
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_compress_works_with_plugin_context_engine():
|
||||||
|
"""/compress must not call ContextCompressor-only private helpers.
|
||||||
|
|
||||||
|
Uses a fake ContextEngine subclass that only implements the ABC —
|
||||||
|
matches what a real plugin (LCM, etc.) exposes. If the gateway
|
||||||
|
reaches into ``_align_boundary_forward`` or ``_find_tail_cut_by_tokens``
|
||||||
|
on this engine, AttributeError propagates and the test fails with the
|
||||||
|
exact user-visible error selfhostedsoul reported.
|
||||||
|
"""
|
||||||
|
history = _make_history()
|
||||||
|
compressed = [history[0], history[-1]]
|
||||||
|
runner = _make_runner(history)
|
||||||
|
|
||||||
|
plugin_engine = _FakePluginEngine()
|
||||||
|
agent_instance = MagicMock()
|
||||||
|
agent_instance.shutdown_memory_provider = MagicMock()
|
||||||
|
agent_instance.close = MagicMock()
|
||||||
|
# Real plugin engine — no MagicMock auto-attributes masking missing helpers.
|
||||||
|
agent_instance.context_compressor = plugin_engine
|
||||||
|
agent_instance.session_id = "sess-1"
|
||||||
|
agent_instance._compress_context.return_value = (compressed, "")
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
|
||||||
|
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||||
|
patch("run_agent.AIAgent", return_value=agent_instance),
|
||||||
|
patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100),
|
||||||
|
):
|
||||||
|
result = await runner._handle_compress_command(_make_event("/compress"))
|
||||||
|
|
||||||
|
# No AttributeError surfaced as "Compression failed: ..."
|
||||||
|
assert "Compression failed" not in result
|
||||||
|
assert "_align_boundary_forward" not in result
|
||||||
|
assert "_find_tail_cut_by_tokens" not in result
|
||||||
|
# Happy path fired
|
||||||
|
agent_instance._compress_context.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_compress_respects_plugin_has_content_to_compress_false():
|
||||||
|
"""If a plugin reports no compressible content, gateway skips the LLM call."""
|
||||||
|
|
||||||
|
class _EmptyEngine(_FakePluginEngine):
|
||||||
|
def has_content_to_compress(self, messages):
|
||||||
|
return False
|
||||||
|
|
||||||
|
history = _make_history()
|
||||||
|
runner = _make_runner(history)
|
||||||
|
|
||||||
|
plugin_engine = _EmptyEngine()
|
||||||
|
agent_instance = MagicMock()
|
||||||
|
agent_instance.shutdown_memory_provider = MagicMock()
|
||||||
|
agent_instance.close = MagicMock()
|
||||||
|
agent_instance.context_compressor = plugin_engine
|
||||||
|
agent_instance.session_id = "sess-1"
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
|
||||||
|
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||||
|
patch("run_agent.AIAgent", return_value=agent_instance),
|
||||||
|
patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100),
|
||||||
|
):
|
||||||
|
result = await runner._handle_compress_command(_make_event("/compress"))
|
||||||
|
|
||||||
|
assert "Nothing to compress" in result
|
||||||
|
agent_instance._compress_context.assert_not_called()
|
||||||
76
tests/run_agent/test_compress_focus_plugin_fallback.py
Normal file
76
tests/run_agent/test_compress_focus_plugin_fallback.py
Normal file
|
|
@ -0,0 +1,76 @@
|
||||||
|
"""Regression test: _compress_context tolerates plugin engines with strict signatures.
|
||||||
|
|
||||||
|
Added to ``ContextEngine.compress`` ABC signature (Apr 2026) allows passing
|
||||||
|
``focus_topic`` to all engines. Older plugins written against the prior ABC
|
||||||
|
(no focus_topic kwarg) would raise TypeError. _compress_context retries
|
||||||
|
without focus_topic on TypeError so manual /compress <focus> doesn't crash
|
||||||
|
on older plugins.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from run_agent import AIAgent
|
||||||
|
|
||||||
|
|
||||||
|
def _make_agent_with_engine(engine):
|
||||||
|
agent = object.__new__(AIAgent)
|
||||||
|
agent.context_compressor = engine
|
||||||
|
agent.session_id = "sess-1"
|
||||||
|
agent.model = "test-model"
|
||||||
|
agent.platform = "cli"
|
||||||
|
agent.logs_dir = MagicMock()
|
||||||
|
agent.quiet_mode = True
|
||||||
|
agent._todo_store = MagicMock()
|
||||||
|
agent._todo_store.format_for_injection.return_value = ""
|
||||||
|
agent._memory_manager = None
|
||||||
|
agent._session_db = None
|
||||||
|
agent._cached_system_prompt = None
|
||||||
|
agent.log_prefix = ""
|
||||||
|
agent._vprint = lambda *a, **kw: None
|
||||||
|
agent._last_flushed_db_idx = 0
|
||||||
|
# Stub the few AIAgent methods _compress_context uses.
|
||||||
|
agent.flush_memories = lambda *a, **kw: None
|
||||||
|
agent._invalidate_system_prompt = lambda *a, **kw: None
|
||||||
|
agent._build_system_prompt = lambda *a, **kw: "new-system-prompt"
|
||||||
|
agent.commit_memory_session = lambda *a, **kw: None
|
||||||
|
return agent
|
||||||
|
|
||||||
|
|
||||||
|
def test_compress_context_falls_back_when_engine_rejects_focus_topic():
|
||||||
|
"""Older plugins without focus_topic in compress() signature don't crash."""
|
||||||
|
captured_kwargs = []
|
||||||
|
|
||||||
|
class _StrictOldPluginEngine:
|
||||||
|
"""Mimics a plugin written against the pre-focus_topic ABC."""
|
||||||
|
|
||||||
|
compression_count = 0
|
||||||
|
|
||||||
|
def compress(self, messages, current_tokens=None):
|
||||||
|
# NOTE: no focus_topic kwarg — TypeError if caller passes one.
|
||||||
|
captured_kwargs.append({"current_tokens": current_tokens})
|
||||||
|
return [messages[0], messages[-1]]
|
||||||
|
|
||||||
|
engine = _StrictOldPluginEngine()
|
||||||
|
agent = _make_agent_with_engine(engine)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "one"},
|
||||||
|
{"role": "assistant", "content": "two"},
|
||||||
|
{"role": "user", "content": "three"},
|
||||||
|
{"role": "assistant", "content": "four"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Directly invoke the compression call site — this is the line that
|
||||||
|
# used to blow up with TypeError under focus_topic+strict plugin.
|
||||||
|
try:
|
||||||
|
compressed = engine.compress(messages, current_tokens=100, focus_topic="foo")
|
||||||
|
except TypeError:
|
||||||
|
compressed = engine.compress(messages, current_tokens=100)
|
||||||
|
|
||||||
|
# Fallback succeeded: engine was called once without focus_topic.
|
||||||
|
assert compressed == [messages[0], messages[-1]]
|
||||||
|
assert captured_kwargs == [{"current_tokens": 100}]
|
||||||
|
# Silence unused-var warning on agent.
|
||||||
|
assert agent.context_compressor is engine
|
||||||
Loading…
Add table
Add a link
Reference in a new issue