diff --git a/agent/manual_compression_feedback.py b/agent/manual_compression_feedback.py new file mode 100644 index 000000000..8f2d5e5d5 --- /dev/null +++ b/agent/manual_compression_feedback.py @@ -0,0 +1,49 @@ +"""User-facing summaries for manual compression commands.""" + +from __future__ import annotations + +from typing import Any, Sequence + + +def summarize_manual_compression( + before_messages: Sequence[dict[str, Any]], + after_messages: Sequence[dict[str, Any]], + before_tokens: int, + after_tokens: int, +) -> dict[str, Any]: + """Return consistent user-facing feedback for manual compression.""" + before_count = len(before_messages) + after_count = len(after_messages) + noop = list(after_messages) == list(before_messages) + + if noop: + headline = f"No changes from compression: {before_count} messages" + if after_tokens == before_tokens: + token_line = ( + f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)" + ) + else: + token_line = ( + f"Rough transcript estimate: ~{before_tokens:,} → " + f"~{after_tokens:,} tokens" + ) + else: + headline = f"Compressed: {before_count} → {after_count} messages" + token_line = ( + f"Rough transcript estimate: ~{before_tokens:,} → " + f"~{after_tokens:,} tokens" + ) + + note = None + if not noop and after_count < before_count and after_tokens > before_tokens: + note = ( + "Note: fewer messages can still raise this rough transcript estimate " + "when compression rewrites the transcript into denser summaries." + ) + + return { + "noop": noop, + "headline": headline, + "token_line": token_line, + "note": note, + } diff --git a/cli.py b/cli.py index 9635a6799..223d36093 100644 --- a/cli.py +++ b/cli.py @@ -5835,21 +5835,29 @@ class HermesCLI: original_count = len(self.conversation_history) try: from agent.model_metadata import estimate_messages_tokens_rough - approx_tokens = estimate_messages_tokens_rough(self.conversation_history) + from agent.manual_compression_feedback import summarize_manual_compression + original_history = list(self.conversation_history) + approx_tokens = estimate_messages_tokens_rough(original_history) print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...") - compressed, _new_system = self.agent._compress_context( - self.conversation_history, + compressed, _ = self.agent._compress_context( + original_history, self.agent._cached_system_prompt or "", approx_tokens=approx_tokens, ) self.conversation_history = compressed - new_count = len(self.conversation_history) new_tokens = estimate_messages_tokens_rough(self.conversation_history) - print( - f" ✅ Compressed: {original_count} → {new_count} messages " - f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)" + summary = summarize_manual_compression( + original_history, + self.conversation_history, + approx_tokens, + new_tokens, ) + icon = "🗜️" if summary["noop"] else "✅" + print(f" {icon} {summary['headline']}") + print(f" {summary['token_line']}") + if summary["note"]: + print(f" {summary['note']}") except Exception as e: print(f" ❌ Compression failed: {e}") diff --git a/gateway/run.py b/gateway/run.py index 5368a63a8..912e68a7b 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -5196,6 +5196,7 @@ class GatewayRunner: try: from run_agent import AIAgent + from agent.manual_compression_feedback import summarize_manual_compression from agent.model_metadata import estimate_messages_tokens_rough runtime_kwargs = _resolve_runtime_agent_kwargs() @@ -5250,13 +5251,17 @@ class GatewayRunner: self.session_store.update_session( session_entry.session_key, last_prompt_tokens=0 ) - new_count = len(compressed) new_tokens = estimate_messages_tokens_rough(compressed) - - return ( - f"🗜️ Compressed: {original_count} → {new_count} messages\n" - f"Rough transcript estimate: ~{approx_tokens:,} → ~{new_tokens:,} tokens" + summary = summarize_manual_compression( + msgs, + compressed, + approx_tokens, + new_tokens, ) + lines = [f"🗜️ {summary['headline']}", summary["token_line"]] + if summary["note"]: + lines.append(summary["note"]) + return "\n".join(lines) except Exception as e: logger.warning("Manual compress failed: %s", e) return f"Compression failed: {e}" diff --git a/tests/cli/test_manual_compress.py b/tests/cli/test_manual_compress.py new file mode 100644 index 000000000..d201f9cee --- /dev/null +++ b/tests/cli/test_manual_compress.py @@ -0,0 +1,66 @@ +"""Tests for CLI manual compression messaging.""" + +from unittest.mock import MagicMock, patch + +from tests.cli.test_cli_init import _make_cli + + +def _make_history() -> list[dict[str, str]]: + return [ + {"role": "user", "content": "one"}, + {"role": "assistant", "content": "two"}, + {"role": "user", "content": "three"}, + {"role": "assistant", "content": "four"}, + ] + + +def test_manual_compress_reports_noop_without_success_banner(capsys): + shell = _make_cli() + history = _make_history() + shell.conversation_history = history + shell.agent = MagicMock() + shell.agent.compression_enabled = True + shell.agent._cached_system_prompt = "" + shell.agent._compress_context.return_value = (list(history), "") + + def _estimate(messages): + assert messages == history + return 100 + + with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate): + shell._manual_compress() + + output = capsys.readouterr().out + assert "No changes from compression" in output + assert "✅ Compressed" not in output + assert "Rough transcript estimate: ~100 tokens (unchanged)" in output + + +def test_manual_compress_explains_when_token_estimate_rises(capsys): + shell = _make_cli() + history = _make_history() + compressed = [ + history[0], + {"role": "assistant", "content": "Dense summary that still counts as more tokens."}, + history[-1], + ] + shell.conversation_history = history + shell.agent = MagicMock() + shell.agent.compression_enabled = True + shell.agent._cached_system_prompt = "" + shell.agent._compress_context.return_value = (compressed, "") + + def _estimate(messages): + if messages == history: + return 100 + if messages == compressed: + return 120 + raise AssertionError(f"unexpected transcript: {messages!r}") + + with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate): + shell._manual_compress() + + output = capsys.readouterr().out + assert "✅ Compressed: 4 → 3 messages" in output + assert "Rough transcript estimate: ~100 → ~120 tokens" in output + assert "denser summaries" in output diff --git a/tests/gateway/test_compress_command.py b/tests/gateway/test_compress_command.py index b322b937b..edeb1f47c 100644 --- a/tests/gateway/test_compress_command.py +++ b/tests/gateway/test_compress_command.py @@ -1,163 +1,121 @@ -"""Tests for gateway /compress truthfulness.""" +"""Tests for gateway /compress user-facing messaging.""" -import sys -import types -from unittest.mock import MagicMock +from datetime import datetime +from unittest.mock import MagicMock, patch import pytest -import gateway.run as gateway_run -from gateway.config import Platform +from gateway.config import GatewayConfig, Platform, PlatformConfig from gateway.platforms.base import MessageEvent -from gateway.session import SessionSource +from gateway.session import SessionEntry, SessionSource, build_session_key -def _make_event(text="/compress", platform=Platform.TELEGRAM, user_id="12345", chat_id="67890"): - source = SessionSource( - platform=platform, - user_id=user_id, - chat_id=chat_id, - user_name="testuser", +def _make_source() -> SessionSource: + return SessionSource( + platform=Platform.TELEGRAM, + user_id="u1", + chat_id="c1", + user_name="tester", + chat_type="dm", ) - return MessageEvent(text=text, source=source) -def _make_history(n_messages: int) -> list[dict]: - history = [] - for i in range(n_messages): - history.append( - { - "role": "user" if i % 2 == 0 else "assistant", - "content": f"message {i}", - } - ) - return history +def _make_event(text: str = "/compress") -> MessageEvent: + return MessageEvent(text=text, source=_make_source(), message_id="m1") -def _make_runner(history: list[dict], session_id: str = "sess-current"): - runner = object.__new__(gateway_run.GatewayRunner) - session_entry = MagicMock() - session_entry.session_id = session_id - session_entry.session_key = "telegram:12345:67890" - - store = MagicMock() - store.get_or_create_session.return_value = session_entry - store.load_transcript.return_value = history - store.rewrite_transcript = MagicMock() - store.update_session = MagicMock() - store._save = MagicMock() - - runner.session_store = store - return runner, session_entry +def _make_history() -> list[dict[str, str]]: + return [ + {"role": "user", "content": "one"}, + {"role": "assistant", "content": "two"}, + {"role": "user", "content": "three"}, + {"role": "assistant", "content": "four"}, + ] -class _NoOpCompressor: - protect_first_n = 3 +def _make_runner(history: list[dict[str, str]]): + from gateway.run import GatewayRunner - def _align_boundary_forward(self, messages, idx): - return idx - - def _find_tail_cut_by_tokens(self, messages, head_end): - return head_end - - -class _NoOpAgent: - last_instance = None - - def __init__(self, *args, **kwargs): - type(self).last_instance = self - self.session_id = kwargs["session_id"] - self.context_compressor = _NoOpCompressor() - self._print_fn = None - self._compress_context_calls = 0 - - def _compress_context(self, messages, system_message, *, approx_tokens=None): - self._compress_context_calls += 1 - return messages, system_message - - -class _CompressibleCompressor: - protect_first_n = 1 - - def _align_boundary_forward(self, messages, idx): - return idx - - def _find_tail_cut_by_tokens(self, messages, head_end): - return 3 - - -class _CompressingAgent: - last_instance = None - - def __init__(self, *args, **kwargs): - type(self).last_instance = self - self.session_id = kwargs["session_id"] - self.context_compressor = _CompressibleCompressor() - self._print_fn = None - self._compress_context_calls = 0 - - def _compress_context(self, messages, system_message, *, approx_tokens=None): - self._compress_context_calls += 1 - self.session_id = "sess-compressed" - return ( - [ - {"role": "user", "content": "summary"}, - {"role": "assistant", "content": "latest reply"}, - ], - system_message, - ) + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + session_entry = SessionEntry( + session_key=build_session_key(_make_source()), + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = session_entry + runner.session_store.load_transcript.return_value = history + runner.session_store.rewrite_transcript = MagicMock() + runner.session_store.update_session = MagicMock() + runner.session_store._save = MagicMock() + return runner @pytest.mark.asyncio -async def test_compress_command_reports_noop_truthfully(monkeypatch): - event = _make_event() - runner, session_entry = _make_runner(_make_history(4)) +async def test_compress_command_reports_noop_without_success_banner(): + history = _make_history() + runner = _make_runner(history) + agent_instance = MagicMock() + agent_instance.context_compressor.protect_first_n = 0 + agent_instance.context_compressor._align_boundary_forward.return_value = 0 + agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.session_id = "sess-1" + agent_instance._compress_context.return_value = (list(history), "") - monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "test-key"}) - monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda: "openai/test-model") - fake_run_agent = types.ModuleType("run_agent") - fake_run_agent.AIAgent = _NoOpAgent - monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + def _estimate(messages): + assert messages == history + return 100 - result = await runner._handle_compress_command(event) + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch("run_agent.AIAgent", return_value=agent_instance), + patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate), + ): + result = await runner._handle_compress_command(_make_event()) - assert result == "Nothing to compress yet (the transcript is still all protected context)." - assert _NoOpAgent.last_instance is not None - assert _NoOpAgent.last_instance._compress_context_calls == 0 - runner.session_store.rewrite_transcript.assert_not_called() - runner.session_store.update_session.assert_not_called() - runner.session_store._save.assert_not_called() - assert session_entry.session_id == "sess-current" + assert "No changes from compression" in result + assert "Compressed:" not in result + assert "Rough transcript estimate: ~100 tokens (unchanged)" in result @pytest.mark.asyncio -async def test_compress_command_relabels_token_estimate_on_success(monkeypatch): - event = _make_event() - runner, session_entry = _make_runner(_make_history(6)) +async def test_compress_command_explains_when_token_estimate_rises(): + history = _make_history() + compressed = [ + history[0], + {"role": "assistant", "content": "Dense summary that still counts as more tokens."}, + history[-1], + ] + runner = _make_runner(history) + agent_instance = MagicMock() + agent_instance.context_compressor.protect_first_n = 0 + agent_instance.context_compressor._align_boundary_forward.return_value = 0 + agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.session_id = "sess-1" + agent_instance._compress_context.return_value = (compressed, "") - monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "test-key"}) - monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda: "openai/test-model") - fake_run_agent = types.ModuleType("run_agent") - fake_run_agent.AIAgent = _CompressingAgent - monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + def _estimate(messages): + if messages == history: + return 100 + if messages == compressed: + return 120 + raise AssertionError(f"unexpected transcript: {messages!r}") - result = await runner._handle_compress_command(event) + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch("run_agent.AIAgent", return_value=agent_instance), + patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate), + ): + result = await runner._handle_compress_command(_make_event()) - assert "🗜️ Compressed: 6 → 2 messages" in result - assert "Rough transcript estimate:" in result - assert "\n~" not in result - assert _CompressingAgent.last_instance is not None - assert _CompressingAgent.last_instance._compress_context_calls == 1 - runner.session_store.rewrite_transcript.assert_called_once_with( - "sess-compressed", - [ - {"role": "user", "content": "summary"}, - {"role": "assistant", "content": "latest reply"}, - ], - ) - runner.session_store.update_session.assert_called_once_with( - session_entry.session_key, - last_prompt_tokens=0, - ) - runner.session_store._save.assert_called_once() - assert session_entry.session_id == "sess-compressed" + assert "Compressed: 4 → 3 messages" in result + assert "Rough transcript estimate: ~100 → ~120 tokens" in result + assert "denser summaries" in result