fix(gateway): make manual compression feedback truthful

This commit is contained in:
Kenny Xie 2026-04-09 21:23:35 -07:00 committed by Teknium
parent d6c2ad7e41
commit 1ffd92cc94
5 changed files with 234 additions and 148 deletions

View file

@ -0,0 +1,49 @@
"""User-facing summaries for manual compression commands."""
from __future__ import annotations
from typing import Any, Sequence
def summarize_manual_compression(
before_messages: Sequence[dict[str, Any]],
after_messages: Sequence[dict[str, Any]],
before_tokens: int,
after_tokens: int,
) -> dict[str, Any]:
"""Return consistent user-facing feedback for manual compression."""
before_count = len(before_messages)
after_count = len(after_messages)
noop = list(after_messages) == list(before_messages)
if noop:
headline = f"No changes from compression: {before_count} messages"
if after_tokens == before_tokens:
token_line = (
f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
)
else:
token_line = (
f"Rough transcript estimate: ~{before_tokens:,}"
f"~{after_tokens:,} tokens"
)
else:
headline = f"Compressed: {before_count}{after_count} messages"
token_line = (
f"Rough transcript estimate: ~{before_tokens:,}"
f"~{after_tokens:,} tokens"
)
note = None
if not noop and after_count < before_count and after_tokens > before_tokens:
note = (
"Note: fewer messages can still raise this rough transcript estimate "
"when compression rewrites the transcript into denser summaries."
)
return {
"noop": noop,
"headline": headline,
"token_line": token_line,
"note": note,
}

22
cli.py
View file

@ -5835,21 +5835,29 @@ class HermesCLI:
original_count = len(self.conversation_history) original_count = len(self.conversation_history)
try: try:
from agent.model_metadata import estimate_messages_tokens_rough from agent.model_metadata import estimate_messages_tokens_rough
approx_tokens = estimate_messages_tokens_rough(self.conversation_history) from agent.manual_compression_feedback import summarize_manual_compression
original_history = list(self.conversation_history)
approx_tokens = estimate_messages_tokens_rough(original_history)
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...") print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
compressed, _new_system = self.agent._compress_context( compressed, _ = self.agent._compress_context(
self.conversation_history, original_history,
self.agent._cached_system_prompt or "", self.agent._cached_system_prompt or "",
approx_tokens=approx_tokens, approx_tokens=approx_tokens,
) )
self.conversation_history = compressed self.conversation_history = compressed
new_count = len(self.conversation_history)
new_tokens = estimate_messages_tokens_rough(self.conversation_history) new_tokens = estimate_messages_tokens_rough(self.conversation_history)
print( summary = summarize_manual_compression(
f" ✅ Compressed: {original_count}{new_count} messages " original_history,
f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)" self.conversation_history,
approx_tokens,
new_tokens,
) )
icon = "🗜️" if summary["noop"] else ""
print(f" {icon} {summary['headline']}")
print(f" {summary['token_line']}")
if summary["note"]:
print(f" {summary['note']}")
except Exception as e: except Exception as e:
print(f" ❌ Compression failed: {e}") print(f" ❌ Compression failed: {e}")

View file

@ -5196,6 +5196,7 @@ class GatewayRunner:
try: try:
from run_agent import AIAgent from run_agent import AIAgent
from agent.manual_compression_feedback import summarize_manual_compression
from agent.model_metadata import estimate_messages_tokens_rough from agent.model_metadata import estimate_messages_tokens_rough
runtime_kwargs = _resolve_runtime_agent_kwargs() runtime_kwargs = _resolve_runtime_agent_kwargs()
@ -5250,13 +5251,17 @@ class GatewayRunner:
self.session_store.update_session( self.session_store.update_session(
session_entry.session_key, last_prompt_tokens=0 session_entry.session_key, last_prompt_tokens=0
) )
new_count = len(compressed)
new_tokens = estimate_messages_tokens_rough(compressed) new_tokens = estimate_messages_tokens_rough(compressed)
summary = summarize_manual_compression(
return ( msgs,
f"🗜️ Compressed: {original_count}{new_count} messages\n" compressed,
f"Rough transcript estimate: ~{approx_tokens:,} → ~{new_tokens:,} tokens" approx_tokens,
new_tokens,
) )
lines = [f"🗜️ {summary['headline']}", summary["token_line"]]
if summary["note"]:
lines.append(summary["note"])
return "\n".join(lines)
except Exception as e: except Exception as e:
logger.warning("Manual compress failed: %s", e) logger.warning("Manual compress failed: %s", e)
return f"Compression failed: {e}" return f"Compression failed: {e}"

View file

@ -0,0 +1,66 @@
"""Tests for CLI manual compression messaging."""
from unittest.mock import MagicMock, patch
from tests.cli.test_cli_init import _make_cli
def _make_history() -> list[dict[str, str]]:
return [
{"role": "user", "content": "one"},
{"role": "assistant", "content": "two"},
{"role": "user", "content": "three"},
{"role": "assistant", "content": "four"},
]
def test_manual_compress_reports_noop_without_success_banner(capsys):
shell = _make_cli()
history = _make_history()
shell.conversation_history = history
shell.agent = MagicMock()
shell.agent.compression_enabled = True
shell.agent._cached_system_prompt = ""
shell.agent._compress_context.return_value = (list(history), "")
def _estimate(messages):
assert messages == history
return 100
with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
shell._manual_compress()
output = capsys.readouterr().out
assert "No changes from compression" in output
assert "✅ Compressed" not in output
assert "Rough transcript estimate: ~100 tokens (unchanged)" in output
def test_manual_compress_explains_when_token_estimate_rises(capsys):
shell = _make_cli()
history = _make_history()
compressed = [
history[0],
{"role": "assistant", "content": "Dense summary that still counts as more tokens."},
history[-1],
]
shell.conversation_history = history
shell.agent = MagicMock()
shell.agent.compression_enabled = True
shell.agent._cached_system_prompt = ""
shell.agent._compress_context.return_value = (compressed, "")
def _estimate(messages):
if messages == history:
return 100
if messages == compressed:
return 120
raise AssertionError(f"unexpected transcript: {messages!r}")
with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
shell._manual_compress()
output = capsys.readouterr().out
assert "✅ Compressed: 4 → 3 messages" in output
assert "Rough transcript estimate: ~100 → ~120 tokens" in output
assert "denser summaries" in output

View file

@ -1,163 +1,121 @@
"""Tests for gateway /compress truthfulness.""" """Tests for gateway /compress user-facing messaging."""
import sys from datetime import datetime
import types from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock
import pytest import pytest
import gateway.run as gateway_run from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.config import Platform
from gateway.platforms.base import MessageEvent from gateway.platforms.base import MessageEvent
from gateway.session import SessionSource from gateway.session import SessionEntry, SessionSource, build_session_key
def _make_event(text="/compress", platform=Platform.TELEGRAM, user_id="12345", chat_id="67890"): def _make_source() -> SessionSource:
source = SessionSource( return SessionSource(
platform=platform, platform=Platform.TELEGRAM,
user_id=user_id, user_id="u1",
chat_id=chat_id, chat_id="c1",
user_name="testuser", user_name="tester",
chat_type="dm",
) )
return MessageEvent(text=text, source=source)
def _make_history(n_messages: int) -> list[dict]: def _make_event(text: str = "/compress") -> MessageEvent:
history = [] return MessageEvent(text=text, source=_make_source(), message_id="m1")
for i in range(n_messages):
history.append(
{
"role": "user" if i % 2 == 0 else "assistant",
"content": f"message {i}",
}
)
return history
def _make_runner(history: list[dict], session_id: str = "sess-current"): def _make_history() -> list[dict[str, str]]:
runner = object.__new__(gateway_run.GatewayRunner) return [
session_entry = MagicMock() {"role": "user", "content": "one"},
session_entry.session_id = session_id {"role": "assistant", "content": "two"},
session_entry.session_key = "telegram:12345:67890" {"role": "user", "content": "three"},
{"role": "assistant", "content": "four"},
store = MagicMock() ]
store.get_or_create_session.return_value = session_entry
store.load_transcript.return_value = history
store.rewrite_transcript = MagicMock()
store.update_session = MagicMock()
store._save = MagicMock()
runner.session_store = store
return runner, session_entry
class _NoOpCompressor: def _make_runner(history: list[dict[str, str]]):
protect_first_n = 3 from gateway.run import GatewayRunner
def _align_boundary_forward(self, messages, idx): runner = object.__new__(GatewayRunner)
return idx runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
def _find_tail_cut_by_tokens(self, messages, head_end): )
return head_end session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
class _NoOpAgent: created_at=datetime.now(),
last_instance = None updated_at=datetime.now(),
platform=Platform.TELEGRAM,
def __init__(self, *args, **kwargs): chat_type="dm",
type(self).last_instance = self )
self.session_id = kwargs["session_id"] runner.session_store = MagicMock()
self.context_compressor = _NoOpCompressor() runner.session_store.get_or_create_session.return_value = session_entry
self._print_fn = None runner.session_store.load_transcript.return_value = history
self._compress_context_calls = 0 runner.session_store.rewrite_transcript = MagicMock()
runner.session_store.update_session = MagicMock()
def _compress_context(self, messages, system_message, *, approx_tokens=None): runner.session_store._save = MagicMock()
self._compress_context_calls += 1 return runner
return messages, system_message
class _CompressibleCompressor:
protect_first_n = 1
def _align_boundary_forward(self, messages, idx):
return idx
def _find_tail_cut_by_tokens(self, messages, head_end):
return 3
class _CompressingAgent:
last_instance = None
def __init__(self, *args, **kwargs):
type(self).last_instance = self
self.session_id = kwargs["session_id"]
self.context_compressor = _CompressibleCompressor()
self._print_fn = None
self._compress_context_calls = 0
def _compress_context(self, messages, system_message, *, approx_tokens=None):
self._compress_context_calls += 1
self.session_id = "sess-compressed"
return (
[
{"role": "user", "content": "summary"},
{"role": "assistant", "content": "latest reply"},
],
system_message,
)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_compress_command_reports_noop_truthfully(monkeypatch): async def test_compress_command_reports_noop_without_success_banner():
event = _make_event() history = _make_history()
runner, session_entry = _make_runner(_make_history(4)) runner = _make_runner(history)
agent_instance = MagicMock()
agent_instance.context_compressor.protect_first_n = 0
agent_instance.context_compressor._align_boundary_forward.return_value = 0
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (list(history), "")
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "test-key"}) def _estimate(messages):
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda: "openai/test-model") assert messages == history
fake_run_agent = types.ModuleType("run_agent") return 100
fake_run_agent.AIAgent = _NoOpAgent
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
result = await runner._handle_compress_command(event) with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=agent_instance),
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
):
result = await runner._handle_compress_command(_make_event())
assert result == "Nothing to compress yet (the transcript is still all protected context)." assert "No changes from compression" in result
assert _NoOpAgent.last_instance is not None assert "Compressed:" not in result
assert _NoOpAgent.last_instance._compress_context_calls == 0 assert "Rough transcript estimate: ~100 tokens (unchanged)" in result
runner.session_store.rewrite_transcript.assert_not_called()
runner.session_store.update_session.assert_not_called()
runner.session_store._save.assert_not_called()
assert session_entry.session_id == "sess-current"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_compress_command_relabels_token_estimate_on_success(monkeypatch): async def test_compress_command_explains_when_token_estimate_rises():
event = _make_event() history = _make_history()
runner, session_entry = _make_runner(_make_history(6)) compressed = [
history[0],
{"role": "assistant", "content": "Dense summary that still counts as more tokens."},
history[-1],
]
runner = _make_runner(history)
agent_instance = MagicMock()
agent_instance.context_compressor.protect_first_n = 0
agent_instance.context_compressor._align_boundary_forward.return_value = 0
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
agent_instance.session_id = "sess-1"
agent_instance._compress_context.return_value = (compressed, "")
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "test-key"}) def _estimate(messages):
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda: "openai/test-model") if messages == history:
fake_run_agent = types.ModuleType("run_agent") return 100
fake_run_agent.AIAgent = _CompressingAgent if messages == compressed:
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) return 120
raise AssertionError(f"unexpected transcript: {messages!r}")
result = await runner._handle_compress_command(event) with (
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
patch("run_agent.AIAgent", return_value=agent_instance),
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
):
result = await runner._handle_compress_command(_make_event())
assert "🗜️ Compressed: 6 → 2 messages" in result assert "Compressed: 4 → 3 messages" in result
assert "Rough transcript estimate:" in result assert "Rough transcript estimate: ~100 → ~120 tokens" in result
assert "\n~" not in result assert "denser summaries" in result
assert _CompressingAgent.last_instance is not None
assert _CompressingAgent.last_instance._compress_context_calls == 1
runner.session_store.rewrite_transcript.assert_called_once_with(
"sess-compressed",
[
{"role": "user", "content": "summary"},
{"role": "assistant", "content": "latest reply"},
],
)
runner.session_store.update_session.assert_called_once_with(
session_entry.session_key,
last_prompt_tokens=0,
)
runner.session_store._save.assert_called_once()
assert session_entry.session_id == "sess-compressed"