mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): make manual compression feedback truthful
This commit is contained in:
parent
d6c2ad7e41
commit
1ffd92cc94
5 changed files with 234 additions and 148 deletions
49
agent/manual_compression_feedback.py
Normal file
49
agent/manual_compression_feedback.py
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
"""User-facing summaries for manual compression commands."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Sequence
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_manual_compression(
|
||||||
|
before_messages: Sequence[dict[str, Any]],
|
||||||
|
after_messages: Sequence[dict[str, Any]],
|
||||||
|
before_tokens: int,
|
||||||
|
after_tokens: int,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Return consistent user-facing feedback for manual compression."""
|
||||||
|
before_count = len(before_messages)
|
||||||
|
after_count = len(after_messages)
|
||||||
|
noop = list(after_messages) == list(before_messages)
|
||||||
|
|
||||||
|
if noop:
|
||||||
|
headline = f"No changes from compression: {before_count} messages"
|
||||||
|
if after_tokens == before_tokens:
|
||||||
|
token_line = (
|
||||||
|
f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
token_line = (
|
||||||
|
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||||
|
f"~{after_tokens:,} tokens"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
headline = f"Compressed: {before_count} → {after_count} messages"
|
||||||
|
token_line = (
|
||||||
|
f"Rough transcript estimate: ~{before_tokens:,} → "
|
||||||
|
f"~{after_tokens:,} tokens"
|
||||||
|
)
|
||||||
|
|
||||||
|
note = None
|
||||||
|
if not noop and after_count < before_count and after_tokens > before_tokens:
|
||||||
|
note = (
|
||||||
|
"Note: fewer messages can still raise this rough transcript estimate "
|
||||||
|
"when compression rewrites the transcript into denser summaries."
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"noop": noop,
|
||||||
|
"headline": headline,
|
||||||
|
"token_line": token_line,
|
||||||
|
"note": note,
|
||||||
|
}
|
||||||
22
cli.py
22
cli.py
|
|
@ -5835,21 +5835,29 @@ class HermesCLI:
|
||||||
original_count = len(self.conversation_history)
|
original_count = len(self.conversation_history)
|
||||||
try:
|
try:
|
||||||
from agent.model_metadata import estimate_messages_tokens_rough
|
from agent.model_metadata import estimate_messages_tokens_rough
|
||||||
approx_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
from agent.manual_compression_feedback import summarize_manual_compression
|
||||||
|
original_history = list(self.conversation_history)
|
||||||
|
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||||
|
|
||||||
compressed, _new_system = self.agent._compress_context(
|
compressed, _ = self.agent._compress_context(
|
||||||
self.conversation_history,
|
original_history,
|
||||||
self.agent._cached_system_prompt or "",
|
self.agent._cached_system_prompt or "",
|
||||||
approx_tokens=approx_tokens,
|
approx_tokens=approx_tokens,
|
||||||
)
|
)
|
||||||
self.conversation_history = compressed
|
self.conversation_history = compressed
|
||||||
new_count = len(self.conversation_history)
|
|
||||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||||
print(
|
summary = summarize_manual_compression(
|
||||||
f" ✅ Compressed: {original_count} → {new_count} messages "
|
original_history,
|
||||||
f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
|
self.conversation_history,
|
||||||
|
approx_tokens,
|
||||||
|
new_tokens,
|
||||||
)
|
)
|
||||||
|
icon = "🗜️" if summary["noop"] else "✅"
|
||||||
|
print(f" {icon} {summary['headline']}")
|
||||||
|
print(f" {summary['token_line']}")
|
||||||
|
if summary["note"]:
|
||||||
|
print(f" {summary['note']}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" ❌ Compression failed: {e}")
|
print(f" ❌ Compression failed: {e}")
|
||||||
|
|
|
||||||
|
|
@ -5196,6 +5196,7 @@ class GatewayRunner:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from run_agent import AIAgent
|
from run_agent import AIAgent
|
||||||
|
from agent.manual_compression_feedback import summarize_manual_compression
|
||||||
from agent.model_metadata import estimate_messages_tokens_rough
|
from agent.model_metadata import estimate_messages_tokens_rough
|
||||||
|
|
||||||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||||||
|
|
@ -5250,13 +5251,17 @@ class GatewayRunner:
|
||||||
self.session_store.update_session(
|
self.session_store.update_session(
|
||||||
session_entry.session_key, last_prompt_tokens=0
|
session_entry.session_key, last_prompt_tokens=0
|
||||||
)
|
)
|
||||||
new_count = len(compressed)
|
|
||||||
new_tokens = estimate_messages_tokens_rough(compressed)
|
new_tokens = estimate_messages_tokens_rough(compressed)
|
||||||
|
summary = summarize_manual_compression(
|
||||||
return (
|
msgs,
|
||||||
f"🗜️ Compressed: {original_count} → {new_count} messages\n"
|
compressed,
|
||||||
f"Rough transcript estimate: ~{approx_tokens:,} → ~{new_tokens:,} tokens"
|
approx_tokens,
|
||||||
|
new_tokens,
|
||||||
)
|
)
|
||||||
|
lines = [f"🗜️ {summary['headline']}", summary["token_line"]]
|
||||||
|
if summary["note"]:
|
||||||
|
lines.append(summary["note"])
|
||||||
|
return "\n".join(lines)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Manual compress failed: %s", e)
|
logger.warning("Manual compress failed: %s", e)
|
||||||
return f"Compression failed: {e}"
|
return f"Compression failed: {e}"
|
||||||
|
|
|
||||||
66
tests/cli/test_manual_compress.py
Normal file
66
tests/cli/test_manual_compress.py
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
"""Tests for CLI manual compression messaging."""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from tests.cli.test_cli_init import _make_cli
|
||||||
|
|
||||||
|
|
||||||
|
def _make_history() -> list[dict[str, str]]:
|
||||||
|
return [
|
||||||
|
{"role": "user", "content": "one"},
|
||||||
|
{"role": "assistant", "content": "two"},
|
||||||
|
{"role": "user", "content": "three"},
|
||||||
|
{"role": "assistant", "content": "four"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_manual_compress_reports_noop_without_success_banner(capsys):
|
||||||
|
shell = _make_cli()
|
||||||
|
history = _make_history()
|
||||||
|
shell.conversation_history = history
|
||||||
|
shell.agent = MagicMock()
|
||||||
|
shell.agent.compression_enabled = True
|
||||||
|
shell.agent._cached_system_prompt = ""
|
||||||
|
shell.agent._compress_context.return_value = (list(history), "")
|
||||||
|
|
||||||
|
def _estimate(messages):
|
||||||
|
assert messages == history
|
||||||
|
return 100
|
||||||
|
|
||||||
|
with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
|
||||||
|
shell._manual_compress()
|
||||||
|
|
||||||
|
output = capsys.readouterr().out
|
||||||
|
assert "No changes from compression" in output
|
||||||
|
assert "✅ Compressed" not in output
|
||||||
|
assert "Rough transcript estimate: ~100 tokens (unchanged)" in output
|
||||||
|
|
||||||
|
|
||||||
|
def test_manual_compress_explains_when_token_estimate_rises(capsys):
|
||||||
|
shell = _make_cli()
|
||||||
|
history = _make_history()
|
||||||
|
compressed = [
|
||||||
|
history[0],
|
||||||
|
{"role": "assistant", "content": "Dense summary that still counts as more tokens."},
|
||||||
|
history[-1],
|
||||||
|
]
|
||||||
|
shell.conversation_history = history
|
||||||
|
shell.agent = MagicMock()
|
||||||
|
shell.agent.compression_enabled = True
|
||||||
|
shell.agent._cached_system_prompt = ""
|
||||||
|
shell.agent._compress_context.return_value = (compressed, "")
|
||||||
|
|
||||||
|
def _estimate(messages):
|
||||||
|
if messages == history:
|
||||||
|
return 100
|
||||||
|
if messages == compressed:
|
||||||
|
return 120
|
||||||
|
raise AssertionError(f"unexpected transcript: {messages!r}")
|
||||||
|
|
||||||
|
with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
|
||||||
|
shell._manual_compress()
|
||||||
|
|
||||||
|
output = capsys.readouterr().out
|
||||||
|
assert "✅ Compressed: 4 → 3 messages" in output
|
||||||
|
assert "Rough transcript estimate: ~100 → ~120 tokens" in output
|
||||||
|
assert "denser summaries" in output
|
||||||
|
|
@ -1,163 +1,121 @@
|
||||||
"""Tests for gateway /compress truthfulness."""
|
"""Tests for gateway /compress user-facing messaging."""
|
||||||
|
|
||||||
import sys
|
from datetime import datetime
|
||||||
import types
|
from unittest.mock import MagicMock, patch
|
||||||
from unittest.mock import MagicMock
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import gateway.run as gateway_run
|
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||||
from gateway.config import Platform
|
|
||||||
from gateway.platforms.base import MessageEvent
|
from gateway.platforms.base import MessageEvent
|
||||||
from gateway.session import SessionSource
|
from gateway.session import SessionEntry, SessionSource, build_session_key
|
||||||
|
|
||||||
|
|
||||||
def _make_event(text="/compress", platform=Platform.TELEGRAM, user_id="12345", chat_id="67890"):
|
def _make_source() -> SessionSource:
|
||||||
source = SessionSource(
|
return SessionSource(
|
||||||
platform=platform,
|
platform=Platform.TELEGRAM,
|
||||||
user_id=user_id,
|
user_id="u1",
|
||||||
chat_id=chat_id,
|
chat_id="c1",
|
||||||
user_name="testuser",
|
user_name="tester",
|
||||||
|
chat_type="dm",
|
||||||
)
|
)
|
||||||
return MessageEvent(text=text, source=source)
|
|
||||||
|
|
||||||
|
|
||||||
def _make_history(n_messages: int) -> list[dict]:
|
def _make_event(text: str = "/compress") -> MessageEvent:
|
||||||
history = []
|
return MessageEvent(text=text, source=_make_source(), message_id="m1")
|
||||||
for i in range(n_messages):
|
|
||||||
history.append(
|
|
||||||
{
|
|
||||||
"role": "user" if i % 2 == 0 else "assistant",
|
|
||||||
"content": f"message {i}",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return history
|
|
||||||
|
|
||||||
|
|
||||||
def _make_runner(history: list[dict], session_id: str = "sess-current"):
|
def _make_history() -> list[dict[str, str]]:
|
||||||
runner = object.__new__(gateway_run.GatewayRunner)
|
return [
|
||||||
session_entry = MagicMock()
|
{"role": "user", "content": "one"},
|
||||||
session_entry.session_id = session_id
|
{"role": "assistant", "content": "two"},
|
||||||
session_entry.session_key = "telegram:12345:67890"
|
{"role": "user", "content": "three"},
|
||||||
|
{"role": "assistant", "content": "four"},
|
||||||
store = MagicMock()
|
]
|
||||||
store.get_or_create_session.return_value = session_entry
|
|
||||||
store.load_transcript.return_value = history
|
|
||||||
store.rewrite_transcript = MagicMock()
|
|
||||||
store.update_session = MagicMock()
|
|
||||||
store._save = MagicMock()
|
|
||||||
|
|
||||||
runner.session_store = store
|
|
||||||
return runner, session_entry
|
|
||||||
|
|
||||||
|
|
||||||
class _NoOpCompressor:
|
def _make_runner(history: list[dict[str, str]]):
|
||||||
protect_first_n = 3
|
from gateway.run import GatewayRunner
|
||||||
|
|
||||||
def _align_boundary_forward(self, messages, idx):
|
runner = object.__new__(GatewayRunner)
|
||||||
return idx
|
runner.config = GatewayConfig(
|
||||||
|
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||||
def _find_tail_cut_by_tokens(self, messages, head_end):
|
)
|
||||||
return head_end
|
session_entry = SessionEntry(
|
||||||
|
session_key=build_session_key(_make_source()),
|
||||||
|
session_id="sess-1",
|
||||||
class _NoOpAgent:
|
created_at=datetime.now(),
|
||||||
last_instance = None
|
updated_at=datetime.now(),
|
||||||
|
platform=Platform.TELEGRAM,
|
||||||
def __init__(self, *args, **kwargs):
|
chat_type="dm",
|
||||||
type(self).last_instance = self
|
)
|
||||||
self.session_id = kwargs["session_id"]
|
runner.session_store = MagicMock()
|
||||||
self.context_compressor = _NoOpCompressor()
|
runner.session_store.get_or_create_session.return_value = session_entry
|
||||||
self._print_fn = None
|
runner.session_store.load_transcript.return_value = history
|
||||||
self._compress_context_calls = 0
|
runner.session_store.rewrite_transcript = MagicMock()
|
||||||
|
runner.session_store.update_session = MagicMock()
|
||||||
def _compress_context(self, messages, system_message, *, approx_tokens=None):
|
runner.session_store._save = MagicMock()
|
||||||
self._compress_context_calls += 1
|
return runner
|
||||||
return messages, system_message
|
|
||||||
|
|
||||||
|
|
||||||
class _CompressibleCompressor:
|
|
||||||
protect_first_n = 1
|
|
||||||
|
|
||||||
def _align_boundary_forward(self, messages, idx):
|
|
||||||
return idx
|
|
||||||
|
|
||||||
def _find_tail_cut_by_tokens(self, messages, head_end):
|
|
||||||
return 3
|
|
||||||
|
|
||||||
|
|
||||||
class _CompressingAgent:
|
|
||||||
last_instance = None
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
type(self).last_instance = self
|
|
||||||
self.session_id = kwargs["session_id"]
|
|
||||||
self.context_compressor = _CompressibleCompressor()
|
|
||||||
self._print_fn = None
|
|
||||||
self._compress_context_calls = 0
|
|
||||||
|
|
||||||
def _compress_context(self, messages, system_message, *, approx_tokens=None):
|
|
||||||
self._compress_context_calls += 1
|
|
||||||
self.session_id = "sess-compressed"
|
|
||||||
return (
|
|
||||||
[
|
|
||||||
{"role": "user", "content": "summary"},
|
|
||||||
{"role": "assistant", "content": "latest reply"},
|
|
||||||
],
|
|
||||||
system_message,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_compress_command_reports_noop_truthfully(monkeypatch):
|
async def test_compress_command_reports_noop_without_success_banner():
|
||||||
event = _make_event()
|
history = _make_history()
|
||||||
runner, session_entry = _make_runner(_make_history(4))
|
runner = _make_runner(history)
|
||||||
|
agent_instance = MagicMock()
|
||||||
|
agent_instance.context_compressor.protect_first_n = 0
|
||||||
|
agent_instance.context_compressor._align_boundary_forward.return_value = 0
|
||||||
|
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
|
||||||
|
agent_instance.session_id = "sess-1"
|
||||||
|
agent_instance._compress_context.return_value = (list(history), "")
|
||||||
|
|
||||||
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "test-key"})
|
def _estimate(messages):
|
||||||
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda: "openai/test-model")
|
assert messages == history
|
||||||
fake_run_agent = types.ModuleType("run_agent")
|
return 100
|
||||||
fake_run_agent.AIAgent = _NoOpAgent
|
|
||||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
|
||||||
|
|
||||||
result = await runner._handle_compress_command(event)
|
with (
|
||||||
|
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
|
||||||
|
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||||
|
patch("run_agent.AIAgent", return_value=agent_instance),
|
||||||
|
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
|
||||||
|
):
|
||||||
|
result = await runner._handle_compress_command(_make_event())
|
||||||
|
|
||||||
assert result == "Nothing to compress yet (the transcript is still all protected context)."
|
assert "No changes from compression" in result
|
||||||
assert _NoOpAgent.last_instance is not None
|
assert "Compressed:" not in result
|
||||||
assert _NoOpAgent.last_instance._compress_context_calls == 0
|
assert "Rough transcript estimate: ~100 tokens (unchanged)" in result
|
||||||
runner.session_store.rewrite_transcript.assert_not_called()
|
|
||||||
runner.session_store.update_session.assert_not_called()
|
|
||||||
runner.session_store._save.assert_not_called()
|
|
||||||
assert session_entry.session_id == "sess-current"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_compress_command_relabels_token_estimate_on_success(monkeypatch):
|
async def test_compress_command_explains_when_token_estimate_rises():
|
||||||
event = _make_event()
|
history = _make_history()
|
||||||
runner, session_entry = _make_runner(_make_history(6))
|
compressed = [
|
||||||
|
history[0],
|
||||||
|
{"role": "assistant", "content": "Dense summary that still counts as more tokens."},
|
||||||
|
history[-1],
|
||||||
|
]
|
||||||
|
runner = _make_runner(history)
|
||||||
|
agent_instance = MagicMock()
|
||||||
|
agent_instance.context_compressor.protect_first_n = 0
|
||||||
|
agent_instance.context_compressor._align_boundary_forward.return_value = 0
|
||||||
|
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
|
||||||
|
agent_instance.session_id = "sess-1"
|
||||||
|
agent_instance._compress_context.return_value = (compressed, "")
|
||||||
|
|
||||||
monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "test-key"})
|
def _estimate(messages):
|
||||||
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda: "openai/test-model")
|
if messages == history:
|
||||||
fake_run_agent = types.ModuleType("run_agent")
|
return 100
|
||||||
fake_run_agent.AIAgent = _CompressingAgent
|
if messages == compressed:
|
||||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
return 120
|
||||||
|
raise AssertionError(f"unexpected transcript: {messages!r}")
|
||||||
|
|
||||||
result = await runner._handle_compress_command(event)
|
with (
|
||||||
|
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
|
||||||
|
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||||
|
patch("run_agent.AIAgent", return_value=agent_instance),
|
||||||
|
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
|
||||||
|
):
|
||||||
|
result = await runner._handle_compress_command(_make_event())
|
||||||
|
|
||||||
assert "🗜️ Compressed: 6 → 2 messages" in result
|
assert "Compressed: 4 → 3 messages" in result
|
||||||
assert "Rough transcript estimate:" in result
|
assert "Rough transcript estimate: ~100 → ~120 tokens" in result
|
||||||
assert "\n~" not in result
|
assert "denser summaries" in result
|
||||||
assert _CompressingAgent.last_instance is not None
|
|
||||||
assert _CompressingAgent.last_instance._compress_context_calls == 1
|
|
||||||
runner.session_store.rewrite_transcript.assert_called_once_with(
|
|
||||||
"sess-compressed",
|
|
||||||
[
|
|
||||||
{"role": "user", "content": "summary"},
|
|
||||||
{"role": "assistant", "content": "latest reply"},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
runner.session_store.update_session.assert_called_once_with(
|
|
||||||
session_entry.session_key,
|
|
||||||
last_prompt_tokens=0,
|
|
||||||
)
|
|
||||||
runner.session_store._save.assert_called_once()
|
|
||||||
assert session_entry.session_id == "sess-compressed"
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue