mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: /compress <focus> — guided compression with focus topic (#8017)
Adds an optional focus topic to /compress: `/compress database schema` guides the summariser to preserve information related to the focus topic (60-70% of summary budget) while compressing everything else more aggressively. Inspired by Claude Code's /compact <focus>. Changes: - context_compressor.py: focus_topic parameter on _generate_summary() and compress(); appends FOCUS TOPIC guidance block to the LLM prompt - run_agent.py: focus_topic parameter on _compress_context(), passed through to the compressor - cli.py: _manual_compress() extracts focus topic from command string, preserves existing manual_compression_feedback integration (no regression) - gateway/run.py: _handle_compress_command() extracts focus from event args and passes through — full gateway parity - commands.py: args_hint="[focus topic]" on /compress CommandDef Salvaged from PR #7459 (CLI /compress focus only — /context command deferred). 15 new tests across CLI, compressor, and gateway.
This commit is contained in:
parent
cfbfc4c3f1
commit
a0a02c1bc0
8 changed files with 445 additions and 14 deletions
|
|
@ -306,13 +306,19 @@ class ContextCompressor(ContextEngine):
|
|||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
|
||||
def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None) -> Optional[str]:
|
||||
"""Generate a structured summary of conversation turns.
|
||||
|
||||
Uses a structured template (Goal, Progress, Decisions, Files, Next Steps)
|
||||
inspired by Pi-mono and OpenCode. When a previous summary exists,
|
||||
generates an iterative update instead of summarizing from scratch.
|
||||
|
||||
Args:
|
||||
focus_topic: Optional focus string for guided compression. When
|
||||
provided, the summariser prioritises preserving information
|
||||
related to this topic and is more aggressive about compressing
|
||||
everything else. Inspired by Claude Code's ``/compact``.
|
||||
|
||||
Returns None if all attempts fail — the caller should drop
|
||||
the middle turns without a summary rather than inject a useless
|
||||
placeholder.
|
||||
|
|
@ -414,6 +420,14 @@ Target ~{summary_budget} tokens. Be specific — include file paths, command out
|
|||
|
||||
Write only the summary body. Do not include any preamble or prefix."""
|
||||
|
||||
# Inject focus topic guidance when the user provides one via /compress <focus>.
|
||||
# This goes at the end of the prompt so it takes precedence.
|
||||
if focus_topic:
|
||||
prompt += f"""
|
||||
|
||||
FOCUS TOPIC: "{focus_topic}"
|
||||
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
|
||||
|
||||
try:
|
||||
call_kwargs = {
|
||||
"task": "compression",
|
||||
|
|
@ -631,7 +645,7 @@ Write only the summary body. Do not include any preamble or prefix."""
|
|||
# Main compression entry point
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
|
||||
def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, focus_topic: str = None) -> List[Dict[str, Any]]:
|
||||
"""Compress conversation messages by summarizing middle turns.
|
||||
|
||||
Algorithm:
|
||||
|
|
@ -643,6 +657,12 @@ Write only the summary body. Do not include any preamble or prefix."""
|
|||
|
||||
After compression, orphaned tool_call / tool_result pairs are cleaned
|
||||
up so the API never receives mismatched IDs.
|
||||
|
||||
Args:
|
||||
focus_topic: Optional focus string for guided compression. When
|
||||
provided, the summariser will prioritise preserving information
|
||||
related to this topic and be more aggressive about compressing
|
||||
everything else. Inspired by Claude Code's ``/compact``.
|
||||
"""
|
||||
n_messages = len(messages)
|
||||
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
|
||||
|
|
@ -700,7 +720,7 @@ Write only the summary body. Do not include any preamble or prefix."""
|
|||
)
|
||||
|
||||
# Phase 3: Generate structured summary
|
||||
summary = self._generate_summary(turns_to_summarize)
|
||||
summary = self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
|
||||
|
||||
# Phase 4: Assemble compressed message list
|
||||
compressed = []
|
||||
|
|
|
|||
26
cli.py
26
cli.py
|
|
@ -5344,7 +5344,7 @@ class HermesCLI:
|
|||
elif canonical == "fast":
|
||||
self._handle_fast_command(cmd_original)
|
||||
elif canonical == "compress":
|
||||
self._manual_compress()
|
||||
self._manual_compress(cmd_original)
|
||||
elif canonical == "usage":
|
||||
self._show_usage()
|
||||
elif canonical == "insights":
|
||||
|
|
@ -6201,8 +6201,14 @@ class HermesCLI:
|
|||
self._reasoning_preview_buf = getattr(self, "_reasoning_preview_buf", "") + reasoning_text
|
||||
self._flush_reasoning_preview(force=False)
|
||||
|
||||
def _manual_compress(self):
|
||||
"""Manually trigger context compression on the current conversation."""
|
||||
def _manual_compress(self, cmd_original: str = ""):
|
||||
"""Manually trigger context compression on the current conversation.
|
||||
|
||||
Accepts an optional focus topic: ``/compress <focus>`` guides the
|
||||
summariser to preserve information related to *focus* while being
|
||||
more aggressive about discarding everything else. Inspired by
|
||||
Claude Code's ``/compact <focus>`` feature.
|
||||
"""
|
||||
if not self.conversation_history or len(self.conversation_history) < 4:
|
||||
print("(._.) Not enough conversation to compress (need at least 4 messages).")
|
||||
return
|
||||
|
|
@ -6215,18 +6221,30 @@ class HermesCLI:
|
|||
print("(._.) Compression is disabled in config.")
|
||||
return
|
||||
|
||||
# Extract optional focus topic from the command (e.g. "/compress database schema")
|
||||
focus_topic = ""
|
||||
if cmd_original:
|
||||
parts = cmd_original.strip().split(None, 1)
|
||||
if len(parts) > 1:
|
||||
focus_topic = parts[1].strip()
|
||||
|
||||
original_count = len(self.conversation_history)
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
original_history = list(self.conversation_history)
|
||||
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
if focus_topic:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
|
||||
f"focus: \"{focus_topic}\"...")
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
|
|
|
|||
|
|
@ -5716,7 +5716,12 @@ class GatewayRunner:
|
|||
return f"{descriptions[new_mode]}\n_(could not save to config: {e})_"
|
||||
|
||||
async def _handle_compress_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /compress command -- manually compress conversation context."""
|
||||
"""Handle /compress command -- manually compress conversation context.
|
||||
|
||||
Accepts an optional focus topic: ``/compress <focus>`` guides the
|
||||
summariser to preserve information related to *focus* while being
|
||||
more aggressive about discarding everything else.
|
||||
"""
|
||||
source = event.source
|
||||
session_entry = self.session_store.get_or_create_session(source)
|
||||
history = self.session_store.load_transcript(session_entry.session_id)
|
||||
|
|
@ -5724,6 +5729,9 @@ class GatewayRunner:
|
|||
if not history or len(history) < 4:
|
||||
return "Not enough conversation to compress (need at least 4 messages)."
|
||||
|
||||
# Extract optional focus topic from command args
|
||||
focus_topic = (event.get_command_args() or "").strip() or None
|
||||
|
||||
try:
|
||||
from run_agent import AIAgent
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
|
|
@ -5765,7 +5773,7 @@ class GatewayRunner:
|
|||
loop = asyncio.get_event_loop()
|
||||
compressed, _ = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens)
|
||||
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
|
||||
)
|
||||
|
||||
# _compress_context already calls end_session() on the old session
|
||||
|
|
@ -5789,7 +5797,10 @@ class GatewayRunner:
|
|||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
lines = [f"🗜️ {summary['headline']}", summary["token_line"]]
|
||||
lines = [f"🗜️ {summary['headline']}"]
|
||||
if focus_topic:
|
||||
lines.append(f"Focus: \"{focus_topic}\"")
|
||||
lines.append(summary["token_line"])
|
||||
if summary["note"]:
|
||||
lines.append(summary["note"])
|
||||
return "\n".join(lines)
|
||||
|
|
|
|||
|
|
@ -69,7 +69,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
args_hint="[name]"),
|
||||
CommandDef("branch", "Branch the current session (explore a different path)", "Session",
|
||||
aliases=("fork",), args_hint="[name]"),
|
||||
CommandDef("compress", "Manually compress conversation context", "Session"),
|
||||
CommandDef("compress", "Manually compress conversation context", "Session",
|
||||
args_hint="[focus topic]"),
|
||||
CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
|
||||
args_hint="[number]"),
|
||||
CommandDef("stop", "Kill all running background processes", "Session"),
|
||||
|
|
|
|||
12
run_agent.py
12
run_agent.py
|
|
@ -6548,17 +6548,23 @@ class AIAgent:
|
|||
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
|
||||
messages.pop()
|
||||
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple:
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
Args:
|
||||
focus_topic: Optional focus string for guided compression — the
|
||||
summariser will prioritise preserving information related to
|
||||
this topic. Inspired by Claude Code's ``/compact <focus>``.
|
||||
|
||||
Returns:
|
||||
(compressed_messages, new_system_prompt) tuple
|
||||
"""
|
||||
_pre_msg_count = len(messages)
|
||||
logger.info(
|
||||
"context compression started: session=%s messages=%d tokens=~%s model=%s",
|
||||
"context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
|
||||
self.session_id or "none", _pre_msg_count,
|
||||
f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
|
||||
focus_topic,
|
||||
)
|
||||
# Pre-compression memory flush: let the model save memories before they're lost
|
||||
self.flush_memories(messages, min_turns=0)
|
||||
|
|
@ -6570,7 +6576,7 @@ class AIAgent:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
|
||||
|
||||
todo_snapshot = self._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
|
|
|
|||
139
tests/agent/test_compress_focus.py
Normal file
139
tests/agent/test_compress_focus.py
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
"""Tests for focus_topic flowing through the compressor.
|
||||
|
||||
Verifies that _generate_summary and compress accept and use the focus_topic
|
||||
parameter correctly. Inspired by Claude Code's /compact <focus>.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from agent.context_compressor import ContextCompressor
|
||||
|
||||
|
||||
def _make_compressor():
|
||||
"""Create a ContextCompressor with minimal state for testing."""
|
||||
compressor = ContextCompressor.__new__(ContextCompressor)
|
||||
compressor.protect_first_n = 2
|
||||
compressor.protect_last_n = 5
|
||||
compressor.tail_token_budget = 20000
|
||||
compressor.context_length = 200000
|
||||
compressor.threshold_percent = 0.80
|
||||
compressor.threshold_tokens = 160000
|
||||
compressor.max_summary_tokens = 10000
|
||||
compressor.quiet_mode = True
|
||||
compressor.compression_count = 0
|
||||
compressor.last_prompt_tokens = 0
|
||||
compressor._previous_summary = None
|
||||
compressor._summary_failure_cooldown_until = 0.0
|
||||
compressor.summary_model = None
|
||||
return compressor
|
||||
|
||||
|
||||
def test_focus_topic_injected_into_summary_prompt():
|
||||
"""When focus_topic is provided, the LLM prompt includes focus guidance."""
|
||||
compressor = _make_compressor()
|
||||
turns = [
|
||||
{"role": "user", "content": "Tell me about the database schema"},
|
||||
{"role": "assistant", "content": "The schema has tables: users, orders, products."},
|
||||
]
|
||||
|
||||
captured_prompt = {}
|
||||
|
||||
def mock_call_llm(**kwargs):
|
||||
captured_prompt["messages"] = kwargs["messages"]
|
||||
resp = MagicMock()
|
||||
resp.choices = [MagicMock()]
|
||||
resp.choices[0].message.content = "## Goal\nUnderstand DB schema."
|
||||
return resp
|
||||
|
||||
with patch("agent.context_compressor.call_llm", mock_call_llm):
|
||||
result = compressor._generate_summary(turns, focus_topic="database schema")
|
||||
|
||||
assert result is not None
|
||||
prompt_text = captured_prompt["messages"][0]["content"]
|
||||
assert 'FOCUS TOPIC: "database schema"' in prompt_text
|
||||
assert "PRIORITISE" in prompt_text
|
||||
assert "60-70%" in prompt_text
|
||||
|
||||
|
||||
def test_no_focus_topic_no_injection():
|
||||
"""Without focus_topic, the prompt doesn't contain focus guidance."""
|
||||
compressor = _make_compressor()
|
||||
turns = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi"},
|
||||
]
|
||||
|
||||
captured_prompt = {}
|
||||
|
||||
def mock_call_llm(**kwargs):
|
||||
captured_prompt["messages"] = kwargs["messages"]
|
||||
resp = MagicMock()
|
||||
resp.choices = [MagicMock()]
|
||||
resp.choices[0].message.content = "## Goal\nGreeting."
|
||||
return resp
|
||||
|
||||
with patch("agent.context_compressor.call_llm", mock_call_llm):
|
||||
result = compressor._generate_summary(turns)
|
||||
|
||||
prompt_text = captured_prompt["messages"][0]["content"]
|
||||
assert "FOCUS TOPIC" not in prompt_text
|
||||
|
||||
|
||||
def test_compress_passes_focus_to_generate_summary():
|
||||
"""compress() passes focus_topic through to _generate_summary."""
|
||||
compressor = _make_compressor()
|
||||
|
||||
# Track what _generate_summary receives
|
||||
received_kwargs = {}
|
||||
original_generate = compressor._generate_summary
|
||||
|
||||
def tracking_generate(turns, **kwargs):
|
||||
received_kwargs.update(kwargs)
|
||||
return "## Goal\nTest."
|
||||
|
||||
compressor._generate_summary = tracking_generate
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "System prompt"},
|
||||
{"role": "user", "content": "first"},
|
||||
{"role": "assistant", "content": "reply1"},
|
||||
{"role": "user", "content": "second"},
|
||||
{"role": "assistant", "content": "reply2"},
|
||||
{"role": "user", "content": "third"},
|
||||
{"role": "assistant", "content": "reply3"},
|
||||
{"role": "user", "content": "fourth"},
|
||||
{"role": "assistant", "content": "reply4"},
|
||||
]
|
||||
|
||||
compressor.compress(messages, current_tokens=100000, focus_topic="authentication flow")
|
||||
|
||||
assert received_kwargs.get("focus_topic") == "authentication flow"
|
||||
|
||||
|
||||
def test_compress_none_focus_by_default():
|
||||
"""compress() passes None focus_topic by default."""
|
||||
compressor = _make_compressor()
|
||||
|
||||
received_kwargs = {}
|
||||
|
||||
def tracking_generate(turns, **kwargs):
|
||||
received_kwargs.update(kwargs)
|
||||
return "## Goal\nTest."
|
||||
|
||||
compressor._generate_summary = tracking_generate
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "System prompt"},
|
||||
{"role": "user", "content": "first"},
|
||||
{"role": "assistant", "content": "reply1"},
|
||||
{"role": "user", "content": "second"},
|
||||
{"role": "assistant", "content": "reply2"},
|
||||
{"role": "user", "content": "third"},
|
||||
{"role": "assistant", "content": "reply3"},
|
||||
{"role": "user", "content": "fourth"},
|
||||
{"role": "assistant", "content": "reply4"},
|
||||
]
|
||||
|
||||
compressor.compress(messages, current_tokens=100000)
|
||||
|
||||
assert received_kwargs.get("focus_topic") is None
|
||||
118
tests/cli/test_compress_focus.py
Normal file
118
tests/cli/test_compress_focus.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
"""Tests for /compress <focus> — guided compression with focus topic.
|
||||
|
||||
Inspired by Claude Code's /compact <focus> feature.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from tests.cli.test_cli_init import _make_cli
|
||||
|
||||
|
||||
def _make_history() -> list[dict[str, str]]:
|
||||
return [
|
||||
{"role": "user", "content": "one"},
|
||||
{"role": "assistant", "content": "two"},
|
||||
{"role": "user", "content": "three"},
|
||||
{"role": "assistant", "content": "four"},
|
||||
]
|
||||
|
||||
|
||||
def test_focus_topic_extracted_and_passed(capsys):
|
||||
"""Focus topic is extracted from the command and passed to _compress_context."""
|
||||
shell = _make_cli()
|
||||
history = _make_history()
|
||||
compressed = [history[0], history[-1]]
|
||||
shell.conversation_history = history
|
||||
shell.agent = MagicMock()
|
||||
shell.agent.compression_enabled = True
|
||||
shell.agent._cached_system_prompt = ""
|
||||
shell.agent._compress_context.return_value = (compressed, "")
|
||||
|
||||
def _estimate(messages):
|
||||
if messages is history:
|
||||
return 100
|
||||
return 50
|
||||
|
||||
with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
|
||||
shell._manual_compress("/compress database schema")
|
||||
|
||||
output = capsys.readouterr().out
|
||||
assert 'focus: "database schema"' in output
|
||||
|
||||
# Verify focus_topic was passed through
|
||||
shell.agent._compress_context.assert_called_once()
|
||||
call_kwargs = shell.agent._compress_context.call_args
|
||||
assert call_kwargs.kwargs.get("focus_topic") == "database schema"
|
||||
|
||||
|
||||
def test_no_focus_topic_when_bare_command(capsys):
|
||||
"""When no focus topic is provided, None is passed."""
|
||||
shell = _make_cli()
|
||||
history = _make_history()
|
||||
shell.conversation_history = history
|
||||
shell.agent = MagicMock()
|
||||
shell.agent.compression_enabled = True
|
||||
shell.agent._cached_system_prompt = ""
|
||||
shell.agent._compress_context.return_value = (list(history), "")
|
||||
|
||||
with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
|
||||
shell._manual_compress("/compress")
|
||||
|
||||
shell.agent._compress_context.assert_called_once()
|
||||
call_kwargs = shell.agent._compress_context.call_args
|
||||
assert call_kwargs.kwargs.get("focus_topic") is None
|
||||
|
||||
|
||||
def test_empty_focus_after_command_treated_as_none(capsys):
|
||||
"""Trailing whitespace after /compress does not produce a focus topic."""
|
||||
shell = _make_cli()
|
||||
history = _make_history()
|
||||
shell.conversation_history = history
|
||||
shell.agent = MagicMock()
|
||||
shell.agent.compression_enabled = True
|
||||
shell.agent._cached_system_prompt = ""
|
||||
shell.agent._compress_context.return_value = (list(history), "")
|
||||
|
||||
with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
|
||||
shell._manual_compress("/compress ")
|
||||
|
||||
shell.agent._compress_context.assert_called_once()
|
||||
call_kwargs = shell.agent._compress_context.call_args
|
||||
assert call_kwargs.kwargs.get("focus_topic") is None
|
||||
|
||||
|
||||
def test_focus_topic_printed_in_compression_banner(capsys):
|
||||
"""The focus topic shows in the compression progress banner."""
|
||||
shell = _make_cli()
|
||||
history = _make_history()
|
||||
compressed = [history[0], history[-1]]
|
||||
shell.conversation_history = history
|
||||
shell.agent = MagicMock()
|
||||
shell.agent.compression_enabled = True
|
||||
shell.agent._cached_system_prompt = ""
|
||||
shell.agent._compress_context.return_value = (compressed, "")
|
||||
|
||||
with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
|
||||
shell._manual_compress("/compress API endpoints")
|
||||
|
||||
output = capsys.readouterr().out
|
||||
assert 'focus: "API endpoints"' in output
|
||||
|
||||
|
||||
def test_no_focus_prints_standard_banner(capsys):
|
||||
"""Without focus, the standard banner (no focus: line) is printed."""
|
||||
shell = _make_cli()
|
||||
history = _make_history()
|
||||
compressed = [history[0], history[-1]]
|
||||
shell.conversation_history = history
|
||||
shell.agent = MagicMock()
|
||||
shell.agent.compression_enabled = True
|
||||
shell.agent._cached_system_prompt = ""
|
||||
shell.agent._compress_context.return_value = (compressed, "")
|
||||
|
||||
with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
|
||||
shell._manual_compress("/compress")
|
||||
|
||||
output = capsys.readouterr().out
|
||||
assert "focus:" not in output
|
||||
assert "Compressing" in output
|
||||
118
tests/gateway/test_compress_focus.py
Normal file
118
tests/gateway/test_compress_focus.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
"""Tests for gateway /compress <focus> — focus topic on the gateway side."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.session import SessionEntry, SessionSource, build_session_key
|
||||
|
||||
|
||||
def _make_source() -> SessionSource:
|
||||
return SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="u1",
|
||||
chat_id="c1",
|
||||
user_name="tester",
|
||||
chat_type="dm",
|
||||
)
|
||||
|
||||
|
||||
def _make_event(text: str = "/compress") -> MessageEvent:
|
||||
return MessageEvent(text=text, source=_make_source(), message_id="m1")
|
||||
|
||||
|
||||
def _make_history() -> list[dict[str, str]]:
|
||||
return [
|
||||
{"role": "user", "content": "one"},
|
||||
{"role": "assistant", "content": "two"},
|
||||
{"role": "user", "content": "three"},
|
||||
{"role": "assistant", "content": "four"},
|
||||
]
|
||||
|
||||
|
||||
def _make_runner(history: list[dict[str, str]]):
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||
)
|
||||
session_entry = SessionEntry(
|
||||
session_key=build_session_key(_make_source()),
|
||||
session_id="sess-1",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store.get_or_create_session.return_value = session_entry
|
||||
runner.session_store.load_transcript.return_value = history
|
||||
runner.session_store.rewrite_transcript = MagicMock()
|
||||
runner.session_store.update_session = MagicMock()
|
||||
runner.session_store._save = MagicMock()
|
||||
return runner
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compress_focus_topic_passed_to_agent():
|
||||
"""Focus topic from /compress <focus> is passed through to _compress_context."""
|
||||
history = _make_history()
|
||||
compressed = [history[0], history[-1]]
|
||||
runner = _make_runner(history)
|
||||
agent_instance = MagicMock()
|
||||
agent_instance.context_compressor.protect_first_n = 0
|
||||
agent_instance.context_compressor._align_boundary_forward.return_value = 0
|
||||
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
|
||||
agent_instance.session_id = "sess-1"
|
||||
agent_instance._compress_context.return_value = (compressed, "")
|
||||
|
||||
def _estimate(messages):
|
||||
return 100
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch("run_agent.AIAgent", return_value=agent_instance),
|
||||
patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
|
||||
):
|
||||
result = await runner._handle_compress_command(_make_event("/compress database schema"))
|
||||
|
||||
# Verify focus_topic was passed
|
||||
agent_instance._compress_context.assert_called_once()
|
||||
call_kwargs = agent_instance._compress_context.call_args
|
||||
assert call_kwargs.kwargs.get("focus_topic") == "database schema"
|
||||
|
||||
# Verify focus is mentioned in response
|
||||
assert 'Focus: "database schema"' in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compress_no_focus_passes_none():
|
||||
"""Bare /compress passes focus_topic=None."""
|
||||
history = _make_history()
|
||||
runner = _make_runner(history)
|
||||
agent_instance = MagicMock()
|
||||
agent_instance.context_compressor.protect_first_n = 0
|
||||
agent_instance.context_compressor._align_boundary_forward.return_value = 0
|
||||
agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
|
||||
agent_instance.session_id = "sess-1"
|
||||
agent_instance._compress_context.return_value = (list(history), "")
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch("run_agent.AIAgent", return_value=agent_instance),
|
||||
patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100),
|
||||
):
|
||||
result = await runner._handle_compress_command(_make_event("/compress"))
|
||||
|
||||
agent_instance._compress_context.assert_called_once()
|
||||
call_kwargs = agent_instance._compress_context.call_args
|
||||
assert call_kwargs.kwargs.get("focus_topic") is None
|
||||
|
||||
# No focus line in response
|
||||
assert "Focus:" not in result
|
||||
Loading…
Add table
Add a link
Reference in a new issue