diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 5aa95dc01b..0fa056b61c 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -306,13 +306,19 @@ class ContextCompressor(ContextEngine): return "\n\n".join(parts) - def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]: + def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None) -> Optional[str]: """Generate a structured summary of conversation turns. Uses a structured template (Goal, Progress, Decisions, Files, Next Steps) inspired by Pi-mono and OpenCode. When a previous summary exists, generates an iterative update instead of summarizing from scratch. + Args: + focus_topic: Optional focus string for guided compression. When + provided, the summariser prioritises preserving information + related to this topic and is more aggressive about compressing + everything else. Inspired by Claude Code's ``/compact``. + Returns None if all attempts fail — the caller should drop the middle turns without a summary rather than inject a useless placeholder. @@ -414,6 +420,14 @@ Target ~{summary_budget} tokens. Be specific — include file paths, command out Write only the summary body. Do not include any preamble or prefix.""" + # Inject focus topic guidance when the user provides one via /compress . + # This goes at the end of the prompt so it takes precedence. + if focus_topic: + prompt += f""" + +FOCUS TOPIC: "{focus_topic}" +The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget.""" + try: call_kwargs = { "task": "compression", @@ -631,7 +645,7 @@ Write only the summary body. Do not include any preamble or prefix.""" # Main compression entry point # ------------------------------------------------------------------ - def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]: + def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, focus_topic: str = None) -> List[Dict[str, Any]]: """Compress conversation messages by summarizing middle turns. Algorithm: @@ -643,6 +657,12 @@ Write only the summary body. Do not include any preamble or prefix.""" After compression, orphaned tool_call / tool_result pairs are cleaned up so the API never receives mismatched IDs. + + Args: + focus_topic: Optional focus string for guided compression. When + provided, the summariser will prioritise preserving information + related to this topic and be more aggressive about compressing + everything else. Inspired by Claude Code's ``/compact``. """ n_messages = len(messages) # Only need head + 3 tail messages minimum (token budget decides the real tail size) @@ -700,7 +720,7 @@ Write only the summary body. Do not include any preamble or prefix.""" ) # Phase 3: Generate structured summary - summary = self._generate_summary(turns_to_summarize) + summary = self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # Phase 4: Assemble compressed message list compressed = [] diff --git a/cli.py b/cli.py index 26a2233880..b3d51b1271 100644 --- a/cli.py +++ b/cli.py @@ -5344,7 +5344,7 @@ class HermesCLI: elif canonical == "fast": self._handle_fast_command(cmd_original) elif canonical == "compress": - self._manual_compress() + self._manual_compress(cmd_original) elif canonical == "usage": self._show_usage() elif canonical == "insights": @@ -6201,8 +6201,14 @@ class HermesCLI: self._reasoning_preview_buf = getattr(self, "_reasoning_preview_buf", "") + reasoning_text self._flush_reasoning_preview(force=False) - def _manual_compress(self): - """Manually trigger context compression on the current conversation.""" + def _manual_compress(self, cmd_original: str = ""): + """Manually trigger context compression on the current conversation. + + Accepts an optional focus topic: ``/compress `` guides the + summariser to preserve information related to *focus* while being + more aggressive about discarding everything else. Inspired by + Claude Code's ``/compact `` feature. + """ if not self.conversation_history or len(self.conversation_history) < 4: print("(._.) Not enough conversation to compress (need at least 4 messages).") return @@ -6215,18 +6221,30 @@ class HermesCLI: print("(._.) Compression is disabled in config.") return + # Extract optional focus topic from the command (e.g. "/compress database schema") + focus_topic = "" + if cmd_original: + parts = cmd_original.strip().split(None, 1) + if len(parts) > 1: + focus_topic = parts[1].strip() + original_count = len(self.conversation_history) try: from agent.model_metadata import estimate_messages_tokens_rough from agent.manual_compression_feedback import summarize_manual_compression original_history = list(self.conversation_history) approx_tokens = estimate_messages_tokens_rough(original_history) - print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...") + if focus_topic: + print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), " + f"focus: \"{focus_topic}\"...") + else: + print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...") compressed, _ = self.agent._compress_context( original_history, self.agent._cached_system_prompt or "", approx_tokens=approx_tokens, + focus_topic=focus_topic or None, ) self.conversation_history = compressed new_tokens = estimate_messages_tokens_rough(self.conversation_history) diff --git a/gateway/run.py b/gateway/run.py index 560ccee4a0..eacd0cea11 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -5716,7 +5716,12 @@ class GatewayRunner: return f"{descriptions[new_mode]}\n_(could not save to config: {e})_" async def _handle_compress_command(self, event: MessageEvent) -> str: - """Handle /compress command -- manually compress conversation context.""" + """Handle /compress command -- manually compress conversation context. + + Accepts an optional focus topic: ``/compress `` guides the + summariser to preserve information related to *focus* while being + more aggressive about discarding everything else. + """ source = event.source session_entry = self.session_store.get_or_create_session(source) history = self.session_store.load_transcript(session_entry.session_id) @@ -5724,6 +5729,9 @@ class GatewayRunner: if not history or len(history) < 4: return "Not enough conversation to compress (need at least 4 messages)." + # Extract optional focus topic from command args + focus_topic = (event.get_command_args() or "").strip() or None + try: from run_agent import AIAgent from agent.manual_compression_feedback import summarize_manual_compression @@ -5765,7 +5773,7 @@ class GatewayRunner: loop = asyncio.get_event_loop() compressed, _ = await loop.run_in_executor( None, - lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens) + lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic) ) # _compress_context already calls end_session() on the old session @@ -5789,7 +5797,10 @@ class GatewayRunner: approx_tokens, new_tokens, ) - lines = [f"🗜️ {summary['headline']}", summary["token_line"]] + lines = [f"🗜️ {summary['headline']}"] + if focus_topic: + lines.append(f"Focus: \"{focus_topic}\"") + lines.append(summary["token_line"]) if summary["note"]: lines.append(summary["note"]) return "\n".join(lines) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 7abadca614..1c5a298d1e 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -69,7 +69,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ args_hint="[name]"), CommandDef("branch", "Branch the current session (explore a different path)", "Session", aliases=("fork",), args_hint="[name]"), - CommandDef("compress", "Manually compress conversation context", "Session"), + CommandDef("compress", "Manually compress conversation context", "Session", + args_hint="[focus topic]"), CommandDef("rollback", "List or restore filesystem checkpoints", "Session", args_hint="[number]"), CommandDef("stop", "Kill all running background processes", "Session"), diff --git a/run_agent.py b/run_agent.py index 3956f89048..b230354542 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6548,17 +6548,23 @@ class AIAgent: if messages and messages[-1].get("_flush_sentinel") == _sentinel: messages.pop() - def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple: + def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple: """Compress conversation context and split the session in SQLite. + Args: + focus_topic: Optional focus string for guided compression — the + summariser will prioritise preserving information related to + this topic. Inspired by Claude Code's ``/compact ``. + Returns: (compressed_messages, new_system_prompt) tuple """ _pre_msg_count = len(messages) logger.info( - "context compression started: session=%s messages=%d tokens=~%s model=%s", + "context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r", self.session_id or "none", _pre_msg_count, f"{approx_tokens:,}" if approx_tokens else "unknown", self.model, + focus_topic, ) # Pre-compression memory flush: let the model save memories before they're lost self.flush_memories(messages, min_turns=0) @@ -6570,7 +6576,7 @@ class AIAgent: except Exception: pass - compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens) + compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic) todo_snapshot = self._todo_store.format_for_injection() if todo_snapshot: diff --git a/tests/agent/test_compress_focus.py b/tests/agent/test_compress_focus.py new file mode 100644 index 0000000000..a569eb9e3d --- /dev/null +++ b/tests/agent/test_compress_focus.py @@ -0,0 +1,139 @@ +"""Tests for focus_topic flowing through the compressor. + +Verifies that _generate_summary and compress accept and use the focus_topic +parameter correctly. Inspired by Claude Code's /compact . +""" + +from unittest.mock import MagicMock, patch + +from agent.context_compressor import ContextCompressor + + +def _make_compressor(): + """Create a ContextCompressor with minimal state for testing.""" + compressor = ContextCompressor.__new__(ContextCompressor) + compressor.protect_first_n = 2 + compressor.protect_last_n = 5 + compressor.tail_token_budget = 20000 + compressor.context_length = 200000 + compressor.threshold_percent = 0.80 + compressor.threshold_tokens = 160000 + compressor.max_summary_tokens = 10000 + compressor.quiet_mode = True + compressor.compression_count = 0 + compressor.last_prompt_tokens = 0 + compressor._previous_summary = None + compressor._summary_failure_cooldown_until = 0.0 + compressor.summary_model = None + return compressor + + +def test_focus_topic_injected_into_summary_prompt(): + """When focus_topic is provided, the LLM prompt includes focus guidance.""" + compressor = _make_compressor() + turns = [ + {"role": "user", "content": "Tell me about the database schema"}, + {"role": "assistant", "content": "The schema has tables: users, orders, products."}, + ] + + captured_prompt = {} + + def mock_call_llm(**kwargs): + captured_prompt["messages"] = kwargs["messages"] + resp = MagicMock() + resp.choices = [MagicMock()] + resp.choices[0].message.content = "## Goal\nUnderstand DB schema." + return resp + + with patch("agent.context_compressor.call_llm", mock_call_llm): + result = compressor._generate_summary(turns, focus_topic="database schema") + + assert result is not None + prompt_text = captured_prompt["messages"][0]["content"] + assert 'FOCUS TOPIC: "database schema"' in prompt_text + assert "PRIORITISE" in prompt_text + assert "60-70%" in prompt_text + + +def test_no_focus_topic_no_injection(): + """Without focus_topic, the prompt doesn't contain focus guidance.""" + compressor = _make_compressor() + turns = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + ] + + captured_prompt = {} + + def mock_call_llm(**kwargs): + captured_prompt["messages"] = kwargs["messages"] + resp = MagicMock() + resp.choices = [MagicMock()] + resp.choices[0].message.content = "## Goal\nGreeting." + return resp + + with patch("agent.context_compressor.call_llm", mock_call_llm): + result = compressor._generate_summary(turns) + + prompt_text = captured_prompt["messages"][0]["content"] + assert "FOCUS TOPIC" not in prompt_text + + +def test_compress_passes_focus_to_generate_summary(): + """compress() passes focus_topic through to _generate_summary.""" + compressor = _make_compressor() + + # Track what _generate_summary receives + received_kwargs = {} + original_generate = compressor._generate_summary + + def tracking_generate(turns, **kwargs): + received_kwargs.update(kwargs) + return "## Goal\nTest." + + compressor._generate_summary = tracking_generate + + messages = [ + {"role": "system", "content": "System prompt"}, + {"role": "user", "content": "first"}, + {"role": "assistant", "content": "reply1"}, + {"role": "user", "content": "second"}, + {"role": "assistant", "content": "reply2"}, + {"role": "user", "content": "third"}, + {"role": "assistant", "content": "reply3"}, + {"role": "user", "content": "fourth"}, + {"role": "assistant", "content": "reply4"}, + ] + + compressor.compress(messages, current_tokens=100000, focus_topic="authentication flow") + + assert received_kwargs.get("focus_topic") == "authentication flow" + + +def test_compress_none_focus_by_default(): + """compress() passes None focus_topic by default.""" + compressor = _make_compressor() + + received_kwargs = {} + + def tracking_generate(turns, **kwargs): + received_kwargs.update(kwargs) + return "## Goal\nTest." + + compressor._generate_summary = tracking_generate + + messages = [ + {"role": "system", "content": "System prompt"}, + {"role": "user", "content": "first"}, + {"role": "assistant", "content": "reply1"}, + {"role": "user", "content": "second"}, + {"role": "assistant", "content": "reply2"}, + {"role": "user", "content": "third"}, + {"role": "assistant", "content": "reply3"}, + {"role": "user", "content": "fourth"}, + {"role": "assistant", "content": "reply4"}, + ] + + compressor.compress(messages, current_tokens=100000) + + assert received_kwargs.get("focus_topic") is None diff --git a/tests/cli/test_compress_focus.py b/tests/cli/test_compress_focus.py new file mode 100644 index 0000000000..d5f6c15656 --- /dev/null +++ b/tests/cli/test_compress_focus.py @@ -0,0 +1,118 @@ +"""Tests for /compress — guided compression with focus topic. + +Inspired by Claude Code's /compact feature. +""" + +from unittest.mock import MagicMock, patch + +from tests.cli.test_cli_init import _make_cli + + +def _make_history() -> list[dict[str, str]]: + return [ + {"role": "user", "content": "one"}, + {"role": "assistant", "content": "two"}, + {"role": "user", "content": "three"}, + {"role": "assistant", "content": "four"}, + ] + + +def test_focus_topic_extracted_and_passed(capsys): + """Focus topic is extracted from the command and passed to _compress_context.""" + shell = _make_cli() + history = _make_history() + compressed = [history[0], history[-1]] + shell.conversation_history = history + shell.agent = MagicMock() + shell.agent.compression_enabled = True + shell.agent._cached_system_prompt = "" + shell.agent._compress_context.return_value = (compressed, "") + + def _estimate(messages): + if messages is history: + return 100 + return 50 + + with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate): + shell._manual_compress("/compress database schema") + + output = capsys.readouterr().out + assert 'focus: "database schema"' in output + + # Verify focus_topic was passed through + shell.agent._compress_context.assert_called_once() + call_kwargs = shell.agent._compress_context.call_args + assert call_kwargs.kwargs.get("focus_topic") == "database schema" + + +def test_no_focus_topic_when_bare_command(capsys): + """When no focus topic is provided, None is passed.""" + shell = _make_cli() + history = _make_history() + shell.conversation_history = history + shell.agent = MagicMock() + shell.agent.compression_enabled = True + shell.agent._cached_system_prompt = "" + shell.agent._compress_context.return_value = (list(history), "") + + with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100): + shell._manual_compress("/compress") + + shell.agent._compress_context.assert_called_once() + call_kwargs = shell.agent._compress_context.call_args + assert call_kwargs.kwargs.get("focus_topic") is None + + +def test_empty_focus_after_command_treated_as_none(capsys): + """Trailing whitespace after /compress does not produce a focus topic.""" + shell = _make_cli() + history = _make_history() + shell.conversation_history = history + shell.agent = MagicMock() + shell.agent.compression_enabled = True + shell.agent._cached_system_prompt = "" + shell.agent._compress_context.return_value = (list(history), "") + + with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100): + shell._manual_compress("/compress ") + + shell.agent._compress_context.assert_called_once() + call_kwargs = shell.agent._compress_context.call_args + assert call_kwargs.kwargs.get("focus_topic") is None + + +def test_focus_topic_printed_in_compression_banner(capsys): + """The focus topic shows in the compression progress banner.""" + shell = _make_cli() + history = _make_history() + compressed = [history[0], history[-1]] + shell.conversation_history = history + shell.agent = MagicMock() + shell.agent.compression_enabled = True + shell.agent._cached_system_prompt = "" + shell.agent._compress_context.return_value = (compressed, "") + + with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100): + shell._manual_compress("/compress API endpoints") + + output = capsys.readouterr().out + assert 'focus: "API endpoints"' in output + + +def test_no_focus_prints_standard_banner(capsys): + """Without focus, the standard banner (no focus: line) is printed.""" + shell = _make_cli() + history = _make_history() + compressed = [history[0], history[-1]] + shell.conversation_history = history + shell.agent = MagicMock() + shell.agent.compression_enabled = True + shell.agent._cached_system_prompt = "" + shell.agent._compress_context.return_value = (compressed, "") + + with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100): + shell._manual_compress("/compress") + + output = capsys.readouterr().out + assert "focus:" not in output + assert "Compressing" in output diff --git a/tests/gateway/test_compress_focus.py b/tests/gateway/test_compress_focus.py new file mode 100644 index 0000000000..8a1ee060f5 --- /dev/null +++ b/tests/gateway/test_compress_focus.py @@ -0,0 +1,118 @@ +"""Tests for gateway /compress — focus topic on the gateway side.""" + +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest + +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import MessageEvent +from gateway.session import SessionEntry, SessionSource, build_session_key + + +def _make_source() -> SessionSource: + return SessionSource( + platform=Platform.TELEGRAM, + user_id="u1", + chat_id="c1", + user_name="tester", + chat_type="dm", + ) + + +def _make_event(text: str = "/compress") -> MessageEvent: + return MessageEvent(text=text, source=_make_source(), message_id="m1") + + +def _make_history() -> list[dict[str, str]]: + return [ + {"role": "user", "content": "one"}, + {"role": "assistant", "content": "two"}, + {"role": "user", "content": "three"}, + {"role": "assistant", "content": "four"}, + ] + + +def _make_runner(history: list[dict[str, str]]): + from gateway.run import GatewayRunner + + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + session_entry = SessionEntry( + session_key=build_session_key(_make_source()), + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = session_entry + runner.session_store.load_transcript.return_value = history + runner.session_store.rewrite_transcript = MagicMock() + runner.session_store.update_session = MagicMock() + runner.session_store._save = MagicMock() + return runner + + +@pytest.mark.asyncio +async def test_compress_focus_topic_passed_to_agent(): + """Focus topic from /compress is passed through to _compress_context.""" + history = _make_history() + compressed = [history[0], history[-1]] + runner = _make_runner(history) + agent_instance = MagicMock() + agent_instance.context_compressor.protect_first_n = 0 + agent_instance.context_compressor._align_boundary_forward.return_value = 0 + agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.session_id = "sess-1" + agent_instance._compress_context.return_value = (compressed, "") + + def _estimate(messages): + return 100 + + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch("run_agent.AIAgent", return_value=agent_instance), + patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate), + ): + result = await runner._handle_compress_command(_make_event("/compress database schema")) + + # Verify focus_topic was passed + agent_instance._compress_context.assert_called_once() + call_kwargs = agent_instance._compress_context.call_args + assert call_kwargs.kwargs.get("focus_topic") == "database schema" + + # Verify focus is mentioned in response + assert 'Focus: "database schema"' in result + + +@pytest.mark.asyncio +async def test_compress_no_focus_passes_none(): + """Bare /compress passes focus_topic=None.""" + history = _make_history() + runner = _make_runner(history) + agent_instance = MagicMock() + agent_instance.context_compressor.protect_first_n = 0 + agent_instance.context_compressor._align_boundary_forward.return_value = 0 + agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.session_id = "sess-1" + agent_instance._compress_context.return_value = (list(history), "") + + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch("run_agent.AIAgent", return_value=agent_instance), + patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100), + ): + result = await runner._handle_compress_command(_make_event("/compress")) + + agent_instance._compress_context.assert_called_once() + call_kwargs = agent_instance._compress_context.call_args + assert call_kwargs.kwargs.get("focus_topic") is None + + # No focus line in response + assert "Focus:" not in result