diff --git a/cli-config.yaml.example b/cli-config.yaml.example index e8e3d30af..836b6da22 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -323,6 +323,16 @@ compression: # compression of older turns. protect_last_n: 20 + # Number of earliest messages to always preserve verbatim as head (default: 3 = + # system prompt + first user/assistant exchange). Head messages are NEVER + # summarized — they survive every compression indefinitely. This gives stable + # early context for short/medium sessions, but in long-running sessions that + # rely on rolling compaction the opening user turn gets pinned forever, which + # may not match how you want the session framed. Lower to 1 to preserve only + # the system prompt and let the first exchange age out naturally through the + # summary. Minimum: 1 (system prompt must survive). + protect_first_n: 3 + # To pin a specific model/provider for compression summaries, use the # auxiliary section below (auxiliary.compression.provider / model). diff --git a/hermes_cli/config.py b/hermes_cli/config.py index c87b9f5a9..bdbb73e65 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -473,6 +473,11 @@ DEFAULT_CONFIG = { "threshold": 0.50, # compress when context usage exceeds this ratio "target_ratio": 0.20, # fraction of threshold to preserve as recent tail "protect_last_n": 20, # minimum recent messages to keep uncompressed + "protect_first_n": 3, # head messages always preserved verbatim + # (system prompt + first exchange). Lower this + # to 1 if you rely on rolling compaction for + # long-running sessions and don't want the + # opening user/assistant turn pinned forever. }, @@ -3599,6 +3604,7 @@ def show_config(): print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%") print(f" Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved") print(f" Protect last: {compression.get('protect_last_n', 20)} messages") + print(f" Protect first: {compression.get('protect_first_n', 3)} messages") _aux_comp = config.get('auxiliary', {}).get('compression', {}) _sm = _aux_comp.get('model', '') or '(auto)' print(f" Model: {_sm}") diff --git a/run_agent.py b/run_agent.py index c5966a173..f293a0b3b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1538,6 +1538,12 @@ class AIAgent: compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes") compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20)) compression_protect_last = int(_compression_cfg.get("protect_last_n", 20)) + # Floor protect_first_n at 1 — the system prompt must always be + # preserved as head, otherwise compression would strip it on the + # first compaction pass and the model would lose its identity. + compression_protect_first = max( + 1, int(_compression_cfg.get("protect_first_n", 3)) + ) # Read optional explicit context_length override for the auxiliary # compression model. Custom endpoints often cannot report this via @@ -1684,7 +1690,7 @@ class AIAgent: self.context_compressor = ContextCompressor( model=self.model, threshold_percent=compression_threshold, - protect_first_n=3, + protect_first_n=compression_protect_first, protect_last_n=compression_protect_last, summary_target_ratio=compression_target_ratio, summary_model_override=None, diff --git a/scripts/release.py b/scripts/release.py index f2a72ea00..a6c11f810 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -183,6 +183,7 @@ AUTHOR_MAP = { "m@statecraft.systems": "mbierling", "balyan.sid@gmail.com": "alt-glitch", "oluwadareab12@gmail.com": "bennytimz", + "simpolism@gmail.com": "simpolism", "simon@simonmarcus.org": "simon-marcus", "xowiekk@gmail.com": "Xowiek", "1243352777@qq.com": "zons-zhaozhy", diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 0c20dddcd..af72b79f6 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -627,6 +627,49 @@ class TestSummaryTargetRatio: c = ContextCompressor(model="test", quiet_mode=True) assert c.protect_last_n == 20 + def test_default_protect_first_n_is_3(self): + """Default protect_first_n should be 3 (system prompt + first exchange).""" + with patch("agent.context_compressor.get_model_context_length", return_value=100_000): + c = ContextCompressor(model="test", quiet_mode=True) + assert c.protect_first_n == 3 + + def test_protect_first_n_override(self): + """protect_first_n=1 should be honoured — for users who rely on rolling + compaction and don't want the opening user/assistant turn pinned as head + indefinitely. Only the system prompt survives head-protection.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100_000): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=1) + assert c.protect_first_n == 1 + + def test_protect_first_n_1_preserves_only_system_prompt(self): + """End-to-end: when protect_first_n=1, compression should treat only + the first message (system prompt) as head. Messages 1..n-protect_last-1 + become summarization candidates, unlike the default where messages 0-2 + would all be pinned as head.""" + with patch("agent.context_compressor.get_model_context_length", return_value=100_000): + c = ContextCompressor( + model="test", + quiet_mode=True, + protect_first_n=1, + protect_last_n=2, + ) + msgs = ( + [{"role": "system", "content": "System prompt"}] + + [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} + for i in range(8)] + ) + result = c.compress(msgs) + # System prompt (msg[0]) survives as head + assert result[0]["role"] == "system" + assert result[0]["content"].startswith("System prompt") + # The first user/assistant exchange (msg 0, msg 1) should NOT be pinned + # as head verbatim — those would have been summarized or absorbed. + # Under default protect_first_n=3, result[1] and result[2] would be + # the literal "msg 0" / "msg 1"; with protect_first_n=1 they aren't. + assert result[1].get("content") != "msg 0" + # Last 2 messages are tail-protected under protect_last_n=2 + assert result[-1]["content"] == msgs[-1]["content"] + class TestTokenBudgetTailProtection: """Tests for token-budget-based tail protection (PR #6240).