From 4ceab16893e3d77b2388bf5d1db8d9cc26a1307e Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Wed, 13 May 2026 22:22:21 -0700 Subject: [PATCH] fix(compression): keep default protect_first_n at 3 + align ABC Follow-up on the salvaged feat commit: - Keep the constructor / config / yaml-example default at 3 so existing gateway and CLI users see no behavioural change. PR #13754 (which this builds on) had lowered the default to 2 to chase pre-feature parity in the system-prompt-present case, at the cost of quietly halving the protected head for the gateway path (which strips the system prompt before calling compress()). With the new "system prompt is implicit" semantics, default 3 gives every caller a stable head shape. - agent/context_engine.py: bring the ABC's protect_first_n docstring in line with the new semantics so plugin context engines interpret the config key the same way the built-in compressor does. - tests: adjust the default-value test (3, not 2) and a stale comment; per-test protect_first_n=2/3/1 values added in PR #13754 stay as-is since those tests fix concrete head shapes. --- agent/context_compressor.py | 2 +- agent/context_engine.py | 5 +++++ cli-config.yaml.example | 6 +++--- hermes_cli/config.py | 5 +++-- run_agent.py | 2 +- tests/agent/test_context_compressor.py | 16 +++++++++------- 6 files changed, 22 insertions(+), 14 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 99012c73c1b..df75b8b88ce 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -405,7 +405,7 @@ class ContextCompressor(ContextEngine): self, model: str, threshold_percent: float = 0.50, - protect_first_n: int = 2, + protect_first_n: int = 3, protect_last_n: int = 20, summary_target_ratio: float = 0.20, quiet_mode: bool = False, diff --git a/agent/context_engine.py b/agent/context_engine.py index bbafcd29c01..2947da54d8c 100644 --- a/agent/context_engine.py +++ b/agent/context_engine.py @@ -55,6 +55,11 @@ class ContextEngine(ABC): # These control the preflight compression check. Subclasses may # override via __init__ or property; defaults are sensible for most # engines. + # + # protect_first_n semantics (since PR #13754): count of non-system head + # messages always preserved verbatim, IN ADDITION to the system prompt + # which is always implicitly protected. Default 3 keeps the + # historical "system + first 3 non-system messages" head shape. threshold_percent: float = 0.75 protect_first_n: int = 3 diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 1bfec39698a..13d9ad9c420 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -372,9 +372,9 @@ compression: # opening turns may not match how you want the session framed over time. # Set to 0 to preserve ONLY the system prompt (plus the rolling summary # and recent tail) — the cleanest configuration for long-running sessions. - # Default 2 preserves the system prompt plus the first user/assistant - # exchange (≈ 3 messages total when a system prompt is present). - protect_first_n: 2 + # Default 3 preserves the system prompt plus the first three non-system + # head messages, matching the pre-feature behaviour. + protect_first_n: 3 # To pin a specific model/provider for compression summaries, use the # auxiliary section below (auxiliary.compression.provider / model). diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 3feb2cbddbb..685de3d7341 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -731,13 +731,14 @@ DEFAULT_CONFIG = { "target_ratio": 0.20, # fraction of threshold to preserve as recent tail "protect_last_n": 20, # minimum recent messages to keep uncompressed "hygiene_hard_message_limit": 400, # gateway session-hygiene force-compress threshold by message count - "protect_first_n": 2, # non-system head messages always preserved beyond the system prompt + "protect_first_n": 3, # non-system head messages always preserved # verbatim, in ADDITION to the system prompt # (which is always implicitly protected). Set to # 0 for long-running rolling-compaction sessions # where you want nothing pinned except the # system prompt + rolling summary + recent tail. }, + # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API). # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored. "prompt_caching": { @@ -4867,7 +4868,7 @@ def show_config(): print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%") print(f" Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved") print(f" Protect last: {compression.get('protect_last_n', 20)} messages") - print(f" Protect first: {compression.get('protect_first_n', 2)} non-system head messages") + print(f" Protect first: {compression.get('protect_first_n', 3)} non-system head messages") _aux_comp = config.get('auxiliary', {}).get('compression', {}) _sm = _aux_comp.get('model', '') or '(auto)' print(f" Model: {_sm}") diff --git a/run_agent.py b/run_agent.py index 8c7dfe2b061..4f50cb06e4d 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2122,7 +2122,7 @@ class AIAgent: # is a legitimate (and common) configuration for long-running # rolling-compaction sessions. compression_protect_first = max( - 0, int(_compression_cfg.get("protect_first_n", 2)) + 0, int(_compression_cfg.get("protect_first_n", 3)) ) # Read optional explicit context_length override for the auxiliary diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 821d3c4c4b7..559cf2237a2 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -1299,14 +1299,16 @@ class TestSummaryTargetRatio: c = ContextCompressor(model="test", quiet_mode=True) assert c.protect_last_n == 20 - def test_default_protect_first_n_is_2(self): - """Default protect_first_n is 2 (system + 2 extra non-system messages = - 3 protected messages total, preserving the pre-feature behaviour where - protect_first_n was hardcoded to protect 3 head messages total). + def test_default_protect_first_n_is_3(self): + """Default protect_first_n is 3 (system + 3 extra non-system messages = + 4 protected messages total when a system prompt is present). With the + new semantics, the constructor default is 3 — the system prompt is + always implicitly protected ON TOP OF protect_first_n non-system + messages. """ with patch("agent.context_compressor.get_model_context_length", return_value=100_000): c = ContextCompressor(model="test", quiet_mode=True) - assert c.protect_first_n == 2 + assert c.protect_first_n == 3 def test_protect_first_n_override(self): """protect_first_n=0 should be honoured — for users who rely on rolling @@ -1342,8 +1344,8 @@ class TestSummaryTargetRatio: assert result[0]["content"].startswith("System prompt") # The first user/assistant exchange (msg 0, msg 1) should NOT be pinned # as head verbatim — those would have been summarized or absorbed. - # Under default protect_first_n=2, result[1] and result[2] would be - # the literal "msg 0" / "msg 1"; with protect_first_n=0 they aren't. + # Under default protect_first_n=3, result[1..3] would be the literal + # "msg 0" / "msg 1" / "msg 2"; with protect_first_n=0 they aren't. assert result[1].get("content") != "msg 0" # Last 2 messages are tail-protected under protect_last_n=2 assert result[-1]["content"] == msgs[-1]["content"]