diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 99012c73c1b..df75b8b88ce 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -405,7 +405,7 @@ class ContextCompressor(ContextEngine): self, model: str, threshold_percent: float = 0.50, - protect_first_n: int = 2, + protect_first_n: int = 3, protect_last_n: int = 20, summary_target_ratio: float = 0.20, quiet_mode: bool = False, diff --git a/agent/context_engine.py b/agent/context_engine.py index bbafcd29c01..2947da54d8c 100644 --- a/agent/context_engine.py +++ b/agent/context_engine.py @@ -55,6 +55,11 @@ class ContextEngine(ABC): # These control the preflight compression check. Subclasses may # override via __init__ or property; defaults are sensible for most # engines. + # + # protect_first_n semantics (since PR #13754): count of non-system head + # messages always preserved verbatim, IN ADDITION to the system prompt + # which is always implicitly protected. Default 3 keeps the + # historical "system + first 3 non-system messages" head shape. threshold_percent: float = 0.75 protect_first_n: int = 3 diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 1bfec39698a..13d9ad9c420 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -372,9 +372,9 @@ compression: # opening turns may not match how you want the session framed over time. # Set to 0 to preserve ONLY the system prompt (plus the rolling summary # and recent tail) — the cleanest configuration for long-running sessions. - # Default 2 preserves the system prompt plus the first user/assistant - # exchange (≈ 3 messages total when a system prompt is present). - protect_first_n: 2 + # Default 3 preserves the system prompt plus the first three non-system + # head messages, matching the pre-feature behaviour. + protect_first_n: 3 # To pin a specific model/provider for compression summaries, use the # auxiliary section below (auxiliary.compression.provider / model). diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 3feb2cbddbb..685de3d7341 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -731,13 +731,14 @@ DEFAULT_CONFIG = { "target_ratio": 0.20, # fraction of threshold to preserve as recent tail "protect_last_n": 20, # minimum recent messages to keep uncompressed "hygiene_hard_message_limit": 400, # gateway session-hygiene force-compress threshold by message count - "protect_first_n": 2, # non-system head messages always preserved beyond the system prompt + "protect_first_n": 3, # non-system head messages always preserved # verbatim, in ADDITION to the system prompt # (which is always implicitly protected). Set to # 0 for long-running rolling-compaction sessions # where you want nothing pinned except the # system prompt + rolling summary + recent tail. }, + # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API). # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored. "prompt_caching": { @@ -4867,7 +4868,7 @@ def show_config(): print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%") print(f" Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved") print(f" Protect last: {compression.get('protect_last_n', 20)} messages") - print(f" Protect first: {compression.get('protect_first_n', 2)} non-system head messages") + print(f" Protect first: {compression.get('protect_first_n', 3)} non-system head messages") _aux_comp = config.get('auxiliary', {}).get('compression', {}) _sm = _aux_comp.get('model', '') or '(auto)' print(f" Model: {_sm}") diff --git a/run_agent.py b/run_agent.py index 8c7dfe2b061..4f50cb06e4d 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2122,7 +2122,7 @@ class AIAgent: # is a legitimate (and common) configuration for long-running # rolling-compaction sessions. compression_protect_first = max( - 0, int(_compression_cfg.get("protect_first_n", 2)) + 0, int(_compression_cfg.get("protect_first_n", 3)) ) # Read optional explicit context_length override for the auxiliary diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 821d3c4c4b7..559cf2237a2 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -1299,14 +1299,16 @@ class TestSummaryTargetRatio: c = ContextCompressor(model="test", quiet_mode=True) assert c.protect_last_n == 20 - def test_default_protect_first_n_is_2(self): - """Default protect_first_n is 2 (system + 2 extra non-system messages = - 3 protected messages total, preserving the pre-feature behaviour where - protect_first_n was hardcoded to protect 3 head messages total). + def test_default_protect_first_n_is_3(self): + """Default protect_first_n is 3 (system + 3 extra non-system messages = + 4 protected messages total when a system prompt is present). With the + new semantics, the constructor default is 3 — the system prompt is + always implicitly protected ON TOP OF protect_first_n non-system + messages. """ with patch("agent.context_compressor.get_model_context_length", return_value=100_000): c = ContextCompressor(model="test", quiet_mode=True) - assert c.protect_first_n == 2 + assert c.protect_first_n == 3 def test_protect_first_n_override(self): """protect_first_n=0 should be honoured — for users who rely on rolling @@ -1342,8 +1344,8 @@ class TestSummaryTargetRatio: assert result[0]["content"].startswith("System prompt") # The first user/assistant exchange (msg 0, msg 1) should NOT be pinned # as head verbatim — those would have been summarized or absorbed. - # Under default protect_first_n=2, result[1] and result[2] would be - # the literal "msg 0" / "msg 1"; with protect_first_n=0 they aren't. + # Under default protect_first_n=3, result[1..3] would be the literal + # "msg 0" / "msg 1" / "msg 2"; with protect_first_n=0 they aren't. assert result[1].get("content") != "msg 0" # Last 2 messages are tail-protected under protect_last_n=2 assert result[-1]["content"] == msgs[-1]["content"]