fix(compression): keep default protect_first_n at 3 + align ABC

Follow-up on the salvaged feat commit: - Keep the constructor / config / yaml-example default at 3 so existing gateway and CLI users see no behavioural change. PR #13754 (which this builds on) had lowered the default to 2 to chase pre-feature parity in the system-prompt-present case, at the cost of quietly halving the protected head for the gateway path (which strips the system prompt before calling compress()). With the new "system prompt is implicit" semantics, default 3 gives every caller a stable head shape. - agent/context_engine.py: bring the ABC's protect_first_n docstring in line with the new semantics so plugin context engines interpret the config key the same way the built-in compressor does. - tests: adjust the default-value test (3, not 2) and a stale comment; per-test protect_first_n=2/3/1 values added in PR #13754 stay as-is since those tests fix concrete head shapes.
2026-07-04 12:33:08 +00:00 · 2026-05-13 22:22:21 -07:00 · 2026-05-13 22:22:21 -07:00 · 4ceab16893
commit 4ceab16893
parent dee71a31e5
6 changed files with 22 additions and 14 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -405,7 +405,7 @@ class ContextCompressor(ContextEngine):
        self,
        model: str,
        threshold_percent: float = 0.50,
-        protect_first_n: int = 2,
+        protect_first_n: int = 3,
        protect_last_n: int = 20,
        summary_target_ratio: float = 0.20,
        quiet_mode: bool = False,
--- a/agent/context_engine.py
+++ b/agent/context_engine.py
@ -55,6 +55,11 @@ class ContextEngine(ABC):
    # These control the preflight compression check.  Subclasses may
    # override via __init__ or property; defaults are sensible for most
    # engines.
+    #
+    # protect_first_n semantics (since PR #13754): count of non-system head
+    # messages always preserved verbatim, IN ADDITION to the system prompt
+    # which is always implicitly protected.  Default 3 keeps the
+    # historical "system + first 3 non-system messages" head shape.

    threshold_percent: float = 0.75
    protect_first_n: int = 3
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@ -372,9 +372,9 @@ compression:
  # opening turns may not match how you want the session framed over time.
  # Set to 0 to preserve ONLY the system prompt (plus the rolling summary
  # and recent tail) — the cleanest configuration for long-running sessions.
-  # Default 2 preserves the system prompt plus the first user/assistant
-  # exchange (≈ 3 messages total when a system prompt is present).
-  protect_first_n: 2
+  # Default 3 preserves the system prompt plus the first three non-system
+  # head messages, matching the pre-feature behaviour.
+  protect_first_n: 3

  # To pin a specific model/provider for compression summaries, use the
  # auxiliary section below (auxiliary.compression.provider / model).
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -731,13 +731,14 @@ DEFAULT_CONFIG = {
        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
        "hygiene_hard_message_limit": 400,  # gateway session-hygiene force-compress threshold by message count
-        "protect_first_n": 2,         # non-system head messages always preserved beyond the system prompt
+        "protect_first_n": 3,         # non-system head messages always preserved
                                      # verbatim, in ADDITION to the system prompt
                                      # (which is always implicitly protected). Set to
                                      # 0 for long-running rolling-compaction sessions
                                      # where you want nothing pinned except the
                                      # system prompt + rolling summary + recent tail.
    },
+
    # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
    # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
    "prompt_caching": {
@ -4867,7 +4868,7 @@ def show_config():
        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
        print(f"  Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
        print(f"  Protect last: {compression.get('protect_last_n', 20)} messages")
-        print(f"  Protect first: {compression.get('protect_first_n', 2)} non-system head messages")
+        print(f"  Protect first: {compression.get('protect_first_n', 3)} non-system head messages")
        _aux_comp = config.get('auxiliary', {}).get('compression', {})
        _sm = _aux_comp.get('model', '') or '(auto)'
        print(f"  Model:        {_sm}")
--- a/run_agent.py
+++ b/run_agent.py
@ -2122,7 +2122,7 @@ class AIAgent:
        # is a legitimate (and common) configuration for long-running
        # rolling-compaction sessions.
        compression_protect_first = max(
-            0, int(_compression_cfg.get("protect_first_n", 2))
+            0, int(_compression_cfg.get("protect_first_n", 3))
        )

        # Read optional explicit context_length override for the auxiliary
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@ -1299,14 +1299,16 @@ class TestSummaryTargetRatio:
            c = ContextCompressor(model="test", quiet_mode=True)
        assert c.protect_last_n == 20

-    def test_default_protect_first_n_is_2(self):
-        """Default protect_first_n is 2 (system + 2 extra non-system messages =
-        3 protected messages total, preserving the pre-feature behaviour where
-        protect_first_n was hardcoded to protect 3 head messages total).
+    def test_default_protect_first_n_is_3(self):
+        """Default protect_first_n is 3 (system + 3 extra non-system messages =
+        4 protected messages total when a system prompt is present). With the
+        new semantics, the constructor default is 3 — the system prompt is
+        always implicitly protected ON TOP OF protect_first_n non-system
+        messages.
        """
        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
            c = ContextCompressor(model="test", quiet_mode=True)
-        assert c.protect_first_n == 2
+        assert c.protect_first_n == 3

    def test_protect_first_n_override(self):
        """protect_first_n=0 should be honoured — for users who rely on rolling
@ -1342,8 +1344,8 @@ class TestSummaryTargetRatio:
        assert result[0]["content"].startswith("System prompt")
        # The first user/assistant exchange (msg 0, msg 1) should NOT be pinned
        # as head verbatim — those would have been summarized or absorbed.
-        # Under default protect_first_n=2, result[1] and result[2] would be
-        # the literal "msg 0" / "msg 1"; with protect_first_n=0 they aren't.
+        # Under default protect_first_n=3, result[1..3] would be the literal
+        # "msg 0" / "msg 1" / "msg 2"; with protect_first_n=0 they aren't.
        assert result[1].get("content") != "msg 0"
        # Last 2 messages are tail-protected under protect_last_n=2
        assert result[-1]["content"] == msgs[-1]["content"]