fix(compression): keep default protect_first_n at 3 + align ABC

Follow-up on the salvaged feat commit: - Keep the constructor / config / yaml-example default at 3 so existing gateway and CLI users see no behavioural change. PR #13754 (which this builds on) had lowered the default to 2 to chase pre-feature parity in the system-prompt-present case, at the cost of quietly halving the protected head for the gateway path (which strips the system prompt before calling compress()). With the new "system prompt is implicit" semantics, default 3 gives every caller a stable head shape. - agent/context_engine.py: bring the ABC's protect_first_n docstring in line with the new semantics so plugin context engines interpret the config key the same way the built-in compressor does. - tests: adjust the default-value test (3, not 2) and a stale comment; per-test protect_first_n=2/3/1 values added in PR #13754 stay as-is since those tests fix concrete head shapes.
2026-05-18 04:41:56 +00:00 · 2026-05-13 22:22:21 -07:00 · 2026-05-13 22:22:21 -07:00 · 4ceab16893
commit 4ceab16893
parent dee71a31e5
6 changed files with 22 additions and 14 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -731,13 +731,14 @@ DEFAULT_CONFIG = {
        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
        "hygiene_hard_message_limit": 400,  # gateway session-hygiene force-compress threshold by message count
-        "protect_first_n": 2,         # non-system head messages always preserved beyond the system prompt
+        "protect_first_n": 3,         # non-system head messages always preserved
                                      # verbatim, in ADDITION to the system prompt
                                      # (which is always implicitly protected). Set to
                                      # 0 for long-running rolling-compaction sessions
                                      # where you want nothing pinned except the
                                      # system prompt + rolling summary + recent tail.
    },
+
    # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
    # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
    "prompt_caching": {
@ -4867,7 +4868,7 @@ def show_config():
        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
        print(f"  Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
        print(f"  Protect last: {compression.get('protect_last_n', 20)} messages")
-        print(f"  Protect first: {compression.get('protect_first_n', 2)} non-system head messages")
+        print(f"  Protect first: {compression.get('protect_first_n', 3)} non-system head messages")
        _aux_comp = config.get('auxiliary', {}).get('compression', {})
        _sm = _aux_comp.get('model', '') or '(auto)'
        print(f"  Model:        {_sm}")