fix(compression): keep default protect_first_n at 3 + align ABC

Follow-up on the salvaged feat commit:

- Keep the constructor / config / yaml-example default at 3 so existing
  gateway and CLI users see no behavioural change. PR #13754 (which this
  builds on) had lowered the default to 2 to chase pre-feature parity in
  the system-prompt-present case, at the cost of quietly halving the
  protected head for the gateway path (which strips the system prompt
  before calling compress()). With the new "system prompt is implicit"
  semantics, default 3 gives every caller a stable head shape.
- agent/context_engine.py: bring the ABC's protect_first_n docstring in
  line with the new semantics so plugin context engines interpret the
  config key the same way the built-in compressor does.
- tests: adjust the default-value test (3, not 2) and a stale comment;
  per-test protect_first_n=2/3/1 values added in PR #13754 stay as-is
  since those tests fix concrete head shapes.
This commit is contained in:
teknium1 2026-05-13 22:22:21 -07:00 committed by Teknium
parent dee71a31e5
commit 4ceab16893
6 changed files with 22 additions and 14 deletions

View file

@ -405,7 +405,7 @@ class ContextCompressor(ContextEngine):
self, self,
model: str, model: str,
threshold_percent: float = 0.50, threshold_percent: float = 0.50,
protect_first_n: int = 2, protect_first_n: int = 3,
protect_last_n: int = 20, protect_last_n: int = 20,
summary_target_ratio: float = 0.20, summary_target_ratio: float = 0.20,
quiet_mode: bool = False, quiet_mode: bool = False,

View file

@ -55,6 +55,11 @@ class ContextEngine(ABC):
# These control the preflight compression check. Subclasses may # These control the preflight compression check. Subclasses may
# override via __init__ or property; defaults are sensible for most # override via __init__ or property; defaults are sensible for most
# engines. # engines.
#
# protect_first_n semantics (since PR #13754): count of non-system head
# messages always preserved verbatim, IN ADDITION to the system prompt
# which is always implicitly protected. Default 3 keeps the
# historical "system + first 3 non-system messages" head shape.
threshold_percent: float = 0.75 threshold_percent: float = 0.75
protect_first_n: int = 3 protect_first_n: int = 3

View file

@ -372,9 +372,9 @@ compression:
# opening turns may not match how you want the session framed over time. # opening turns may not match how you want the session framed over time.
# Set to 0 to preserve ONLY the system prompt (plus the rolling summary # Set to 0 to preserve ONLY the system prompt (plus the rolling summary
# and recent tail) — the cleanest configuration for long-running sessions. # and recent tail) — the cleanest configuration for long-running sessions.
# Default 2 preserves the system prompt plus the first user/assistant # Default 3 preserves the system prompt plus the first three non-system
# exchange (≈ 3 messages total when a system prompt is present). # head messages, matching the pre-feature behaviour.
protect_first_n: 2 protect_first_n: 3
# To pin a specific model/provider for compression summaries, use the # To pin a specific model/provider for compression summaries, use the
# auxiliary section below (auxiliary.compression.provider / model). # auxiliary section below (auxiliary.compression.provider / model).

View file

@ -731,13 +731,14 @@ DEFAULT_CONFIG = {
"target_ratio": 0.20, # fraction of threshold to preserve as recent tail "target_ratio": 0.20, # fraction of threshold to preserve as recent tail
"protect_last_n": 20, # minimum recent messages to keep uncompressed "protect_last_n": 20, # minimum recent messages to keep uncompressed
"hygiene_hard_message_limit": 400, # gateway session-hygiene force-compress threshold by message count "hygiene_hard_message_limit": 400, # gateway session-hygiene force-compress threshold by message count
"protect_first_n": 2, # non-system head messages always preserved beyond the system prompt "protect_first_n": 3, # non-system head messages always preserved
# verbatim, in ADDITION to the system prompt # verbatim, in ADDITION to the system prompt
# (which is always implicitly protected). Set to # (which is always implicitly protected). Set to
# 0 for long-running rolling-compaction sessions # 0 for long-running rolling-compaction sessions
# where you want nothing pinned except the # where you want nothing pinned except the
# system prompt + rolling summary + recent tail. # system prompt + rolling summary + recent tail.
}, },
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API). # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored. # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
"prompt_caching": { "prompt_caching": {
@ -4867,7 +4868,7 @@ def show_config():
print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%") print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%")
print(f" Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved") print(f" Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
print(f" Protect last: {compression.get('protect_last_n', 20)} messages") print(f" Protect last: {compression.get('protect_last_n', 20)} messages")
print(f" Protect first: {compression.get('protect_first_n', 2)} non-system head messages") print(f" Protect first: {compression.get('protect_first_n', 3)} non-system head messages")
_aux_comp = config.get('auxiliary', {}).get('compression', {}) _aux_comp = config.get('auxiliary', {}).get('compression', {})
_sm = _aux_comp.get('model', '') or '(auto)' _sm = _aux_comp.get('model', '') or '(auto)'
print(f" Model: {_sm}") print(f" Model: {_sm}")

View file

@ -2122,7 +2122,7 @@ class AIAgent:
# is a legitimate (and common) configuration for long-running # is a legitimate (and common) configuration for long-running
# rolling-compaction sessions. # rolling-compaction sessions.
compression_protect_first = max( compression_protect_first = max(
0, int(_compression_cfg.get("protect_first_n", 2)) 0, int(_compression_cfg.get("protect_first_n", 3))
) )
# Read optional explicit context_length override for the auxiliary # Read optional explicit context_length override for the auxiliary

View file

@ -1299,14 +1299,16 @@ class TestSummaryTargetRatio:
c = ContextCompressor(model="test", quiet_mode=True) c = ContextCompressor(model="test", quiet_mode=True)
assert c.protect_last_n == 20 assert c.protect_last_n == 20
def test_default_protect_first_n_is_2(self): def test_default_protect_first_n_is_3(self):
"""Default protect_first_n is 2 (system + 2 extra non-system messages = """Default protect_first_n is 3 (system + 3 extra non-system messages =
3 protected messages total, preserving the pre-feature behaviour where 4 protected messages total when a system prompt is present). With the
protect_first_n was hardcoded to protect 3 head messages total). new semantics, the constructor default is 3 the system prompt is
always implicitly protected ON TOP OF protect_first_n non-system
messages.
""" """
with patch("agent.context_compressor.get_model_context_length", return_value=100_000): with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
c = ContextCompressor(model="test", quiet_mode=True) c = ContextCompressor(model="test", quiet_mode=True)
assert c.protect_first_n == 2 assert c.protect_first_n == 3
def test_protect_first_n_override(self): def test_protect_first_n_override(self):
"""protect_first_n=0 should be honoured — for users who rely on rolling """protect_first_n=0 should be honoured — for users who rely on rolling
@ -1342,8 +1344,8 @@ class TestSummaryTargetRatio:
assert result[0]["content"].startswith("System prompt") assert result[0]["content"].startswith("System prompt")
# The first user/assistant exchange (msg 0, msg 1) should NOT be pinned # The first user/assistant exchange (msg 0, msg 1) should NOT be pinned
# as head verbatim — those would have been summarized or absorbed. # as head verbatim — those would have been summarized or absorbed.
# Under default protect_first_n=2, result[1] and result[2] would be # Under default protect_first_n=3, result[1..3] would be the literal
# the literal "msg 0" / "msg 1"; with protect_first_n=0 they aren't. # "msg 0" / "msg 1" / "msg 2"; with protect_first_n=0 they aren't.
assert result[1].get("content") != "msg 0" assert result[1].get("content") != "msg 0"
# Last 2 messages are tail-protected under protect_last_n=2 # Last 2 messages are tail-protected under protect_last_n=2
assert result[-1]["content"] == msgs[-1]["content"] assert result[-1]["content"] == msgs[-1]["content"]