mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(compression): keep default protect_first_n at 3 + align ABC
Follow-up on the salvaged feat commit: - Keep the constructor / config / yaml-example default at 3 so existing gateway and CLI users see no behavioural change. PR #13754 (which this builds on) had lowered the default to 2 to chase pre-feature parity in the system-prompt-present case, at the cost of quietly halving the protected head for the gateway path (which strips the system prompt before calling compress()). With the new "system prompt is implicit" semantics, default 3 gives every caller a stable head shape. - agent/context_engine.py: bring the ABC's protect_first_n docstring in line with the new semantics so plugin context engines interpret the config key the same way the built-in compressor does. - tests: adjust the default-value test (3, not 2) and a stale comment; per-test protect_first_n=2/3/1 values added in PR #13754 stay as-is since those tests fix concrete head shapes.
This commit is contained in:
parent
dee71a31e5
commit
4ceab16893
6 changed files with 22 additions and 14 deletions
|
|
@ -405,7 +405,7 @@ class ContextCompressor(ContextEngine):
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
threshold_percent: float = 0.50,
|
threshold_percent: float = 0.50,
|
||||||
protect_first_n: int = 2,
|
protect_first_n: int = 3,
|
||||||
protect_last_n: int = 20,
|
protect_last_n: int = 20,
|
||||||
summary_target_ratio: float = 0.20,
|
summary_target_ratio: float = 0.20,
|
||||||
quiet_mode: bool = False,
|
quiet_mode: bool = False,
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,11 @@ class ContextEngine(ABC):
|
||||||
# These control the preflight compression check. Subclasses may
|
# These control the preflight compression check. Subclasses may
|
||||||
# override via __init__ or property; defaults are sensible for most
|
# override via __init__ or property; defaults are sensible for most
|
||||||
# engines.
|
# engines.
|
||||||
|
#
|
||||||
|
# protect_first_n semantics (since PR #13754): count of non-system head
|
||||||
|
# messages always preserved verbatim, IN ADDITION to the system prompt
|
||||||
|
# which is always implicitly protected. Default 3 keeps the
|
||||||
|
# historical "system + first 3 non-system messages" head shape.
|
||||||
|
|
||||||
threshold_percent: float = 0.75
|
threshold_percent: float = 0.75
|
||||||
protect_first_n: int = 3
|
protect_first_n: int = 3
|
||||||
|
|
|
||||||
|
|
@ -372,9 +372,9 @@ compression:
|
||||||
# opening turns may not match how you want the session framed over time.
|
# opening turns may not match how you want the session framed over time.
|
||||||
# Set to 0 to preserve ONLY the system prompt (plus the rolling summary
|
# Set to 0 to preserve ONLY the system prompt (plus the rolling summary
|
||||||
# and recent tail) — the cleanest configuration for long-running sessions.
|
# and recent tail) — the cleanest configuration for long-running sessions.
|
||||||
# Default 2 preserves the system prompt plus the first user/assistant
|
# Default 3 preserves the system prompt plus the first three non-system
|
||||||
# exchange (≈ 3 messages total when a system prompt is present).
|
# head messages, matching the pre-feature behaviour.
|
||||||
protect_first_n: 2
|
protect_first_n: 3
|
||||||
|
|
||||||
# To pin a specific model/provider for compression summaries, use the
|
# To pin a specific model/provider for compression summaries, use the
|
||||||
# auxiliary section below (auxiliary.compression.provider / model).
|
# auxiliary section below (auxiliary.compression.provider / model).
|
||||||
|
|
|
||||||
|
|
@ -731,13 +731,14 @@ DEFAULT_CONFIG = {
|
||||||
"target_ratio": 0.20, # fraction of threshold to preserve as recent tail
|
"target_ratio": 0.20, # fraction of threshold to preserve as recent tail
|
||||||
"protect_last_n": 20, # minimum recent messages to keep uncompressed
|
"protect_last_n": 20, # minimum recent messages to keep uncompressed
|
||||||
"hygiene_hard_message_limit": 400, # gateway session-hygiene force-compress threshold by message count
|
"hygiene_hard_message_limit": 400, # gateway session-hygiene force-compress threshold by message count
|
||||||
"protect_first_n": 2, # non-system head messages always preserved beyond the system prompt
|
"protect_first_n": 3, # non-system head messages always preserved
|
||||||
# verbatim, in ADDITION to the system prompt
|
# verbatim, in ADDITION to the system prompt
|
||||||
# (which is always implicitly protected). Set to
|
# (which is always implicitly protected). Set to
|
||||||
# 0 for long-running rolling-compaction sessions
|
# 0 for long-running rolling-compaction sessions
|
||||||
# where you want nothing pinned except the
|
# where you want nothing pinned except the
|
||||||
# system prompt + rolling summary + recent tail.
|
# system prompt + rolling summary + recent tail.
|
||||||
},
|
},
|
||||||
|
|
||||||
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
|
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
|
||||||
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
|
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
|
||||||
"prompt_caching": {
|
"prompt_caching": {
|
||||||
|
|
@ -4867,7 +4868,7 @@ def show_config():
|
||||||
print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%")
|
print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%")
|
||||||
print(f" Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
|
print(f" Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
|
||||||
print(f" Protect last: {compression.get('protect_last_n', 20)} messages")
|
print(f" Protect last: {compression.get('protect_last_n', 20)} messages")
|
||||||
print(f" Protect first: {compression.get('protect_first_n', 2)} non-system head messages")
|
print(f" Protect first: {compression.get('protect_first_n', 3)} non-system head messages")
|
||||||
_aux_comp = config.get('auxiliary', {}).get('compression', {})
|
_aux_comp = config.get('auxiliary', {}).get('compression', {})
|
||||||
_sm = _aux_comp.get('model', '') or '(auto)'
|
_sm = _aux_comp.get('model', '') or '(auto)'
|
||||||
print(f" Model: {_sm}")
|
print(f" Model: {_sm}")
|
||||||
|
|
|
||||||
|
|
@ -2122,7 +2122,7 @@ class AIAgent:
|
||||||
# is a legitimate (and common) configuration for long-running
|
# is a legitimate (and common) configuration for long-running
|
||||||
# rolling-compaction sessions.
|
# rolling-compaction sessions.
|
||||||
compression_protect_first = max(
|
compression_protect_first = max(
|
||||||
0, int(_compression_cfg.get("protect_first_n", 2))
|
0, int(_compression_cfg.get("protect_first_n", 3))
|
||||||
)
|
)
|
||||||
|
|
||||||
# Read optional explicit context_length override for the auxiliary
|
# Read optional explicit context_length override for the auxiliary
|
||||||
|
|
|
||||||
|
|
@ -1299,14 +1299,16 @@ class TestSummaryTargetRatio:
|
||||||
c = ContextCompressor(model="test", quiet_mode=True)
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
assert c.protect_last_n == 20
|
assert c.protect_last_n == 20
|
||||||
|
|
||||||
def test_default_protect_first_n_is_2(self):
|
def test_default_protect_first_n_is_3(self):
|
||||||
"""Default protect_first_n is 2 (system + 2 extra non-system messages =
|
"""Default protect_first_n is 3 (system + 3 extra non-system messages =
|
||||||
3 protected messages total, preserving the pre-feature behaviour where
|
4 protected messages total when a system prompt is present). With the
|
||||||
protect_first_n was hardcoded to protect 3 head messages total).
|
new semantics, the constructor default is 3 — the system prompt is
|
||||||
|
always implicitly protected ON TOP OF protect_first_n non-system
|
||||||
|
messages.
|
||||||
"""
|
"""
|
||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||||
c = ContextCompressor(model="test", quiet_mode=True)
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
assert c.protect_first_n == 2
|
assert c.protect_first_n == 3
|
||||||
|
|
||||||
def test_protect_first_n_override(self):
|
def test_protect_first_n_override(self):
|
||||||
"""protect_first_n=0 should be honoured — for users who rely on rolling
|
"""protect_first_n=0 should be honoured — for users who rely on rolling
|
||||||
|
|
@ -1342,8 +1344,8 @@ class TestSummaryTargetRatio:
|
||||||
assert result[0]["content"].startswith("System prompt")
|
assert result[0]["content"].startswith("System prompt")
|
||||||
# The first user/assistant exchange (msg 0, msg 1) should NOT be pinned
|
# The first user/assistant exchange (msg 0, msg 1) should NOT be pinned
|
||||||
# as head verbatim — those would have been summarized or absorbed.
|
# as head verbatim — those would have been summarized or absorbed.
|
||||||
# Under default protect_first_n=2, result[1] and result[2] would be
|
# Under default protect_first_n=3, result[1..3] would be the literal
|
||||||
# the literal "msg 0" / "msg 1"; with protect_first_n=0 they aren't.
|
# "msg 0" / "msg 1" / "msg 2"; with protect_first_n=0 they aren't.
|
||||||
assert result[1].get("content") != "msg 0"
|
assert result[1].get("content") != "msg 0"
|
||||||
# Last 2 messages are tail-protected under protect_last_n=2
|
# Last 2 messages are tail-protected under protect_last_n=2
|
||||||
assert result[-1]["content"] == msgs[-1]["content"]
|
assert result[-1]["content"] == msgs[-1]["content"]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue