mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge 46b75960ed into 05d8f11085
This commit is contained in:
commit
efbef35d3b
5 changed files with 67 additions and 1 deletions
|
|
@ -323,6 +323,16 @@ compression:
|
||||||
# compression of older turns.
|
# compression of older turns.
|
||||||
protect_last_n: 20
|
protect_last_n: 20
|
||||||
|
|
||||||
|
# Number of earliest messages to always preserve verbatim as head (default: 3 =
|
||||||
|
# system prompt + first user/assistant exchange). Head messages are NEVER
|
||||||
|
# summarized — they survive every compression indefinitely. This gives stable
|
||||||
|
# early context for short/medium sessions, but in long-running sessions that
|
||||||
|
# rely on rolling compaction the opening user turn gets pinned forever, which
|
||||||
|
# may not match how you want the session framed. Lower to 1 to preserve only
|
||||||
|
# the system prompt and let the first exchange age out naturally through the
|
||||||
|
# summary. Minimum: 1 (system prompt must survive).
|
||||||
|
protect_first_n: 3
|
||||||
|
|
||||||
# To pin a specific model/provider for compression summaries, use the
|
# To pin a specific model/provider for compression summaries, use the
|
||||||
# auxiliary section below (auxiliary.compression.provider / model).
|
# auxiliary section below (auxiliary.compression.provider / model).
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -518,6 +518,11 @@ DEFAULT_CONFIG = {
|
||||||
"threshold": 0.50, # compress when context usage exceeds this ratio
|
"threshold": 0.50, # compress when context usage exceeds this ratio
|
||||||
"target_ratio": 0.20, # fraction of threshold to preserve as recent tail
|
"target_ratio": 0.20, # fraction of threshold to preserve as recent tail
|
||||||
"protect_last_n": 20, # minimum recent messages to keep uncompressed
|
"protect_last_n": 20, # minimum recent messages to keep uncompressed
|
||||||
|
"protect_first_n": 3, # head messages always preserved verbatim
|
||||||
|
# (system prompt + first exchange). Lower this
|
||||||
|
# to 1 if you rely on rolling compaction for
|
||||||
|
# long-running sessions and don't want the
|
||||||
|
# opening user/assistant turn pinned forever.
|
||||||
|
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
@ -3725,6 +3730,7 @@ def show_config():
|
||||||
print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%")
|
print(f" Threshold: {compression.get('threshold', 0.50) * 100:.0f}%")
|
||||||
print(f" Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
|
print(f" Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
|
||||||
print(f" Protect last: {compression.get('protect_last_n', 20)} messages")
|
print(f" Protect last: {compression.get('protect_last_n', 20)} messages")
|
||||||
|
print(f" Protect first: {compression.get('protect_first_n', 3)} messages")
|
||||||
_aux_comp = config.get('auxiliary', {}).get('compression', {})
|
_aux_comp = config.get('auxiliary', {}).get('compression', {})
|
||||||
_sm = _aux_comp.get('model', '') or '(auto)'
|
_sm = _aux_comp.get('model', '') or '(auto)'
|
||||||
print(f" Model: {_sm}")
|
print(f" Model: {_sm}")
|
||||||
|
|
|
||||||
|
|
@ -1721,6 +1721,12 @@ class AIAgent:
|
||||||
compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
|
compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
|
||||||
compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
|
compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
|
||||||
compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))
|
compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))
|
||||||
|
# Floor protect_first_n at 1 — the system prompt must always be
|
||||||
|
# preserved as head, otherwise compression would strip it on the
|
||||||
|
# first compaction pass and the model would lose its identity.
|
||||||
|
compression_protect_first = max(
|
||||||
|
1, int(_compression_cfg.get("protect_first_n", 3))
|
||||||
|
)
|
||||||
|
|
||||||
# Read optional explicit context_length override for the auxiliary
|
# Read optional explicit context_length override for the auxiliary
|
||||||
# compression model. Custom endpoints often cannot report this via
|
# compression model. Custom endpoints often cannot report this via
|
||||||
|
|
@ -1867,7 +1873,7 @@ class AIAgent:
|
||||||
self.context_compressor = ContextCompressor(
|
self.context_compressor = ContextCompressor(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
threshold_percent=compression_threshold,
|
threshold_percent=compression_threshold,
|
||||||
protect_first_n=3,
|
protect_first_n=compression_protect_first,
|
||||||
protect_last_n=compression_protect_last,
|
protect_last_n=compression_protect_last,
|
||||||
summary_target_ratio=compression_target_ratio,
|
summary_target_ratio=compression_target_ratio,
|
||||||
summary_model_override=None,
|
summary_model_override=None,
|
||||||
|
|
|
||||||
|
|
@ -253,6 +253,7 @@ AUTHOR_MAP = {
|
||||||
"m@statecraft.systems": "mbierling",
|
"m@statecraft.systems": "mbierling",
|
||||||
"balyan.sid@gmail.com": "alt-glitch",
|
"balyan.sid@gmail.com": "alt-glitch",
|
||||||
"oluwadareab12@gmail.com": "bennytimz",
|
"oluwadareab12@gmail.com": "bennytimz",
|
||||||
|
"simpolism@gmail.com": "simpolism",
|
||||||
"simon@simonmarcus.org": "simon-marcus",
|
"simon@simonmarcus.org": "simon-marcus",
|
||||||
"xowiekk@gmail.com": "Xowiek",
|
"xowiekk@gmail.com": "Xowiek",
|
||||||
"1243352777@qq.com": "zons-zhaozhy",
|
"1243352777@qq.com": "zons-zhaozhy",
|
||||||
|
|
|
||||||
|
|
@ -691,6 +691,49 @@ class TestSummaryTargetRatio:
|
||||||
c = ContextCompressor(model="test", quiet_mode=True)
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
assert c.protect_last_n == 20
|
assert c.protect_last_n == 20
|
||||||
|
|
||||||
|
def test_default_protect_first_n_is_3(self):
|
||||||
|
"""Default protect_first_n should be 3 (system prompt + first exchange)."""
|
||||||
|
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||||
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
|
assert c.protect_first_n == 3
|
||||||
|
|
||||||
|
def test_protect_first_n_override(self):
|
||||||
|
"""protect_first_n=1 should be honoured — for users who rely on rolling
|
||||||
|
compaction and don't want the opening user/assistant turn pinned as head
|
||||||
|
indefinitely. Only the system prompt survives head-protection."""
|
||||||
|
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||||
|
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=1)
|
||||||
|
assert c.protect_first_n == 1
|
||||||
|
|
||||||
|
def test_protect_first_n_1_preserves_only_system_prompt(self):
|
||||||
|
"""End-to-end: when protect_first_n=1, compression should treat only
|
||||||
|
the first message (system prompt) as head. Messages 1..n-protect_last-1
|
||||||
|
become summarization candidates, unlike the default where messages 0-2
|
||||||
|
would all be pinned as head."""
|
||||||
|
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||||
|
c = ContextCompressor(
|
||||||
|
model="test",
|
||||||
|
quiet_mode=True,
|
||||||
|
protect_first_n=1,
|
||||||
|
protect_last_n=2,
|
||||||
|
)
|
||||||
|
msgs = (
|
||||||
|
[{"role": "system", "content": "System prompt"}]
|
||||||
|
+ [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
||||||
|
for i in range(8)]
|
||||||
|
)
|
||||||
|
result = c.compress(msgs)
|
||||||
|
# System prompt (msg[0]) survives as head
|
||||||
|
assert result[0]["role"] == "system"
|
||||||
|
assert result[0]["content"].startswith("System prompt")
|
||||||
|
# The first user/assistant exchange (msg 0, msg 1) should NOT be pinned
|
||||||
|
# as head verbatim — those would have been summarized or absorbed.
|
||||||
|
# Under default protect_first_n=3, result[1] and result[2] would be
|
||||||
|
# the literal "msg 0" / "msg 1"; with protect_first_n=1 they aren't.
|
||||||
|
assert result[1].get("content") != "msg 0"
|
||||||
|
# Last 2 messages are tail-protected under protect_last_n=2
|
||||||
|
assert result[-1]["content"] == msgs[-1]["content"]
|
||||||
|
|
||||||
|
|
||||||
class TestTokenBudgetTailProtection:
|
class TestTokenBudgetTailProtection:
|
||||||
"""Tests for token-budget-based tail protection (PR #6240).
|
"""Tests for token-budget-based tail protection (PR #6240).
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue