mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 01:51:44 +00:00
fix: improve context compression quality — named constants, tool tracking, degradation warning
Three targeted improvements to the compression system: 1. Replace hardcoded truncation limits with named class constants (_CONTENT_MAX=6000, _CONTENT_HEAD=4000, _CONTENT_TAIL=1500, _TOOL_ARGS_MAX=1500, _TOOL_ARGS_HEAD=1200). Previous limits (3000/500) heavily truncated the summarizer's input — a 200-line edit got cut to 3000 chars before the summarizer ever saw it. 2. Add '## Tools & Patterns' section to both compression prompt templates (first-pass and iterative). Preserves working tool invocations, preferred flags, and tool-specific discoveries across compaction boundaries. 3. Warn users on 2nd+ compression: 'Session compressed N times — accuracy may degrade. Consider /new to start fresh.' Ref #499
This commit is contained in:
parent
af4abd2f22
commit
8567031433
2 changed files with 36 additions and 12 deletions
|
|
@ -199,30 +199,39 @@ class ContextCompressor:
|
||||||
budget = int(content_tokens * _SUMMARY_RATIO)
|
budget = int(content_tokens * _SUMMARY_RATIO)
|
||||||
return max(_MIN_SUMMARY_TOKENS, min(budget, self.max_summary_tokens))
|
return max(_MIN_SUMMARY_TOKENS, min(budget, self.max_summary_tokens))
|
||||||
|
|
||||||
|
# Truncation limits for the summarizer input. These bound how much of
|
||||||
|
# each message the summary model sees — the budget is the *summary*
|
||||||
|
# model's context window, not the main model's.
|
||||||
|
_CONTENT_MAX = 6000 # total chars per message body
|
||||||
|
_CONTENT_HEAD = 4000 # chars kept from the start
|
||||||
|
_CONTENT_TAIL = 1500 # chars kept from the end
|
||||||
|
_TOOL_ARGS_MAX = 1500 # tool call argument chars
|
||||||
|
_TOOL_ARGS_HEAD = 1200 # kept from the start of tool args
|
||||||
|
|
||||||
def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
|
def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
|
||||||
"""Serialize conversation turns into labeled text for the summarizer.
|
"""Serialize conversation turns into labeled text for the summarizer.
|
||||||
|
|
||||||
Includes tool call arguments and result content (up to 3000 chars
|
Includes tool call arguments and result content (up to
|
||||||
per message) so the summarizer can preserve specific details like
|
``_CONTENT_MAX`` chars per message) so the summarizer can preserve
|
||||||
file paths, commands, and outputs.
|
specific details like file paths, commands, and outputs.
|
||||||
"""
|
"""
|
||||||
parts = []
|
parts = []
|
||||||
for msg in turns:
|
for msg in turns:
|
||||||
role = msg.get("role", "unknown")
|
role = msg.get("role", "unknown")
|
||||||
content = msg.get("content") or ""
|
content = msg.get("content") or ""
|
||||||
|
|
||||||
# Tool results: keep more content than before (3000 chars)
|
# Tool results: keep enough content for the summarizer
|
||||||
if role == "tool":
|
if role == "tool":
|
||||||
tool_id = msg.get("tool_call_id", "")
|
tool_id = msg.get("tool_call_id", "")
|
||||||
if len(content) > 3000:
|
if len(content) > self._CONTENT_MAX:
|
||||||
content = content[:2000] + "\n...[truncated]...\n" + content[-800:]
|
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||||
parts.append(f"[TOOL RESULT {tool_id}]: {content}")
|
parts.append(f"[TOOL RESULT {tool_id}]: {content}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Assistant messages: include tool call names AND arguments
|
# Assistant messages: include tool call names AND arguments
|
||||||
if role == "assistant":
|
if role == "assistant":
|
||||||
if len(content) > 3000:
|
if len(content) > self._CONTENT_MAX:
|
||||||
content = content[:2000] + "\n...[truncated]...\n" + content[-800:]
|
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||||
tool_calls = msg.get("tool_calls", [])
|
tool_calls = msg.get("tool_calls", [])
|
||||||
if tool_calls:
|
if tool_calls:
|
||||||
tc_parts = []
|
tc_parts = []
|
||||||
|
|
@ -232,8 +241,8 @@ class ContextCompressor:
|
||||||
name = fn.get("name", "?")
|
name = fn.get("name", "?")
|
||||||
args = fn.get("arguments", "")
|
args = fn.get("arguments", "")
|
||||||
# Truncate long arguments but keep enough for context
|
# Truncate long arguments but keep enough for context
|
||||||
if len(args) > 500:
|
if len(args) > self._TOOL_ARGS_MAX:
|
||||||
args = args[:400] + "..."
|
args = args[:self._TOOL_ARGS_HEAD] + "..."
|
||||||
tc_parts.append(f" {name}({args})")
|
tc_parts.append(f" {name}({args})")
|
||||||
else:
|
else:
|
||||||
fn = getattr(tc, "function", None)
|
fn = getattr(tc, "function", None)
|
||||||
|
|
@ -244,8 +253,8 @@ class ContextCompressor:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# User and other roles
|
# User and other roles
|
||||||
if len(content) > 3000:
|
if len(content) > self._CONTENT_MAX:
|
||||||
content = content[:2000] + "\n...[truncated]...\n" + content[-800:]
|
content = content[:self._CONTENT_HEAD] + "\n...[truncated]...\n" + content[-self._CONTENT_TAIL:]
|
||||||
parts.append(f"[{role.upper()}]: {content}")
|
parts.append(f"[{role.upper()}]: {content}")
|
||||||
|
|
||||||
return "\n\n".join(parts)
|
return "\n\n".join(parts)
|
||||||
|
|
@ -310,6 +319,9 @@ Update the summary using this exact structure. PRESERVE all existing information
|
||||||
## Critical Context
|
## Critical Context
|
||||||
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
|
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
|
||||||
|
|
||||||
|
## Tools & Patterns
|
||||||
|
[Which tools were used, how they were used effectively, and any tool-specific discoveries. Accumulate across compactions.]
|
||||||
|
|
||||||
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
|
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
|
||||||
|
|
||||||
Write only the summary body. Do not include any preamble or prefix."""
|
Write only the summary body. Do not include any preamble or prefix."""
|
||||||
|
|
@ -348,6 +360,9 @@ Use this exact structure:
|
||||||
## Critical Context
|
## Critical Context
|
||||||
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
|
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
|
||||||
|
|
||||||
|
## Tools & Patterns
|
||||||
|
[Which tools were used, how they were used effectively, and any tool-specific discoveries (e.g., preferred flags, working invocations, successful command patterns)]
|
||||||
|
|
||||||
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions. The goal is to prevent the next assistant from repeating work or losing important details.
|
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions. The goal is to prevent the next assistant from repeating work or losing important details.
|
||||||
|
|
||||||
Write only the summary body. Do not include any preamble or prefix."""
|
Write only the summary body. Do not include any preamble or prefix."""
|
||||||
|
|
|
||||||
|
|
@ -6013,6 +6013,15 @@ class AIAgent:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
|
||||||
|
|
||||||
|
# Warn on repeated compressions (quality degrades with each pass)
|
||||||
|
_cc = self.context_compressor.compression_count
|
||||||
|
if _cc >= 2:
|
||||||
|
self._vprint(
|
||||||
|
f"{self.log_prefix}⚠️ Session compressed {_cc} times — "
|
||||||
|
f"accuracy may degrade. Consider /new to start fresh.",
|
||||||
|
force=True,
|
||||||
|
)
|
||||||
|
|
||||||
# Update token estimate after compaction so pressure calculations
|
# Update token estimate after compaction so pressure calculations
|
||||||
# use the post-compression count, not the stale pre-compression one.
|
# use the post-compression count, not the stale pre-compression one.
|
||||||
_compressed_est = (
|
_compressed_est = (
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue