mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: configurable custom compaction prompt for context compression
Add a compression.prompt config option that lets users override the
default summarization prompt used during context compression.
What changes:
1. ContextCompressor.__init__() accepts compaction_prompt_override param.
When set (non-empty string), it replaces the default summarization
instructions in _generate_summary(). The framing (token target, turns
to summarize, [CONTEXT SUMMARY]: prefix instruction) stays the same.
2. run_agent.py reads CONTEXT_COMPRESSION_PROMPT env var and passes it
to ContextCompressor.
3. Config wiring — the new 'prompt' key under 'compression' section is
mapped to CONTEXT_COMPRESSION_PROMPT env var in:
- cli.py (load_cli_config defaults + env mapping)
- hermes_cli/config.py (DEFAULT_CONFIG + show_config display)
- gateway/run.py (gateway env mapping)
Usage in config.yaml:
compression:
prompt: 'Your custom summarization instructions here'
Or via environment variable:
CONTEXT_COMPRESSION_PROMPT='Your custom instructions'
When empty (default), the built-in summarization prompt is used
unchanged. This gives power users control over how context is
compressed without modifying source code.
Inspired by PR #776 by @kshitijk4poor and the research in #499.
This commit is contained in:
parent
9149c34a26
commit
32c89fed18
5 changed files with 37 additions and 17 deletions
|
|
@ -34,6 +34,7 @@ class ContextCompressor:
|
|||
summary_target_tokens: int = 2500,
|
||||
quiet_mode: bool = False,
|
||||
summary_model_override: str = None,
|
||||
compaction_prompt_override: str = None,
|
||||
base_url: str = "",
|
||||
):
|
||||
self.model = model
|
||||
|
|
@ -55,6 +56,11 @@ class ContextCompressor:
|
|||
|
||||
self.client, default_model = get_text_auxiliary_client("compression")
|
||||
self.summary_model = summary_model_override or default_model
|
||||
self.compaction_prompt = (
|
||||
compaction_prompt_override.strip()
|
||||
if compaction_prompt_override and compaction_prompt_override.strip()
|
||||
else None
|
||||
)
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
|
|
@ -103,22 +109,25 @@ class ContextCompressor:
|
|||
parts.append(f"[{role.upper()}]: {content}")
|
||||
|
||||
content_to_summarize = "\n\n".join(parts)
|
||||
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
|
||||
|
||||
Write from a neutral perspective describing:
|
||||
1. What actions were taken (tool calls, searches, file operations)
|
||||
2. Key information or results obtained
|
||||
3. Important decisions or findings
|
||||
4. Relevant data, file names, or outputs
|
||||
|
||||
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
|
||||
|
||||
---
|
||||
TURNS TO SUMMARIZE:
|
||||
{content_to_summarize}
|
||||
---
|
||||
|
||||
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
default_instructions = (
|
||||
"Summarize these conversation turns concisely. This summary will "
|
||||
"replace these turns in the conversation history.\n\n"
|
||||
"Write from a neutral perspective describing:\n"
|
||||
"1. What actions were taken (tool calls, searches, file operations)\n"
|
||||
"2. Key information or results obtained\n"
|
||||
"3. Important decisions or findings\n"
|
||||
"4. Relevant data, file names, or outputs\n\n"
|
||||
"Keep factual and informative."
|
||||
)
|
||||
instructions = self.compaction_prompt or default_instructions
|
||||
prompt = (
|
||||
f"{instructions}\n\n"
|
||||
f"Target ~{self.summary_target_tokens} tokens.\n\n"
|
||||
"---\n"
|
||||
f"TURNS TO SUMMARIZE:\n{content_to_summarize}\n"
|
||||
"---\n\n"
|
||||
'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.'
|
||||
)
|
||||
|
||||
# 1. Try the auxiliary model (cheap/fast)
|
||||
if self.client:
|
||||
|
|
|
|||
2
cli.py
2
cli.py
|
|
@ -177,6 +177,7 @@ def load_cli_config() -> Dict[str, Any]:
|
|||
"enabled": True, # Auto-compress when approaching context limit
|
||||
"threshold": 0.85, # Compress at 85% of model's context limit
|
||||
"summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries
|
||||
"prompt": "", # Custom compaction prompt (empty = use default)
|
||||
},
|
||||
"agent": {
|
||||
"max_turns": 90, # Default max tool-calling iterations (shared with subagents)
|
||||
|
|
@ -350,6 +351,7 @@ def load_cli_config() -> Dict[str, Any]:
|
|||
"enabled": "CONTEXT_COMPRESSION_ENABLED",
|
||||
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
|
||||
"summary_model": "CONTEXT_COMPRESSION_MODEL",
|
||||
"prompt": "CONTEXT_COMPRESSION_PROMPT",
|
||||
"summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ if _config_path.exists():
|
|||
"enabled": "CONTEXT_COMPRESSION_ENABLED",
|
||||
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
|
||||
"summary_model": "CONTEXT_COMPRESSION_MODEL",
|
||||
"prompt": "CONTEXT_COMPRESSION_PROMPT",
|
||||
"summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
|
||||
}
|
||||
for _cfg_key, _env_var in _compression_env_map.items():
|
||||
|
|
|
|||
|
|
@ -122,6 +122,7 @@ DEFAULT_CONFIG = {
|
|||
"enabled": True,
|
||||
"threshold": 0.85,
|
||||
"summary_model": "google/gemini-3-flash-preview",
|
||||
"prompt": "",
|
||||
"summary_provider": "auto",
|
||||
},
|
||||
|
||||
|
|
@ -1069,6 +1070,11 @@ def show_config():
|
|||
if enabled:
|
||||
print(f" Threshold: {compression.get('threshold', 0.85) * 100:.0f}%")
|
||||
print(f" Model: {compression.get('summary_model', 'google/gemini-3-flash-preview')}")
|
||||
custom_prompt = compression.get("prompt", "")
|
||||
if custom_prompt:
|
||||
# Show first 60 chars of custom prompt
|
||||
display_prompt = custom_prompt[:60] + ("..." if len(custom_prompt) > 60 else "")
|
||||
print(f" Prompt: {display_prompt}")
|
||||
comp_provider = compression.get('summary_provider', 'auto')
|
||||
if comp_provider != 'auto':
|
||||
print(f" Provider: {comp_provider}")
|
||||
|
|
|
|||
|
|
@ -604,14 +604,16 @@ class AIAgent:
|
|||
compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
|
||||
compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
|
||||
compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
|
||||
compression_prompt = os.getenv("CONTEXT_COMPRESSION_PROMPT") or None
|
||||
|
||||
self.context_compressor = ContextCompressor(
|
||||
model=self.model,
|
||||
threshold_percent=compression_threshold,
|
||||
protect_first_n=3,
|
||||
protect_last_n=4,
|
||||
summary_target_tokens=500,
|
||||
summary_target_tokens=2500,
|
||||
summary_model_override=compression_summary_model,
|
||||
compaction_prompt_override=compression_prompt,
|
||||
quiet_mode=self.quiet_mode,
|
||||
base_url=self.base_url,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue