feat: configurable custom compaction prompt for context compression

Add a compression.prompt config option that lets users override the
default summarization prompt used during context compression.

What changes:

1. ContextCompressor.__init__() accepts compaction_prompt_override param.
   When set (non-empty string), it replaces the default summarization
   instructions in _generate_summary(). The framing (token target, turns
   to summarize, [CONTEXT SUMMARY]: prefix instruction) stays the same.

2. run_agent.py reads CONTEXT_COMPRESSION_PROMPT env var and passes it
   to ContextCompressor.

3. Config wiring — the new 'prompt' key under 'compression' section is
   mapped to CONTEXT_COMPRESSION_PROMPT env var in:
   - cli.py (load_cli_config defaults + env mapping)
   - hermes_cli/config.py (DEFAULT_CONFIG + show_config display)
   - gateway/run.py (gateway env mapping)

Usage in config.yaml:
  compression:
    prompt: 'Your custom summarization instructions here'

Or via environment variable:
  CONTEXT_COMPRESSION_PROMPT='Your custom instructions'

When empty (default), the built-in summarization prompt is used
unchanged. This gives power users control over how context is
compressed without modifying source code.

Inspired by PR #776 by @kshitijk4poor and the research in #499.
This commit is contained in:
teknium1 2026-03-11 05:45:24 -07:00
parent 9149c34a26
commit 32c89fed18
5 changed files with 37 additions and 17 deletions

View file

@ -34,6 +34,7 @@ class ContextCompressor:
summary_target_tokens: int = 2500,
quiet_mode: bool = False,
summary_model_override: str = None,
compaction_prompt_override: str = None,
base_url: str = "",
):
self.model = model
@ -55,6 +56,11 @@ class ContextCompressor:
self.client, default_model = get_text_auxiliary_client("compression")
self.summary_model = summary_model_override or default_model
self.compaction_prompt = (
compaction_prompt_override.strip()
if compaction_prompt_override and compaction_prompt_override.strip()
else None
)
def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response."""
@ -103,22 +109,25 @@ class ContextCompressor:
parts.append(f"[{role.upper()}]: {content}")
content_to_summarize = "\n\n".join(parts)
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
Write from a neutral perspective describing:
1. What actions were taken (tool calls, searches, file operations)
2. Key information or results obtained
3. Important decisions or findings
4. Relevant data, file names, or outputs
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
---
TURNS TO SUMMARIZE:
{content_to_summarize}
---
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
default_instructions = (
"Summarize these conversation turns concisely. This summary will "
"replace these turns in the conversation history.\n\n"
"Write from a neutral perspective describing:\n"
"1. What actions were taken (tool calls, searches, file operations)\n"
"2. Key information or results obtained\n"
"3. Important decisions or findings\n"
"4. Relevant data, file names, or outputs\n\n"
"Keep factual and informative."
)
instructions = self.compaction_prompt or default_instructions
prompt = (
f"{instructions}\n\n"
f"Target ~{self.summary_target_tokens} tokens.\n\n"
"---\n"
f"TURNS TO SUMMARIZE:\n{content_to_summarize}\n"
"---\n\n"
'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.'
)
# 1. Try the auxiliary model (cheap/fast)
if self.client:

2
cli.py
View file

@ -177,6 +177,7 @@ def load_cli_config() -> Dict[str, Any]:
"enabled": True, # Auto-compress when approaching context limit
"threshold": 0.85, # Compress at 85% of model's context limit
"summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries
"prompt": "", # Custom compaction prompt (empty = use default)
},
"agent": {
"max_turns": 90, # Default max tool-calling iterations (shared with subagents)
@ -350,6 +351,7 @@ def load_cli_config() -> Dict[str, Any]:
"enabled": "CONTEXT_COMPRESSION_ENABLED",
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
"summary_model": "CONTEXT_COMPRESSION_MODEL",
"prompt": "CONTEXT_COMPRESSION_PROMPT",
"summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
}

View file

@ -91,6 +91,7 @@ if _config_path.exists():
"enabled": "CONTEXT_COMPRESSION_ENABLED",
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
"summary_model": "CONTEXT_COMPRESSION_MODEL",
"prompt": "CONTEXT_COMPRESSION_PROMPT",
"summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
}
for _cfg_key, _env_var in _compression_env_map.items():

View file

@ -122,6 +122,7 @@ DEFAULT_CONFIG = {
"enabled": True,
"threshold": 0.85,
"summary_model": "google/gemini-3-flash-preview",
"prompt": "",
"summary_provider": "auto",
},
@ -1069,6 +1070,11 @@ def show_config():
if enabled:
print(f" Threshold: {compression.get('threshold', 0.85) * 100:.0f}%")
print(f" Model: {compression.get('summary_model', 'google/gemini-3-flash-preview')}")
custom_prompt = compression.get("prompt", "")
if custom_prompt:
# Show first 60 chars of custom prompt
display_prompt = custom_prompt[:60] + ("..." if len(custom_prompt) > 60 else "")
print(f" Prompt: {display_prompt}")
comp_provider = compression.get('summary_provider', 'auto')
if comp_provider != 'auto':
print(f" Provider: {comp_provider}")

View file

@ -604,14 +604,16 @@ class AIAgent:
compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
compression_prompt = os.getenv("CONTEXT_COMPRESSION_PROMPT") or None
self.context_compressor = ContextCompressor(
model=self.model,
threshold_percent=compression_threshold,
protect_first_n=3,
protect_last_n=4,
summary_target_tokens=500,
summary_target_tokens=2500,
summary_model_override=compression_summary_model,
compaction_prompt_override=compression_prompt,
quiet_mode=self.quiet_mode,
base_url=self.base_url,
)