fix(compression): restore sane defaults and cap summary at 12K tokens

- threshold: 0.80 → 0.50 (compress at 50%, not 80%) - target_ratio: 0.40 → 0.20, now relative to threshold not total context (20% of 50% = 10% of context as tail budget) - summary ceiling: 32K → 12K (Gemini can't output more than ~12K) - Updated DEFAULT_CONFIG, config display, example config, and tests
2026-04-25 00:51:20 +00:00 · 2026-03-24 18:48:04 -07:00 · 2026-03-24 18:48:04 -07:00 · 7ca22ea11b
commit 7ca22ea11b
parent ef47531617
5 changed files with 32 additions and 29 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -163,8 +163,8 @@ DEFAULT_CONFIG = {
    
    "compression": {
        "enabled": True,
-        "threshold": 0.80,            # compress when context usage exceeds this ratio
-        "target_ratio": 0.40,         # fraction of context to preserve as recent tail
+        "threshold": 0.50,            # compress when context usage exceeds this ratio
+        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
        "summary_model": "",          # empty = use main configured model
        "summary_provider": "auto",
@ -1686,8 +1686,8 @@ def show_config():
    enabled = compression.get('enabled', True)
    print(f"  Enabled:      {'yes' if enabled else 'no'}")
    if enabled:
-        print(f"  Threshold:    {compression.get('threshold', 0.80) * 100:.0f}%")
-        print(f"  Target ratio: {compression.get('target_ratio', 0.40) * 100:.0f}% of context preserved")
+        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
+        print(f"  Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
        print(f"  Protect last: {compression.get('protect_last_n', 20)} messages")
        _sm = compression.get('summary_model', '') or '(main model)'
        print(f"  Model:        {_sm}")