feat: context pressure warnings for CLI and gateway (#2159)

* feat: context pressure warnings for CLI and gateway

User-facing notifications as context approaches the compaction threshold.
Warnings fire at 60% and 85% of the way to compaction — relative to
the configured compression threshold, not the raw context window.

CLI: Formatted line with a progress bar showing distance to compaction.
Cyan at 60% (approaching), bold yellow at 85% (imminent).

  ◐ context ▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱▱▱▱▱▱ 60% to compaction  100k threshold (50%) · approaching compaction
  ⚠ context ▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱ 85% to compaction  100k threshold (50%) · compaction imminent

Gateway: Plain-text notification sent to the user's chat via the new
status_callback mechanism (asyncio.run_coroutine_threadsafe bridge,
same pattern as step_callback).

Does NOT inject into the message stream. The LLM never sees these
warnings. Flags reset after each compaction cycle.

Files changed:
- agent/display.py — format_context_pressure(), format_context_pressure_gateway()
- run_agent.py — status_callback param, _context_50/70_warned flags,
  _emit_context_pressure(), flag reset in _compress_context()
- gateway/run.py — _status_callback_sync bridge, wired to AIAgent
- tests/test_context_pressure.py — 23 tests

* Merge remote-tracking branch 'origin/main' into hermes/hermes-7ea545bf

---------

Co-authored-by: Test <test@test.com>
This commit is contained in:
Teknium 2026-03-20 08:37:36 -07:00 committed by GitHub
parent d76ebf0ec3
commit c52353cf8a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 430 additions and 0 deletions

View file

@ -612,3 +612,95 @@ def write_tty(text: str) -> None:
except OSError:
sys.stdout.write(text)
sys.stdout.flush()
# =========================================================================
# Context pressure display (CLI user-facing warnings)
# =========================================================================
# ANSI color codes for context pressure tiers
_CYAN = "\033[36m"
_YELLOW = "\033[33m"
_BOLD = "\033[1m"
_DIM_ANSI = "\033[2m"
# Bar characters
_BAR_FILLED = ""
_BAR_EMPTY = ""
_BAR_WIDTH = 20
def format_context_pressure(
compaction_progress: float,
threshold_tokens: int,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a formatted context pressure line for CLI display.
The bar and percentage show progress toward the compaction threshold,
NOT the raw context window. 100% = compaction fires.
Uses ANSI colors:
- cyan at ~60% to compaction = informational
- bold yellow at ~85% to compaction = warning
Args:
compaction_progress: How close to compaction (0.01.0, 1.0 = fires).
threshold_tokens: Compaction threshold in tokens.
threshold_percent: Compaction threshold as a fraction of context window.
compression_enabled: Whether auto-compression is active.
"""
pct_int = int(compaction_progress * 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens)
threshold_pct_int = int(threshold_percent * 100)
# Tier styling
if compaction_progress >= 0.85:
color = f"{_BOLD}{_YELLOW}"
icon = ""
if compression_enabled:
hint = "compaction imminent"
else:
hint = "no auto-compaction"
else:
color = _CYAN
icon = ""
hint = "approaching compaction"
return (
f" {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}"
f" {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}"
)
def format_context_pressure_gateway(
compaction_progress: float,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a plain-text context pressure notification for messaging platforms.
No ANSI just Unicode and plain text suitable for Telegram/Discord/etc.
The percentage shows progress toward the compaction threshold.
"""
pct_int = int(compaction_progress * 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_pct_int = int(threshold_percent * 100)
if compaction_progress >= 0.85:
icon = "⚠️"
if compression_enabled:
hint = f"Context compaction is imminent (threshold: {threshold_pct_int}% of window)."
else:
hint = "Auto-compaction is disabled — context may be truncated."
else:
icon = ""
hint = f"Compaction threshold is at {threshold_pct_int}% of context window."
return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}"

View file

@ -4539,6 +4539,26 @@ class GatewayRunner:
except Exception as _e:
logger.debug("agent:step hook error: %s", _e)
# Bridge sync status_callback → async adapter.send for context pressure
_status_adapter = self.adapters.get(source.platform)
_status_chat_id = source.chat_id
_status_thread_metadata = {"thread_id": source.thread_id} if source.thread_id else None
def _status_callback_sync(event_type: str, message: str) -> None:
if not _status_adapter:
return
try:
asyncio.run_coroutine_threadsafe(
_status_adapter.send(
_status_chat_id,
message,
metadata=_status_thread_metadata,
),
_loop_for_step,
)
except Exception as _e:
logger.debug("status_callback error (%s): %s", event_type, _e)
def run_sync():
# Pass session_key to process registry via env var so background
# processes can be mapped back to this gateway session
@ -4631,6 +4651,7 @@ class GatewayRunner:
tool_progress_callback=progress_callback if tool_progress_enabled else None,
step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
stream_delta_callback=_stream_delta_cb,
status_callback=_status_callback_sync,
platform=platform_key,
honcho_session_key=session_key,
honcho_manager=honcho_manager,

View file

@ -400,6 +400,7 @@ class AIAgent:
clarify_callback: callable = None,
step_callback: callable = None,
stream_delta_callback: callable = None,
status_callback: callable = None,
max_tokens: int = None,
reasoning_config: Dict[str, Any] = None,
prefill_messages: List[Dict[str, Any]] = None,
@ -522,6 +523,7 @@ class AIAgent:
self.clarify_callback = clarify_callback
self.step_callback = step_callback
self.stream_delta_callback = stream_delta_callback
self.status_callback = status_callback
self._last_reported_tool = None # Track for "new tool" mode
# Tool execution state — allows _vprint during tool execution
@ -571,6 +573,12 @@ class AIAgent:
self._budget_warning_threshold = 0.9 # 90% — urgent, respond now
self._budget_pressure_enabled = True
# Context pressure warnings: notify the USER (not the LLM) as context
# fills up. Purely informational — displayed in CLI output and sent via
# status_callback for gateway platforms. Does NOT inject into messages.
self._context_50_warned = False
self._context_70_warned = False
# Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
# so tool failures, API errors, etc. are inspectable after the fact.
# In gateway mode, each incoming message creates a new AIAgent instance,
@ -4385,6 +4393,10 @@ class AIAgent:
except Exception as e:
logger.debug("Session DB compression split failed: %s", e)
# Reset context pressure warnings — usage drops after compaction
self._context_50_warned = False
self._context_70_warned = False
return compressed, new_system_prompt
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
@ -4965,6 +4977,45 @@ class AIAgent:
)
return None
def _emit_context_pressure(self, compaction_progress: float, compressor) -> None:
"""Notify the user that context is approaching the compaction threshold.
Args:
compaction_progress: How close to compaction (0.01.0, where 1.0 = fires).
compressor: The ContextCompressor instance (for threshold/context info).
Purely user-facing does NOT modify the message stream.
For CLI: prints a formatted line with a progress bar.
For gateway: fires status_callback so the platform can send a chat message.
"""
from agent.display import format_context_pressure, format_context_pressure_gateway
threshold_pct = compressor.threshold_tokens / compressor.context_length if compressor.context_length else 0.5
# CLI output — always shown (these are user-facing status notifications,
# not verbose debug output, so they bypass quiet_mode).
# Gateway users also get the callback below.
if self.platform in (None, "cli"):
line = format_context_pressure(
compaction_progress=compaction_progress,
threshold_tokens=compressor.threshold_tokens,
threshold_percent=threshold_pct,
compression_enabled=self.compression_enabled,
)
self._safe_print(line)
# Gateway / external consumers
if self.status_callback:
try:
msg = format_context_pressure_gateway(
compaction_progress=compaction_progress,
threshold_percent=threshold_pct,
compression_enabled=self.compression_enabled,
)
self.status_callback("context_pressure", msg)
except Exception:
logger.debug("status_callback error in context pressure", exc_info=True)
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
"""Request a summary when max iterations are reached. Returns the final response text."""
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...")
@ -6540,6 +6591,23 @@ class AIAgent:
+ _compressor.last_completion_tokens
+ _new_chars // 3 # conservative: JSON-heavy tool results ≈ 3 chars/token
)
# ── Context pressure warnings (user-facing only) ──────────
# Notify the user (NOT the LLM) as context approaches the
# compaction threshold. Thresholds are relative to where
# compaction fires, not the raw context window.
# Does not inject into messages — just prints to CLI output
# and fires status_callback for gateway platforms.
if _compressor.threshold_tokens > 0:
_compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens
if _compaction_progress >= 0.85 and not self._context_70_warned:
self._context_70_warned = True
self._context_50_warned = True # skip first tier if we jumped past it
self._emit_context_pressure(_compaction_progress, _compressor)
elif _compaction_progress >= 0.60 and not self._context_50_warned:
self._context_50_warned = True
self._emit_context_pressure(_compaction_progress, _compressor)
if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
messages, active_system_prompt = self._compress_context(
messages, system_message,

View file

@ -0,0 +1,249 @@
"""Tests for context pressure warnings (user-facing, not injected into messages).
Covers:
- Display formatting (CLI and gateway variants)
- Flag tracking and threshold logic on AIAgent
- Flag reset after compression
- status_callback invocation
"""
import json
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from agent.display import format_context_pressure, format_context_pressure_gateway
from run_agent import AIAgent
# ---------------------------------------------------------------------------
# Display formatting tests
# ---------------------------------------------------------------------------
class TestFormatContextPressure:
"""CLI context pressure display (agent/display.py).
The bar shows progress toward the compaction threshold, not the
raw context window. 60% = 60% of the way to compaction.
"""
def test_60_percent_uses_info_icon(self):
line = format_context_pressure(0.60, 100_000, 0.50)
assert "" in line
assert "60% to compaction" in line
def test_85_percent_uses_warning_icon(self):
line = format_context_pressure(0.85, 100_000, 0.50)
assert "" in line
assert "85% to compaction" in line
def test_bar_length_scales_with_progress(self):
line_60 = format_context_pressure(0.60, 100_000, 0.50)
line_85 = format_context_pressure(0.85, 100_000, 0.50)
assert line_85.count("") > line_60.count("")
def test_shows_threshold_tokens(self):
line = format_context_pressure(0.60, 100_000, 0.50)
assert "100k" in line
def test_small_threshold(self):
line = format_context_pressure(0.60, 500, 0.50)
assert "500" in line
def test_shows_threshold_percent(self):
line = format_context_pressure(0.85, 100_000, 0.50)
assert "50%" in line # threshold percent shown
def test_imminent_hint_at_85(self):
line = format_context_pressure(0.85, 100_000, 0.50)
assert "compaction imminent" in line
def test_approaching_hint_below_85(self):
line = format_context_pressure(0.60, 100_000, 0.80)
assert "approaching compaction" in line
def test_no_compaction_when_disabled(self):
line = format_context_pressure(0.85, 100_000, 0.50, compression_enabled=False)
assert "no auto-compaction" in line
def test_returns_string(self):
result = format_context_pressure(0.65, 128_000, 0.50)
assert isinstance(result, str)
def test_over_100_percent_capped(self):
"""Progress > 1.0 should not break the bar."""
line = format_context_pressure(1.05, 100_000, 0.50)
assert "" in line
assert line.count("") == 20
class TestFormatContextPressureGateway:
"""Gateway (plain text) context pressure display."""
def test_60_percent_informational(self):
msg = format_context_pressure_gateway(0.60, 0.50)
assert "60% to compaction" in msg
assert "50%" in msg # threshold shown
def test_85_percent_warning(self):
msg = format_context_pressure_gateway(0.85, 0.50)
assert "85% to compaction" in msg
assert "imminent" in msg
def test_no_compaction_warning(self):
msg = format_context_pressure_gateway(0.85, 0.50, compression_enabled=False)
assert "disabled" in msg
def test_no_ansi_codes(self):
msg = format_context_pressure_gateway(0.85, 0.50)
assert "\033[" not in msg
def test_has_progress_bar(self):
msg = format_context_pressure_gateway(0.85, 0.50)
assert "" in msg
# ---------------------------------------------------------------------------
# AIAgent context pressure flag tests
# ---------------------------------------------------------------------------
def _make_tool_defs(*names):
return [
{
"type": "function",
"function": {
"name": n,
"description": f"{n} tool",
"parameters": {"type": "object", "properties": {}},
},
}
for n in names
]
@pytest.fixture()
def agent():
"""Minimal AIAgent with mocked internals."""
with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
):
a = AIAgent(
api_key="test-key-1234567890",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
a.client = MagicMock()
return a
class TestContextPressureFlags:
"""Context pressure warning flag tracking on AIAgent."""
def test_flags_initialized_false(self, agent):
assert agent._context_50_warned is False
assert agent._context_70_warned is False
def test_emit_calls_status_callback(self, agent):
"""status_callback should be invoked with event type and message."""
cb = MagicMock()
agent.status_callback = cb
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000 # 50%
agent._emit_context_pressure(0.85, compressor)
cb.assert_called_once()
args = cb.call_args[0]
assert args[0] == "context_pressure"
assert "85% to compaction" in args[1]
def test_emit_no_callback_no_crash(self, agent):
"""No status_callback set — should not crash."""
agent.status_callback = None
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000
# Should not raise
agent._emit_context_pressure(0.60, compressor)
def test_emit_prints_for_cli_platform(self, agent, capsys):
"""CLI platform should always print context pressure, even in quiet_mode."""
agent.quiet_mode = True
agent.platform = "cli"
agent.status_callback = None
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000
agent._emit_context_pressure(0.85, compressor)
captured = capsys.readouterr()
assert "" in captured.out
assert "to compaction" in captured.out
def test_emit_skips_print_for_gateway_platform(self, agent, capsys):
"""Gateway platforms get the callback, not CLI print."""
agent.platform = "telegram"
agent.status_callback = None
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000
agent._emit_context_pressure(0.85, compressor)
captured = capsys.readouterr()
assert "" not in captured.out
def test_flags_reset_on_compression(self, agent):
"""After _compress_context, context pressure flags should reset."""
agent._context_50_warned = True
agent._context_70_warned = True
agent.compression_enabled = True
# Mock the compressor's compress method to return minimal valid output
agent.context_compressor = MagicMock()
agent.context_compressor.compress.return_value = [
{"role": "user", "content": "Summary of conversation so far."}
]
agent.context_compressor.context_length = 200_000
agent.context_compressor.threshold_tokens = 100_000
# Mock _todo_store
agent._todo_store = MagicMock()
agent._todo_store.format_for_injection.return_value = None
# Mock _build_system_prompt
agent._build_system_prompt = MagicMock(return_value="system prompt")
agent._cached_system_prompt = "old system prompt"
agent._session_db = None
messages = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi there"},
]
agent._compress_context(messages, "system prompt")
assert agent._context_50_warned is False
assert agent._context_70_warned is False
def test_emit_callback_error_handled(self, agent):
"""If status_callback raises, it should be caught gracefully."""
cb = MagicMock(side_effect=RuntimeError("callback boom"))
agent.status_callback = cb
compressor = MagicMock()
compressor.context_length = 200_000
compressor.threshold_tokens = 100_000
# Should not raise
agent._emit_context_pressure(0.85, compressor)