mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: context pressure warnings for CLI and gateway (#2159)
* feat: context pressure warnings for CLI and gateway User-facing notifications as context approaches the compaction threshold. Warnings fire at 60% and 85% of the way to compaction — relative to the configured compression threshold, not the raw context window. CLI: Formatted line with a progress bar showing distance to compaction. Cyan at 60% (approaching), bold yellow at 85% (imminent). ◐ context ▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱▱▱▱▱▱ 60% to compaction 100k threshold (50%) · approaching compaction ⚠ context ▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱ 85% to compaction 100k threshold (50%) · compaction imminent Gateway: Plain-text notification sent to the user's chat via the new status_callback mechanism (asyncio.run_coroutine_threadsafe bridge, same pattern as step_callback). Does NOT inject into the message stream. The LLM never sees these warnings. Flags reset after each compaction cycle. Files changed: - agent/display.py — format_context_pressure(), format_context_pressure_gateway() - run_agent.py — status_callback param, _context_50/70_warned flags, _emit_context_pressure(), flag reset in _compress_context() - gateway/run.py — _status_callback_sync bridge, wired to AIAgent - tests/test_context_pressure.py — 23 tests * Merge remote-tracking branch 'origin/main' into hermes/hermes-7ea545bf --------- Co-authored-by: Test <test@test.com>
This commit is contained in:
parent
d76ebf0ec3
commit
c52353cf8a
4 changed files with 430 additions and 0 deletions
68
run_agent.py
68
run_agent.py
|
|
@ -400,6 +400,7 @@ class AIAgent:
|
|||
clarify_callback: callable = None,
|
||||
step_callback: callable = None,
|
||||
stream_delta_callback: callable = None,
|
||||
status_callback: callable = None,
|
||||
max_tokens: int = None,
|
||||
reasoning_config: Dict[str, Any] = None,
|
||||
prefill_messages: List[Dict[str, Any]] = None,
|
||||
|
|
@ -522,6 +523,7 @@ class AIAgent:
|
|||
self.clarify_callback = clarify_callback
|
||||
self.step_callback = step_callback
|
||||
self.stream_delta_callback = stream_delta_callback
|
||||
self.status_callback = status_callback
|
||||
self._last_reported_tool = None # Track for "new tool" mode
|
||||
|
||||
# Tool execution state — allows _vprint during tool execution
|
||||
|
|
@ -571,6 +573,12 @@ class AIAgent:
|
|||
self._budget_warning_threshold = 0.9 # 90% — urgent, respond now
|
||||
self._budget_pressure_enabled = True
|
||||
|
||||
# Context pressure warnings: notify the USER (not the LLM) as context
|
||||
# fills up. Purely informational — displayed in CLI output and sent via
|
||||
# status_callback for gateway platforms. Does NOT inject into messages.
|
||||
self._context_50_warned = False
|
||||
self._context_70_warned = False
|
||||
|
||||
# Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
|
||||
# so tool failures, API errors, etc. are inspectable after the fact.
|
||||
# In gateway mode, each incoming message creates a new AIAgent instance,
|
||||
|
|
@ -4385,6 +4393,10 @@ class AIAgent:
|
|||
except Exception as e:
|
||||
logger.debug("Session DB compression split failed: %s", e)
|
||||
|
||||
# Reset context pressure warnings — usage drops after compaction
|
||||
self._context_50_warned = False
|
||||
self._context_70_warned = False
|
||||
|
||||
return compressed, new_system_prompt
|
||||
|
||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
|
|
@ -4965,6 +4977,45 @@ class AIAgent:
|
|||
)
|
||||
return None
|
||||
|
||||
def _emit_context_pressure(self, compaction_progress: float, compressor) -> None:
|
||||
"""Notify the user that context is approaching the compaction threshold.
|
||||
|
||||
Args:
|
||||
compaction_progress: How close to compaction (0.0–1.0, where 1.0 = fires).
|
||||
compressor: The ContextCompressor instance (for threshold/context info).
|
||||
|
||||
Purely user-facing — does NOT modify the message stream.
|
||||
For CLI: prints a formatted line with a progress bar.
|
||||
For gateway: fires status_callback so the platform can send a chat message.
|
||||
"""
|
||||
from agent.display import format_context_pressure, format_context_pressure_gateway
|
||||
|
||||
threshold_pct = compressor.threshold_tokens / compressor.context_length if compressor.context_length else 0.5
|
||||
|
||||
# CLI output — always shown (these are user-facing status notifications,
|
||||
# not verbose debug output, so they bypass quiet_mode).
|
||||
# Gateway users also get the callback below.
|
||||
if self.platform in (None, "cli"):
|
||||
line = format_context_pressure(
|
||||
compaction_progress=compaction_progress,
|
||||
threshold_tokens=compressor.threshold_tokens,
|
||||
threshold_percent=threshold_pct,
|
||||
compression_enabled=self.compression_enabled,
|
||||
)
|
||||
self._safe_print(line)
|
||||
|
||||
# Gateway / external consumers
|
||||
if self.status_callback:
|
||||
try:
|
||||
msg = format_context_pressure_gateway(
|
||||
compaction_progress=compaction_progress,
|
||||
threshold_percent=threshold_pct,
|
||||
compression_enabled=self.compression_enabled,
|
||||
)
|
||||
self.status_callback("context_pressure", msg)
|
||||
except Exception:
|
||||
logger.debug("status_callback error in context pressure", exc_info=True)
|
||||
|
||||
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
|
||||
"""Request a summary when max iterations are reached. Returns the final response text."""
|
||||
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...")
|
||||
|
|
@ -6540,6 +6591,23 @@ class AIAgent:
|
|||
+ _compressor.last_completion_tokens
|
||||
+ _new_chars // 3 # conservative: JSON-heavy tool results ≈ 3 chars/token
|
||||
)
|
||||
|
||||
# ── Context pressure warnings (user-facing only) ──────────
|
||||
# Notify the user (NOT the LLM) as context approaches the
|
||||
# compaction threshold. Thresholds are relative to where
|
||||
# compaction fires, not the raw context window.
|
||||
# Does not inject into messages — just prints to CLI output
|
||||
# and fires status_callback for gateway platforms.
|
||||
if _compressor.threshold_tokens > 0:
|
||||
_compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens
|
||||
if _compaction_progress >= 0.85 and not self._context_70_warned:
|
||||
self._context_70_warned = True
|
||||
self._context_50_warned = True # skip first tier if we jumped past it
|
||||
self._emit_context_pressure(_compaction_progress, _compressor)
|
||||
elif _compaction_progress >= 0.60 and not self._context_50_warned:
|
||||
self._context_50_warned = True
|
||||
self._emit_context_pressure(_compaction_progress, _compressor)
|
||||
|
||||
if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue