mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): remove user-facing compression warnings (#4139)
Auto-compression still runs silently in the background with server-side logging, but no longer sends messages to the user's chat about it. Removed: - 'Session is large... Auto-compressing' pre-compression notification - 'Compressed: N → M messages' post-compression notification - 'Session is still very large after compression' warning - 'Auto-compression failed' warning - Rate-limit tracking (only existed for these warnings)
This commit is contained in:
parent
45396aaa92
commit
cc63b2d1cd
2 changed files with 5 additions and 118 deletions
|
|
@ -476,12 +476,7 @@ class GatewayRunner:
|
|||
self._honcho_managers: Dict[str, Any] = {}
|
||||
self._honcho_configs: Dict[str, Any] = {}
|
||||
|
||||
# Rate-limit compression warning messages sent to users.
|
||||
# Keyed by chat_id — value is the timestamp of the last warning sent.
|
||||
# Prevents the warning from firing on every message when a session
|
||||
# remains above the threshold after compression.
|
||||
self._compression_warn_sent: Dict[str, float] = {}
|
||||
self._compression_warn_cooldown: int = 3600 # seconds (1 hour)
|
||||
|
||||
|
||||
# Ensure tirith security scanner is available (downloads if needed)
|
||||
try:
|
||||
|
|
@ -2354,18 +2349,7 @@ class GatewayRunner:
|
|||
f"{_compress_token_threshold:,}",
|
||||
)
|
||||
|
||||
_hyg_adapter = self.adapters.get(source.platform)
|
||||
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
||||
if _hyg_adapter:
|
||||
try:
|
||||
await _hyg_adapter.send(
|
||||
source.chat_id,
|
||||
f"🗜️ Session is large ({_msg_count} messages, "
|
||||
f"~{_approx_tokens:,} tokens). Auto-compressing...",
|
||||
metadata=_hyg_meta,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from run_agent import AIAgent
|
||||
|
|
@ -2426,70 +2410,17 @@ class GatewayRunner:
|
|||
f"{_approx_tokens:,}", f"{_new_tokens:,}",
|
||||
)
|
||||
|
||||
if _hyg_adapter:
|
||||
try:
|
||||
await _hyg_adapter.send(
|
||||
source.chat_id,
|
||||
f"🗜️ Compressed: {_msg_count} → "
|
||||
f"{_new_count} messages, "
|
||||
f"~{_approx_tokens:,} → "
|
||||
f"~{_new_tokens:,} tokens",
|
||||
metadata=_hyg_meta,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Still too large after compression — warn user
|
||||
# Rate-limited to once per cooldown period per
|
||||
# chat to avoid spamming on every message.
|
||||
if _new_tokens >= _warn_token_threshold:
|
||||
logger.warning(
|
||||
"Session hygiene: still ~%s tokens after "
|
||||
"compression — suggesting /reset",
|
||||
"compression",
|
||||
f"{_new_tokens:,}",
|
||||
)
|
||||
_now = time.time()
|
||||
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
|
||||
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
|
||||
self._compression_warn_sent[source.chat_id] = _now
|
||||
try:
|
||||
await _hyg_adapter.send(
|
||||
source.chat_id,
|
||||
"⚠️ Session is still very large "
|
||||
"after compression "
|
||||
f"(~{_new_tokens:,} tokens). "
|
||||
"Consider using /reset to start "
|
||||
"fresh if you experience issues.",
|
||||
metadata=_hyg_meta,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Session hygiene auto-compress failed: %s", e
|
||||
)
|
||||
# Compression failed and session is dangerously large
|
||||
if _approx_tokens >= _warn_token_threshold:
|
||||
_hyg_adapter = self.adapters.get(source.platform)
|
||||
_hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
||||
_now = time.time()
|
||||
_last_warn = self._compression_warn_sent.get(source.chat_id, 0)
|
||||
if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown:
|
||||
self._compression_warn_sent[source.chat_id] = _now
|
||||
try:
|
||||
await _hyg_adapter.send(
|
||||
source.chat_id,
|
||||
f"⚠️ Session is very large "
|
||||
f"({_msg_count} messages, "
|
||||
f"~{_approx_tokens:,} tokens) and "
|
||||
"auto-compression failed. Consider "
|
||||
"using /compress or /reset to avoid "
|
||||
"issues.",
|
||||
metadata=_hyg_meta,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# First-message onboarding -- only on the very first interaction ever
|
||||
if not history and not self.session_store.has_any_sessions():
|
||||
|
|
|
|||
|
|
@ -212,47 +212,7 @@ class TestSessionHygieneWarnThreshold:
|
|||
assert post_compress_tokens < warn_threshold
|
||||
|
||||
|
||||
class TestCompressionWarnRateLimit:
|
||||
"""Compression warning messages must be rate-limited per chat_id."""
|
||||
|
||||
def _make_runner(self):
|
||||
from unittest.mock import MagicMock, patch
|
||||
with patch("gateway.run.load_gateway_config"), \
|
||||
patch("gateway.run.SessionStore"), \
|
||||
patch("gateway.run.DeliveryRouter"):
|
||||
from gateway.run import GatewayRunner
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner._compression_warn_sent = {}
|
||||
runner._compression_warn_cooldown = 3600
|
||||
return runner
|
||||
|
||||
def test_first_warn_is_sent(self):
|
||||
runner = self._make_runner()
|
||||
now = 1_000_000.0
|
||||
last = runner._compression_warn_sent.get("chat:1", 0)
|
||||
assert now - last >= runner._compression_warn_cooldown
|
||||
|
||||
def test_second_warn_suppressed_within_cooldown(self):
|
||||
runner = self._make_runner()
|
||||
now = 1_000_000.0
|
||||
runner._compression_warn_sent["chat:1"] = now - 60 # 1 minute ago
|
||||
last = runner._compression_warn_sent.get("chat:1", 0)
|
||||
assert now - last < runner._compression_warn_cooldown
|
||||
|
||||
def test_warn_allowed_after_cooldown(self):
|
||||
runner = self._make_runner()
|
||||
now = 1_000_000.0
|
||||
runner._compression_warn_sent["chat:1"] = now - 3601 # just past cooldown
|
||||
last = runner._compression_warn_sent.get("chat:1", 0)
|
||||
assert now - last >= runner._compression_warn_cooldown
|
||||
|
||||
def test_rate_limit_is_per_chat(self):
|
||||
"""Rate-limiting one chat must not suppress warnings for another."""
|
||||
runner = self._make_runner()
|
||||
now = 1_000_000.0
|
||||
runner._compression_warn_sent["chat:1"] = now - 60 # suppressed
|
||||
last_other = runner._compression_warn_sent.get("chat:2", 0)
|
||||
assert now - last_other >= runner._compression_warn_cooldown
|
||||
|
||||
|
||||
class TestEstimatedTokenThreshold:
|
||||
|
|
@ -421,10 +381,6 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t
|
|||
result = await runner._handle_message(event)
|
||||
|
||||
assert result == "ok"
|
||||
assert len(adapter.sent) == 2
|
||||
assert adapter.sent[0]["chat_id"] == "-1001"
|
||||
assert "Session is large" in adapter.sent[0]["content"]
|
||||
assert adapter.sent[0]["metadata"] == {"thread_id": "17585"}
|
||||
assert adapter.sent[1]["chat_id"] == "-1001"
|
||||
assert "Compressed:" in adapter.sent[1]["content"]
|
||||
assert adapter.sent[1]["metadata"] == {"thread_id": "17585"}
|
||||
# Compression warnings are no longer sent to users — compression
|
||||
# happens silently with server-side logging only.
|
||||
assert len(adapter.sent) == 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue