diff --git a/gateway/run.py b/gateway/run.py index c094fddd6..3428c59f7 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -476,12 +476,7 @@ class GatewayRunner: self._honcho_managers: Dict[str, Any] = {} self._honcho_configs: Dict[str, Any] = {} - # Rate-limit compression warning messages sent to users. - # Keyed by chat_id — value is the timestamp of the last warning sent. - # Prevents the warning from firing on every message when a session - # remains above the threshold after compression. - self._compression_warn_sent: Dict[str, float] = {} - self._compression_warn_cooldown: int = 3600 # seconds (1 hour) + # Ensure tirith security scanner is available (downloads if needed) try: @@ -2354,18 +2349,7 @@ class GatewayRunner: f"{_compress_token_threshold:,}", ) - _hyg_adapter = self.adapters.get(source.platform) _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None - if _hyg_adapter: - try: - await _hyg_adapter.send( - source.chat_id, - f"🗜️ Session is large ({_msg_count} messages, " - f"~{_approx_tokens:,} tokens). Auto-compressing...", - metadata=_hyg_meta, - ) - except Exception: - pass try: from run_agent import AIAgent @@ -2426,70 +2410,17 @@ class GatewayRunner: f"{_approx_tokens:,}", f"{_new_tokens:,}", ) - if _hyg_adapter: - try: - await _hyg_adapter.send( - source.chat_id, - f"🗜️ Compressed: {_msg_count} → " - f"{_new_count} messages, " - f"~{_approx_tokens:,} → " - f"~{_new_tokens:,} tokens", - metadata=_hyg_meta, - ) - except Exception: - pass - - # Still too large after compression — warn user - # Rate-limited to once per cooldown period per - # chat to avoid spamming on every message. if _new_tokens >= _warn_token_threshold: logger.warning( "Session hygiene: still ~%s tokens after " - "compression — suggesting /reset", + "compression", f"{_new_tokens:,}", ) - _now = time.time() - _last_warn = self._compression_warn_sent.get(source.chat_id, 0) - if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown: - self._compression_warn_sent[source.chat_id] = _now - try: - await _hyg_adapter.send( - source.chat_id, - "⚠️ Session is still very large " - "after compression " - f"(~{_new_tokens:,} tokens). " - "Consider using /reset to start " - "fresh if you experience issues.", - metadata=_hyg_meta, - ) - except Exception: - pass except Exception as e: logger.warning( "Session hygiene auto-compress failed: %s", e ) - # Compression failed and session is dangerously large - if _approx_tokens >= _warn_token_threshold: - _hyg_adapter = self.adapters.get(source.platform) - _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None - _now = time.time() - _last_warn = self._compression_warn_sent.get(source.chat_id, 0) - if _hyg_adapter and _now - _last_warn >= self._compression_warn_cooldown: - self._compression_warn_sent[source.chat_id] = _now - try: - await _hyg_adapter.send( - source.chat_id, - f"⚠️ Session is very large " - f"({_msg_count} messages, " - f"~{_approx_tokens:,} tokens) and " - "auto-compression failed. Consider " - "using /compress or /reset to avoid " - "issues.", - metadata=_hyg_meta, - ) - except Exception: - pass # First-message onboarding -- only on the very first interaction ever if not history and not self.session_store.has_any_sessions(): diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py index 843c0d416..5488296f6 100644 --- a/tests/gateway/test_session_hygiene.py +++ b/tests/gateway/test_session_hygiene.py @@ -212,47 +212,7 @@ class TestSessionHygieneWarnThreshold: assert post_compress_tokens < warn_threshold -class TestCompressionWarnRateLimit: - """Compression warning messages must be rate-limited per chat_id.""" - def _make_runner(self): - from unittest.mock import MagicMock, patch - with patch("gateway.run.load_gateway_config"), \ - patch("gateway.run.SessionStore"), \ - patch("gateway.run.DeliveryRouter"): - from gateway.run import GatewayRunner - runner = GatewayRunner.__new__(GatewayRunner) - runner._compression_warn_sent = {} - runner._compression_warn_cooldown = 3600 - return runner - - def test_first_warn_is_sent(self): - runner = self._make_runner() - now = 1_000_000.0 - last = runner._compression_warn_sent.get("chat:1", 0) - assert now - last >= runner._compression_warn_cooldown - - def test_second_warn_suppressed_within_cooldown(self): - runner = self._make_runner() - now = 1_000_000.0 - runner._compression_warn_sent["chat:1"] = now - 60 # 1 minute ago - last = runner._compression_warn_sent.get("chat:1", 0) - assert now - last < runner._compression_warn_cooldown - - def test_warn_allowed_after_cooldown(self): - runner = self._make_runner() - now = 1_000_000.0 - runner._compression_warn_sent["chat:1"] = now - 3601 # just past cooldown - last = runner._compression_warn_sent.get("chat:1", 0) - assert now - last >= runner._compression_warn_cooldown - - def test_rate_limit_is_per_chat(self): - """Rate-limiting one chat must not suppress warnings for another.""" - runner = self._make_runner() - now = 1_000_000.0 - runner._compression_warn_sent["chat:1"] = now - 60 # suppressed - last_other = runner._compression_warn_sent.get("chat:2", 0) - assert now - last_other >= runner._compression_warn_cooldown class TestEstimatedTokenThreshold: @@ -421,10 +381,6 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t result = await runner._handle_message(event) assert result == "ok" - assert len(adapter.sent) == 2 - assert adapter.sent[0]["chat_id"] == "-1001" - assert "Session is large" in adapter.sent[0]["content"] - assert adapter.sent[0]["metadata"] == {"thread_id": "17585"} - assert adapter.sent[1]["chat_id"] == "-1001" - assert "Compressed:" in adapter.sent[1]["content"] - assert adapter.sent[1]["metadata"] == {"thread_id": "17585"} + # Compression warnings are no longer sent to users — compression + # happens silently with server-side logging only. + assert len(adapter.sent) == 0