fix(gateway): suppress exact silence tokens without mutating history

2026-07-30 19:09:28 +00:00 · 2026-06-14 02:58:54 -07:00 · 2026-06-14 02:58:54 -07:00 · 293c04fef6
commit 293c04fef6
parent 10bad2faf1
5 changed files with 266 additions and 7 deletions
--- a/gateway/response_filters.py
+++ b/gateway/response_filters.py
@ -0,0 +1,53 @@
+"""Gateway response filtering helpers.
+
+These helpers operate at the gateway boundary: they decide whether a completed
+agent turn should be delivered to the chat, not what should be persisted in the
+conversation history.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+# Canonical model-emitted control token for intentional silence.
+SILENT_REPLY_TOKEN = "NO_REPLY"
+
+# Exact whole-response markers that mean "the agent intentionally chose not to
+# reply".  Keep this list small and explicit; arbitrary empty output remains an
+# error/empty-response path, not silence.
+LIVE_GATEWAY_SILENT_MARKERS = frozenset({
+    "[SILENT]",
+    "SILENT",
+    "NO_REPLY",
+    "NO REPLY",
+})
+
+
+def _canonical_silence_candidate(text: str) -> str:
+    return " ".join(text.strip().upper().split())
+
+
+def is_intentional_silence_response(response: Any) -> bool:
+    """Return True only when ``response`` is exactly a silence marker.
+
+    Substantive prose that merely mentions ``NO_REPLY`` or ``[SILENT]`` must be
+    delivered normally.  A blank response is also not silence; blank output is
+    handled by the empty-response failure path.
+    """
+    if not isinstance(response, str):
+        return False
+    stripped = response.strip()
+    if not stripped:
+        return False
+    if len(stripped) > 64:
+        return False
+    return _canonical_silence_candidate(stripped) in LIVE_GATEWAY_SILENT_MARKERS
+
+
+def is_intentional_silence_agent_result(agent_result: dict | None, response: Any) -> bool:
+    """Silence markers suppress delivery only for successful agent turns."""
+    if not isinstance(agent_result, dict):
+        return False
+    if agent_result.get("failed"):
+        return False
+    return is_intentional_silence_response(response)
--- a/gateway/run.py
+++ b/gateway/run.py
@ -8740,13 +8740,20 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                return None

            response = agent_result.get("final_response") or ""
+            try:
+                from gateway.response_filters import is_intentional_silence_agent_result
+                _intentional_silence = is_intentional_silence_agent_result(
+                    agent_result, response,
+                )
+            except Exception:
+                _intentional_silence = False

            # Convert the agent's internal "(empty)" sentinel into a
            # user-friendly message.  "(empty)" means the model failed to
            # produce visible content after exhausting all retries (nudge,
            # prefill, empty-retry, fallback).  Sending the raw sentinel
            # looks like a bug; a short explanation is more helpful.
-            if response == "(empty)":
+            if response == "(empty)" and not _intentional_silence:
                response = (
                    "⚠️ The model returned no response after processing tool "
                    "results. This can happen with some models — try again or "
@ -8782,10 +8789,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew

            # Normalize empty responses: surface errors, partial failures, and
            # the case where agent did work but returned no text. Fix for #18765.
-            response = _normalize_empty_agent_response(
-                agent_result, response, history_len=len(history),
-            )
-            response = _sanitize_gateway_final_response(source.platform, response)
+            if not _intentional_silence:
+                response = _normalize_empty_agent_response(
+                    agent_result, response, history_len=len(history),
+                )
+                response = _sanitize_gateway_final_response(source.platform, response)

            # Ordering contract: the agent thread already updated the contextvar
            # in conversation_compression.py; propagate to SessionEntry + _save().
@ -8809,7 +8817,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                )
            except Exception:
                _show_reasoning_effective = getattr(self, "_show_reasoning", False)
-            if _show_reasoning_effective and response:
+            if _show_reasoning_effective and response and not _intentional_silence:
                last_reasoning = agent_result.get("last_reasoning")
                if last_reasoning:
                    # Collapse long reasoning to keep messages readable
@ -8839,7 +8847,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
            except Exception as _footer_err:
                logger.debug("runtime_footer build failed: %s", _footer_err)
                _footer_line = ""
-            if _footer_line and response and not agent_result.get("already_sent"):
+            if _footer_line and response and not agent_result.get("already_sent") and not _intentional_silence:
                response = f"{response}\n\n{_footer_line}"

            # Emit agent:end hook
@ -9073,6 +9081,18 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
            )

+            # Intentional silence is a delivery decision, not a transcript
+            # mutation.  The agent's [SILENT]/NO_REPLY assistant turn above is
+            # still persisted in session history so later turns keep normal
+            # user/assistant alternation; only the outbound chat delivery is
+            # suppressed.
+            if _intentional_silence:
+                logger.info(
+                    "Suppressing intentional silence marker for session %s",
+                    session_entry.session_id,
+                )
+                response = ""
+
            # Auto voice reply: send TTS audio before the text response
            _already_sent = bool(agent_result.get("already_sent"))
            if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):
--- a/scripts/release.py
+++ b/scripts/release.py
@ -82,6 +82,7 @@ AUTHOR_MAP = {
    "290859878+synapsesx@users.noreply.github.com": "synapsesx",
    "157689911+itsflownium@users.noreply.github.com": "itsflownium",
    "dirtyren@users.noreply.github.com": "dirtyren",
+    "github@aldo.pw": "aldoeliacim",
    "max@c60spaceship.com": "MaxFreedomPollard",
    "achaljhawar03@gmail.com": "achaljhawar",
    "claytonchew@ClaytonMacMiniM4.local": "claytonchew",
--- a/tests/gateway/test_gateway_silence_tokens.py
+++ b/tests/gateway/test_gateway_silence_tokens.py
@ -0,0 +1,165 @@
+"""Gateway intentional-silence token behavior."""
+
+from datetime import datetime
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+import gateway.run as gateway_run
+from gateway.config import GatewayConfig, Platform
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource
+from gateway.response_filters import (
+    is_intentional_silence_agent_result,
+    is_intentional_silence_response,
+)
+
+
+def _source():
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1001",
+        chat_type="group",
+        user_id="12345",
+    )
+
+
+def _event():
+    return MessageEvent(
+        text="side chatter",
+        source=_source(),
+        message_id="msg-42",
+    )
+
+
+def _runner(monkeypatch, tmp_path):
+    runner = gateway_run.GatewayRunner(GatewayConfig())
+    runner.adapters = {}
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._handle_active_session_busy_message = AsyncMock(return_value=False)
+    runner._session_db = MagicMock()
+    runner._recover_telegram_topic_thread_id = lambda _source: None
+    runner._cache_session_source = lambda _key, _source: None
+    runner._is_session_run_current = lambda _key, _gen: True
+    runner._reply_anchor_for_event = lambda _event: None
+    runner._get_guild_id = lambda _event: None
+    runner._should_send_voice_reply = lambda *_a, **_kw: False
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = SessionEntry(
+        session_key="agent:main:telegram:group:-1001:12345",
+        session_id="sess-silent",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="group",
+    )
+    runner.session_store.load_transcript.return_value = []
+    runner.session_store.append_to_transcript = MagicMock()
+    runner.session_store.update_session = MagicMock()
+
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "fake"}
+    )
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100_000,
+    )
+    return runner
+
+
+def test_exact_silence_tokens_are_intentional_silence():
+    for token in ("[SILENT]", " SILENT ", "NO_REPLY", "no reply"):
+        assert is_intentional_silence_response(token)
+
+
+def test_blank_and_prose_mentions_are_not_silence():
+    assert not is_intentional_silence_response("")
+    assert not is_intentional_silence_response("Use NO_REPLY when no answer is needed.")
+    assert not is_intentional_silence_response("The reply was [SILENT], intentionally.")
+
+
+def test_failed_agent_result_never_counts_as_intentional_silence():
+    assert is_intentional_silence_agent_result({"failed": False}, "NO_REPLY")
+    assert not is_intentional_silence_agent_result({"failed": True}, "NO_REPLY")
+
+
+@pytest.mark.asyncio
+async def test_silence_token_suppresses_delivery_but_preserves_transcript(monkeypatch, tmp_path):
+    runner = _runner(monkeypatch, tmp_path)
+    runner._run_agent = AsyncMock(return_value={
+        "final_response": "[SILENT]",
+        "messages": [
+            {"role": "user", "content": "side chatter"},
+            {"role": "assistant", "content": "[SILENT]"},
+        ],
+        "tools": [],
+        "history_offset": 0,
+        "last_prompt_tokens": 0,
+        "api_calls": 1,
+        "failed": False,
+    })
+
+    response = await runner._handle_message_with_agent(
+        _event(), _source(), "agent:main:telegram:group:-1001:12345", 1
+    )
+
+    assert response == ""
+    appended = [call.args[1] for call in runner.session_store.append_to_transcript.call_args_list]
+    assert {"role": "assistant", "content": "[SILENT]"}.items() <= appended[-1].items()
+    assert [msg["role"] for msg in appended if msg.get("role") in {"user", "assistant"}] == ["user", "assistant"]
+
+
+@pytest.mark.asyncio
+async def test_empty_success_still_gets_empty_response_warning(monkeypatch, tmp_path):
+    runner = _runner(monkeypatch, tmp_path)
+    runner._run_agent = AsyncMock(return_value={
+        "final_response": "",
+        "messages": [
+            {"role": "user", "content": "question"},
+            {"role": "assistant", "content": ""},
+        ],
+        "tools": [],
+        "history_offset": 0,
+        "last_prompt_tokens": 0,
+        "api_calls": 1,
+        "failed": False,
+    })
+
+    response = await runner._handle_message_with_agent(
+        _event(), _source(), "agent:main:telegram:group:-1001:12345", 1
+    )
+
+    assert "no response was generated" in response
+
+
+@pytest.mark.asyncio
+async def test_prose_mentioning_silence_token_is_delivered(monkeypatch, tmp_path):
+    runner = _runner(monkeypatch, tmp_path)
+    text = "Use [SILENT] when no answer is needed."
+    runner._run_agent = AsyncMock(return_value={
+        "final_response": text,
+        "messages": [
+            {"role": "user", "content": "question"},
+            {"role": "assistant", "content": text},
+        ],
+        "tools": [],
+        "history_offset": 0,
+        "last_prompt_tokens": 0,
+        "api_calls": 1,
+        "failed": False,
+    })
+
+    response = await runner._handle_message_with_agent(
+        _event(), _source(), "agent:main:telegram:group:-1001:12345", 1
+    )
+
+    assert response == text
--- a/tests/gateway/test_response_filters.py
+++ b/tests/gateway/test_response_filters.py
@ -0,0 +1,20 @@
+from gateway.response_filters import (
+    is_intentional_silence_agent_result,
+    is_intentional_silence_response,
+)
+
+
+def test_exact_silence_tokens_are_intentional_silence():
+    for token in ("[SILENT]", " SILENT ", "NO_REPLY", "no reply"):
+        assert is_intentional_silence_response(token)
+
+
+def test_blank_and_prose_mentions_are_not_silence():
+    assert not is_intentional_silence_response("")
+    assert not is_intentional_silence_response("Use NO_REPLY when no answer is needed.")
+    assert not is_intentional_silence_response("The reply was [SILENT], intentionally.")
+
+
+def test_failed_agent_result_never_counts_as_intentional_silence():
+    assert is_intentional_silence_agent_result({"failed": False}, "NO_REPLY")
+    assert not is_intentional_silence_agent_result({"failed": True}, "NO_REPLY")