Quiet noisy Telegram gateway errors

This commit is contained in:
Bob Yang 2026-05-11 22:58:42 +02:00 committed by Teknium
parent f1cefad8c2
commit 8a80eee02d
2 changed files with 235 additions and 1 deletions

View file

@ -66,6 +66,144 @@ _PLATFORM_CONNECT_TIMEOUT_SECS_DEFAULT = 30.0
_ADAPTER_DISCONNECT_TIMEOUT_SECS_DEFAULT = 5.0
_TELEGRAM_COMMAND_MENTION_RE = re.compile(r"(?<![\w:/])/([A-Za-z0-9][A-Za-z0-9_-]*)")
_TELEGRAM_NOISY_STATUS_RE = re.compile(
r"(" # transient/auxiliary status that should stay in logs, not Telegram chat
r"auxiliary\s+.+\s+failed"
r"|compression\s+summary\s+failed"
r"|fallback\s+context\s+marker"
r"|configured\s+compression\s+model\s+.+\s+failed"
r"|no\s+auxiliary\s+llm\s+provider\s+configured"
r"|auto-lowered\s+compression\s+threshold"
r"|preflight\s+compression"
r"|rate\s+limited\.\s+waiting\s+\d"
r"|retrying\s+in\s+\d"
r"|max\s+retries\s+\(\d+\).*(?:trying\s+fallback|exhausted|invalid\s+responses)"
r"|stream\s+(?:drop|drop\s+mid\s+tool-call).+retry\s+\d"
r"|stale\s+connections\s+from\s+a\s+previous\s+provider\s+issue"
r")",
re.IGNORECASE | re.DOTALL,
)
_GATEWAY_PROVIDER_ERROR_RE = re.compile(
r"(" # infrastructure/provider error preambles, not ordinary assistant prose
r"api\s+(?:call\s+)?failed"
r"|provider\s+authentication\s+failed"
r"|non-retryable\s+error"
r"|rate\s+limited\s+after\s+\d+\s+retries"
r"|error\s+code\s*:"
r"|\bhttp\s*\d{3}\b"
r"|incorrect\s+api\s+key"
r"|invalid\s+api\s+key"
r")",
re.IGNORECASE,
)
_GATEWAY_PROVIDER_POLICY_RE = re.compile(
r"(" # raw provider policy/safety bodies are noisy and may be sensitive
r"cybersecurity\s+risk"
r"|security\s+policy"
r"|safety\s+policy"
r"|policy\s+violation"
r"|violat(?:e|es|ed|ion)"
r"|blocked\s+(?:because|by|under)"
r"|request\s+(?:was\s+)?(?:blocked|rejected)"
r"|disallowed"
r"|moderation"
r")",
re.IGNORECASE,
)
_GATEWAY_AUTH_ERROR_RE = re.compile(
r"(provider\s+authentication\s+failed|incorrect\s+api\s+key|invalid\s+api\s+key|\b401\b)",
re.IGNORECASE,
)
_GATEWAY_RATE_LIMIT_RE = re.compile(
r"(rate\s+limit|rate-limited|\b429\b|quota|usage\s+limit)",
re.IGNORECASE,
)
_GATEWAY_SECRET_PATTERNS = (
re.compile(r"\bsk-[A-Za-z0-9][A-Za-z0-9_\-]{12,}\b"),
re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b"),
re.compile(r"\bxox[baprs]-[A-Za-z0-9\-]{20,}\b"),
re.compile(r"\bhf_[A-Za-z0-9]{20,}\b"),
re.compile(r"\bglpat-[A-Za-z0-9_\-]{20,}\b"),
re.compile(r"(?i)\b(Bearer\s+)[A-Za-z0-9._\-]{20,}\b"),
)
def _gateway_platform_value(platform: Any) -> str:
"""Return a normalized gateway platform value for enums or raw strings."""
return str(getattr(platform, "value", platform) or "").strip().lower()
def _redact_gateway_user_facing_secrets(text: str) -> str:
"""Best-effort secret redaction before text can leave the gateway."""
redacted = str(text or "")
for pattern in _GATEWAY_SECRET_PATTERNS:
redacted = pattern.sub(lambda m: (m.group(1) if m.lastindex else "") + "[REDACTED]", redacted)
return redacted
def _gateway_provider_error_reply(text: str) -> str:
"""Map raw provider/API errors to a short user-safe Telegram reply."""
if _GATEWAY_AUTH_ERROR_RE.search(text):
return (
"⚠️ Provider authentication failed. Check the configured credentials; "
"raw provider details are in the gateway logs."
)
if _GATEWAY_PROVIDER_POLICY_RE.search(text):
return (
"⚠️ The model provider rejected the request. I kept the raw provider "
"error out of chat; check gateway logs for details or try rephrasing."
)
if _GATEWAY_RATE_LIMIT_RE.search(text):
return "⏱️ The model provider is rate-limiting requests. Please wait a moment and try again."
return (
"⚠️ The model provider failed after retries. I kept raw provider details "
"out of chat; check gateway logs for diagnostics."
)
def _looks_like_gateway_provider_error(text: str) -> bool:
"""True when text is infrastructure/provider failure, not normal content."""
return bool(_GATEWAY_PROVIDER_ERROR_RE.search(text))
def _sanitize_gateway_final_response(platform: Any, text: str) -> str:
"""Sanitize final gateway replies before sending them to high-noise chats.
Telegram is Bob's mobile inbox, so it should receive concise, safe provider
failure categories instead of raw HTTP bodies, request IDs, or policy text.
Other platforms keep the existing behaviour for now.
"""
if not text:
return text
if _gateway_platform_value(platform) != "telegram":
return text
redacted = _redact_gateway_user_facing_secrets(str(text))
if _looks_like_gateway_provider_error(redacted):
return _gateway_provider_error_reply(redacted)
return redacted
def _prepare_gateway_status_message(platform: Any, event_type: str, message: str) -> Optional[str]:
"""Filter/sanitize agent status callbacks before platform delivery."""
text = str(message or "").strip()
if not text:
return None
if _gateway_platform_value(platform) != "telegram":
return text
text = _redact_gateway_user_facing_secrets(text)
if _TELEGRAM_NOISY_STATUS_RE.search(text):
return None
if _looks_like_gateway_provider_error(text):
return _gateway_provider_error_reply(text)
return text
def _telegramize_command_mentions(text: str, platform: Any) -> str:
"""Rewrite slash-command mentions to Telegram-valid command names.
@ -8248,6 +8386,7 @@ class GatewayRunner:
response = _normalize_empty_agent_response(
agent_result, response, history_len=len(history),
)
response = _sanitize_gateway_final_response(source.platform, response)
# If the agent's session_id changed during compression, update
# session_entry so transcript writes below go to the right session.
@ -15782,10 +15921,23 @@ class GatewayRunner:
def _status_callback_sync(event_type: str, message: str) -> None:
if not _status_adapter or not _run_still_current():
return
prepared_message = _prepare_gateway_status_message(
source.platform,
event_type,
message,
)
if prepared_message is None:
logger.debug(
"status_callback suppressed for %s/%s: %s",
source.platform.value if source.platform else "unknown",
event_type,
_redact_gateway_user_facing_secrets(str(message or ""))[:160],
)
return
_fut = safe_schedule_threadsafe(
_status_adapter.send(
_status_chat_id,
message,
prepared_message,
metadata=_status_thread_metadata,
),
_loop_for_step,

View file

@ -0,0 +1,82 @@
"""Telegram-specific gateway filtering for noisy status/error output."""
from gateway.config import Platform
from gateway.run import (
_prepare_gateway_status_message,
_sanitize_gateway_final_response,
)
def test_telegram_status_suppresses_auxiliary_and_retry_noise():
"""Auxiliary failures and retry backoff chatter should not hit Telegram."""
noisy_messages = [
"⚠ Auxiliary title generation failed: HTTP 400: Operation contains cybersecurity risk",
"⚠ Compression summary failed: upstream error. Inserted a fallback context marker.",
" Configured compression model 'small-model' failed (timeout). Recovered using main model — check auxiliary.compression.model in config.yaml.",
"⏳ Retrying in 4.2s (attempt 1/3)...",
"⏱️ Rate limited. Waiting 30.0s (attempt 2/3)...",
"⚠️ Max retries (3) exhausted — trying fallback...",
]
for message in noisy_messages:
assert _prepare_gateway_status_message(Platform.TELEGRAM, "warn", message) is None
def test_non_telegram_status_is_unchanged():
"""The Telegram quieting policy must not hide CLI/Discord diagnostics."""
message = "⏳ Retrying in 4.2s (attempt 1/3)..."
assert _prepare_gateway_status_message(Platform.DISCORD, "lifecycle", message) == message
assert _prepare_gateway_status_message("local", "lifecycle", message) == message
def test_telegram_status_sanitizes_raw_provider_security_errors():
"""Provider policy/security bodies should be replaced before chat delivery."""
raw = (
"❌ API failed after 3 retries — HTTP 400: request blocked because "
"Operation contains cybersecurity risk. request_id=req_123"
)
sanitized = _prepare_gateway_status_message(Platform.TELEGRAM, "lifecycle", raw)
assert sanitized is not None
assert "provider rejected" in sanitized.lower()
assert "cybersecurity risk" not in sanitized.lower()
assert "HTTP 400" not in sanitized
assert "req_123" not in sanitized
def test_telegram_final_response_sanitizes_raw_provider_errors():
"""Final Telegram replies should not expose raw provider/security details."""
raw = (
"API call failed after 3 retries: HTTP 400: This request was blocked "
"under the provider cybersecurity risk policy. request_id=req_abc"
)
sanitized = _sanitize_gateway_final_response(Platform.TELEGRAM, raw)
assert "provider rejected" in sanitized.lower()
assert "cybersecurity risk" not in sanitized.lower()
assert "HTTP 400" not in sanitized
assert "req_abc" not in sanitized
def test_telegram_final_response_redacts_auth_secrets():
"""Authentication errors should be useful without leaking key material."""
raw = (
"⚠️ Provider authentication failed: Incorrect API key provided: "
"sk-live_abcdefghijklmnopqrstuvwxyz1234567890"
)
sanitized = _sanitize_gateway_final_response(Platform.TELEGRAM, raw)
assert "authentication failed" in sanitized.lower()
assert "check the configured credentials" in sanitized.lower()
assert "sk-live" not in sanitized
def test_telegram_final_response_keeps_normal_answers():
"""Normal assistant content should not be rewritten."""
answer = "Here is the clean summary you asked for."
assert _sanitize_gateway_final_response(Platform.TELEGRAM, answer) == answer