From 2a4542333ee107bbb5b9e434574347334f239258 Mon Sep 17 00:00:00 2001 From: joaomarcos Date: Sun, 21 Jun 2026 13:53:26 -0300 Subject: [PATCH] fix(photon): classify Envoy overflow errors as retryable; add typing cooldown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #50185 Two independent gaps let a transient Photon/Spectrum upstream overflow degrade message delivery and amplify gRPC pressure: 1. _is_retryable_error did not recognise Photon- or Envoy-specific error strings ("internal sidecar error", "upstream connect error", "reset reason: overflow"), so _send_with_retry fell through to the plain-text fallback immediately instead of backing off and retrying. 2. send_typing had no rate gate, so a burst of typing-indicator calls during an overflow event kept hitting the upstream gRPC connection and widened the failure window. Fix: - Add _PHOTON_RETRYABLE_PATTERNS with the three high-specificity Envoy / sidecar substrings and override _is_retryable_error on PhotonAdapter to check them after delegating to the base-class patterns. base.py and all other adapters are untouched. - Add a 5 s per-chat cooldown in send_typing backed by _typing_last_sent. stop_typing clears the entry so the next start after a completed turn fires immediately — only rapid consecutive starts without a stop are suppressed. - Reduce PhotonAdapter._send_with_retry default max_retries from 2 to 1 (single 2 s back-off check) — enough to confirm whether the Envoy circuit-breaker has opened, without adding unnecessary latency. All changes are scoped to plugins/platforms/photon/adapter.py. --- plugins/platforms/photon/adapter.py | 32 ++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/plugins/platforms/photon/adapter.py b/plugins/platforms/photon/adapter.py index 01c1cabbc01..7560adc0465 100644 --- a/plugins/platforms/photon/adapter.py +++ b/plugins/platforms/photon/adapter.py @@ -85,6 +85,20 @@ _DEDUP_WINDOW_SECONDS = 48 * 3600 _SIDECAR_DIR = Path(__file__).parent / "sidecar" +# Photon / Envoy / spectrum-ts error substrings that indicate a transient +# upstream overload rather than a permanent failure. These are not in the +# core _RETRYABLE_ERROR_PATTERNS because they are specific to this adapter. +_PHOTON_RETRYABLE_PATTERNS = ( + "internal sidecar error", + "upstream connect error", + "reset reason: overflow", +) + +# Minimum seconds between typing-indicator calls for the same chat. +# iMessage is a personal channel — suppressing rapid repeats reduces +# upstream gRPC pressure during Photon overflow events. +_TYPING_COOLDOWN_SECONDS = 5.0 + # Group-chat mention wake words. When ``require_mention`` is enabled, group # messages are ignored unless they match one of these patterns — same # behavior and defaults as the BlueBubbles iMessage channel so the two @@ -234,6 +248,8 @@ class PhotonAdapter(BasePlatformAdapter): # react action default to "the message that triggered me" without # requiring the model to thread message ids through tool calls. self._last_inbound_by_chat: Dict[str, str] = {} + # Last time we sent a typing indicator per chat, for cooldown gating. + self._typing_last_sent: Dict[str, float] = {} # Group-chat mention gating (parity with BlueBubbles). When enabled, # group messages are ignored unless they match a wake word; DMs are @@ -988,6 +1004,10 @@ class PhotonAdapter(BasePlatformAdapter): ) async def send_typing(self, chat_id: str, metadata=None) -> None: + now = time.time() + if now - self._typing_last_sent.get(chat_id, 0.0) < _TYPING_COOLDOWN_SECONDS: + return + self._typing_last_sent[chat_id] = now try: await self._sidecar_call( "/typing", {"spaceId": chat_id, "state": "start"} @@ -996,6 +1016,7 @@ class PhotonAdapter(BasePlatformAdapter): logger.debug("[photon] send_typing failed: %s", e) async def stop_typing(self, chat_id: str) -> None: + self._typing_last_sent.pop(chat_id, None) try: await self._sidecar_call( "/typing", {"spaceId": chat_id, "state": "stop"} @@ -1189,13 +1210,22 @@ class PhotonAdapter(BasePlatformAdapter): return content return strip_markdown(content) + @staticmethod + def _is_retryable_error(error: Optional[str]) -> bool: + if BasePlatformAdapter._is_retryable_error(error): + return True + if not error: + return False + lowered = error.lower() + return any(pat in lowered for pat in _PHOTON_RETRYABLE_PATTERNS) + async def _send_with_retry( self, chat_id: str, content: str, reply_to: Optional[str] = None, metadata: Any = None, - max_retries: int = 2, + max_retries: int = 1, base_delay: float = 2.0, ) -> SendResult: """Retry sends without the generic Markdown banner.