fix(photon): classify Envoy overflow errors as retryable; add typing cooldown

Closes #50185 Two independent gaps let a transient Photon/Spectrum upstream overflow degrade message delivery and amplify gRPC pressure: 1. _is_retryable_error did not recognise Photon- or Envoy-specific error strings ("internal sidecar error", "upstream connect error", "reset reason: overflow"), so _send_with_retry fell through to the plain-text fallback immediately instead of backing off and retrying. 2. send_typing had no rate gate, so a burst of typing-indicator calls during an overflow event kept hitting the upstream gRPC connection and widened the failure window. Fix: - Add _PHOTON_RETRYABLE_PATTERNS with the three high-specificity Envoy / sidecar substrings and override _is_retryable_error on PhotonAdapter to check them after delegating to the base-class patterns. base.py and all other adapters are untouched. - Add a 5 s per-chat cooldown in send_typing backed by _typing_last_sent. stop_typing clears the entry so the next start after a completed turn fires immediately — only rapid consecutive starts without a stop are suppressed. - Reduce PhotonAdapter._send_with_retry default max_retries from 2 to 1 (single 2 s back-off check) — enough to confirm whether the Envoy circuit-breaker has opened, without adding unnecessary latency. All changes are scoped to plugins/platforms/photon/adapter.py.
2026-06-23 10:42:00 +00:00 · 2026-06-21 13:53:26 -03:00 · 2026-06-21 13:53:26 -03:00 · 2a4542333e
commit 2a4542333e
parent 7a131f7f40
1 changed files with 31 additions and 1 deletions
--- a/plugins/platforms/photon/adapter.py
+++ b/plugins/platforms/photon/adapter.py
@ -85,6 +85,20 @@ _DEDUP_WINDOW_SECONDS = 48 * 3600

 _SIDECAR_DIR = Path(__file__).parent / "sidecar"

+# Photon / Envoy / spectrum-ts error substrings that indicate a transient
+# upstream overload rather than a permanent failure.  These are not in the
+# core _RETRYABLE_ERROR_PATTERNS because they are specific to this adapter.
+_PHOTON_RETRYABLE_PATTERNS = (
+    "internal sidecar error",
+    "upstream connect error",
+    "reset reason: overflow",
+)
+
+# Minimum seconds between typing-indicator calls for the same chat.
+# iMessage is a personal channel — suppressing rapid repeats reduces
+# upstream gRPC pressure during Photon overflow events.
+_TYPING_COOLDOWN_SECONDS = 5.0
+
 # Group-chat mention wake words. When ``require_mention`` is enabled, group
 # messages are ignored unless they match one of these patterns — same
 # behavior and defaults as the BlueBubbles iMessage channel so the two
@ -234,6 +248,8 @@ class PhotonAdapter(BasePlatformAdapter):
        # react action default to "the message that triggered me" without
        # requiring the model to thread message ids through tool calls.
        self._last_inbound_by_chat: Dict[str, str] = {}
+        # Last time we sent a typing indicator per chat, for cooldown gating.
+        self._typing_last_sent: Dict[str, float] = {}

        # Group-chat mention gating (parity with BlueBubbles). When enabled,
        # group messages are ignored unless they match a wake word; DMs are
@ -988,6 +1004,10 @@ class PhotonAdapter(BasePlatformAdapter):
        )

    async def send_typing(self, chat_id: str, metadata=None) -> None:
+        now = time.time()
+        if now - self._typing_last_sent.get(chat_id, 0.0) < _TYPING_COOLDOWN_SECONDS:
+            return
+        self._typing_last_sent[chat_id] = now
        try:
            await self._sidecar_call(
                "/typing", {"spaceId": chat_id, "state": "start"}
@ -996,6 +1016,7 @@ class PhotonAdapter(BasePlatformAdapter):
            logger.debug("[photon] send_typing failed: %s", e)

    async def stop_typing(self, chat_id: str) -> None:
+        self._typing_last_sent.pop(chat_id, None)
        try:
            await self._sidecar_call(
                "/typing", {"spaceId": chat_id, "state": "stop"}
@ -1189,13 +1210,22 @@ class PhotonAdapter(BasePlatformAdapter):
            return content
        return strip_markdown(content)

+    @staticmethod
+    def _is_retryable_error(error: Optional[str]) -> bool:
+        if BasePlatformAdapter._is_retryable_error(error):
+            return True
+        if not error:
+            return False
+        lowered = error.lower()
+        return any(pat in lowered for pat in _PHOTON_RETRYABLE_PATTERNS)
+
    async def _send_with_retry(
        self,
        chat_id: str,
        content: str,
        reply_to: Optional[str] = None,
        metadata: Any = None,
-        max_retries: int = 2,
+        max_retries: int = 1,
        base_delay: float = 2.0,
    ) -> SendResult:
        """Retry sends without the generic Markdown banner.