From 2a4542333ee107bbb5b9e434574347334f239258 Mon Sep 17 00:00:00 2001
From: joaomarcos <joaomarcosdias444@gmail.com>
Date: Sun, 21 Jun 2026 13:53:26 -0300
Subject: [PATCH] fix(photon): classify Envoy overflow errors as retryable; add
 typing cooldown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #50185

Two independent gaps let a transient Photon/Spectrum upstream overflow
degrade message delivery and amplify gRPC pressure:

1. _is_retryable_error did not recognise Photon- or Envoy-specific error
   strings ("internal sidecar error", "upstream connect error",
   "reset reason: overflow"), so _send_with_retry fell through to the
   plain-text fallback immediately instead of backing off and retrying.

2. send_typing had no rate gate, so a burst of typing-indicator calls
   during an overflow event kept hitting the upstream gRPC connection and
   widened the failure window.

Fix:
- Add _PHOTON_RETRYABLE_PATTERNS with the three high-specificity Envoy /
  sidecar substrings and override _is_retryable_error on PhotonAdapter to
  check them after delegating to the base-class patterns.  base.py and all
  other adapters are untouched.
- Add a 5 s per-chat cooldown in send_typing backed by _typing_last_sent.
  stop_typing clears the entry so the next start after a completed turn
  fires immediately — only rapid consecutive starts without a stop are
  suppressed.
- Reduce PhotonAdapter._send_with_retry default max_retries from 2 to 1
  (single 2 s back-off check) — enough to confirm whether the Envoy
  circuit-breaker has opened, without adding unnecessary latency.

All changes are scoped to plugins/platforms/photon/adapter.py.
---
 plugins/platforms/photon/adapter.py | 32 ++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/plugins/platforms/photon/adapter.py b/plugins/platforms/photon/adapter.py
index 01c1cabbc01..7560adc0465 100644
--- a/plugins/platforms/photon/adapter.py
+++ b/plugins/platforms/photon/adapter.py
@@ -85,6 +85,20 @@ _DEDUP_WINDOW_SECONDS = 48 * 3600
 
 _SIDECAR_DIR = Path(__file__).parent / "sidecar"
 
+# Photon / Envoy / spectrum-ts error substrings that indicate a transient
+# upstream overload rather than a permanent failure.  These are not in the
+# core _RETRYABLE_ERROR_PATTERNS because they are specific to this adapter.
+_PHOTON_RETRYABLE_PATTERNS = (
+    "internal sidecar error",
+    "upstream connect error",
+    "reset reason: overflow",
+)
+
+# Minimum seconds between typing-indicator calls for the same chat.
+# iMessage is a personal channel — suppressing rapid repeats reduces
+# upstream gRPC pressure during Photon overflow events.
+_TYPING_COOLDOWN_SECONDS = 5.0
+
 # Group-chat mention wake words. When ``require_mention`` is enabled, group
 # messages are ignored unless they match one of these patterns — same
 # behavior and defaults as the BlueBubbles iMessage channel so the two
@@ -234,6 +248,8 @@ class PhotonAdapter(BasePlatformAdapter):
         # react action default to "the message that triggered me" without
         # requiring the model to thread message ids through tool calls.
         self._last_inbound_by_chat: Dict[str, str] = {}
+        # Last time we sent a typing indicator per chat, for cooldown gating.
+        self._typing_last_sent: Dict[str, float] = {}
 
         # Group-chat mention gating (parity with BlueBubbles). When enabled,
         # group messages are ignored unless they match a wake word; DMs are
@@ -988,6 +1004,10 @@ class PhotonAdapter(BasePlatformAdapter):
         )
 
     async def send_typing(self, chat_id: str, metadata=None) -> None:
+        now = time.time()
+        if now - self._typing_last_sent.get(chat_id, 0.0) < _TYPING_COOLDOWN_SECONDS:
+            return
+        self._typing_last_sent[chat_id] = now
         try:
             await self._sidecar_call(
                 "/typing", {"spaceId": chat_id, "state": "start"}
@@ -996,6 +1016,7 @@ class PhotonAdapter(BasePlatformAdapter):
             logger.debug("[photon] send_typing failed: %s", e)
 
     async def stop_typing(self, chat_id: str) -> None:
+        self._typing_last_sent.pop(chat_id, None)
         try:
             await self._sidecar_call(
                 "/typing", {"spaceId": chat_id, "state": "stop"}
@@ -1189,13 +1210,22 @@ class PhotonAdapter(BasePlatformAdapter):
             return content
         return strip_markdown(content)
 
+    @staticmethod
+    def _is_retryable_error(error: Optional[str]) -> bool:
+        if BasePlatformAdapter._is_retryable_error(error):
+            return True
+        if not error:
+            return False
+        lowered = error.lower()
+        return any(pat in lowered for pat in _PHOTON_RETRYABLE_PATTERNS)
+
     async def _send_with_retry(
         self,
         chat_id: str,
         content: str,
         reply_to: Optional[str] = None,
         metadata: Any = None,
-        max_retries: int = 2,
+        max_retries: int = 1,
         base_delay: float = 2.0,
     ) -> SendResult:
         """Retry sends without the generic Markdown banner.