mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(photon): classify Envoy overflow errors as retryable; add typing cooldown
Closes #50185 Two independent gaps let a transient Photon/Spectrum upstream overflow degrade message delivery and amplify gRPC pressure: 1. _is_retryable_error did not recognise Photon- or Envoy-specific error strings ("internal sidecar error", "upstream connect error", "reset reason: overflow"), so _send_with_retry fell through to the plain-text fallback immediately instead of backing off and retrying. 2. send_typing had no rate gate, so a burst of typing-indicator calls during an overflow event kept hitting the upstream gRPC connection and widened the failure window. Fix: - Add _PHOTON_RETRYABLE_PATTERNS with the three high-specificity Envoy / sidecar substrings and override _is_retryable_error on PhotonAdapter to check them after delegating to the base-class patterns. base.py and all other adapters are untouched. - Add a 5 s per-chat cooldown in send_typing backed by _typing_last_sent. stop_typing clears the entry so the next start after a completed turn fires immediately — only rapid consecutive starts without a stop are suppressed. - Reduce PhotonAdapter._send_with_retry default max_retries from 2 to 1 (single 2 s back-off check) — enough to confirm whether the Envoy circuit-breaker has opened, without adding unnecessary latency. All changes are scoped to plugins/platforms/photon/adapter.py.
This commit is contained in:
parent
7a131f7f40
commit
2a4542333e
1 changed files with 31 additions and 1 deletions
|
|
@ -85,6 +85,20 @@ _DEDUP_WINDOW_SECONDS = 48 * 3600
|
|||
|
||||
_SIDECAR_DIR = Path(__file__).parent / "sidecar"
|
||||
|
||||
# Photon / Envoy / spectrum-ts error substrings that indicate a transient
|
||||
# upstream overload rather than a permanent failure. These are not in the
|
||||
# core _RETRYABLE_ERROR_PATTERNS because they are specific to this adapter.
|
||||
_PHOTON_RETRYABLE_PATTERNS = (
|
||||
"internal sidecar error",
|
||||
"upstream connect error",
|
||||
"reset reason: overflow",
|
||||
)
|
||||
|
||||
# Minimum seconds between typing-indicator calls for the same chat.
|
||||
# iMessage is a personal channel — suppressing rapid repeats reduces
|
||||
# upstream gRPC pressure during Photon overflow events.
|
||||
_TYPING_COOLDOWN_SECONDS = 5.0
|
||||
|
||||
# Group-chat mention wake words. When ``require_mention`` is enabled, group
|
||||
# messages are ignored unless they match one of these patterns — same
|
||||
# behavior and defaults as the BlueBubbles iMessage channel so the two
|
||||
|
|
@ -234,6 +248,8 @@ class PhotonAdapter(BasePlatformAdapter):
|
|||
# react action default to "the message that triggered me" without
|
||||
# requiring the model to thread message ids through tool calls.
|
||||
self._last_inbound_by_chat: Dict[str, str] = {}
|
||||
# Last time we sent a typing indicator per chat, for cooldown gating.
|
||||
self._typing_last_sent: Dict[str, float] = {}
|
||||
|
||||
# Group-chat mention gating (parity with BlueBubbles). When enabled,
|
||||
# group messages are ignored unless they match a wake word; DMs are
|
||||
|
|
@ -988,6 +1004,10 @@ class PhotonAdapter(BasePlatformAdapter):
|
|||
)
|
||||
|
||||
async def send_typing(self, chat_id: str, metadata=None) -> None:
|
||||
now = time.time()
|
||||
if now - self._typing_last_sent.get(chat_id, 0.0) < _TYPING_COOLDOWN_SECONDS:
|
||||
return
|
||||
self._typing_last_sent[chat_id] = now
|
||||
try:
|
||||
await self._sidecar_call(
|
||||
"/typing", {"spaceId": chat_id, "state": "start"}
|
||||
|
|
@ -996,6 +1016,7 @@ class PhotonAdapter(BasePlatformAdapter):
|
|||
logger.debug("[photon] send_typing failed: %s", e)
|
||||
|
||||
async def stop_typing(self, chat_id: str) -> None:
|
||||
self._typing_last_sent.pop(chat_id, None)
|
||||
try:
|
||||
await self._sidecar_call(
|
||||
"/typing", {"spaceId": chat_id, "state": "stop"}
|
||||
|
|
@ -1189,13 +1210,22 @@ class PhotonAdapter(BasePlatformAdapter):
|
|||
return content
|
||||
return strip_markdown(content)
|
||||
|
||||
@staticmethod
|
||||
def _is_retryable_error(error: Optional[str]) -> bool:
|
||||
if BasePlatformAdapter._is_retryable_error(error):
|
||||
return True
|
||||
if not error:
|
||||
return False
|
||||
lowered = error.lower()
|
||||
return any(pat in lowered for pat in _PHOTON_RETRYABLE_PATTERNS)
|
||||
|
||||
async def _send_with_retry(
|
||||
self,
|
||||
chat_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Any = None,
|
||||
max_retries: int = 2,
|
||||
max_retries: int = 1,
|
||||
base_delay: float = 2.0,
|
||||
) -> SendResult:
|
||||
"""Retry sends without the generic Markdown banner.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue