mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-24 10:52:21 +00:00
Follow-up for salvaged PR #50256. Unit tests for the three behaviors: retryable classification of Envoy/sidecar overflow strings, per-chat typing cooldown with stop_typing reset, and the _supervise_sidecar crash-detection path that raises a retryable fatal (and the clean-shutdown no-op).
197 lines
6.3 KiB
Python
197 lines
6.3 KiB
Python
"""Photon adapter resilience to transient Spectrum/Envoy upstream overflow.
|
|
|
|
Covers the three behaviors that let the adapter ride through a Photon
|
|
"reset reason: overflow" event instead of degrading delivery and silently
|
|
dying (issue #50185):
|
|
|
|
1. ``_is_retryable_error`` classifies the Envoy/sidecar overflow strings as
|
|
retryable so ``_send_with_retry`` actually engages its backoff loop.
|
|
2. ``send_typing`` is rate-gated per chat, and ``stop_typing`` resets the
|
|
gate so the next turn's typing indicator fires immediately.
|
|
3. ``_supervise_sidecar`` detects an unexpected sidecar exit and raises a
|
|
``retryable=True`` fatal so the gateway reconnect watcher revives the
|
|
platform — instead of returning silently and leaving ``_inbound_loop``
|
|
spinning against a dead port.
|
|
|
|
No Node sidecar is spawned and no ports are bound.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, Dict
|
|
|
|
import pytest
|
|
|
|
from gateway.config import PlatformConfig
|
|
from plugins.platforms.photon.adapter import PhotonAdapter
|
|
|
|
|
|
def _make_adapter(monkeypatch: pytest.MonkeyPatch) -> PhotonAdapter:
|
|
monkeypatch.setenv("PHOTON_PROJECT_ID", "test-project-id")
|
|
monkeypatch.setenv("PHOTON_PROJECT_SECRET", "test-project-secret")
|
|
cfg = PlatformConfig(enabled=True, token="", extra={})
|
|
return PhotonAdapter(cfg)
|
|
|
|
|
|
# -- Gap 1: retryable classification of overflow errors ---------------------
|
|
|
|
@pytest.mark.parametrize(
|
|
"error",
|
|
[
|
|
"UNAVAILABLE: internal sidecar error",
|
|
"upstream connect error or disconnect/reset before headers",
|
|
"reset reason: overflow",
|
|
# Case-insensitive: real strings arrive with mixed case.
|
|
"Internal Sidecar Error",
|
|
],
|
|
)
|
|
def test_overflow_strings_classified_retryable(error: str) -> None:
|
|
assert PhotonAdapter._is_retryable_error(error) is True
|
|
|
|
|
|
def test_unrelated_error_not_retryable() -> None:
|
|
# A genuine permanent failure must NOT be retried.
|
|
assert PhotonAdapter._is_retryable_error("400 bad request: invalid spaceId") is False
|
|
assert PhotonAdapter._is_retryable_error(None) is False
|
|
|
|
|
|
def test_base_network_patterns_still_match() -> None:
|
|
# The override delegates to the base classifier first, so generic
|
|
# network strings keep working.
|
|
assert PhotonAdapter._is_retryable_error("ConnectError: connection refused") is True
|
|
|
|
|
|
# -- Gap 2: typing-indicator cooldown ---------------------------------------
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_typing_cooldown_suppresses_rapid_repeats(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
adapter = _make_adapter(monkeypatch)
|
|
calls: list[Dict[str, Any]] = []
|
|
|
|
async def _fake_call(path: str, payload: Dict[str, Any]) -> Any:
|
|
calls.append(payload)
|
|
return {"ok": True}
|
|
|
|
monkeypatch.setattr(adapter, "_sidecar_call", _fake_call)
|
|
|
|
# First call fires; immediate repeats are suppressed by the cooldown.
|
|
await adapter.send_typing("chat-1")
|
|
await adapter.send_typing("chat-1")
|
|
await adapter.send_typing("chat-1")
|
|
|
|
assert len(calls) == 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_typing_cooldown_is_per_chat(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
adapter = _make_adapter(monkeypatch)
|
|
calls: list[str] = []
|
|
|
|
async def _fake_call(path: str, payload: Dict[str, Any]) -> Any:
|
|
calls.append(payload["spaceId"])
|
|
return {"ok": True}
|
|
|
|
monkeypatch.setattr(adapter, "_sidecar_call", _fake_call)
|
|
|
|
# Different chats have independent cooldowns.
|
|
await adapter.send_typing("chat-1")
|
|
await adapter.send_typing("chat-2")
|
|
|
|
assert calls == ["chat-1", "chat-2"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_stop_typing_resets_cooldown(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
adapter = _make_adapter(monkeypatch)
|
|
starts = 0
|
|
|
|
async def _fake_call(path: str, payload: Dict[str, Any]) -> Any:
|
|
nonlocal starts
|
|
if payload.get("state") == "start":
|
|
starts += 1
|
|
return {"ok": True}
|
|
|
|
monkeypatch.setattr(adapter, "_sidecar_call", _fake_call)
|
|
|
|
# A start, then a stop (end of turn), then a start for the next turn must
|
|
# fire immediately — the cooldown only suppresses rapid consecutive starts
|
|
# without an intervening stop.
|
|
await adapter.send_typing("chat-1")
|
|
await adapter.stop_typing("chat-1")
|
|
await adapter.send_typing("chat-1")
|
|
|
|
assert starts == 2
|
|
|
|
|
|
# -- Gap 3: sidecar crash detection -----------------------------------------
|
|
|
|
class _EofStdout:
|
|
"""A proc.stdout whose readline() reports immediate EOF (dead sidecar)."""
|
|
|
|
def readline(self) -> bytes:
|
|
return b""
|
|
|
|
|
|
class _DeadProc:
|
|
"""Minimal subprocess.Popen stand-in for a sidecar that has exited."""
|
|
|
|
def __init__(self, exit_code: int = 1) -> None:
|
|
self.stdout = _EofStdout()
|
|
self.stdin = None
|
|
self._exit_code = exit_code
|
|
|
|
def poll(self) -> int:
|
|
return self._exit_code
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_unexpected_sidecar_exit_raises_retryable_fatal(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
adapter = _make_adapter(monkeypatch)
|
|
# Simulate a live session whose sidecar then dies underneath it.
|
|
adapter._inbound_running = True
|
|
|
|
notified: list[bool] = []
|
|
|
|
async def _fake_notify() -> None:
|
|
notified.append(True)
|
|
|
|
monkeypatch.setattr(adapter, "_notify_fatal_error", _fake_notify)
|
|
|
|
await adapter._supervise_sidecar(_DeadProc(exit_code=137)) # type: ignore[arg-type]
|
|
|
|
assert adapter.has_fatal_error is True
|
|
assert adapter.fatal_error_code == "SIDECAR_CRASHED"
|
|
# retryable=True routes the platform into the reconnect watcher rather
|
|
# than crashing the whole gateway.
|
|
assert adapter.fatal_error_retryable is True
|
|
assert adapter._running is False
|
|
assert notified == [True]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_clean_shutdown_does_not_raise_fatal(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
adapter = _make_adapter(monkeypatch)
|
|
# disconnect() sets _inbound_running = False before stopping the sidecar,
|
|
# so the detection block must NOT fire on a clean shutdown.
|
|
adapter._inbound_running = False
|
|
|
|
notified: list[bool] = []
|
|
|
|
async def _fake_notify() -> None:
|
|
notified.append(True)
|
|
|
|
monkeypatch.setattr(adapter, "_notify_fatal_error", _fake_notify)
|
|
|
|
await adapter._supervise_sidecar(_DeadProc(exit_code=0)) # type: ignore[arg-type]
|
|
|
|
assert adapter.has_fatal_error is False
|
|
assert notified == []
|