mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(discord): narrow rate-limit catch and move sync state under gateway/
Two follow-ups on top of helix4u's slash-command sync hardening: - Only suppress exceptions that are actually Discord 429 rate limits (discord.RateLimited, HTTPException with status 429, or a clearly rate-limit-named duck type). Arbitrary failures that happen to expose a retry_after attribute now re-raise to the outer handler instead of silently swallowing a cooldown. - Move the sync-state JSON under $HERMES_HOME/gateway/ so the home root stops collecting ad-hoc runtime files. Added a test verifying unrelated exceptions don't get misclassified as rate limits.
This commit is contained in:
parent
d797755a1c
commit
5a3cadf6eb
2 changed files with 103 additions and 12 deletions
|
|
@ -26,7 +26,8 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
|
||||
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
|
||||
_DISCORD_COMMAND_SYNC_STATE_FILE = "discord_command_sync_state.json"
|
||||
_DISCORD_COMMAND_SYNC_STATE_SUBDIR = "gateway"
|
||||
_DISCORD_COMMAND_SYNC_STATE_FILENAME = "discord_command_sync_state.json"
|
||||
_DISCORD_COMMAND_SYNC_MUTATION_INTERVAL_SECONDS = 4.5
|
||||
_DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS = 30.0
|
||||
|
||||
|
|
@ -834,7 +835,12 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
def _command_sync_state_path(self) -> _Path:
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
return get_hermes_home() / _DISCORD_COMMAND_SYNC_STATE_FILE
|
||||
directory = get_hermes_home() / _DISCORD_COMMAND_SYNC_STATE_SUBDIR
|
||||
try:
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
return directory / _DISCORD_COMMAND_SYNC_STATE_FILENAME
|
||||
|
||||
def _read_command_sync_state(self) -> dict:
|
||||
try:
|
||||
|
|
@ -945,6 +951,40 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
continue
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _is_discord_rate_limit(exc: BaseException) -> bool:
|
||||
"""True only for exceptions that look like Discord 429 rate limits.
|
||||
|
||||
Narrower than ``hasattr(exc, 'retry_after')``: discord.py's own
|
||||
``RateLimited`` exception and any HTTPException with status 429
|
||||
qualify. This prevents suppressing unrelated failures that happen
|
||||
to expose a ``retry_after`` attribute."""
|
||||
# discord.py emits RateLimited / HTTPException subclasses for 429s.
|
||||
# Guard with isinstance-of-class so a mocked ``discord`` module
|
||||
# (where attrs are MagicMocks, not types) doesn't trip isinstance.
|
||||
if DISCORD_AVAILABLE and discord is not None:
|
||||
for attr_name in ("RateLimited", "HTTPException"):
|
||||
cls = getattr(discord, attr_name, None)
|
||||
if not isinstance(cls, type):
|
||||
continue
|
||||
if isinstance(exc, cls):
|
||||
if attr_name == "RateLimited":
|
||||
return True
|
||||
status = getattr(exc, "status", None)
|
||||
if status == 429:
|
||||
return True
|
||||
# Fallback duck-type: something named like a rate-limit with a
|
||||
# numeric retry_after. Covers mocked clients in tests and exotic
|
||||
# transports, without swallowing arbitrary exceptions.
|
||||
name = type(exc).__name__.lower()
|
||||
if ("ratelimit" in name or "rate_limit" in name) and getattr(exc, "retry_after", None) is not None:
|
||||
return True
|
||||
response = getattr(exc, "response", None)
|
||||
status = getattr(response, "status", None) or getattr(response, "status_code", None)
|
||||
if status == 429:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _command_sync_mutation_interval_seconds(self) -> float:
|
||||
return _DISCORD_COMMAND_SYNC_MUTATION_INTERVAL_SECONDS
|
||||
|
||||
|
|
@ -989,16 +1029,20 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# persist Discord's retry-after when it refuses the batch.
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
|
||||
except Exception as e:
|
||||
if not self._is_discord_rate_limit(e):
|
||||
raise
|
||||
retry_after = self._extract_discord_retry_after(e)
|
||||
if retry_after is not None:
|
||||
self._record_command_sync_rate_limit(app_id, fingerprint, retry_after)
|
||||
logger.warning(
|
||||
"[%s] Discord rate-limited slash command sync; retrying after %.0fs",
|
||||
self.name,
|
||||
retry_after,
|
||||
)
|
||||
return
|
||||
raise
|
||||
if retry_after is None:
|
||||
# Rate-limited but no retry-after signal — back off for a
|
||||
# conservative default so we don't slam the bucket again.
|
||||
retry_after = _DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS
|
||||
self._record_command_sync_rate_limit(app_id, fingerprint, retry_after)
|
||||
logger.warning(
|
||||
"[%s] Discord rate-limited slash command sync; retrying after %.0fs",
|
||||
self.name,
|
||||
retry_after,
|
||||
)
|
||||
return
|
||||
finally:
|
||||
if has_ratelimit_timeout:
|
||||
http.max_ratelimit_timeout = previous_ratelimit_timeout
|
||||
|
|
|
|||
|
|
@ -612,12 +612,59 @@ async def test_post_connect_initialization_respects_discord_retry_after(tmp_path
|
|||
await adapter._run_post_connect_initialization()
|
||||
|
||||
sync.assert_awaited_once()
|
||||
state = json.loads((tmp_path / discord_platform._DISCORD_COMMAND_SYNC_STATE_FILE).read_text())
|
||||
state_path = (
|
||||
tmp_path
|
||||
/ discord_platform._DISCORD_COMMAND_SYNC_STATE_SUBDIR
|
||||
/ discord_platform._DISCORD_COMMAND_SYNC_STATE_FILENAME
|
||||
)
|
||||
state = json.loads(state_path.read_text())
|
||||
entry = state["999"]
|
||||
assert entry["retry_after"] == 123.0
|
||||
assert entry["retry_after_until"] > entry["last_attempt_at"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_post_connect_initialization_reraises_non_rate_limit_exceptions(tmp_path, monkeypatch):
|
||||
"""Arbitrary failures during sync must surface, not be swallowed as rate-limits."""
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
monkeypatch.setattr("hermes_constants.get_hermes_home", lambda: tmp_path)
|
||||
|
||||
class _DesiredCommand:
|
||||
def to_dict(self, tree):
|
||||
return {"name": "status", "description": "Show Hermes status", "type": 1, "options": []}
|
||||
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=SimpleNamespace(get_commands=lambda: [_DesiredCommand()]),
|
||||
application_id=4242,
|
||||
user=SimpleNamespace(id=4242),
|
||||
)
|
||||
|
||||
# Unrelated failure that happens to expose retry_after. Must NOT be
|
||||
# caught by the rate-limit handler — it has nothing to do with 429s.
|
||||
class _UnrelatedError(RuntimeError):
|
||||
retry_after = 999.0
|
||||
|
||||
sync = AsyncMock(side_effect=_UnrelatedError("database is down"))
|
||||
monkeypatch.setattr(adapter, "_safe_sync_slash_commands", sync)
|
||||
|
||||
# The outer _run_post_connect_initialization has a broad except Exception
|
||||
# that logs defensively — so we assert on state NOT being written.
|
||||
await adapter._run_post_connect_initialization()
|
||||
|
||||
sync.assert_awaited_once()
|
||||
state_path = (
|
||||
tmp_path
|
||||
/ discord_platform._DISCORD_COMMAND_SYNC_STATE_SUBDIR
|
||||
/ discord_platform._DISCORD_COMMAND_SYNC_STATE_FILENAME
|
||||
)
|
||||
state = json.loads(state_path.read_text()) if state_path.exists() else {}
|
||||
entry = state.get("4242", {})
|
||||
# Attempt was recorded before the sync call, but no rate-limit cooldown
|
||||
# should have been persisted from the unrelated exception.
|
||||
assert "retry_after_until" not in entry
|
||||
assert "retry_after" not in entry
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_slash_commands_paces_mutation_writes(monkeypatch):
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue