mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(telegram): use get_running_loop in polling-conflict retry reschedule (#41716)
The conflict-retry path called asyncio.get_event_loop() to reschedule itself when a retry's start_polling raised. On Python 3.11+ (our floor) that raises 'RuntimeError: There is no current event loop in thread MainThread' when no loop is attached to the thread, which is what happens when PTB dispatches this error callback. The retry never gets scheduled, the adapter goes silent-but-alive, and gateway --replace keeps spawning fresh instances that hit the same wall — the crash loop reported in #19471 (worse under multi-profile, where two bots hold the same conflict open). We are inside a coroutine here, so asyncio.get_running_loop() is the correct, guaranteed-valid replacement. Only get_event_loop() call in any platform adapter, so no sibling sites. Fixes #19471
This commit is contained in:
parent
b5f7a1f299
commit
2e62862784
2 changed files with 96 additions and 1 deletions
|
|
@ -1143,7 +1143,13 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
# gateway process is alive and reports "connected" but
|
||||
# no messages are received or sent.
|
||||
if self._polling_conflict_count < MAX_CONFLICT_RETRIES:
|
||||
loop = asyncio.get_event_loop()
|
||||
# We are inside a running coroutine, so the running loop is
|
||||
# guaranteed to exist. asyncio.get_event_loop() is deprecated
|
||||
# and raises "RuntimeError: There is no current event loop in
|
||||
# thread 'MainThread'" on Python 3.10+ when invoked from a
|
||||
# context without an attached loop (which can happen when PTB
|
||||
# dispatches this error callback). Use get_running_loop().
|
||||
loop = asyncio.get_running_loop()
|
||||
self._polling_error_task = loop.create_task(
|
||||
self._handle_polling_conflict(retry_err)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -309,3 +309,92 @@ async def test_disconnect_skips_inactive_updater_and_app(monkeypatch):
|
|||
app.stop.assert_not_awaited()
|
||||
app.shutdown.assert_awaited_once()
|
||||
warning.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_polling_conflict_reschedule_uses_running_loop(monkeypatch):
|
||||
"""Regression for #19471.
|
||||
|
||||
When a conflict-retry's start_polling raises and we are still below the
|
||||
retry ceiling, the handler reschedules itself via loop.create_task. The
|
||||
old code used the deprecated asyncio.get_event_loop(), which raises
|
||||
"RuntimeError: There is no current event loop in thread 'MainThread'" on
|
||||
Python 3.11+ when no loop is attached to the thread (as happens when PTB
|
||||
dispatches this error callback). That left the gateway alive but silent
|
||||
and drove the --replace crash loop. The fix uses get_running_loop(), which
|
||||
is always valid inside a coroutine. Force get_event_loop() to raise so a
|
||||
regression would surface as the original RuntimeError, not pass silently.
|
||||
"""
|
||||
adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***"))
|
||||
adapter.set_fatal_error_handler(AsyncMock())
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.acquire_scoped_lock",
|
||||
lambda scope, identity, metadata=None: (True, None),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.release_scoped_lock",
|
||||
lambda scope, identity: None,
|
||||
)
|
||||
|
||||
captured = {}
|
||||
call_count = {"n": 0}
|
||||
|
||||
async def failing_start_polling(**kwargs):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
captured["error_callback"] = kwargs["error_callback"]
|
||||
else:
|
||||
# Retry attempt fails so the handler enters the reschedule branch.
|
||||
raise Exception("Connection refused")
|
||||
|
||||
updater = SimpleNamespace(
|
||||
start_polling=AsyncMock(side_effect=failing_start_polling),
|
||||
stop=AsyncMock(),
|
||||
running=True,
|
||||
)
|
||||
bot = SimpleNamespace(set_my_commands=AsyncMock(), delete_webhook=AsyncMock())
|
||||
app = SimpleNamespace(
|
||||
bot=bot,
|
||||
updater=updater,
|
||||
add_handler=MagicMock(),
|
||||
initialize=AsyncMock(),
|
||||
start=AsyncMock(),
|
||||
)
|
||||
builder = MagicMock()
|
||||
builder.token.return_value = builder
|
||||
builder.request.return_value = builder
|
||||
builder.get_updates_request.return_value = builder
|
||||
builder.build.return_value = app
|
||||
monkeypatch.setattr(
|
||||
"gateway.platforms.telegram.Application",
|
||||
SimpleNamespace(builder=MagicMock(return_value=builder)),
|
||||
)
|
||||
monkeypatch.setattr("asyncio.sleep", AsyncMock())
|
||||
|
||||
ok = await adapter.connect()
|
||||
assert ok is True
|
||||
|
||||
# If the fix regresses to get_event_loop(), this makes it raise — the same
|
||||
# RuntimeError users hit in #19471. The running-loop path ignores it.
|
||||
def _boom():
|
||||
raise RuntimeError("There is no current event loop in thread 'MainThread'.")
|
||||
|
||||
monkeypatch.setattr("asyncio.get_event_loop", _boom)
|
||||
|
||||
conflict = type("Conflict", (Exception,), {})
|
||||
|
||||
# One conflict: count goes to 1 (< MAX), retry's start_polling raises,
|
||||
# handler reschedules via loop.create_task — the previously-broken line.
|
||||
await adapter._handle_polling_conflict(
|
||||
conflict("Conflict: terminated by other getUpdates request")
|
||||
)
|
||||
|
||||
assert adapter.has_fatal_error is False
|
||||
assert adapter._polling_error_task is not None
|
||||
# The rescheduled task must be schedulable on the running loop.
|
||||
adapter._polling_error_task.cancel()
|
||||
try:
|
||||
await adapter._polling_error_task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue