mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Two related race conditions in gateway/platforms/base.py that could produce duplicate agent runs or silently drop messages. Neither is specific to any one platform — all adapters inherit this logic. R5 (HIGH) — duplicate agent spawn on turn chain In _process_message_background, the pending-drain path deleted _active_sessions[session_key] before awaiting typing_task.cancel() and then recursively awaiting _process_message_background for the queued event. During the typing_task await, a fresh inbound message M3 could pass the Level-1 guard (entry now missing), set its own Event, and spawn a second _process_message_background for the same session_key — two agents running simultaneously, duplicate responses, duplicate tool calls. Fix: keep the _active_sessions entry populated and only clear() the Event. The guard stays live, so any concurrent inbound message takes the busy-handler path (queue + interrupt) as intended. R6 (MED-HIGH) — message dropped during finally cleanup The finally block has two await points (typing_task, stop_typing) before it deletes _active_sessions. A message arriving in that window passes the guard (entry still live), lands in _pending_messages via the busy-handler — and then the unconditional del removes the guard with that message still queued. Nothing drains it; the user never gets a reply. Fix: before deleting _active_sessions in finally, pop any late pending_messages entry and spawn a drain task for it. Only delete _active_sessions when no pending is waiting. Tests: tests/gateway/test_pending_drain_race.py — three regression cases. Validated: without the fix, two of the three fail exactly where the races manifest (duplicate-spawn guard loses identity, late-arrival 'LATE' message not in processed list).
212 lines
6.9 KiB
Python
212 lines
6.9 KiB
Python
"""Regression tests: pending-drain + finally-cleanup races must not spawn
|
|
duplicate agents OR silently drop messages that arrived during cleanup.
|
|
|
|
Two related races in gateway/platforms/base.py:_process_message_background:
|
|
|
|
1. Pending-drain path (previous line 1931):
|
|
``del self._active_sessions[session_key]`` opened a window where a
|
|
concurrent inbound message could pass the Level-1 guard, spawn its
|
|
own _process_message_background, and run simultaneously with the
|
|
recursive drain. Two agents on one session_key = duplicate responses.
|
|
|
|
2. Finally-cleanup path (previous line 1990-1991):
|
|
Between the awaits in finally (typing_task, stop_typing) and the
|
|
``del self._active_sessions[session_key]``, a new message could
|
|
land in _pending_messages. The del ran anyway, and the message was
|
|
silently dropped — user never got a reply.
|
|
|
|
Fix: keep the _active_sessions entry live across the turn chain and
|
|
clear the Event instead of deleting; in finally, drain any
|
|
late-arrival pending message by spawning a task instead of
|
|
dropping it.
|
|
"""
|
|
|
|
import asyncio
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
|
|
from gateway.config import Platform, PlatformConfig
|
|
from gateway.platforms.base import (
|
|
BasePlatformAdapter,
|
|
MessageEvent,
|
|
MessageType,
|
|
)
|
|
from gateway.session import SessionSource, build_session_key
|
|
|
|
|
|
class _StubAdapter(BasePlatformAdapter):
|
|
async def connect(self):
|
|
pass
|
|
|
|
async def disconnect(self):
|
|
pass
|
|
|
|
async def send(self, chat_id, text, **kwargs):
|
|
return None
|
|
|
|
async def get_chat_info(self, chat_id):
|
|
return {}
|
|
|
|
|
|
def _make_adapter():
|
|
adapter = _StubAdapter(PlatformConfig(enabled=True, token="t"), Platform.TELEGRAM)
|
|
adapter._send_with_retry = AsyncMock(return_value=None)
|
|
return adapter
|
|
|
|
|
|
def _make_event(text="hi", chat_id="42"):
|
|
return MessageEvent(
|
|
text=text,
|
|
message_type=MessageType.TEXT,
|
|
source=SessionSource(platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"),
|
|
)
|
|
|
|
|
|
def _sk(chat_id="42"):
|
|
return build_session_key(
|
|
SessionSource(platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm")
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_pending_drain_keeps_active_session_guard_live():
|
|
"""Fix for R5: during pending-drain cleanup, _active_sessions must stay
|
|
populated so concurrent inbound messages can't spawn a duplicate
|
|
_process_message_background. We only CLEAR the Event, never delete."""
|
|
adapter = _make_adapter()
|
|
sk = _sk()
|
|
|
|
# Register a slow handler so the agent is "mid-processing" when the
|
|
# pending message arrives.
|
|
first_started = asyncio.Event()
|
|
release_first = asyncio.Event()
|
|
|
|
async def handler(event):
|
|
first_started.set()
|
|
await release_first.wait()
|
|
return "done"
|
|
|
|
adapter._message_handler = handler
|
|
|
|
# Spawn M1 through handle_message.
|
|
await adapter.handle_message(_make_event(text="M1"))
|
|
|
|
# Wait until M1 is actively running inside the handler.
|
|
await asyncio.wait_for(first_started.wait(), timeout=1.0)
|
|
|
|
# Assert: session is active.
|
|
assert sk in adapter._active_sessions
|
|
active_event = adapter._active_sessions[sk]
|
|
|
|
# Simulate pending message (M2) queued while M1 runs.
|
|
adapter._pending_messages[sk] = _make_event(text="M2")
|
|
|
|
# Release M1 — pending-drain block now runs. During its cleanup
|
|
# awaits, _active_sessions[sk] must remain populated (same object
|
|
# reference) so any M3 arriving in that window hits the busy-handler.
|
|
release_first.set()
|
|
|
|
# Give the drain a moment to execute its .clear() + await typing_task
|
|
# without letting it fully finish the recursive call.
|
|
await asyncio.sleep(0)
|
|
await asyncio.sleep(0)
|
|
|
|
# Across the drain transition, the Event object must be the SAME
|
|
# reference (not replaced, not deleted). If del happened, the key
|
|
# would be missing briefly; if a new Event was installed, the
|
|
# identity would differ.
|
|
assert sk in adapter._active_sessions, (
|
|
"_active_sessions[session_key] was deleted during pending-drain — "
|
|
"opens a window for duplicate-agent spawn"
|
|
)
|
|
assert adapter._active_sessions[sk] is active_event, (
|
|
"_active_sessions[session_key] was replaced during pending-drain — "
|
|
"the old Event may have waiters that now won't be signaled"
|
|
)
|
|
|
|
# Finish drain.
|
|
await asyncio.sleep(0.1)
|
|
await adapter.cancel_background_tasks()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_finally_cleanup_drains_late_arrival_pending():
|
|
"""Fix for R6: if a message lands in _pending_messages during the
|
|
finally-block cleanup awaits, the finally must spawn a drain task
|
|
instead of deleting _active_sessions and dropping the message."""
|
|
adapter = _make_adapter()
|
|
sk = _sk()
|
|
|
|
processed = []
|
|
|
|
async def handler(event):
|
|
processed.append(event.text)
|
|
return "ok"
|
|
|
|
adapter._message_handler = handler
|
|
|
|
# Instrument stop_typing to inject a late-arrival pending message
|
|
# during the finally-block await window. This exactly simulates the
|
|
# R6 race: the message arrives after the response has been sent but
|
|
# before _active_sessions is deleted.
|
|
original_stop = adapter.stop_typing if hasattr(adapter, "stop_typing") else None
|
|
|
|
injected = {"done": False}
|
|
|
|
async def stop_typing_injects_pending(*args, **kwargs):
|
|
# Yield so the injection happens mid-await.
|
|
await asyncio.sleep(0)
|
|
if not injected["done"]:
|
|
adapter._pending_messages[sk] = _make_event(text="LATE")
|
|
injected["done"] = True
|
|
if original_stop:
|
|
return await original_stop(*args, **kwargs)
|
|
return None
|
|
|
|
adapter.stop_typing = stop_typing_injects_pending
|
|
|
|
# Send M1.
|
|
await adapter.handle_message(_make_event(text="M1"))
|
|
|
|
# Drain: wait for M1 to finish and the late-drain task to process LATE.
|
|
for _ in range(50): # up to ~0.5s
|
|
if "LATE" in processed:
|
|
break
|
|
await asyncio.sleep(0.01)
|
|
|
|
await adapter.cancel_background_tasks()
|
|
|
|
assert "M1" in processed, "M1 was not processed"
|
|
assert "LATE" in processed, (
|
|
"Late-arrival pending message was silently dropped — finally "
|
|
"cleanup should have spawned a drain task"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_pending_cleans_up_normally():
|
|
"""Regression guard: when no pending message exists, the finally
|
|
block must still delete _active_sessions as before (no leak)."""
|
|
adapter = _make_adapter()
|
|
sk = _sk()
|
|
|
|
async def handler(event):
|
|
return "ok"
|
|
|
|
adapter._message_handler = handler
|
|
|
|
await adapter.handle_message(_make_event(text="solo"))
|
|
|
|
# Wait for background task to finish.
|
|
for _ in range(50):
|
|
if sk not in adapter._active_sessions:
|
|
break
|
|
await asyncio.sleep(0.01)
|
|
|
|
assert sk not in adapter._active_sessions, (
|
|
"_active_sessions was not cleaned up after a normal turn with no pending"
|
|
)
|
|
assert sk not in adapter._pending_messages
|
|
|
|
await adapter.cancel_background_tasks()
|