mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 02:11:48 +00:00
fix(gateway): snapshot callback generation after agent binds it, not before
_process_message_background snapshotted callback_generation from the
interrupt event at the TOP of the task — before the handler ran.
_hermes_run_generation is only set on the event by
GatewayRunner._bind_adapter_run_generation during
_handle_message_with_agent, which runs DURING the handler await. The
early snapshot always captured None, which then flowed into
pop_post_delivery_callback(..., generation=None) in the finally block.
In pop_post_delivery_callback, generation=None with a tuple-registered
entry (generation, callback) bypasses the ownership check — it pops and
fires the callback regardless of which run owns it. Result: a stale run
could fire a fresher run's post-delivery callback (e.g. a
background-review notification attributed to the wrong turn).
Fix: move the snapshot into the finally block, after the handler has
run and _hermes_run_generation has been bound to the current run.
Regression test added: simulates a stale handler at generation=1 and a
fresher callback registered at generation=2. Pre-fix: snapshot=None →
pop fires the generation=2 callback under generation=1's ownership
("newer" fires). Post-fix: snapshot=1 → pop skips the mismatched
entry, callback stays in the dict for the correct run to claim.
Verified: test FAILS on current main (captures "newer" in fired list),
PASSES with this fix.
Salvaged from PR #12565 (the callback-ownership portion only; the
/status totals portion was already fixed on main in 7abc9ce4d via #17158).
Co-authored-by: Oxidane-bot <1317078257maroon@gmail.com>
This commit is contained in:
parent
27ec74c68a
commit
8d7500d80d
3 changed files with 81 additions and 2 deletions
|
|
@ -568,3 +568,68 @@ async def test_profile_command_reports_custom_root_profile(monkeypatch, tmp_path
|
|||
|
||||
assert "**Profile:** `coder`" in result
|
||||
assert f"**Home:** `{profile_home}`" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_post_delivery_callback_generation_snapshot_happens_after_bind():
|
||||
"""Regression: the callback_generation snapshot in _process_message_background
|
||||
must happen AFTER the handler runs, not before.
|
||||
|
||||
_hermes_run_generation is set on the interrupt event by
|
||||
GatewayRunner._bind_adapter_run_generation during _handle_message_with_agent.
|
||||
The earlier snapshot-at-task-start always captured None, which bypassed the
|
||||
generation-ownership check in pop_post_delivery_callback and let stale runs
|
||||
fire a fresher run's callbacks.
|
||||
"""
|
||||
import asyncio
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
|
||||
source = _make_source()
|
||||
session_key = build_session_key(source)
|
||||
fired = []
|
||||
|
||||
class _ConcreteAdapter(BasePlatformAdapter):
|
||||
platform = Platform.TELEGRAM
|
||||
|
||||
async def connect(self): pass
|
||||
async def disconnect(self): pass
|
||||
async def send(self, chat_id, content, **kwargs): pass
|
||||
async def get_chat_info(self, chat_id): return {}
|
||||
|
||||
adapter = _ConcreteAdapter(
|
||||
PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM
|
||||
)
|
||||
|
||||
async def fake_handler(event):
|
||||
# Simulate what _bind_adapter_run_generation does mid-run.
|
||||
interrupt_event = adapter._active_sessions.get(session_key)
|
||||
setattr(interrupt_event, "_hermes_run_generation", 1)
|
||||
# Stale run registers its callback at generation=1.
|
||||
adapter.register_post_delivery_callback(
|
||||
session_key,
|
||||
lambda: fired.append("older"),
|
||||
generation=1,
|
||||
)
|
||||
# A fresher run overwrites with generation=2 (different dict entry).
|
||||
adapter.register_post_delivery_callback(
|
||||
session_key,
|
||||
lambda: fired.append("newer"),
|
||||
generation=2,
|
||||
)
|
||||
return None
|
||||
|
||||
adapter.set_message_handler(fake_handler)
|
||||
event = MessageEvent(text="hello", source=source, message_id="m1")
|
||||
|
||||
await adapter.handle_message(event)
|
||||
tasks = list(adapter._background_tasks)
|
||||
assert tasks, "expected background task to be created"
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
# The stale run (generation=1) must NOT fire the fresher run's callback
|
||||
# (generation=2). With the pre-fix code, callback_generation was snapshotted
|
||||
# as None before the handler ran, bypassing the ownership check and firing
|
||||
# "newer" anyway.
|
||||
assert fired == []
|
||||
assert session_key in adapter._post_delivery_callbacks
|
||||
assert adapter._post_delivery_callbacks[session_key][0] == 2
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue