fix: dedupe kanban notifier delivery claims

This commit is contained in:
Mike Nguyen 2026-05-09 12:41:31 +00:00 committed by Teknium
parent 373c4d6647
commit 861ce7c0b6
5 changed files with 411 additions and 7 deletions

View file

@ -188,6 +188,16 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"HERMES_BACKGROUND_NOTIFICATIONS",
"HERMES_EXEC_ASK",
"HERMES_HOME_MODE",
# Kanban path/board pins must never leak from a developer shell or
# dispatched worker into tests; otherwise tests can write fake tasks to
# the real ~/.hermes/kanban.db instead of the per-test HERMES_HOME.
"HERMES_KANBAN_DB",
"HERMES_KANBAN_BOARD",
"HERMES_KANBAN_WORKSPACES_ROOT",
"HERMES_KANBAN_LOGS_ROOT",
"HERMES_KANBAN_TASK",
"HERMES_KANBAN_WORKSPACE",
"HERMES_TENANT",
"TERMINAL_CWD",
"TERMINAL_ENV",
"TERMINAL_VERCEL_RUNTIME",
@ -223,6 +233,45 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"SIGNAL_ALLOW_ALL_USERS",
"EMAIL_ALLOW_ALL_USERS",
"SMS_ALLOW_ALL_USERS",
# Gateway home channels are set by /sethome in real profiles. Tests that
# exercise dashboard notification toggles must opt in explicitly or they
# can accidentally subscribe against a developer's real home channel.
"TELEGRAM_HOME_CHANNEL",
"TELEGRAM_HOME_CHANNEL_THREAD_ID",
"TELEGRAM_HOME_CHANNEL_NAME",
"DISCORD_HOME_CHANNEL",
"DISCORD_HOME_CHANNEL_THREAD_ID",
"DISCORD_HOME_CHANNEL_NAME",
"SLACK_HOME_CHANNEL",
"SLACK_HOME_CHANNEL_THREAD_ID",
"SLACK_HOME_CHANNEL_NAME",
"WHATSAPP_HOME_CHANNEL",
"WHATSAPP_HOME_CHANNEL_THREAD_ID",
"WHATSAPP_HOME_CHANNEL_NAME",
"SIGNAL_HOME_CHANNEL",
"SIGNAL_HOME_CHANNEL_THREAD_ID",
"SIGNAL_HOME_CHANNEL_NAME",
"EMAIL_HOME_CHANNEL",
"EMAIL_HOME_CHANNEL_THREAD_ID",
"EMAIL_HOME_CHANNEL_NAME",
"SMS_HOME_CHANNEL",
"SMS_HOME_CHANNEL_THREAD_ID",
"SMS_HOME_CHANNEL_NAME",
"MATTERMOST_HOME_CHANNEL",
"MATTERMOST_HOME_CHANNEL_THREAD_ID",
"MATTERMOST_HOME_CHANNEL_NAME",
"MATRIX_HOME_CHANNEL",
"MATRIX_HOME_CHANNEL_THREAD_ID",
"MATRIX_HOME_CHANNEL_NAME",
"DINGTALK_HOME_CHANNEL",
"DINGTALK_HOME_CHANNEL_THREAD_ID",
"DINGTALK_HOME_CHANNEL_NAME",
"FEISHU_HOME_CHANNEL",
"FEISHU_HOME_CHANNEL_THREAD_ID",
"FEISHU_HOME_CHANNEL_NAME",
"WECOM_HOME_CHANNEL",
"WECOM_HOME_CHANNEL_THREAD_ID",
"WECOM_HOME_CHANNEL_NAME",
# Platform gating — set by load_gateway_config() as a side effect when
# a config.yaml is present, so individual test bodies that call the
# loader leak these values into later tests on the same xdist worker.

View file

@ -0,0 +1,138 @@
import asyncio
from pathlib import Path
import pytest
from gateway.config import Platform
from gateway.run import GatewayRunner
from hermes_cli import kanban_db as kb
class RecordingAdapter:
def __init__(self):
self.sent = []
async def send(self, chat_id, text, metadata=None):
self.sent.append({"chat_id": chat_id, "text": text, "metadata": metadata or {}})
class DisconnectedAdapters(dict):
"""Expose a platform during collection, then simulate disconnect on get()."""
def get(self, key, default=None):
return None
async def _run_one_notifier_tick(monkeypatch, runner):
real_sleep = asyncio.sleep
async def fake_sleep(delay):
if delay == 5:
return None
runner._running = False
await real_sleep(0)
monkeypatch.setattr(asyncio, "sleep", fake_sleep)
await runner._kanban_notifier_watcher(interval=1)
def _make_runner(adapter):
runner = GatewayRunner.__new__(GatewayRunner)
runner._running = True
runner.adapters = {Platform.TELEGRAM: adapter}
runner._kanban_sub_fail_counts = {}
return runner
def _create_completed_subscription(summary="done once"):
conn = kb.connect()
try:
tid = kb.create_task(conn, title="notify once", assignee="worker")
kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat-1")
kb.complete_task(conn, tid, summary=summary)
return tid
finally:
conn.close()
def _unseen_terminal_events(tid):
conn = kb.connect()
try:
_, events = kb.unseen_events_for_sub(
conn,
task_id=tid,
platform="telegram",
chat_id="chat-1",
kinds=["completed", "blocked", "gave_up", "crashed", "timed_out"],
)
return events
finally:
conn.close()
def test_kanban_notifier_dedupes_board_slugs_pointing_to_same_db(tmp_path, monkeypatch):
db_path = tmp_path / "shared-kanban.db"
monkeypatch.setenv("HERMES_KANBAN_DB", str(db_path))
kb.init_db()
kb.write_board_metadata("alias-a", name="Alias A")
kb.write_board_metadata("alias-b", name="Alias B")
tid = _create_completed_subscription()
adapter = RecordingAdapter()
runner = _make_runner(adapter)
asyncio.run(_run_one_notifier_tick(monkeypatch, runner))
assert len(adapter.sent) == 1
assert "Kanban" in adapter.sent[0]["text"]
assert tid in adapter.sent[0]["text"]
def test_kanban_notifier_claim_prevents_second_watcher_send(tmp_path, monkeypatch):
db_path = tmp_path / "single-owner.db"
monkeypatch.setenv("HERMES_KANBAN_DB", str(db_path))
kb.init_db()
tid = _create_completed_subscription()
adapter1 = RecordingAdapter()
adapter2 = RecordingAdapter()
asyncio.run(_run_one_notifier_tick(monkeypatch, _make_runner(adapter1)))
asyncio.run(_run_one_notifier_tick(monkeypatch, _make_runner(adapter2)))
assert len(adapter1.sent) == 1
assert adapter2.sent == []
def test_kanban_notifier_rewinds_claim_if_adapter_disconnects(tmp_path, monkeypatch):
db_path = tmp_path / "adapter-disconnect.db"
monkeypatch.setenv("HERMES_KANBAN_DB", str(db_path))
kb.init_db()
tid = _create_completed_subscription()
runner = GatewayRunner.__new__(GatewayRunner)
runner._running = True
runner.adapters = DisconnectedAdapters({Platform.TELEGRAM: RecordingAdapter()})
runner._kanban_sub_fail_counts = {}
asyncio.run(_run_one_notifier_tick(monkeypatch, runner))
assert [ev.kind for ev in _unseen_terminal_events(tid)] == ["completed"]
def test_kanban_db_path_is_test_isolated_from_real_home():
hermes_home = Path(kb.kanban_home())
production_db = Path.home() / ".hermes" / "kanban.db"
assert kb.kanban_db_path().resolve() != production_db.resolve()
conn = kb.connect()
try:
tid = kb.create_task(conn, title="x", assignee="worker")
kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat-1")
finally:
conn.close()
assert kb.kanban_db_path().resolve().is_relative_to(hermes_home.resolve())
assert kb.kanban_db_path().resolve() != production_db.resolve()

View file

@ -568,6 +568,57 @@ def test_notify_cursor_advances(kanban_home):
conn.close()
def test_notify_claim_is_single_owner_and_rewindable(kanban_home):
conn1 = kb.connect()
conn2 = kb.connect()
try:
tid = kb.create_task(conn1, title="x", assignee="w")
kb.add_notify_sub(conn1, task_id=tid, platform="telegram", chat_id="123")
kb.complete_task(conn1, tid, result="ok")
old_cursor, claimed_cursor, events = kb.claim_unseen_events_for_sub(
conn1,
task_id=tid,
platform="telegram",
chat_id="123",
kinds=["completed", "blocked"],
)
assert old_cursor == 0
assert claimed_cursor > old_cursor
assert [ev.kind for ev in events] == ["completed"]
# A concurrent notifier instance sees the advanced cursor and cannot
# claim/send the same event range.
_, _, duplicate_events = kb.claim_unseen_events_for_sub(
conn2,
task_id=tid,
platform="telegram",
chat_id="123",
kinds=["completed", "blocked"],
)
assert duplicate_events == []
assert kb.rewind_notify_cursor(
conn1,
task_id=tid,
platform="telegram",
chat_id="123",
claimed_cursor=claimed_cursor,
old_cursor=old_cursor,
) is True
_, retried_events = kb.unseen_events_for_sub(
conn2,
task_id=tid,
platform="telegram",
chat_id="123",
kinds=["completed", "blocked"],
)
assert [ev.kind for ev in retried_events] == ["completed"]
finally:
conn1.close()
conn2.close()
# ---------------------------------------------------------------------------
# GC + retention
# ---------------------------------------------------------------------------