mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
Follow-up to the previous commit's notifier behavior change. Two test fixes: 1. `tests/gateway/test_kanban_notifier.py` gains `test_notifier_redelivers_same_kind_on_dispatch_cycle` — pins the new contract directly: a task that crashes, gets reclaimed, and crashes again notifies the user BOTH times. Before #21398 the second crash silently dropped because the subscription was already deleted. 2. `tests/hermes_cli/test_kanban_notify.py:: test_notifier_unsubs_after_abnormal_events[gave_up|crashed|timed_out]` is flipped. Those tests were added in the salvage of #22941 and asserted the OLD behavior (subscription deleted after gave_up / crashed / timed_out). They're now obsolete — the new contract is "subscription survives a non-final terminal event so retries reach the user." Updated docstring + asserts; the cursor-advance check is added to confirm the dedup mechanism still works. The `test_notifier_unsubs_after_completed_event` test stays untouched because `completed` IS still a terminal event that triggers unsub (the task hits `done` status, which is handled by the `task_terminal` branch in the notifier loop).
236 lines
7.8 KiB
Python
236 lines
7.8 KiB
Python
import asyncio
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from gateway.config import Platform
|
|
from gateway.run import GatewayRunner
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
|
|
class RecordingAdapter:
|
|
def __init__(self):
|
|
self.sent = []
|
|
|
|
async def send(self, chat_id, text, metadata=None):
|
|
self.sent.append({"chat_id": chat_id, "text": text, "metadata": metadata or {}})
|
|
|
|
|
|
class DisconnectedAdapters(dict):
|
|
"""Expose a platform during collection, then simulate disconnect on get()."""
|
|
|
|
def get(self, key, default=None):
|
|
return None
|
|
|
|
|
|
async def _run_one_notifier_tick(monkeypatch, runner):
|
|
real_sleep = asyncio.sleep
|
|
|
|
async def fake_sleep(delay):
|
|
if delay == 5:
|
|
return None
|
|
runner._running = False
|
|
await real_sleep(0)
|
|
|
|
monkeypatch.setattr(asyncio, "sleep", fake_sleep)
|
|
await runner._kanban_notifier_watcher(interval=1)
|
|
|
|
|
|
def _make_runner(adapter):
|
|
runner = GatewayRunner.__new__(GatewayRunner)
|
|
runner._running = True
|
|
runner.adapters = {Platform.TELEGRAM: adapter}
|
|
runner._kanban_sub_fail_counts = {}
|
|
return runner
|
|
|
|
|
|
def _create_completed_subscription(summary="done once"):
|
|
conn = kb.connect()
|
|
try:
|
|
tid = kb.create_task(conn, title="notify once", assignee="worker")
|
|
kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat-1")
|
|
kb.complete_task(conn, tid, summary=summary)
|
|
return tid
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def _unseen_terminal_events(tid):
|
|
conn = kb.connect()
|
|
try:
|
|
_, events = kb.unseen_events_for_sub(
|
|
conn,
|
|
task_id=tid,
|
|
platform="telegram",
|
|
chat_id="chat-1",
|
|
kinds=["completed", "blocked", "gave_up", "crashed", "timed_out"],
|
|
)
|
|
return events
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def test_kanban_notifier_dedupes_board_slugs_pointing_to_same_db(tmp_path, monkeypatch):
|
|
db_path = tmp_path / "shared-kanban.db"
|
|
monkeypatch.setenv("HERMES_KANBAN_DB", str(db_path))
|
|
kb.init_db()
|
|
kb.write_board_metadata("alias-a", name="Alias A")
|
|
kb.write_board_metadata("alias-b", name="Alias B")
|
|
|
|
tid = _create_completed_subscription()
|
|
|
|
adapter = RecordingAdapter()
|
|
runner = _make_runner(adapter)
|
|
|
|
asyncio.run(_run_one_notifier_tick(monkeypatch, runner))
|
|
|
|
assert len(adapter.sent) == 1
|
|
assert "Kanban" in adapter.sent[0]["text"]
|
|
assert tid in adapter.sent[0]["text"]
|
|
|
|
|
|
def test_kanban_notifier_claim_prevents_second_watcher_send(tmp_path, monkeypatch):
|
|
db_path = tmp_path / "single-owner.db"
|
|
monkeypatch.setenv("HERMES_KANBAN_DB", str(db_path))
|
|
kb.init_db()
|
|
|
|
tid = _create_completed_subscription()
|
|
|
|
adapter1 = RecordingAdapter()
|
|
adapter2 = RecordingAdapter()
|
|
|
|
asyncio.run(_run_one_notifier_tick(monkeypatch, _make_runner(adapter1)))
|
|
asyncio.run(_run_one_notifier_tick(monkeypatch, _make_runner(adapter2)))
|
|
|
|
assert len(adapter1.sent) == 1
|
|
assert adapter2.sent == []
|
|
|
|
|
|
def test_kanban_notifier_rewinds_claim_if_adapter_disconnects(tmp_path, monkeypatch):
|
|
db_path = tmp_path / "adapter-disconnect.db"
|
|
monkeypatch.setenv("HERMES_KANBAN_DB", str(db_path))
|
|
kb.init_db()
|
|
tid = _create_completed_subscription()
|
|
|
|
runner = GatewayRunner.__new__(GatewayRunner)
|
|
runner._running = True
|
|
runner.adapters = DisconnectedAdapters({Platform.TELEGRAM: RecordingAdapter()})
|
|
runner._kanban_sub_fail_counts = {}
|
|
|
|
asyncio.run(_run_one_notifier_tick(monkeypatch, runner))
|
|
|
|
assert [ev.kind for ev in _unseen_terminal_events(tid)] == ["completed"]
|
|
|
|
|
|
def test_kanban_db_path_is_test_isolated_from_real_home():
|
|
hermes_home = Path(kb.kanban_home())
|
|
production_db = Path.home() / ".hermes" / "kanban.db"
|
|
assert kb.kanban_db_path().resolve() != production_db.resolve()
|
|
|
|
conn = kb.connect()
|
|
try:
|
|
tid = kb.create_task(conn, title="x", assignee="worker")
|
|
kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat-1")
|
|
finally:
|
|
conn.close()
|
|
|
|
assert kb.kanban_db_path().resolve().is_relative_to(hermes_home.resolve())
|
|
assert kb.kanban_db_path().resolve() != production_db.resolve()
|
|
|
|
|
|
class FailingAdapter:
|
|
"""Adapter whose send() always raises, simulating a transient send error."""
|
|
|
|
def __init__(self):
|
|
self.attempts = 0
|
|
|
|
async def send(self, chat_id, text, metadata=None):
|
|
self.attempts += 1
|
|
raise RuntimeError("simulated send failure")
|
|
|
|
|
|
def test_kanban_notifier_rewinds_claim_on_send_exception(tmp_path, monkeypatch):
|
|
"""A raising adapter rewinds the claim so the next tick can retry.
|
|
|
|
This is the second rewind path (distinct from the adapter-disconnect path
|
|
in test_kanban_notifier_rewinds_claim_if_adapter_disconnects). Here the
|
|
adapter is connected and the send call actually fires; the claim must
|
|
still rewind so the event isn't lost when send() raises mid-tick.
|
|
"""
|
|
db_path = tmp_path / "send-failure.db"
|
|
monkeypatch.setenv("HERMES_KANBAN_DB", str(db_path))
|
|
kb.init_db()
|
|
tid = _create_completed_subscription()
|
|
|
|
adapter = FailingAdapter()
|
|
runner = _make_runner(adapter)
|
|
|
|
asyncio.run(_run_one_notifier_tick(monkeypatch, runner))
|
|
|
|
# Send was attempted (so we exercised the failure path, not just the
|
|
# disconnect path) and the claim was rewound — the unseen-events query
|
|
# still returns the event for retry on the next tick.
|
|
assert adapter.attempts >= 1, "send should have been attempted at least once"
|
|
assert [ev.kind for ev in _unseen_terminal_events(tid)] == ["completed"]
|
|
|
|
|
|
def test_notifier_redelivers_same_kind_on_dispatch_cycle(tmp_path, monkeypatch):
|
|
"""A retry cycle (crashed → reclaimed → crashed) notifies the user twice.
|
|
|
|
Before #21398 the notifier auto-unsubscribed on any terminal event kind
|
|
(gave_up / crashed / timed_out), so the second crash in a respawn cycle
|
|
silently dropped — the subscription was already gone. This test pins the
|
|
new contract: subscription survives non-final terminal events; the
|
|
cursor handles dedup.
|
|
|
|
Two crashes ten seconds apart on the same task — both should land on
|
|
the adapter.
|
|
"""
|
|
db_path = tmp_path / "redeliver-cycle.db"
|
|
monkeypatch.setenv("HERMES_KANBAN_DB", str(db_path))
|
|
kb.init_db()
|
|
|
|
conn = kb.connect()
|
|
try:
|
|
tid = kb.create_task(conn, title="cycle test", assignee="worker")
|
|
kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat-1")
|
|
# First crash — fired by the dispatcher when the worker PID dies.
|
|
kb._append_event(conn, tid, kind="crashed")
|
|
finally:
|
|
conn.close()
|
|
|
|
adapter = RecordingAdapter()
|
|
runner = _make_runner(adapter)
|
|
asyncio.run(_run_one_notifier_tick(monkeypatch, runner))
|
|
|
|
# First crash delivered.
|
|
assert len(adapter.sent) == 1
|
|
assert "crashed" in adapter.sent[0]["text"].lower()
|
|
|
|
# Subscription survives — the cursor advanced past event #1, but the
|
|
# row is still there.
|
|
conn = kb.connect()
|
|
try:
|
|
subs = kb.list_notify_subs(conn, tid)
|
|
assert len(subs) == 1, (
|
|
"Subscription must survive a crashed event so a respawn-cycle "
|
|
"second crash also notifies the user (issue #21398)."
|
|
)
|
|
|
|
# Second crash — same task, same dispatcher (or a respawn). Append
|
|
# another event to simulate the dispatcher firing crashed a second
|
|
# time during retry.
|
|
kb._append_event(conn, tid, kind="crashed")
|
|
finally:
|
|
conn.close()
|
|
|
|
# New tick: the second event has a fresh id past the cursor advance,
|
|
# so it gets claimed and delivered.
|
|
runner = _make_runner(adapter)
|
|
asyncio.run(_run_one_notifier_tick(monkeypatch, runner))
|
|
|
|
assert len(adapter.sent) == 2, (
|
|
f"Second crashed event should also notify; got {len(adapter.sent)} "
|
|
f"deliveries (texts: {[d['text'] for d in adapter.sent]})"
|
|
)
|
|
assert "crashed" in adapter.sent[1]["text"].lower()
|