mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix: dedupe kanban notifier delivery claims
This commit is contained in:
parent
373c4d6647
commit
861ce7c0b6
5 changed files with 411 additions and 7 deletions
100
gateway/run.py
100
gateway/run.py
|
|
@ -4119,18 +4119,42 @@ class GatewayRunner:
|
|||
try:
|
||||
def _collect():
|
||||
deliveries: list[dict] = []
|
||||
# Enumerate every board on disk. Cheap: a few
|
||||
# directory stat calls per tick. Missing/empty
|
||||
# boards are silently skipped.
|
||||
active_platforms = {
|
||||
getattr(platform, "value", str(platform)).lower()
|
||||
for platform in self.adapters.keys()
|
||||
}
|
||||
if not active_platforms:
|
||||
logger.debug("kanban notifier: no connected adapters; skipping tick")
|
||||
return deliveries
|
||||
|
||||
# Enumerate every board on disk, but poll each resolved DB
|
||||
# path once. Multiple slugs can point at the same DB when
|
||||
# HERMES_KANBAN_DB pins the board path; without this guard
|
||||
# one gateway could collect the same subscription/event
|
||||
# more than once before advancing the cursor.
|
||||
try:
|
||||
boards = _kb.list_boards(include_archived=False)
|
||||
except Exception:
|
||||
boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)]
|
||||
seen_db_paths: set[str] = set()
|
||||
for board_meta in boards:
|
||||
slug = board_meta.get("slug") or _kb.DEFAULT_BOARD
|
||||
db_path = board_meta.get("db_path")
|
||||
try:
|
||||
resolved_db_path = str(Path(db_path).expanduser().resolve()) if db_path else str(_kb.kanban_db_path(slug).resolve())
|
||||
except Exception:
|
||||
resolved_db_path = f"slug:{slug}"
|
||||
if resolved_db_path in seen_db_paths:
|
||||
logger.debug(
|
||||
"kanban notifier: skipping duplicate board slug %s for DB %s",
|
||||
slug, resolved_db_path,
|
||||
)
|
||||
continue
|
||||
seen_db_paths.add(resolved_db_path)
|
||||
try:
|
||||
conn = _kb.connect(board=slug)
|
||||
except Exception:
|
||||
except Exception as exc:
|
||||
logger.debug("kanban notifier: cannot open board %s: %s", slug, exc)
|
||||
continue
|
||||
try:
|
||||
# `connect()` runs the schema + idempotent migration
|
||||
|
|
@ -4146,8 +4170,17 @@ class GatewayRunner:
|
|||
# tolerates that race, but we still skip the
|
||||
# redundant call to avoid the wasted work.
|
||||
subs = _kb.list_notify_subs(conn)
|
||||
if not subs:
|
||||
logger.debug("kanban notifier: board %s has no subscriptions", slug)
|
||||
for sub in subs:
|
||||
cursor, events = _kb.unseen_events_for_sub(
|
||||
platform = (sub.get("platform") or "").lower()
|
||||
if platform not in active_platforms:
|
||||
logger.debug(
|
||||
"kanban notifier: subscription for %s on %s skipped; adapter not connected",
|
||||
sub.get("task_id"), platform or "<missing>",
|
||||
)
|
||||
continue
|
||||
old_cursor, cursor, events = _kb.claim_unseen_events_for_sub(
|
||||
conn,
|
||||
task_id=sub["task_id"],
|
||||
platform=sub["platform"],
|
||||
|
|
@ -4158,8 +4191,13 @@ class GatewayRunner:
|
|||
if not events:
|
||||
continue
|
||||
task = _kb.get_task(conn, sub["task_id"])
|
||||
logger.debug(
|
||||
"kanban notifier: claimed %d event(s) for %s on board %s cursor %s→%s",
|
||||
len(events), sub["task_id"], slug, old_cursor, cursor,
|
||||
)
|
||||
deliveries.append({
|
||||
"sub": sub,
|
||||
"old_cursor": old_cursor,
|
||||
"cursor": cursor,
|
||||
"events": events,
|
||||
"task": task,
|
||||
|
|
@ -4186,7 +4224,18 @@ class GatewayRunner:
|
|||
continue
|
||||
adapter = self.adapters.get(plat)
|
||||
if adapter is None:
|
||||
continue # platform not currently connected
|
||||
logger.debug(
|
||||
"kanban notifier: adapter %s disconnected before delivery for %s; rewinding claim",
|
||||
platform_str, sub["task_id"],
|
||||
)
|
||||
await asyncio.to_thread(
|
||||
self._kanban_rewind,
|
||||
sub,
|
||||
d["cursor"],
|
||||
d.get("old_cursor", 0),
|
||||
board_slug,
|
||||
)
|
||||
continue
|
||||
title = (task.title if task else sub["task_id"])[:120]
|
||||
for ev in d["events"]:
|
||||
kind = ev.kind
|
||||
|
|
@ -4254,6 +4303,10 @@ class GatewayRunner:
|
|||
await adapter.send(
|
||||
sub["chat_id"], msg, metadata=metadata,
|
||||
)
|
||||
logger.info(
|
||||
"kanban notifier: delivered %s event for %s to %s/%s on board %s",
|
||||
kind, sub["task_id"], platform_str, sub["chat_id"], board_slug,
|
||||
)
|
||||
# Reset the failure counter on success.
|
||||
sub_fail_counts.pop(sub_key, None)
|
||||
except Exception as exc:
|
||||
|
|
@ -4273,7 +4326,17 @@ class GatewayRunner:
|
|||
)
|
||||
await asyncio.to_thread(self._kanban_unsub, sub, board_slug)
|
||||
sub_fail_counts.pop(sub_key, None)
|
||||
# Don't advance cursor on send failure — retry next tick.
|
||||
else:
|
||||
await asyncio.to_thread(
|
||||
self._kanban_rewind,
|
||||
sub,
|
||||
d["cursor"],
|
||||
d.get("old_cursor", 0),
|
||||
board_slug,
|
||||
)
|
||||
# Rewind the pre-send claim on transient failure so
|
||||
# a later tick can retry. After too many failures,
|
||||
# dropping the subscription is the terminal action.
|
||||
break
|
||||
else:
|
||||
# All events delivered; advance cursor + maybe unsub.
|
||||
|
|
@ -4336,6 +4399,29 @@ class GatewayRunner:
|
|||
finally:
|
||||
conn.close()
|
||||
|
||||
def _kanban_rewind(
|
||||
self,
|
||||
sub: dict,
|
||||
claimed_cursor: int,
|
||||
old_cursor: int,
|
||||
board: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Sync helper: undo a claimed notification cursor after send failure."""
|
||||
from hermes_cli import kanban_db as _kb
|
||||
conn = _kb.connect(board=board)
|
||||
try:
|
||||
_kb.rewind_notify_cursor(
|
||||
conn,
|
||||
task_id=sub["task_id"],
|
||||
platform=sub["platform"],
|
||||
chat_id=sub["chat_id"],
|
||||
thread_id=sub.get("thread_id") or "",
|
||||
claimed_cursor=claimed_cursor,
|
||||
old_cursor=old_cursor,
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
async def _kanban_dispatcher_watcher(self) -> None:
|
||||
"""Embedded kanban dispatcher — one tick every `dispatch_interval_seconds`.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue