mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-25 05:52:34 +00:00
fix(kanban): drop redundant init_db() in gateway watchers (#21378)
Both `_kanban_notifier_watcher` and `_kanban_dispatcher_watcher`'s
`_tick_once_for_board` called `_kb.connect(board=slug)` immediately
followed by `_kb.init_db(board=slug)`. Since `connect()` already runs
the schema + idempotent migration on first open per process, the
explicit `init_db()` was redundant — and worse, `init_db()` deliberately
busts the per-process `_INITIALIZED_PATHS` cache and re-runs the migration
on a *second* connection that races the first.
On every cold gateway start against a legacy DB this surfaced as either
`sqlite3.OperationalError: duplicate column name: <col>` or intermittent
`database is locked` errors logged at the first tick. The duplicate-column
case is now tolerated by `_add_column_if_missing` (commit 78698381a), but
the wasted second migration plus the database-is-locked race remain
fixable by skipping the redundant call entirely.
Drops `_kb.init_db(board=slug)` at both call sites and adds a regression
test in `tests/hermes_cli/test_kanban_notify.py` that pins the absence
via source inspection plus a runtime spy.
Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com>
This commit is contained in:
parent
68e44642c8
commit
6f2d60559e
2 changed files with 122 additions and 8 deletions
|
|
@ -3887,10 +3887,18 @@ class GatewayRunner:
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
try:
|
# `connect()` runs the schema + idempotent migration
|
||||||
_kb.init_db(board=slug) # idempotent; handles first-run
|
# on first open per process, so an explicit
|
||||||
except Exception:
|
# `init_db()` here would be redundant. Worse:
|
||||||
pass
|
# `init_db()` deliberately busts the per-process
|
||||||
|
# cache and re-runs the migration on a *second*
|
||||||
|
# connection, which races the first and used to
|
||||||
|
# log a benign but noisy `duplicate column name`
|
||||||
|
# traceback (and intermittent "database is locked"
|
||||||
|
# — issue #21378) on every gateway start against
|
||||||
|
# a legacy DB. `_add_column_if_missing` now
|
||||||
|
# tolerates that race, but we still skip the
|
||||||
|
# redundant call to avoid the wasted work.
|
||||||
subs = _kb.list_notify_subs(conn)
|
subs = _kb.list_notify_subs(conn)
|
||||||
for sub in subs:
|
for sub in subs:
|
||||||
cursor, events = _kb.unseen_events_for_sub(
|
cursor, events = _kb.unseen_events_for_sub(
|
||||||
|
|
@ -4183,10 +4191,12 @@ class GatewayRunner:
|
||||||
conn = None
|
conn = None
|
||||||
try:
|
try:
|
||||||
conn = _kb.connect(board=slug)
|
conn = _kb.connect(board=slug)
|
||||||
try:
|
# `connect()` runs the schema + idempotent migration on
|
||||||
_kb.init_db(board=slug) # idempotent, handles first-run
|
# first open per process; the previous explicit
|
||||||
except Exception:
|
# `init_db()` call here busted the per-process cache and
|
||||||
pass
|
# re-ran the migration on a second connection, racing
|
||||||
|
# the first. See the matching comment in
|
||||||
|
# `_kanban_notifier_watcher` and issue #21378.
|
||||||
return _kb.dispatch_once(
|
return _kb.dispatch_once(
|
||||||
conn,
|
conn,
|
||||||
board=slug,
|
board=slug,
|
||||||
|
|
|
||||||
|
|
@ -197,3 +197,107 @@ async def test_notifier_second_blocked_delivers(kanban_home):
|
||||||
f"Should receive 2 blocked notification, but only get {len(blocked_deliveries)} count\n"
|
f"Should receive 2 blocked notification, but only get {len(blocked_deliveries)} count\n"
|
||||||
f"Message {delivered_msgs}"
|
f"Message {delivered_msgs}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Regression: gateway watchers must not double-init the kanban DB.
|
||||||
|
#
|
||||||
|
# Both the notifier watcher (`_kanban_notifier_watcher`) and the dispatcher
|
||||||
|
# tick (`_tick_once_for_board`) used to call `_kb.connect(board=slug)`
|
||||||
|
# immediately followed by `_kb.init_db(board=slug)`. Since `connect()`
|
||||||
|
# already runs the schema + idempotent migration on first open per process,
|
||||||
|
# the explicit `init_db()` was redundant — and worse, `init_db()`
|
||||||
|
# deliberately busts the per-process cache and re-runs the migration on a
|
||||||
|
# *second* connection, which races the first. On legacy DBs this surfaced
|
||||||
|
# as `duplicate column name: <col>` (now tolerated by
|
||||||
|
# `_add_column_if_missing`) and intermittent `database is locked` errors
|
||||||
|
# (issue #21378).
|
||||||
|
#
|
||||||
|
# The fix removes the `init_db()` calls in both watchers; this regression
|
||||||
|
# test pins that behaviour so we don't reintroduce them.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_notifier_does_not_call_init_db(kanban_home):
|
||||||
|
"""Notifier watcher path must not invoke `_kb.init_db` (issue #21378)."""
|
||||||
|
import hermes_cli.kanban_db as kb
|
||||||
|
from gateway.run import GatewayRunner
|
||||||
|
from gateway.config import Platform
|
||||||
|
|
||||||
|
runner = object.__new__(GatewayRunner)
|
||||||
|
runner._running = True
|
||||||
|
runner._kanban_sub_fail_counts = {}
|
||||||
|
|
||||||
|
fake_adapter = MagicMock()
|
||||||
|
fake_adapter.send = AsyncMock()
|
||||||
|
runner.adapters = {Platform.TELEGRAM: fake_adapter}
|
||||||
|
|
||||||
|
_orig_sleep = asyncio.sleep
|
||||||
|
tick_count = 0
|
||||||
|
|
||||||
|
async def _fast_sleep(_):
|
||||||
|
nonlocal tick_count
|
||||||
|
await _orig_sleep(0)
|
||||||
|
tick_count += 1
|
||||||
|
if tick_count >= 3:
|
||||||
|
runner._running = False
|
||||||
|
|
||||||
|
init_db_calls: list[object] = []
|
||||||
|
real_init_db = kb.init_db
|
||||||
|
|
||||||
|
def _spy_init_db(*args, **kwargs):
|
||||||
|
init_db_calls.append((args, kwargs))
|
||||||
|
return real_init_db(*args, **kwargs)
|
||||||
|
|
||||||
|
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep), \
|
||||||
|
patch("hermes_cli.kanban_db.init_db", side_effect=_spy_init_db):
|
||||||
|
await asyncio.wait_for(
|
||||||
|
runner._kanban_notifier_watcher(interval=1),
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert init_db_calls == [], (
|
||||||
|
"_kanban_notifier_watcher must not call init_db on every tick — "
|
||||||
|
"connect() handles first-run schema init. "
|
||||||
|
"Reintroducing init_db revives issue #21378. "
|
||||||
|
f"Got {len(init_db_calls)} call(s): {init_db_calls}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_dispatcher_tick_does_not_call_init_db(kanban_home, monkeypatch):
|
||||||
|
"""`_tick_once_for_board` must not invoke `_kb.init_db` (issue #21378).
|
||||||
|
|
||||||
|
`connect()` already runs the schema + idempotent migration on first open
|
||||||
|
per process. The explicit `init_db()` call was redundant and triggered a
|
||||||
|
second migration on a second connection that raced the first.
|
||||||
|
"""
|
||||||
|
import hermes_cli.kanban_db as kb
|
||||||
|
from gateway.run import GatewayRunner
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
runner = object.__new__(GatewayRunner)
|
||||||
|
|
||||||
|
init_db_calls: list[object] = []
|
||||||
|
real_init_db = kb.init_db
|
||||||
|
|
||||||
|
def _spy_init_db(*args, **kwargs):
|
||||||
|
init_db_calls.append((args, kwargs))
|
||||||
|
return real_init_db(*args, **kwargs)
|
||||||
|
|
||||||
|
# The dispatcher watcher's tick lives as a local closure inside
|
||||||
|
# `_kanban_dispatcher_watcher`. Read the source and assert the
|
||||||
|
# specific patterns that would reintroduce the bug are absent.
|
||||||
|
import inspect
|
||||||
|
src = inspect.getsource(GatewayRunner._kanban_dispatcher_watcher)
|
||||||
|
assert "_kb.init_db(board=slug)" not in src, (
|
||||||
|
"_kanban_dispatcher_watcher must not call _kb.init_db(board=slug) — "
|
||||||
|
"see issue #21378. Use connect() alone; it runs migrations on first "
|
||||||
|
"open per process."
|
||||||
|
)
|
||||||
|
|
||||||
|
notifier_src = inspect.getsource(GatewayRunner._kanban_notifier_watcher)
|
||||||
|
assert "_kb.init_db(board=slug)" not in notifier_src, (
|
||||||
|
"_kanban_notifier_watcher must not call _kb.init_db(board=slug) — "
|
||||||
|
"see issue #21378."
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue