mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Six regression tests pinning the dispatcher contract that was broken in #28712: * test_worker_block_is_not_auto_promoted_by_recompute_ready — kanban_block survives five back-to-back ticks (compressed dispatcher loop). * test_worker_block_on_child_with_done_parents_is_still_sticky — the parent-completion code path was the worst false-positive; even when every parent is done, an explicit worker block stays blocked. * test_circuit_breaker_block_still_auto_promotes — preserves the pre-#28712 recovery semantics for circuit-breaker blocks (direct UPDATE + no "blocked" event). * test_gave_up_event_alone_does_not_make_block_sticky — explicit guard so the gave_up event is never accidentally treated as sticky; covers the second leg of the protocol_violation loop. * test_unblock_clears_sticky_state_and_lets_block_recover — only unblock_task resolves the sticky state; subsequent circuit-breaker blocks recover normally. * test_protocol_violation_loop_is_broken — full bug-shaped reproduction: block → tick → (would-be) crash + gave_up → next tick still blocked. Without the fix this would loop indefinitely. The seventh test from the original PR (legacy-DB init recovery) was dropped during salvage — the schema-init half of #28712 is already fixed on main by #28754 and #28781, and the contract is covered by test_kanban_db.py::test_connect_migrates_legacy_db_before_optional_column_indexes.
268 lines
11 KiB
Python
268 lines
11 KiB
Python
"""Regression tests for #28712 — kanban dispatcher must not auto-promote
|
|
worker-initiated ``kanban_block`` (sticky blocks), but must keep
|
|
auto-recovering circuit-breaker blocks.
|
|
|
|
The bug: when a worker called ``kanban_block(reason="review-required:
|
|
...")`` to hand off to a human, the dispatcher's ``recompute_ready``
|
|
would promote the task back to ``ready`` on the next tick. The fresh
|
|
worker found nothing to do (work already applied), exited cleanly, and
|
|
got recorded as a ``protocol_violation`` → ``gave_up`` → promote → loop
|
|
until manual intervention.
|
|
|
|
These tests pin down:
|
|
|
|
* Worker / operator-initiated blocks are sticky and survive
|
|
``recompute_ready``.
|
|
* Circuit-breaker blocks (``gave_up`` event, status flipped via
|
|
``_record_task_failure``) still auto-recover — the original intent
|
|
of #40c1decb3 is preserved.
|
|
* An explicit ``kanban_unblock`` clears the sticky state.
|
|
* The full block → promote → crash → ``gave_up`` loop is broken after
|
|
this fix: subsequent ticks leave the task blocked.
|
|
|
|
The tangentially related schema-init ordering bug originally reported
|
|
in #28712 (``init_db`` crashing on legacy DBs that pre-dated the
|
|
``session_id`` migration) is covered separately by
|
|
``test_kanban_db.py::test_connect_migrates_legacy_db_before_optional_column_indexes``,
|
|
landed via #28754 / #28781 ahead of this fix.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
|
|
@pytest.fixture
|
|
def kanban_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
|
"""Isolated HERMES_HOME with an empty kanban DB."""
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
kb.init_db()
|
|
return home
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Worker-initiated kanban_block must be sticky
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_worker_block_is_not_auto_promoted_by_recompute_ready(kanban_home: Path) -> None:
|
|
"""A standalone task that a worker explicitly blocks for review
|
|
must stay blocked across an arbitrary number of dispatcher ticks.
|
|
Before #28712's fix, ``recompute_ready`` would silently flip it
|
|
back to ``ready`` on the very next tick."""
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="needs human review")
|
|
kb.claim_task(conn, tid)
|
|
assert kb.block_task(
|
|
conn, tid,
|
|
reason="review-required: please verify ACL change",
|
|
expected_run_id=kb.get_task(conn, tid).current_run_id,
|
|
)
|
|
assert kb.get_task(conn, tid).status == "blocked"
|
|
|
|
# Hammer the promotion code — exactly the dispatcher loop's
|
|
# behaviour, just compressed in time.
|
|
for _ in range(5):
|
|
promoted = kb.recompute_ready(conn)
|
|
assert promoted == 0, "worker-blocked task must not auto-promote"
|
|
assert kb.get_task(conn, tid).status == "blocked"
|
|
|
|
|
|
def test_worker_block_on_child_with_done_parents_is_still_sticky(kanban_home: Path) -> None:
|
|
"""The parent-completion path is the one ``recompute_ready`` was
|
|
designed for, so it's the most dangerous false-positive: even when
|
|
every parent is done, a worker-initiated block on the child must
|
|
stay blocked."""
|
|
with kb.connect() as conn:
|
|
parent = kb.create_task(conn, title="parent")
|
|
child = kb.create_task(conn, title="child", parents=[parent])
|
|
kb.complete_task(conn, parent, result="parent ok")
|
|
|
|
kb.claim_task(conn, child)
|
|
kb.block_task(
|
|
conn, child,
|
|
reason="review-required: child needs sign-off",
|
|
expected_run_id=kb.get_task(conn, child).current_run_id,
|
|
)
|
|
assert kb.get_task(conn, child).status == "blocked"
|
|
|
|
promoted = kb.recompute_ready(conn)
|
|
assert promoted == 0
|
|
assert kb.get_task(conn, child).status == "blocked"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Circuit-breaker blocks still auto-recover (preserve #40c1decb3 intent)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_circuit_breaker_block_still_auto_promotes(kanban_home: Path) -> None:
|
|
"""A child that was put into ``blocked`` *without* a worker-issued
|
|
``kanban_block`` (e.g. circuit-breaker after repeated spawn
|
|
failures, manual DB triage) must still get auto-promoted when its
|
|
parents complete — preserves the pre-#28712 recovery semantics."""
|
|
with kb.connect() as conn:
|
|
parent = kb.create_task(conn, title="parent")
|
|
child = kb.create_task(conn, title="child", parents=[parent])
|
|
kb.complete_task(conn, parent, result="ok")
|
|
|
|
# Simulate a circuit-breaker / direct triage that flips status
|
|
# without emitting a ``blocked`` event — exactly what
|
|
# ``_record_task_failure`` does after a ``gave_up``.
|
|
conn.execute(
|
|
"UPDATE tasks SET status='blocked', consecutive_failures=5, "
|
|
"last_failure_error='persistent error' WHERE id=?",
|
|
(child,),
|
|
)
|
|
conn.commit()
|
|
|
|
promoted = kb.recompute_ready(conn)
|
|
assert promoted == 1
|
|
task = kb.get_task(conn, child)
|
|
assert task.status == "ready"
|
|
assert task.consecutive_failures == 0
|
|
assert task.last_failure_error is None
|
|
|
|
|
|
def test_gave_up_event_alone_does_not_make_block_sticky(kanban_home: Path) -> None:
|
|
"""The circuit-breaker emits ``gave_up`` (not ``blocked``). Make
|
|
sure ``_has_sticky_block`` doesn't accidentally treat ``gave_up``
|
|
as sticky — otherwise we'd regress the safety net for genuinely
|
|
transient crashes."""
|
|
with kb.connect() as conn:
|
|
parent = kb.create_task(conn, title="parent")
|
|
child = kb.create_task(conn, title="child", parents=[parent])
|
|
kb.complete_task(conn, parent, result="ok")
|
|
|
|
# Status + event match what _record_task_failure writes when
|
|
# the breaker trips.
|
|
conn.execute(
|
|
"UPDATE tasks SET status='blocked' WHERE id=?", (child,),
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
|
"VALUES (?, 'gave_up', NULL, ?)",
|
|
(child, int(time.time())),
|
|
)
|
|
conn.commit()
|
|
|
|
promoted = kb.recompute_ready(conn)
|
|
assert promoted == 1
|
|
assert kb.get_task(conn, child).status == "ready"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# unblock_task clears the sticky state
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_unblock_clears_sticky_state_and_lets_block_recover(kanban_home: Path) -> None:
|
|
"""``hermes kanban unblock`` (or the ``kanban_unblock`` tool) is
|
|
the only legitimate way out of a worker-initiated block. After
|
|
unblock, a *subsequent* circuit-breaker block on the same task
|
|
must again be eligible for auto-recovery."""
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="t")
|
|
kb.claim_task(conn, tid)
|
|
kb.block_task(
|
|
conn, tid,
|
|
reason="review-required: ...",
|
|
expected_run_id=kb.get_task(conn, tid).current_run_id,
|
|
)
|
|
assert kb.unblock_task(conn, tid)
|
|
# After unblock the task is no longer blocked at all.
|
|
assert kb.get_task(conn, tid).status == "ready"
|
|
|
|
# Now simulate a *later* circuit-breaker block (no new
|
|
# ``blocked`` event, just status flip). The most recent
|
|
# block/unblock event is ``unblocked`` → guard does not fire
|
|
# → recompute can recover.
|
|
conn.execute(
|
|
"UPDATE tasks SET status='blocked' WHERE id=?", (tid,),
|
|
)
|
|
conn.commit()
|
|
|
|
promoted = kb.recompute_ready(conn)
|
|
assert promoted == 1
|
|
assert kb.get_task(conn, tid).status == "ready"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Full bug-shaped loop: block → promote → crash → gave_up → next tick
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_protocol_violation_loop_is_broken(kanban_home: Path) -> None:
|
|
"""Reproduces the exact #28712 loop and asserts the dispatcher
|
|
leaves the task blocked instead of cycling.
|
|
|
|
Loop shape from the issue:
|
|
|
|
1. Worker calls ``kanban_block`` → status='blocked',
|
|
``task_runs.outcome='blocked'``, ``blocked`` event.
|
|
2. (Bug) Dispatcher promotes back to ``ready``.
|
|
3. Fresh worker exits cleanly without terminal tool call →
|
|
``protocol_violation`` event.
|
|
4. ``_record_task_failure(failure_limit=1)`` → ``gave_up`` event,
|
|
status='blocked' again.
|
|
5. (Bug) Dispatcher promotes again → infinite loop.
|
|
|
|
With the fix in place, step 2 never happens — the test simulates
|
|
one would-be loop cycle by faking the crash-then-gave_up entries
|
|
that *would* have been written and asserts the *next* tick still
|
|
leaves the task blocked.
|
|
"""
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="loop reproducer")
|
|
kb.claim_task(conn, tid)
|
|
kb.block_task(
|
|
conn, tid,
|
|
reason="review-required: human eyes please",
|
|
expected_run_id=kb.get_task(conn, tid).current_run_id,
|
|
)
|
|
assert kb.get_task(conn, tid).status == "blocked"
|
|
|
|
# First dispatcher tick — must NOT promote.
|
|
assert kb.recompute_ready(conn) == 0
|
|
assert kb.get_task(conn, tid).status == "blocked"
|
|
|
|
# Simulate the (hypothetical) protocol_violation + gave_up
|
|
# entries that the dispatcher would have written if the bug
|
|
# were still present. Even with those event rows in place,
|
|
# the worker-initiated ``blocked`` event is the most recent
|
|
# of the ``{blocked, unblocked}`` pair, so the sticky guard
|
|
# still fires.
|
|
now = int(time.time())
|
|
conn.execute(
|
|
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
|
"VALUES (?, 'protocol_violation', NULL, ?)",
|
|
(tid, now),
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
|
"VALUES (?, 'gave_up', NULL, ?)",
|
|
(tid, now + 1),
|
|
)
|
|
conn.commit()
|
|
|
|
# Subsequent ticks must still leave it blocked.
|
|
for _ in range(3):
|
|
promoted = kb.recompute_ready(conn)
|
|
assert promoted == 0
|
|
assert kb.get_task(conn, tid).status == "blocked"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Schema-init recovery on legacy DBs is covered by
|
|
# tests/hermes_cli/test_kanban_db.py::test_connect_migrates_legacy_db_before_optional_column_indexes
|
|
# (landed via #28754 / #28781). The original PR shipped a duplicate test
|
|
# here; dropped during salvage to avoid two assertions of the same contract.
|
|
# ---------------------------------------------------------------------------
|