feat(kanban): hallucination gate + recovery UX for worker-created-card claims (#20232)

Workers completing a kanban task can now claim the ids of cards they
created via an optional ``created_cards`` field on ``kanban_complete``.
The kernel verifies each id exists and was created by the completing
worker's profile; any phantom id blocks the completion with a
``HallucinatedCardsError`` and records a
``completion_blocked_hallucination`` event on the task so the rejected
attempt is auditable. Successful completions also get a non-blocking
prose-scan pass over their ``summary`` + ``result`` that emits a
``suspected_hallucinated_references`` event for any ``t_<hex>``
reference that doesn't resolve.

Closes #20017.

Recovery UX (kernel + CLI + dashboard)
--------------------------------------

A structural gate alone isn't enough — operators also need to see and
act on stuck workers, especially when a profile's model is the root
cause. This PR ships the full loop:

* ``kanban_db.reclaim_task(task_id)`` — operator-driven reclaim that
  releases an active worker claim immediately (unlike
  ``release_stale_claims`` which only acts after claim_expires has
  passed). Emits a ``reclaimed`` event with ``manual: True`` payload.
* ``kanban_db.reassign_task(task_id, profile, reclaim_first=...)`` —
  switch a task to a different profile, optionally reclaiming a stuck
  running worker in the same call.
* ``hermes kanban reclaim <id> [--reason ...]`` and
  ``hermes kanban reassign <id> <profile> [--reclaim] [--reason ...]``
  CLI subcommands wired through to the same helpers.
* ``POST /api/plugins/kanban/tasks/{id}/reclaim`` and
  ``POST /api/plugins/kanban/tasks/{id}/reassign`` endpoints on the
  dashboard plugin.

Dashboard surfacing
-------------------

* ⚠ **warning badge** on cards with active hallucination events.
* **attention strip** at the top of the board listing all flagged
  tasks; dismissible per session.
* **events callout** in the task drawer — hallucination events render
  with a red left border, amber icon, and phantom ids as styled chips.
* **recovery section** in the task drawer with three actions: Reclaim,
  Reassign (with profile picker + reclaim-first checkbox), and a
  copy-to-clipboard hint for ``hermes -p <profile> model`` since
  profile config lives on disk and can't be edited from the browser.
  Auto-opens when the task has warnings, collapsed otherwise.
  Keyed by task id so state doesn't leak between drawers.

Active-vs-stale rule: warnings clear when a clean ``completed`` or
``edited`` event supersedes the hallucination, so recovery is never
permanently stigmatising — the audit events persist for debugging but
the badge goes away once the worker succeeds.

Skill updates
-------------

* ``skills/devops/kanban-worker/SKILL.md`` documents the
  ``created_cards`` contract with good/bad examples.
* ``skills/devops/kanban-orchestrator/SKILL.md`` gains a "Recovering
  stuck workers" section with the three actions and when to use each.

Tests
-----

* Kernel gate: verified-cards manifest, phantom rejection + audit
  event, cross-worker rejection, prose scan positive + negative.
* Recovery helpers: reclaim on running task, reclaim on non-running
  returns False, reassign refuses running without reclaim_first,
  reassign with reclaim_first succeeds on running.
* API endpoints: warnings field present on /board and /tasks/:id,
  warnings cleared after clean completion, reclaim 200 + 409 paths,
  reassign 200 + 409 + reclaim_first paths.
* CLI smoke: reclaim + reassign subcommands.

Live-verified end-to-end on a dashboard with seeded scenarios:
attention strip renders, badges land on the right cards, drawer
callout shows phantom chips, Reclaim on a running task flips status to
ready + emits manual reclaimed event + refreshes the drawer,
Reassign swaps the assignee and triggers board refresh.

359/359 kanban-suite tests pass
(test_kanban_{db,cli,boards,core_functionality} + dashboard + tools).
This commit is contained in:
Teknium 2026-05-05 08:06:55 -07:00 committed by GitHub
parent 7de3c86c5a
commit de9238d37e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 1791 additions and 17 deletions

View file

@ -208,3 +208,81 @@ def test_kanban_not_gateway_only():
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
assert not cmd.cli_only
assert not cmd.gateway_only
# ---------------------------------------------------------------------------
# reclaim + reassign CLI smoke tests
# ---------------------------------------------------------------------------
def test_run_slash_reclaim_running_task(kanban_home):
import re
import time
import secrets
from hermes_cli import kanban_db as kb
out1 = kc.run_slash("create 'stuck worker task' --assignee broken-model")
m = re.search(r"(t_[a-f0-9]+)", out1)
assert m
tid = m.group(1)
# Simulate a running claim outside TTL.
conn = kb.connect()
try:
lock = secrets.token_hex(4)
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, int(time.time()) + 3600, 4242, tid),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
)
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
conn.commit()
finally:
conn.close()
out = kc.run_slash(f"reclaim {tid} --reason 'test'")
assert "Reclaimed" in out, out
# Status back to ready.
out2 = kc.run_slash(f"show {tid}")
assert "ready" in out2.lower()
def test_run_slash_reassign_with_reclaim_flag(kanban_home):
import re
import time
import secrets
from hermes_cli import kanban_db as kb
out1 = kc.run_slash("create 'switch model' --assignee orig")
m = re.search(r"(t_[a-f0-9]+)", out1)
tid = m.group(1)
# Simulate a running claim.
conn = kb.connect()
try:
lock = secrets.token_hex(4)
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, int(time.time()) + 3600, 4242, tid),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
)
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
conn.commit()
finally:
conn.close()
out = kc.run_slash(f"reassign {tid} newbie --reclaim --reason 'switch'")
assert "Reassigned" in out, out
out2 = kc.run_slash(f"show {tid}")
assert "newbie" in out2

View file

@ -2786,3 +2786,269 @@ def test_gateway_dispatcher_watcher_env_truthy_uses_config(monkeypatch):
timeout=3.0,
)
)
# ---------------------------------------------------------------------------
# Hallucination gate (created_cards verify + prose scan)
# ---------------------------------------------------------------------------
def test_complete_with_created_cards_all_verified_records_manifest(kanban_home):
"""A completion with created_cards that all exist + belong to this
worker records them on the ``completed`` event payload."""
conn = kb.connect()
try:
parent = kb.create_task(conn, title="parent", assignee="alice")
c1 = kb.create_task(conn, title="c1", assignee="x", created_by="alice")
c2 = kb.create_task(conn, title="c2", assignee="y", created_by="alice")
ok = kb.complete_task(
conn, parent,
summary="done, created c1+c2",
created_cards=[c1, c2],
)
assert ok is True
evs = list(conn.execute(
"SELECT kind, payload FROM task_events WHERE task_id=? ORDER BY id",
(parent,),
))
completed = [e for e in evs if e["kind"] == "completed"]
assert len(completed) == 1
import json as _json
payload = _json.loads(completed[0]["payload"])
assert payload.get("verified_cards") == [c1, c2]
finally:
conn.close()
def test_complete_with_phantom_created_cards_raises_and_audits(kanban_home):
"""A completion claiming a card id that doesn't exist raises
HallucinatedCardsError, leaves the task in its prior state, and
records a ``completion_blocked_hallucination`` event for auditing."""
conn = kb.connect()
try:
parent = kb.create_task(conn, title="parent", assignee="alice")
real = kb.create_task(conn, title="real", assignee="x", created_by="alice")
phantom_id = "t_deadbeefcafe"
with pytest.raises(kb.HallucinatedCardsError) as excinfo:
kb.complete_task(
conn, parent,
summary="claimed phantom",
created_cards=[real, phantom_id],
)
assert excinfo.value.phantom == [phantom_id]
# Task still in prior state (ready, not done).
row = conn.execute(
"SELECT status FROM tasks WHERE id=?", (parent,),
).fetchone()
assert row["status"] == "ready"
# Audit event landed.
kinds = [
r["kind"] for r in conn.execute(
"SELECT kind FROM task_events WHERE task_id=? ORDER BY id",
(parent,),
)
]
assert "completion_blocked_hallucination" in kinds
assert "completed" not in kinds
finally:
conn.close()
def test_complete_with_cross_worker_card_is_rejected(kanban_home):
"""A card that exists but was created by a different worker profile
is treated as phantom (hallucinated attribution)."""
conn = kb.connect()
try:
parent = kb.create_task(conn, title="parent", assignee="alice")
other = kb.create_task(conn, title="other", assignee="x", created_by="bob")
with pytest.raises(kb.HallucinatedCardsError) as excinfo:
kb.complete_task(
conn, parent,
summary="claiming someone else's card",
created_cards=[other],
)
assert excinfo.value.phantom == [other]
finally:
conn.close()
def test_complete_prose_scan_flags_nonexistent_ids(kanban_home):
"""Successful completion whose summary references a ``t_<hex>`` id
that doesn't resolve emits a ``suspected_hallucinated_references``
event. Does not block the completion."""
conn = kb.connect()
try:
parent = kb.create_task(conn, title="parent", assignee="x")
ok = kb.complete_task(
conn, parent,
summary="also saw t_abcd1234ffff failing in CI",
)
assert ok is True
kinds_and_payloads = list(conn.execute(
"SELECT kind, payload FROM task_events WHERE task_id=? ORDER BY id",
(parent,),
))
kinds = [r["kind"] for r in kinds_and_payloads]
assert "suspected_hallucinated_references" in kinds
import json as _json
susp = [
_json.loads(r["payload"])
for r in kinds_and_payloads
if r["kind"] == "suspected_hallucinated_references"
][0]
assert "t_abcd1234ffff" in susp["phantom_refs"]
finally:
conn.close()
def test_complete_prose_scan_ignores_existing_ids(kanban_home):
"""Summaries referencing real task ids don't emit a warning."""
conn = kb.connect()
try:
other = kb.create_task(conn, title="other", assignee="x")
parent = kb.create_task(conn, title="parent", assignee="x")
ok = kb.complete_task(
conn, parent,
summary=f"depended on {other}, now done",
)
assert ok is True
kinds = [
r["kind"] for r in conn.execute(
"SELECT kind FROM task_events WHERE task_id=? ORDER BY id",
(parent,),
)
]
assert "suspected_hallucinated_references" not in kinds
finally:
conn.close()
# ---------------------------------------------------------------------------
# Recovery helpers (reclaim + reassign)
# ---------------------------------------------------------------------------
def test_reclaim_task_resets_running_to_ready(kanban_home):
"""Manual reclaim releases the claim, resets status, and emits a
``reclaimed`` event even when claim_expires has not passed."""
import time
import secrets
conn = kb.connect()
try:
t = kb.create_task(conn, title="stuck", assignee="broken")
# Simulate a live claim (not expired).
lock = secrets.token_hex(8)
future = int(time.time()) + 3600
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, future, 12345, t),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(t, lock, future, 12345, int(time.time())),
)
run_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (run_id, t))
conn.commit()
# release_stale_claims should NOT reclaim (not expired).
assert kb.release_stale_claims(conn) == 0
# reclaim_task should work immediately.
assert kb.reclaim_task(conn, t, reason="test reason") is True
row = conn.execute(
"SELECT status, claim_lock, worker_pid FROM tasks WHERE id=?",
(t,),
).fetchone()
assert row["status"] == "ready"
assert row["claim_lock"] is None
assert row["worker_pid"] is None
import json as _json
reclaim_evs = [
_json.loads(r["payload"])
for r in conn.execute(
"SELECT payload FROM task_events WHERE task_id=? AND kind='reclaimed'",
(t,),
)
]
assert len(reclaim_evs) == 1
assert reclaim_evs[0].get("manual") is True
assert reclaim_evs[0].get("reason") == "test reason"
finally:
conn.close()
def test_reclaim_task_returns_false_for_already_ready(kanban_home):
"""Reclaiming a task that's not running returns False (no-op)."""
conn = kb.connect()
try:
t = kb.create_task(conn, title="ready task", assignee="x")
assert kb.reclaim_task(conn, t) is False
finally:
conn.close()
def test_reassign_task_refuses_running_without_reclaim_first(kanban_home):
"""Without ``reclaim_first=True``, reassigning a running task is a
no-op returning False (matches assign_task's RuntimeError via
internal catch)."""
conn = kb.connect()
try:
t = kb.create_task(conn, title="running", assignee="orig")
conn.execute(
"UPDATE tasks SET status='running', claim_lock=? WHERE id=?",
("live", t),
)
conn.commit()
assert kb.reassign_task(conn, t, "new") is False
# Assignee unchanged.
row = conn.execute(
"SELECT assignee FROM tasks WHERE id=?", (t,),
).fetchone()
assert row["assignee"] == "orig"
finally:
conn.close()
def test_reassign_task_with_reclaim_first_switches_profile(kanban_home):
"""With ``reclaim_first=True``, a running task is reclaimed and
reassigned in one operation."""
import time
import secrets
conn = kb.connect()
try:
t = kb.create_task(conn, title="switch me", assignee="orig")
lock = secrets.token_hex(8)
future = int(time.time()) + 3600
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, future, 99999, t),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(t, lock, future, 99999, int(time.time())),
)
run_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (run_id, t))
conn.commit()
assert kb.reassign_task(
conn, t, "new-profile",
reclaim_first=True, reason="switch model",
) is True
row = conn.execute(
"SELECT assignee, status FROM tasks WHERE id=?", (t,),
).fetchone()
assert row["assignee"] == "new-profile"
assert row["status"] == "ready"
finally:
conn.close()

View file

@ -1117,3 +1117,221 @@ def test_home_channels_empty_when_no_homes_configured(client, monkeypatch):
r = client.get("/api/plugins/kanban/home-channels")
assert r.status_code == 200
assert r.json()["home_channels"] == []
# ---------------------------------------------------------------------------
# Recovery endpoints (reclaim + reassign) and warnings field
# ---------------------------------------------------------------------------
def test_board_surfaces_warnings_field_for_hallucinated_completions(client):
"""Tasks with a pending completion_blocked_hallucination event surface
a ``warnings`` object on the /board payload so the UI can badge
them without fetching per-task events."""
conn = kb.connect()
try:
parent = kb.create_task(conn, title="parent", assignee="alice")
real = kb.create_task(conn, title="real", assignee="x", created_by="alice")
import pytest as _pytest
with _pytest.raises(kb.HallucinatedCardsError):
kb.complete_task(
conn, parent,
summary="claimed phantom",
created_cards=[real, "t_deadbeefcafe"],
)
finally:
conn.close()
r = client.get("/api/plugins/kanban/board")
assert r.status_code == 200
data = r.json()
tasks = [t for col in data["columns"] for t in col["tasks"]]
parent_dict = next(t for t in tasks if t["title"] == "parent")
assert parent_dict.get("warnings") is not None
w = parent_dict["warnings"]
assert w["count"] >= 1
assert "completion_blocked_hallucination" in w["kinds"]
def test_board_warnings_cleared_after_clean_completion(client):
"""A completed or edited event after a hallucination event clears
the warning badge we don't mark tasks permanently."""
conn = kb.connect()
try:
parent = kb.create_task(conn, title="parent", assignee="alice")
real = kb.create_task(conn, title="real", assignee="x", created_by="alice")
import pytest as _pytest
with _pytest.raises(kb.HallucinatedCardsError):
kb.complete_task(
conn, parent,
summary="first attempt phantom",
created_cards=[real, "t_phantom11"],
)
# Second attempt drops the bad id — succeeds.
ok = kb.complete_task(
conn, parent,
summary="retry without phantom",
created_cards=[real],
)
assert ok is True
finally:
conn.close()
r = client.get("/api/plugins/kanban/board", params={"include_archived": True})
assert r.status_code == 200
data = r.json()
tasks = [t for col in data["columns"] for t in col["tasks"]]
parent_dict = next(t for t in tasks if t["title"] == "parent")
# The clean completion wiped the warning.
assert parent_dict.get("warnings") is None
def test_reclaim_endpoint_releases_running_claim(client):
"""POST /tasks/<id>/reclaim drops the claim, returns ok, and emits
a manual reclaimed event."""
import secrets
conn = kb.connect()
try:
t = kb.create_task(conn, title="running", assignee="x")
lock = secrets.token_hex(8)
future = int(time.time()) + 3600
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, future, 99999, t),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(t, lock, future, 99999, int(time.time())),
)
run_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (run_id, t))
conn.commit()
finally:
conn.close()
r = client.post(
f"/api/plugins/kanban/tasks/{t}/reclaim",
json={"reason": "browser recovery"},
)
assert r.status_code == 200, r.text
body = r.json()
assert body["ok"] is True
assert body["task_id"] == t
# Confirm the task is back to ready.
conn2 = kb.connect()
try:
row = conn2.execute(
"SELECT status, claim_lock FROM tasks WHERE id=?", (t,),
).fetchone()
assert row["status"] == "ready"
assert row["claim_lock"] is None
finally:
conn2.close()
def test_reclaim_endpoint_409_for_non_running_task(client):
"""Reclaiming a task that's already ready returns 409."""
conn = kb.connect()
try:
t = kb.create_task(conn, title="ready", assignee="x")
finally:
conn.close()
r = client.post(
f"/api/plugins/kanban/tasks/{t}/reclaim",
json={},
)
assert r.status_code == 409
def test_reassign_endpoint_switches_profile(client):
"""POST /tasks/<id>/reassign changes the assignee field."""
conn = kb.connect()
try:
t = kb.create_task(conn, title="task", assignee="orig")
finally:
conn.close()
r = client.post(
f"/api/plugins/kanban/tasks/{t}/reassign",
json={"profile": "newbie", "reclaim_first": False},
)
assert r.status_code == 200, r.text
assert r.json()["assignee"] == "newbie"
conn2 = kb.connect()
try:
row = conn2.execute(
"SELECT assignee FROM tasks WHERE id=?", (t,),
).fetchone()
assert row["assignee"] == "newbie"
finally:
conn2.close()
def test_reassign_endpoint_409_on_running_without_reclaim(client):
"""Reassigning a running task without reclaim_first returns 409."""
import secrets
conn = kb.connect()
try:
t = kb.create_task(conn, title="running", assignee="orig")
conn.execute(
"UPDATE tasks SET status='running', claim_lock=? WHERE id=?",
(secrets.token_hex(4), t),
)
conn.commit()
finally:
conn.close()
r = client.post(
f"/api/plugins/kanban/tasks/{t}/reassign",
json={"profile": "new", "reclaim_first": False},
)
assert r.status_code == 409
def test_reassign_endpoint_with_reclaim_first_succeeds_on_running(client):
"""With reclaim_first=true, a running task is reclaimed+reassigned in
one call."""
import secrets
conn = kb.connect()
try:
t = kb.create_task(conn, title="running", assignee="orig")
lock = secrets.token_hex(4)
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, int(time.time()) + 3600, 1234, t),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(t, lock, int(time.time()) + 3600, 1234, int(time.time())),
)
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, t))
conn.commit()
finally:
conn.close()
r = client.post(
f"/api/plugins/kanban/tasks/{t}/reassign",
json={"profile": "new", "reclaim_first": True, "reason": "switch"},
)
assert r.status_code == 200, r.text
assert r.json()["assignee"] == "new"
conn2 = kb.connect()
try:
row = conn2.execute(
"SELECT status, assignee FROM tasks WHERE id=?", (t,),
).fetchone()
assert row["status"] == "ready"
assert row["assignee"] == "new"
finally:
conn2.close()