mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
feat(kanban): hallucination gate + recovery UX for worker-created-card claims (#20232)
Workers completing a kanban task can now claim the ids of cards they created via an optional ``created_cards`` field on ``kanban_complete``. The kernel verifies each id exists and was created by the completing worker's profile; any phantom id blocks the completion with a ``HallucinatedCardsError`` and records a ``completion_blocked_hallucination`` event on the task so the rejected attempt is auditable. Successful completions also get a non-blocking prose-scan pass over their ``summary`` + ``result`` that emits a ``suspected_hallucinated_references`` event for any ``t_<hex>`` reference that doesn't resolve. Closes #20017. Recovery UX (kernel + CLI + dashboard) -------------------------------------- A structural gate alone isn't enough — operators also need to see and act on stuck workers, especially when a profile's model is the root cause. This PR ships the full loop: * ``kanban_db.reclaim_task(task_id)`` — operator-driven reclaim that releases an active worker claim immediately (unlike ``release_stale_claims`` which only acts after claim_expires has passed). Emits a ``reclaimed`` event with ``manual: True`` payload. * ``kanban_db.reassign_task(task_id, profile, reclaim_first=...)`` — switch a task to a different profile, optionally reclaiming a stuck running worker in the same call. * ``hermes kanban reclaim <id> [--reason ...]`` and ``hermes kanban reassign <id> <profile> [--reclaim] [--reason ...]`` CLI subcommands wired through to the same helpers. * ``POST /api/plugins/kanban/tasks/{id}/reclaim`` and ``POST /api/plugins/kanban/tasks/{id}/reassign`` endpoints on the dashboard plugin. Dashboard surfacing ------------------- * ⚠ **warning badge** on cards with active hallucination events. * **attention strip** at the top of the board listing all flagged tasks; dismissible per session. * **events callout** in the task drawer — hallucination events render with a red left border, amber icon, and phantom ids as styled chips. * **recovery section** in the task drawer with three actions: Reclaim, Reassign (with profile picker + reclaim-first checkbox), and a copy-to-clipboard hint for ``hermes -p <profile> model`` since profile config lives on disk and can't be edited from the browser. Auto-opens when the task has warnings, collapsed otherwise. Keyed by task id so state doesn't leak between drawers. Active-vs-stale rule: warnings clear when a clean ``completed`` or ``edited`` event supersedes the hallucination, so recovery is never permanently stigmatising — the audit events persist for debugging but the badge goes away once the worker succeeds. Skill updates ------------- * ``skills/devops/kanban-worker/SKILL.md`` documents the ``created_cards`` contract with good/bad examples. * ``skills/devops/kanban-orchestrator/SKILL.md`` gains a "Recovering stuck workers" section with the three actions and when to use each. Tests ----- * Kernel gate: verified-cards manifest, phantom rejection + audit event, cross-worker rejection, prose scan positive + negative. * Recovery helpers: reclaim on running task, reclaim on non-running returns False, reassign refuses running without reclaim_first, reassign with reclaim_first succeeds on running. * API endpoints: warnings field present on /board and /tasks/:id, warnings cleared after clean completion, reclaim 200 + 409 paths, reassign 200 + 409 + reclaim_first paths. * CLI smoke: reclaim + reassign subcommands. Live-verified end-to-end on a dashboard with seeded scenarios: attention strip renders, badges land on the right cards, drawer callout shows phantom chips, Reclaim on a running task flips status to ready + emits manual reclaimed event + refreshes the drawer, Reassign swaps the assignee and triggers board refresh. 359/359 kanban-suite tests pass (test_kanban_{db,cli,boards,core_functionality} + dashboard + tools).
This commit is contained in:
parent
7de3c86c5a
commit
de9238d37e
11 changed files with 1791 additions and 17 deletions
|
|
@ -308,6 +308,35 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu
|
|||
p_assign.add_argument("task_id")
|
||||
p_assign.add_argument("profile", help="Profile name (or 'none' to unassign)")
|
||||
|
||||
# --- reclaim / reassign (recovery) ---
|
||||
p_reclaim = sub.add_parser(
|
||||
"reclaim",
|
||||
help="Release an active worker claim on a running task",
|
||||
)
|
||||
p_reclaim.add_argument("task_id")
|
||||
p_reclaim.add_argument(
|
||||
"--reason", default=None,
|
||||
help="Human-readable reason (recorded on the reclaimed event)",
|
||||
)
|
||||
|
||||
p_reassign = sub.add_parser(
|
||||
"reassign",
|
||||
help="Reassign a task to a different profile, optionally reclaiming first",
|
||||
)
|
||||
p_reassign.add_argument("task_id")
|
||||
p_reassign.add_argument(
|
||||
"profile",
|
||||
help="New profile name (or 'none' to unassign)",
|
||||
)
|
||||
p_reassign.add_argument(
|
||||
"--reclaim", action="store_true",
|
||||
help="Release any active claim before reassigning (required if task is running)",
|
||||
)
|
||||
p_reassign.add_argument(
|
||||
"--reason", default=None,
|
||||
help="Human-readable reason (recorded on the reclaimed event)",
|
||||
)
|
||||
|
||||
# --- link / unlink ---
|
||||
p_link = sub.add_parser("link", help="Add a parent->child dependency")
|
||||
p_link.add_argument("parent_id")
|
||||
|
|
@ -597,6 +626,8 @@ def kanban_command(args: argparse.Namespace) -> int:
|
|||
"ls": _cmd_list,
|
||||
"show": _cmd_show,
|
||||
"assign": _cmd_assign,
|
||||
"reclaim": _cmd_reclaim,
|
||||
"reassign": _cmd_reassign,
|
||||
"link": _cmd_link,
|
||||
"unlink": _cmd_unlink,
|
||||
"claim": _cmd_claim,
|
||||
|
|
@ -1117,6 +1148,45 @@ def _cmd_assign(args: argparse.Namespace) -> int:
|
|||
return 0
|
||||
|
||||
|
||||
def _cmd_reclaim(args: argparse.Namespace) -> int:
|
||||
with kb.connect() as conn:
|
||||
ok = kb.reclaim_task(
|
||||
conn, args.task_id,
|
||||
reason=getattr(args, "reason", None),
|
||||
)
|
||||
if not ok:
|
||||
print(
|
||||
f"cannot reclaim {args.task_id} (not running or unknown id)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
print(f"Reclaimed {args.task_id}")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_reassign(args: argparse.Namespace) -> int:
|
||||
profile = None if args.profile.lower() in ("none", "-", "null") else args.profile
|
||||
with kb.connect() as conn:
|
||||
ok = kb.reassign_task(
|
||||
conn, args.task_id, profile,
|
||||
reclaim_first=bool(getattr(args, "reclaim", False)),
|
||||
reason=getattr(args, "reason", None),
|
||||
)
|
||||
if not ok:
|
||||
print(
|
||||
f"cannot reassign {args.task_id} "
|
||||
f"(unknown id, or still running — pass --reclaim to release first)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
print(
|
||||
f"Reassigned {args.task_id} to "
|
||||
f"{profile or '(unassigned)'}"
|
||||
+ (" (claim reclaimed)" if getattr(args, "reclaim", False) else "")
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_link(args: argparse.Namespace) -> int:
|
||||
with kb.connect() as conn:
|
||||
kb.link_tasks(conn, args.parent_id, args.child_id)
|
||||
|
|
|
|||
|
|
@ -1842,6 +1842,212 @@ def release_stale_claims(conn: sqlite3.Connection) -> int:
|
|||
return reclaimed
|
||||
|
||||
|
||||
def reclaim_task(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
*,
|
||||
reason: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Operator-driven reclaim: release the claim and reset to ``ready``.
|
||||
|
||||
Unlike :func:`release_stale_claims` which only acts on tasks whose
|
||||
``claim_expires`` has passed, this function reclaims immediately
|
||||
regardless of TTL. Intended for the dashboard/CLI recovery flow
|
||||
when an operator wants to abort a running worker without waiting
|
||||
for the TTL to expire (e.g. after seeing a hallucination warning).
|
||||
|
||||
Returns True if a reclaim happened, False if the task isn't in a
|
||||
reclaimable state (not running, or doesn't exist).
|
||||
"""
|
||||
with write_txn(conn):
|
||||
row = conn.execute(
|
||||
"SELECT status, claim_lock, worker_pid FROM tasks WHERE id = ?",
|
||||
(task_id,),
|
||||
).fetchone()
|
||||
if not row:
|
||||
return False
|
||||
if row["status"] != "running" and row["claim_lock"] is None:
|
||||
# Nothing to reclaim — already ready / blocked / done.
|
||||
return False
|
||||
prev_lock = row["claim_lock"]
|
||||
prev_pid = row["worker_pid"]
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status = 'ready', claim_lock = NULL, "
|
||||
"claim_expires = NULL, worker_pid = NULL "
|
||||
"WHERE id = ? AND status IN ('running', 'ready', 'blocked')",
|
||||
(task_id,),
|
||||
)
|
||||
run_id = _end_run(
|
||||
conn, task_id,
|
||||
outcome="reclaimed", status="reclaimed",
|
||||
error=(
|
||||
f"manual_reclaim: {reason}" if reason
|
||||
else f"manual_reclaim lock={prev_lock}"
|
||||
),
|
||||
)
|
||||
_append_event(
|
||||
conn, task_id, "reclaimed",
|
||||
{
|
||||
"manual": True,
|
||||
"reason": reason,
|
||||
"prev_lock": prev_lock,
|
||||
"prev_pid": prev_pid,
|
||||
},
|
||||
run_id=run_id,
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def reassign_task(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
profile: Optional[str],
|
||||
*,
|
||||
reclaim_first: bool = False,
|
||||
reason: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Reassign a task, optionally reclaiming a stuck running worker first.
|
||||
|
||||
This is the recovery path for "this profile's model is broken, try
|
||||
a different one". If ``reclaim_first`` is True, any active claim is
|
||||
released (via :func:`reclaim_task`) before the reassign happens;
|
||||
otherwise the function refuses to reassign a currently-running task
|
||||
and returns False (caller can retry with ``reclaim_first=True``).
|
||||
|
||||
Returns True if the reassign landed. ``profile`` may be ``None`` to
|
||||
unassign entirely.
|
||||
"""
|
||||
if reclaim_first:
|
||||
# Safe to call even if nothing to reclaim.
|
||||
reclaim_task(conn, task_id, reason=reason or "reassign")
|
||||
# assign_task handles its own txn + the still-running guard.
|
||||
try:
|
||||
return assign_task(conn, task_id, profile)
|
||||
except RuntimeError:
|
||||
# Task is still running and reclaim_first was False; caller
|
||||
# needs to decide whether to retry with reclaim.
|
||||
return False
|
||||
|
||||
|
||||
def _verify_created_cards(
|
||||
conn: sqlite3.Connection,
|
||||
completing_task_id: str,
|
||||
claimed_ids: Iterable[str],
|
||||
) -> tuple[list[str], list[str]]:
|
||||
"""Partition ``claimed_ids`` into (verified, phantom).
|
||||
|
||||
A card is "verified" iff a row exists in ``tasks`` with the given id
|
||||
AND ``created_by`` matches the completing task's ``assignee`` (or
|
||||
the completing task itself — workers that create children of their
|
||||
own task also qualify).
|
||||
|
||||
``phantom`` returns ids that either don't exist at all or exist but
|
||||
were not created by the completing worker. The caller decides what
|
||||
to do with each bucket; this helper never mutates.
|
||||
"""
|
||||
claimed = [str(x).strip() for x in (claimed_ids or []) if str(x).strip()]
|
||||
if not claimed:
|
||||
return [], []
|
||||
# Dedupe while preserving order.
|
||||
seen: set[str] = set()
|
||||
ordered: list[str] = []
|
||||
for cid in claimed:
|
||||
if cid not in seen:
|
||||
seen.add(cid)
|
||||
ordered.append(cid)
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT assignee FROM tasks WHERE id = ?", (completing_task_id,),
|
||||
).fetchone()
|
||||
if row is None:
|
||||
# Completing task not found — nothing resolves.
|
||||
return [], ordered
|
||||
completing_assignee = row["assignee"]
|
||||
|
||||
# Batch-fetch existence + created_by in one query.
|
||||
placeholders = ",".join(["?"] * len(ordered))
|
||||
rows = conn.execute(
|
||||
f"SELECT id, created_by FROM tasks WHERE id IN ({placeholders})",
|
||||
tuple(ordered),
|
||||
).fetchall()
|
||||
found = {r["id"]: r["created_by"] for r in rows}
|
||||
|
||||
verified: list[str] = []
|
||||
phantom: list[str] = []
|
||||
for cid in ordered:
|
||||
created_by = found.get(cid)
|
||||
if created_by is None:
|
||||
phantom.append(cid)
|
||||
continue
|
||||
# Accept if created_by matches the completing task's assignee
|
||||
# profile, OR the task itself (workers whose created_by happens
|
||||
# to match their task id are unusual but harmless to accept).
|
||||
if completing_assignee and created_by == completing_assignee:
|
||||
verified.append(cid)
|
||||
elif created_by == completing_task_id:
|
||||
verified.append(cid)
|
||||
else:
|
||||
phantom.append(cid)
|
||||
return verified, phantom
|
||||
|
||||
|
||||
# Task-id pattern used both by ``kanban_create`` (``t_<12 hex>``) and
|
||||
# ``_new_task_id`` below. Kept permissive on length for forward compat:
|
||||
# accept 8+ hex chars after the ``t_`` prefix.
|
||||
_TASK_ID_PROSE_RE = re.compile(r"\bt_[a-f0-9]{8,}\b")
|
||||
|
||||
|
||||
def _scan_prose_for_phantom_ids(
|
||||
conn: sqlite3.Connection,
|
||||
text: str,
|
||||
) -> list[str]:
|
||||
"""Regex-scan free-form text for ``t_<hex>`` references; return the
|
||||
ones that don't exist in ``tasks``.
|
||||
|
||||
Used as a non-blocking advisory check on completion summaries. An
|
||||
empty return means "no suspicious references found" — either the
|
||||
text had no IDs at all, or every ID it mentioned resolves to a real
|
||||
task. Duplicates are deduped.
|
||||
"""
|
||||
if not text:
|
||||
return []
|
||||
matches = _TASK_ID_PROSE_RE.findall(text)
|
||||
if not matches:
|
||||
return []
|
||||
# Dedupe preserving order.
|
||||
seen: set[str] = set()
|
||||
unique: list[str] = []
|
||||
for m in matches:
|
||||
if m not in seen:
|
||||
seen.add(m)
|
||||
unique.append(m)
|
||||
placeholders = ",".join(["?"] * len(unique))
|
||||
rows = conn.execute(
|
||||
f"SELECT id FROM tasks WHERE id IN ({placeholders})",
|
||||
tuple(unique),
|
||||
).fetchall()
|
||||
existing = {r["id"] for r in rows}
|
||||
return [m for m in unique if m not in existing]
|
||||
|
||||
|
||||
class HallucinatedCardsError(ValueError):
|
||||
"""Raised by ``complete_task`` when ``created_cards`` contains ids
|
||||
that don't exist or weren't created by the completing worker.
|
||||
|
||||
The phantom list is attached as ``.phantom`` for callers that want
|
||||
structured access. Kept as ``ValueError`` subclass so existing
|
||||
tool-error handlers treat it as a recoverable user error.
|
||||
"""
|
||||
|
||||
def __init__(self, phantom: list[str], completing_task_id: str):
|
||||
self.phantom = list(phantom)
|
||||
self.completing_task_id = completing_task_id
|
||||
super().__init__(
|
||||
f"completion blocked: claimed created_cards that do not exist "
|
||||
f"or were not created by this worker: {', '.join(phantom)}"
|
||||
)
|
||||
|
||||
|
||||
def complete_task(
|
||||
conn: sqlite3.Connection,
|
||||
task_id: str,
|
||||
|
|
@ -1849,21 +2055,65 @@ def complete_task(
|
|||
result: Optional[str] = None,
|
||||
summary: Optional[str] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
created_cards: Optional[Iterable[str]] = None,
|
||||
) -> bool:
|
||||
"""Transition ``running|ready -> done`` and record ``result``.
|
||||
|
||||
Accepts a task that's merely ``ready`` too, so a manual CLI
|
||||
Accepts a task that is merely ``ready`` too, so a manual CLI
|
||||
completion (``hermes kanban complete <id>``) works without requiring
|
||||
a claim/start/complete sequence.
|
||||
|
||||
``summary`` and ``metadata`` are stored on the closing run (if any)
|
||||
and surfaced to downstream children via :func:`build_worker_context`.
|
||||
When ``summary`` is omitted we fall back to ``result`` so single-run
|
||||
callers don't have to pass both. ``metadata`` is a free-form dict
|
||||
callers do not have to pass both. ``metadata`` is a free-form dict
|
||||
(e.g. ``{"changed_files": [...], "tests_run": [...]}``) — workers
|
||||
are encouraged to use it for structured handoff facts.
|
||||
|
||||
``created_cards`` is an optional list of task ids the completing
|
||||
worker claims to have created. Each id is verified against
|
||||
``tasks.created_by``. If any id is phantom (does not exist or was
|
||||
not created by this worker's assignee profile), completion is blocked
|
||||
with a ``HallucinatedCardsError`` and a
|
||||
``completion_blocked_hallucination`` event is emitted so the rejected
|
||||
attempt is auditable. When all ids verify, they are recorded on the
|
||||
``completed`` event payload.
|
||||
|
||||
After a successful completion, ``summary`` and ``result`` are scanned
|
||||
for prose references like ``t_deadbeefcafe`` that do not resolve.
|
||||
Any suspected phantom references are recorded as a
|
||||
``suspected_hallucinated_references`` event. This pass is advisory
|
||||
and never blocks.
|
||||
"""
|
||||
now = int(time.time())
|
||||
|
||||
# Gate: verify created_cards BEFORE the main write txn. A rejected
|
||||
# completion still needs an auditable event, so we emit it in a
|
||||
# tiny dedicated txn, then raise. The caller is responsible for
|
||||
# surfacing HallucinatedCardsError to the worker; this function
|
||||
# never mutates task state on a phantom-card rejection.
|
||||
if created_cards:
|
||||
verified_cards, phantom_cards = _verify_created_cards(
|
||||
conn, task_id, created_cards
|
||||
)
|
||||
if phantom_cards:
|
||||
with write_txn(conn):
|
||||
_append_event(
|
||||
conn, task_id, "completion_blocked_hallucination",
|
||||
{
|
||||
"phantom_cards": phantom_cards,
|
||||
"verified_cards": verified_cards,
|
||||
"summary_preview": (
|
||||
(summary or result or "").strip().splitlines()[0][:200]
|
||||
if (summary or result)
|
||||
else None
|
||||
),
|
||||
},
|
||||
)
|
||||
raise HallucinatedCardsError(phantom_cards, task_id)
|
||||
else:
|
||||
verified_cards = []
|
||||
|
||||
with write_txn(conn):
|
||||
cur = conn.execute(
|
||||
"""
|
||||
|
|
@ -1904,14 +2154,38 @@ def complete_task(
|
|||
# full summary stays on the run row.
|
||||
ev_summary = (summary if summary is not None else result) or ""
|
||||
ev_summary = ev_summary.strip().splitlines()[0][:400] if ev_summary else ""
|
||||
completed_payload: dict = {
|
||||
"result_len": len(result) if result else 0,
|
||||
"summary": ev_summary or None,
|
||||
}
|
||||
if verified_cards:
|
||||
completed_payload["verified_cards"] = verified_cards
|
||||
_append_event(
|
||||
conn, task_id, "completed",
|
||||
{
|
||||
"result_len": len(result) if result else 0,
|
||||
"summary": ev_summary or None,
|
||||
},
|
||||
completed_payload,
|
||||
run_id=run_id,
|
||||
)
|
||||
# Prose-scan the summary + result for t_<hex> references that do
|
||||
# not resolve. Advisory — does not block the completion. Runs in
|
||||
# its own txn so the completion itself is already durable by the
|
||||
# time we emit the warning.
|
||||
scan_text = " ".join(filter(None, [summary, result]))
|
||||
if scan_text:
|
||||
phantom_refs = _scan_prose_for_phantom_ids(conn, scan_text)
|
||||
# Drop any phantom refs that were already flagged as verified
|
||||
# above (shouldn't happen — verified means they exist — but
|
||||
# belt-and-suspenders).
|
||||
phantom_refs = [p for p in phantom_refs if p not in set(verified_cards)]
|
||||
if phantom_refs:
|
||||
with write_txn(conn):
|
||||
_append_event(
|
||||
conn, task_id, "suspected_hallucinated_references",
|
||||
{
|
||||
"phantom_refs": phantom_refs,
|
||||
"source": "completion_summary",
|
||||
},
|
||||
run_id=run_id,
|
||||
)
|
||||
# Recompute ready status for dependents (separate txn so children see done).
|
||||
recompute_ready(conn)
|
||||
return True
|
||||
|
|
|
|||
381
plugins/kanban/dashboard/dist/index.js
vendored
381
plugins/kanban/dashboard/dist/index.js
vendored
|
|
@ -60,6 +60,35 @@
|
|||
blocked: "Mark this task as blocked? The worker's claim is released.",
|
||||
};
|
||||
|
||||
// Event kinds that indicate a hallucinated/phantom task-id reference
|
||||
// in a completion. ``completion_blocked_hallucination`` is emitted when
|
||||
// the kernel's ``created_cards`` gate rejects a completion; the task is
|
||||
// left in its prior state and the worker can retry. ``suspected_
|
||||
// hallucinated_references`` is the advisory prose-scan result — the
|
||||
// completion succeeded but the summary text references task ids that
|
||||
// do not resolve.
|
||||
const HALLUCINATION_EVENT_KINDS = [
|
||||
"completion_blocked_hallucination",
|
||||
"suspected_hallucinated_references",
|
||||
];
|
||||
const HALLUCINATION_EVENT_LABELS = {
|
||||
completion_blocked_hallucination: "Completion blocked — phantom card ids",
|
||||
suspected_hallucinated_references: "Prose referenced phantom card ids",
|
||||
};
|
||||
|
||||
function isHallucinationEvent(kind) {
|
||||
return HALLUCINATION_EVENT_KINDS.indexOf(kind) !== -1;
|
||||
}
|
||||
|
||||
function phantomIdsFromEvent(ev) {
|
||||
// Payload shapes:
|
||||
// completion_blocked_hallucination: {phantom_cards, verified_cards, summary_preview}
|
||||
// suspected_hallucinated_references: {phantom_refs, source}
|
||||
if (!ev || !ev.payload) return [];
|
||||
const p = ev.payload;
|
||||
return p.phantom_cards || p.phantom_refs || [];
|
||||
}
|
||||
|
||||
function withCompletionSummary(patch, count) {
|
||||
if (!patch || patch.status !== "done") return patch;
|
||||
const label = count && count > 1 ? `${count} selected task(s)` : "this task";
|
||||
|
|
@ -646,6 +675,10 @@
|
|||
return createNewBoard(payload).then(function () { setShowNewBoard(false); });
|
||||
},
|
||||
}) : null,
|
||||
h(AttentionStrip, {
|
||||
boardData,
|
||||
onOpen: setSelectedTaskId,
|
||||
}),
|
||||
h(BoardToolbar, {
|
||||
board: boardData,
|
||||
tenantFilter, setTenantFilter,
|
||||
|
|
@ -684,12 +717,303 @@
|
|||
onRefresh: loadBoard,
|
||||
renderMarkdown: renderMd,
|
||||
allTasks: boardData.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []),
|
||||
assignees: (boardData && boardData.assignees) || [],
|
||||
eventTick: taskEventTick[selectedTaskId] || 0,
|
||||
}) : null,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Attention strip — surfaces tasks with active hallucination warnings.
|
||||
// Renders a collapsed bar just below the board switcher; clicking expands
|
||||
// a list of affected tasks with an "Open" button each. Dismissible per
|
||||
// session via state flag; tasks re-appear on page reload if they still
|
||||
// have warnings.
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
function collectWarningTasks(boardData) {
|
||||
if (!boardData || !boardData.columns) return [];
|
||||
const out = [];
|
||||
for (const col of boardData.columns) {
|
||||
for (const t of col.tasks || []) {
|
||||
if (t.warnings && t.warnings.count > 0) out.push(t);
|
||||
}
|
||||
}
|
||||
// Sort: most recent warning first.
|
||||
out.sort(function (a, b) {
|
||||
return (b.warnings.latest_at || 0) - (a.warnings.latest_at || 0);
|
||||
});
|
||||
return out;
|
||||
}
|
||||
|
||||
function AttentionStrip(props) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const [dismissed, setDismissed] = useState(false);
|
||||
const warnTasks = useMemo(
|
||||
function () { return collectWarningTasks(props.boardData); },
|
||||
[props.boardData]
|
||||
);
|
||||
if (dismissed || warnTasks.length === 0) return null;
|
||||
return h("div", { className: "hermes-kanban-attention" },
|
||||
h("div", { className: "hermes-kanban-attention-bar" },
|
||||
h("span", { className: "hermes-kanban-attention-icon" }, "⚠"),
|
||||
h("span", { className: "hermes-kanban-attention-text" },
|
||||
warnTasks.length === 1
|
||||
? "1 task with hallucination warnings"
|
||||
: `${warnTasks.length} tasks with hallucination warnings`,
|
||||
),
|
||||
h("button", {
|
||||
className: "hermes-kanban-attention-toggle",
|
||||
onClick: function () { setExpanded(function (x) { return !x; }); },
|
||||
type: "button",
|
||||
}, expanded ? "Hide" : "Show"),
|
||||
h("button", {
|
||||
className: "hermes-kanban-attention-dismiss",
|
||||
onClick: function () { setDismissed(true); },
|
||||
title: "Hide until next page reload",
|
||||
type: "button",
|
||||
}, "✕"),
|
||||
),
|
||||
expanded
|
||||
? h("div", { className: "hermes-kanban-attention-list" },
|
||||
warnTasks.map(function (t) {
|
||||
return h("div", { key: t.id, className: "hermes-kanban-attention-row" },
|
||||
h("span", { className: "hermes-kanban-attention-row-id" }, t.id),
|
||||
h("span", { className: "hermes-kanban-attention-row-title" },
|
||||
t.title || "(untitled)"),
|
||||
h("span", { className: "hermes-kanban-attention-row-meta" },
|
||||
t.assignee ? "@" + t.assignee : "unassigned",
|
||||
" · ",
|
||||
`${t.warnings.count} event${t.warnings.count === 1 ? "" : "s"}`,
|
||||
),
|
||||
h("button", {
|
||||
className: "hermes-kanban-attention-row-btn",
|
||||
onClick: function () { props.onOpen(t.id); },
|
||||
type: "button",
|
||||
}, "Open"),
|
||||
);
|
||||
}),
|
||||
)
|
||||
: null,
|
||||
);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Recovery popover — operator actions for a task flagged with
|
||||
// hallucination warnings. Three primary actions:
|
||||
// 1. Reclaim — release a running worker's claim; task back to ready.
|
||||
// 2. Reassign — switch the task to a different profile (with optional
|
||||
// reclaim-first toggle for currently-running tasks).
|
||||
// 3. Edit profile — copy the CLI hint for `hermes -p <name> model`
|
||||
// (the dashboard can't edit profile config from the
|
||||
// browser; it lives on the filesystem).
|
||||
// Rendered from inside TaskDetail via a toggle button.
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
function RecoveryPopover(props) {
|
||||
const t = props.task;
|
||||
const board = props.boardSlug;
|
||||
const assignees = props.assignees || [];
|
||||
const [reason, setReason] = useState("");
|
||||
const [newProfile, setNewProfile] = useState(t.assignee || "");
|
||||
const [reclaimFirst, setReclaimFirst] = useState(t.status === "running");
|
||||
const [busy, setBusy] = useState(false);
|
||||
const [msg, setMsg] = useState(null);
|
||||
const [copied, setCopied] = useState(false);
|
||||
|
||||
const act = function (kind) {
|
||||
if (busy) return;
|
||||
setBusy(true);
|
||||
setMsg(null);
|
||||
const urlBase = `${API}/tasks/${encodeURIComponent(t.id)}`;
|
||||
const url = kind === "reclaim"
|
||||
? withBoard(`${urlBase}/reclaim`, board)
|
||||
: withBoard(`${urlBase}/reassign`, board);
|
||||
const body = kind === "reclaim"
|
||||
? { reason: reason || null }
|
||||
: {
|
||||
profile: newProfile || null,
|
||||
reclaim_first: !!reclaimFirst,
|
||||
reason: reason || null,
|
||||
};
|
||||
SDK.fetchJSON(url, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
}).then(function () {
|
||||
setMsg({ ok: true, text:
|
||||
kind === "reclaim"
|
||||
? `Reclaimed ${t.id}. Task back to ready.`
|
||||
: `Reassigned ${t.id} to ${newProfile || "(unassigned)"}.`
|
||||
});
|
||||
if (props.onActionComplete) props.onActionComplete(kind);
|
||||
}).catch(function (err) {
|
||||
setMsg({ ok: false, text: `Failed: ${err.message || err}` });
|
||||
}).then(function () {
|
||||
setBusy(false);
|
||||
});
|
||||
};
|
||||
|
||||
const profileCmd = `hermes -p ${t.assignee || "<profile>"} model`;
|
||||
const copyCmd = function () {
|
||||
try {
|
||||
navigator.clipboard.writeText(profileCmd).then(function () {
|
||||
setCopied(true);
|
||||
setTimeout(function () { setCopied(false); }, 2000);
|
||||
});
|
||||
} catch (_) {
|
||||
window.prompt("Copy this command:", profileCmd);
|
||||
}
|
||||
};
|
||||
|
||||
return h("div", { className: "hermes-kanban-recovery" },
|
||||
h("div", { className: "hermes-kanban-recovery-title" },
|
||||
"Recovery actions"),
|
||||
h("div", { className: "hermes-kanban-recovery-hint" },
|
||||
"Use these when a worker is stuck (crash loop, repeated hallucination, ",
|
||||
"broken model). Events in this task's history are preserved as audit trail."),
|
||||
|
||||
// Reason input (shared across actions)
|
||||
h("div", { className: "hermes-kanban-recovery-section" },
|
||||
h("label", { className: "hermes-kanban-recovery-label" },
|
||||
"Reason (optional, logged on event)"),
|
||||
h("input", {
|
||||
type: "text",
|
||||
className: "hermes-kanban-recovery-input",
|
||||
value: reason,
|
||||
onChange: function (e) { setReason(e.target.value); },
|
||||
placeholder: "e.g. model hallucinating, switching to larger",
|
||||
}),
|
||||
),
|
||||
|
||||
// Action 1: Reclaim
|
||||
h("div", { className: "hermes-kanban-recovery-section" },
|
||||
h("div", { className: "hermes-kanban-recovery-action-row" },
|
||||
h("div", { className: "hermes-kanban-recovery-action-label" },
|
||||
"1. Reclaim"),
|
||||
h("div", { className: "hermes-kanban-recovery-action-desc" },
|
||||
t.status === "running"
|
||||
? "Abort the running worker and reset to ready."
|
||||
: "Task is not running — nothing to reclaim."),
|
||||
h("button", {
|
||||
className: "hermes-kanban-recovery-btn",
|
||||
disabled: busy || t.status !== "running",
|
||||
onClick: function () { act("reclaim"); },
|
||||
type: "button",
|
||||
}, "Reclaim"),
|
||||
),
|
||||
),
|
||||
|
||||
// Action 2: Reassign
|
||||
h("div", { className: "hermes-kanban-recovery-section" },
|
||||
h("div", { className: "hermes-kanban-recovery-action-row" },
|
||||
h("div", { className: "hermes-kanban-recovery-action-label" },
|
||||
"2. Reassign"),
|
||||
h("div", { className: "hermes-kanban-recovery-action-desc" },
|
||||
"Switch to a different worker profile and retry."),
|
||||
),
|
||||
h("div", { className: "hermes-kanban-recovery-reassign-row" },
|
||||
h("select", {
|
||||
className: "hermes-kanban-recovery-select",
|
||||
value: newProfile,
|
||||
onChange: function (e) { setNewProfile(e.target.value); },
|
||||
},
|
||||
h("option", { value: "" }, "(unassigned)"),
|
||||
assignees.map(function (a) {
|
||||
return h("option", { key: a, value: a }, a);
|
||||
}),
|
||||
),
|
||||
h("label", { className: "hermes-kanban-recovery-checkbox" },
|
||||
h("input", {
|
||||
type: "checkbox",
|
||||
checked: reclaimFirst,
|
||||
onChange: function (e) { setReclaimFirst(e.target.checked); },
|
||||
}),
|
||||
" Reclaim first",
|
||||
),
|
||||
h("button", {
|
||||
className: "hermes-kanban-recovery-btn",
|
||||
disabled: busy,
|
||||
onClick: function () { act("reassign"); },
|
||||
type: "button",
|
||||
}, "Reassign"),
|
||||
),
|
||||
),
|
||||
|
||||
// Action 3: Edit profile model (CLI hint)
|
||||
h("div", { className: "hermes-kanban-recovery-section" },
|
||||
h("div", { className: "hermes-kanban-recovery-action-row" },
|
||||
h("div", { className: "hermes-kanban-recovery-action-label" },
|
||||
"3. Change profile model"),
|
||||
h("div", { className: "hermes-kanban-recovery-action-desc" },
|
||||
"Profile config lives on disk — change it from a terminal, ",
|
||||
"then use Reclaim above to retry with the new model."),
|
||||
),
|
||||
h("div", { className: "hermes-kanban-recovery-cmd-row" },
|
||||
h("code", { className: "hermes-kanban-recovery-cmd" }, profileCmd),
|
||||
h("button", {
|
||||
className: "hermes-kanban-recovery-btn",
|
||||
onClick: copyCmd,
|
||||
type: "button",
|
||||
}, copied ? "Copied" : "Copy"),
|
||||
),
|
||||
),
|
||||
|
||||
msg
|
||||
? h("div", {
|
||||
className: cn(
|
||||
"hermes-kanban-recovery-msg",
|
||||
msg.ok ? "hermes-kanban-recovery-msg--ok" : "hermes-kanban-recovery-msg--err",
|
||||
),
|
||||
}, msg.text)
|
||||
: null,
|
||||
);
|
||||
}
|
||||
|
||||
// Thin wrapper that toggles the RecoveryPopover visibility inside a
|
||||
// task drawer. Auto-opens when the task has active hallucination
|
||||
// warnings; operators can still collapse it. Always available via a
|
||||
// header button for tasks without warnings, so reclaim/reassign is
|
||||
// accessible for other stuck-worker scenarios too.
|
||||
function RecoverySection(props) {
|
||||
const [open, setOpen] = useState(!!props.hasWarnings);
|
||||
// Re-open automatically if warnings appear while the drawer is open.
|
||||
useEffect(function () {
|
||||
if (props.hasWarnings) setOpen(true);
|
||||
}, [props.hasWarnings]);
|
||||
return h("div", { className: "hermes-kanban-section" },
|
||||
h("div", { className: "hermes-kanban-section-head-row" },
|
||||
h("span", { className: "hermes-kanban-section-head" },
|
||||
props.hasWarnings
|
||||
? h("span", { className: "hermes-kanban-section-head-warning" },
|
||||
"⚠ Recovery")
|
||||
: "Recovery",
|
||||
),
|
||||
h("button", {
|
||||
className: "hermes-kanban-section-toggle",
|
||||
onClick: function () { setOpen(function (x) { return !x; }); },
|
||||
type: "button",
|
||||
}, open ? "Hide" : "Show"),
|
||||
),
|
||||
open
|
||||
? h(RecoveryPopover, {
|
||||
// Keyed by task id so React tears the popover down and
|
||||
// remounts it when the drawer swaps to a different task —
|
||||
// otherwise reason / newProfile / success toast from the
|
||||
// previous task leak into the new one.
|
||||
key: props.task.id,
|
||||
task: props.task,
|
||||
boardSlug: props.boardSlug,
|
||||
assignees: props.assignees,
|
||||
onActionComplete: function () {
|
||||
if (props.onRefresh) props.onRefresh();
|
||||
},
|
||||
})
|
||||
: null,
|
||||
);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Board switcher (multi-project)
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
@ -1219,6 +1543,14 @@
|
|||
title: "Select for bulk actions",
|
||||
}),
|
||||
h("span", { className: "hermes-kanban-card-id" }, t.id),
|
||||
t.warnings && t.warnings.count > 0
|
||||
? h("span", {
|
||||
className: "hermes-kanban-warning-badge",
|
||||
title: `⚠ ${t.warnings.count} hallucination ` +
|
||||
`event(s) since last clean completion. ` +
|
||||
`Click to open for details.`,
|
||||
}, "⚠")
|
||||
: null,
|
||||
t.priority > 0
|
||||
? h(Badge, { className: "hermes-kanban-priority" }, `P${t.priority}`)
|
||||
: null,
|
||||
|
|
@ -1541,6 +1873,7 @@
|
|||
data, editing, setEditing,
|
||||
renderMarkdown: props.renderMarkdown,
|
||||
allTasks: props.allTasks,
|
||||
assignees: props.assignees || [],
|
||||
boardSlug: boardSlug,
|
||||
onPatch: doPatch,
|
||||
onAddParent: addLink,
|
||||
|
|
@ -1550,6 +1883,7 @@
|
|||
homeChannels: homeChannels,
|
||||
homeBusy: homeBusy,
|
||||
onToggleHomeSub: toggleHomeSubscription,
|
||||
onRefresh: props.onRefresh,
|
||||
}) : null,
|
||||
data ? h("div", { className: "hermes-kanban-drawer-comment-row" },
|
||||
h(Input, {
|
||||
|
|
@ -1611,6 +1945,13 @@
|
|||
t.created_by ? h(MetaRow, { label: "Created by", value: t.created_by }) : null,
|
||||
),
|
||||
h(StatusActions, { task: t, onPatch: props.onPatch }),
|
||||
h(RecoverySection, {
|
||||
task: t,
|
||||
boardSlug: props.boardSlug,
|
||||
assignees: props.assignees,
|
||||
hasWarnings: t.warnings && t.warnings.count > 0,
|
||||
onRefresh: props.onRefresh,
|
||||
}),
|
||||
h(HomeSubsSection, {
|
||||
homeChannels: props.homeChannels || [],
|
||||
homeBusy: props.homeBusy || {},
|
||||
|
|
@ -1651,11 +1992,41 @@
|
|||
h("div", { className: "hermes-kanban-section" },
|
||||
h("div", { className: "hermes-kanban-section-head" }, `Events (${events.length})`),
|
||||
events.slice().reverse().slice(0, 20).map(function (e) {
|
||||
return h("div", { key: e.id, className: "hermes-kanban-event" },
|
||||
h("span", { className: "hermes-kanban-event-kind" }, e.kind),
|
||||
h("span", { className: "hermes-kanban-event-ago" },
|
||||
timeAgo ? timeAgo(e.created_at) : ""),
|
||||
e.payload
|
||||
const isHall = isHallucinationEvent(e.kind);
|
||||
const phantoms = isHall ? phantomIdsFromEvent(e) : [];
|
||||
return h("div", {
|
||||
key: e.id,
|
||||
className: cn(
|
||||
"hermes-kanban-event",
|
||||
isHall ? "hermes-kanban-event--hallucination" : "",
|
||||
),
|
||||
},
|
||||
isHall
|
||||
? h("div", { className: "hermes-kanban-event-header" },
|
||||
h("span", { className: "hermes-kanban-event-warning-icon" }, "⚠"),
|
||||
h("span", { className: "hermes-kanban-event-warning-label" },
|
||||
HALLUCINATION_EVENT_LABELS[e.kind] || e.kind),
|
||||
h("span", { className: "hermes-kanban-event-ago" },
|
||||
timeAgo ? timeAgo(e.created_at) : ""),
|
||||
)
|
||||
: h("div", { className: "hermes-kanban-event-header-plain" },
|
||||
h("span", { className: "hermes-kanban-event-kind" }, e.kind),
|
||||
h("span", { className: "hermes-kanban-event-ago" },
|
||||
timeAgo ? timeAgo(e.created_at) : ""),
|
||||
),
|
||||
isHall && phantoms.length > 0
|
||||
? h("div", { className: "hermes-kanban-event-phantom-row" },
|
||||
h("span", { className: "hermes-kanban-event-phantom-label" },
|
||||
"Phantom ids:"),
|
||||
phantoms.map(function (pid) {
|
||||
return h("code", {
|
||||
key: pid,
|
||||
className: "hermes-kanban-event-phantom-chip",
|
||||
}, pid);
|
||||
}),
|
||||
)
|
||||
: null,
|
||||
e.payload && !isHall
|
||||
? h("code", { className: "hermes-kanban-event-payload" },
|
||||
JSON.stringify(e.payload))
|
||||
: null,
|
||||
|
|
|
|||
253
plugins/kanban/dashboard/dist/style.css
vendored
253
plugins/kanban/dashboard/dist/style.css
vendored
|
|
@ -847,3 +847,256 @@
|
|||
gap: 0.5rem;
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
/* Hallucination warnings: per-card badge, events callout, attention */
|
||||
/* strip, recovery popover. Orange/red palette but muted so the board */
|
||||
/* doesn't scream on every render. */
|
||||
/* ---------------------------------------------------------------------- */
|
||||
.hermes-kanban-warning-badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
font-size: 0.75rem;
|
||||
color: #ff9e3b;
|
||||
margin-left: 0.25rem;
|
||||
cursor: help;
|
||||
}
|
||||
|
||||
/* Attention strip — collapsed state is a thin bar. */
|
||||
.hermes-kanban-attention {
|
||||
border: 1px solid rgba(255, 158, 59, 0.35);
|
||||
background: rgba(255, 158, 59, 0.06);
|
||||
border-radius: 0.5rem;
|
||||
overflow: hidden;
|
||||
}
|
||||
.hermes-kanban-attention-bar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.4rem 0.75rem;
|
||||
font-size: 0.8125rem;
|
||||
}
|
||||
.hermes-kanban-attention-icon { color: #ff9e3b; font-size: 1rem; }
|
||||
.hermes-kanban-attention-text { flex: 1; }
|
||||
.hermes-kanban-attention-toggle,
|
||||
.hermes-kanban-attention-dismiss,
|
||||
.hermes-kanban-attention-row-btn {
|
||||
background: transparent;
|
||||
border: 1px solid rgba(120, 120, 140, 0.3);
|
||||
border-radius: 0.3rem;
|
||||
padding: 0.15rem 0.55rem;
|
||||
font-size: 0.75rem;
|
||||
color: inherit;
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-attention-toggle:hover,
|
||||
.hermes-kanban-attention-dismiss:hover,
|
||||
.hermes-kanban-attention-row-btn:hover {
|
||||
background: rgba(255, 158, 59, 0.12);
|
||||
}
|
||||
.hermes-kanban-attention-list {
|
||||
border-top: 1px solid rgba(255, 158, 59, 0.2);
|
||||
padding: 0.25rem 0;
|
||||
}
|
||||
.hermes-kanban-attention-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.3rem 0.75rem;
|
||||
font-size: 0.8125rem;
|
||||
}
|
||||
.hermes-kanban-attention-row:hover {
|
||||
background: rgba(255, 158, 59, 0.08);
|
||||
}
|
||||
.hermes-kanban-attention-row-id {
|
||||
font-family: ui-monospace, SFMono-Regular, monospace;
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground, #888);
|
||||
min-width: 7rem;
|
||||
}
|
||||
.hermes-kanban-attention-row-title {
|
||||
flex: 1;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
.hermes-kanban-attention-row-meta {
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground, #888);
|
||||
}
|
||||
|
||||
/* Events tab — callout style for hallucination events. */
|
||||
.hermes-kanban-event--hallucination {
|
||||
border-left: 3px solid #ff6b6b;
|
||||
background: rgba(255, 107, 107, 0.08);
|
||||
padding: 0.5rem 0.65rem;
|
||||
border-radius: 0.35rem;
|
||||
margin: 0.25rem 0;
|
||||
}
|
||||
.hermes-kanban-event-header,
|
||||
.hermes-kanban-event-header-plain {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.hermes-kanban-event-warning-icon { color: #ff6b6b; font-size: 1rem; }
|
||||
.hermes-kanban-event-warning-label {
|
||||
color: #ff6b6b;
|
||||
font-weight: 600;
|
||||
font-size: 0.8125rem;
|
||||
}
|
||||
.hermes-kanban-event-phantom-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
flex-wrap: wrap;
|
||||
margin-top: 0.3rem;
|
||||
padding-left: 1.35rem;
|
||||
}
|
||||
.hermes-kanban-event-phantom-label {
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground, #999);
|
||||
}
|
||||
.hermes-kanban-event-phantom-chip {
|
||||
font-family: ui-monospace, SFMono-Regular, monospace;
|
||||
font-size: 0.75rem;
|
||||
padding: 0.1rem 0.4rem;
|
||||
background: rgba(255, 107, 107, 0.15);
|
||||
border: 1px solid rgba(255, 107, 107, 0.3);
|
||||
border-radius: 0.3rem;
|
||||
}
|
||||
|
||||
/* Recovery section header — amber accent when the task has warnings. */
|
||||
.hermes-kanban-section-head-warning { color: #ff9e3b; }
|
||||
.hermes-kanban-section-head-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.hermes-kanban-section-toggle {
|
||||
background: transparent;
|
||||
border: 1px solid rgba(120, 120, 140, 0.3);
|
||||
border-radius: 0.3rem;
|
||||
padding: 0.15rem 0.55rem;
|
||||
font-size: 0.75rem;
|
||||
color: inherit;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/* Recovery popover body. */
|
||||
.hermes-kanban-recovery {
|
||||
border: 1px solid rgba(120, 120, 140, 0.25);
|
||||
background: rgba(255, 158, 59, 0.04);
|
||||
border-radius: 0.5rem;
|
||||
padding: 0.75rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
.hermes-kanban-recovery-title {
|
||||
font-weight: 600;
|
||||
font-size: 0.8125rem;
|
||||
}
|
||||
.hermes-kanban-recovery-hint {
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground, #888);
|
||||
line-height: 1.35;
|
||||
}
|
||||
.hermes-kanban-recovery-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
.hermes-kanban-recovery-label {
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground, #888);
|
||||
}
|
||||
.hermes-kanban-recovery-input,
|
||||
.hermes-kanban-recovery-select {
|
||||
padding: 0.25rem 0.4rem;
|
||||
font-size: 0.8125rem;
|
||||
background: rgba(0, 0, 0, 0.15);
|
||||
border: 1px solid rgba(120, 120, 140, 0.3);
|
||||
border-radius: 0.3rem;
|
||||
color: inherit;
|
||||
outline: none;
|
||||
}
|
||||
.hermes-kanban-recovery-action-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.hermes-kanban-recovery-action-label {
|
||||
font-size: 0.8125rem;
|
||||
font-weight: 600;
|
||||
min-width: 8rem;
|
||||
}
|
||||
.hermes-kanban-recovery-action-desc {
|
||||
flex: 1;
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground, #888);
|
||||
}
|
||||
.hermes-kanban-recovery-btn {
|
||||
padding: 0.25rem 0.7rem;
|
||||
font-size: 0.75rem;
|
||||
background: rgba(255, 158, 59, 0.15);
|
||||
border: 1px solid rgba(255, 158, 59, 0.4);
|
||||
border-radius: 0.3rem;
|
||||
color: inherit;
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-recovery-btn:hover:not(:disabled) {
|
||||
background: rgba(255, 158, 59, 0.25);
|
||||
}
|
||||
.hermes-kanban-recovery-btn:disabled {
|
||||
opacity: 0.4;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
.hermes-kanban-recovery-reassign-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.hermes-kanban-recovery-checkbox {
|
||||
font-size: 0.75rem;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
}
|
||||
.hermes-kanban-recovery-cmd-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.hermes-kanban-recovery-cmd {
|
||||
font-family: ui-monospace, SFMono-Regular, monospace;
|
||||
font-size: 0.75rem;
|
||||
padding: 0.2rem 0.5rem;
|
||||
background: rgba(0, 0, 0, 0.2);
|
||||
border: 1px solid rgba(120, 120, 140, 0.3);
|
||||
border-radius: 0.3rem;
|
||||
flex: 1;
|
||||
min-width: 10rem;
|
||||
overflow-x: auto;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.hermes-kanban-recovery-msg {
|
||||
font-size: 0.75rem;
|
||||
padding: 0.35rem 0.5rem;
|
||||
border-radius: 0.3rem;
|
||||
}
|
||||
.hermes-kanban-recovery-msg--ok {
|
||||
background: rgba(120, 200, 120, 0.12);
|
||||
color: #6bc46b;
|
||||
border: 1px solid rgba(120, 200, 120, 0.3);
|
||||
}
|
||||
.hermes-kanban-recovery-msg--err {
|
||||
background: rgba(255, 107, 107, 0.12);
|
||||
color: #ff8b8b;
|
||||
border: 1px solid rgba(255, 107, 107, 0.3);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -176,6 +176,74 @@ def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
|
|||
}
|
||||
|
||||
|
||||
# Hallucination-warning event kinds — see complete_task() in kanban_db.py.
|
||||
# completion_blocked_hallucination: kernel rejected created_cards with
|
||||
# phantom ids; task stays in prior state.
|
||||
# suspected_hallucinated_references: prose scan found t_<hex> in summary
|
||||
# that doesn't resolve; completion succeeded, advisory only.
|
||||
_WARNING_EVENT_KINDS = (
|
||||
"completion_blocked_hallucination",
|
||||
"suspected_hallucinated_references",
|
||||
)
|
||||
|
||||
|
||||
def _compute_warnings_for_tasks(
|
||||
conn: sqlite3.Connection,
|
||||
task_ids: Optional[list[str]] = None,
|
||||
) -> dict[str, dict]:
|
||||
"""Return {task_id: {count, kinds, latest_at}} for tasks with
|
||||
hallucination warnings that occurred AFTER the most recent clean
|
||||
completion event (completed / edited). An empty dict means no tasks
|
||||
on the board have active warnings.
|
||||
|
||||
``task_ids`` narrows the query; pass ``None`` to scan the whole DB
|
||||
(matches board-level rollup). Used by both the /board aggregate and
|
||||
per-task /tasks/:id endpoints.
|
||||
"""
|
||||
params: tuple = ()
|
||||
if task_ids is not None:
|
||||
if not task_ids:
|
||||
return {}
|
||||
placeholders = ",".join(["?"] * len(task_ids))
|
||||
sql = (
|
||||
"SELECT task_id, kind, created_at FROM task_events "
|
||||
f"WHERE task_id IN ({placeholders}) AND kind IN "
|
||||
"('completion_blocked_hallucination', "
|
||||
" 'suspected_hallucinated_references', "
|
||||
" 'completed', 'edited') "
|
||||
"ORDER BY task_id, id"
|
||||
)
|
||||
params = tuple(task_ids)
|
||||
else:
|
||||
sql = (
|
||||
"SELECT task_id, kind, created_at FROM task_events "
|
||||
"WHERE kind IN "
|
||||
"('completion_blocked_hallucination', "
|
||||
" 'suspected_hallucinated_references', "
|
||||
" 'completed', 'edited') "
|
||||
"ORDER BY task_id, id"
|
||||
)
|
||||
|
||||
out: dict[str, dict] = {}
|
||||
for row in conn.execute(sql, params).fetchall():
|
||||
tid = row["task_id"]
|
||||
kind = row["kind"]
|
||||
created_at = row["created_at"]
|
||||
if kind in ("completed", "edited"):
|
||||
# Clean event wipes prior warning counters; only events after
|
||||
# this timestamp count.
|
||||
out.pop(tid, None)
|
||||
continue
|
||||
bucket = out.setdefault(
|
||||
tid, {"count": 0, "kinds": {}, "latest_at": 0}
|
||||
)
|
||||
bucket["count"] += 1
|
||||
bucket["kinds"][kind] = bucket["kinds"].get(kind, 0) + 1
|
||||
if created_at > bucket["latest_at"]:
|
||||
bucket["latest_at"] = created_at
|
||||
return out
|
||||
|
||||
|
||||
def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]:
|
||||
"""Return {'parents': [...], 'children': [...]} for a task."""
|
||||
parents = [
|
||||
|
|
@ -253,6 +321,11 @@ def get_board(
|
|||
if row["cstatus"] == "done":
|
||||
p["done"] += 1
|
||||
|
||||
# Hallucination-warning rollup for this board (all tasks).
|
||||
# Delegated to _compute_warnings_for_tasks so the per-task
|
||||
# /tasks/:id endpoint can reuse the same rule.
|
||||
warnings_per_task = _compute_warnings_for_tasks(conn, task_ids=None)
|
||||
|
||||
latest_event_id = conn.execute(
|
||||
"SELECT COALESCE(MAX(id), 0) AS m FROM task_events"
|
||||
).fetchone()["m"]
|
||||
|
|
@ -266,6 +339,9 @@ def get_board(
|
|||
d["link_counts"] = link_counts.get(t.id, {"parents": 0, "children": 0})
|
||||
d["comment_count"] = comment_counts.get(t.id, 0)
|
||||
d["progress"] = progress.get(t.id) # None when the task has no children
|
||||
w = warnings_per_task.get(t.id)
|
||||
if w:
|
||||
d["warnings"] = w
|
||||
col = t.status if t.status in columns else "todo"
|
||||
columns[col].append(d)
|
||||
|
||||
|
|
@ -313,8 +389,14 @@ def get_task(task_id: str, board: Optional[str] = Query(None)):
|
|||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
task_d = _task_dict(task)
|
||||
# Attach warnings metadata so the drawer's Recovery section can
|
||||
# auto-open when a hallucination is unresolved.
|
||||
warnings = _compute_warnings_for_tasks(conn, task_ids=[task_id])
|
||||
if warnings.get(task_id):
|
||||
task_d["warnings"] = warnings[task_id]
|
||||
return {
|
||||
"task": _task_dict(task),
|
||||
"task": task_d,
|
||||
"comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
|
||||
"events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
|
||||
"links": _links_for(conn, task_id),
|
||||
|
|
@ -713,6 +795,85 @@ def bulk_update(payload: BulkTaskBody, board: Optional[str] = Query(None)):
|
|||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Recovery actions — reclaim a running claim, reassign to a new profile
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ReclaimBody(BaseModel):
|
||||
reason: Optional[str] = None
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/reclaim")
|
||||
def reclaim_task_endpoint(
|
||||
task_id: str,
|
||||
payload: ReclaimBody,
|
||||
board: Optional[str] = Query(None),
|
||||
):
|
||||
"""Release an active worker claim on a running task.
|
||||
|
||||
Used by the dashboard recovery popover when an operator wants to
|
||||
abort a stuck worker (e.g. one that keeps hallucinating card ids)
|
||||
without waiting for the claim TTL. Maps 1:1 to
|
||||
``hermes kanban reclaim <task_id> --reason ...``.
|
||||
"""
|
||||
board = _resolve_board(board)
|
||||
conn = _conn(board=board)
|
||||
try:
|
||||
ok = kanban_db.reclaim_task(conn, task_id, reason=payload.reason)
|
||||
if not ok:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=(
|
||||
f"cannot reclaim {task_id}: not in a claimable state "
|
||||
"(not running, or unknown id)"
|
||||
),
|
||||
)
|
||||
return {"ok": True, "task_id": task_id}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
class ReassignBody(BaseModel):
|
||||
profile: Optional[str] = None # "" or None = unassign
|
||||
reclaim_first: bool = False
|
||||
reason: Optional[str] = None
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/reassign")
|
||||
def reassign_task_endpoint(
|
||||
task_id: str,
|
||||
payload: ReassignBody,
|
||||
board: Optional[str] = Query(None),
|
||||
):
|
||||
"""Reassign a task to a different profile, optionally reclaiming first.
|
||||
|
||||
Used by the dashboard recovery popover when an operator wants to
|
||||
retry a task with a different worker profile (e.g. switch to a
|
||||
smarter model after the assigned profile keeps hallucinating).
|
||||
Maps 1:1 to ``hermes kanban reassign <task_id> <profile> [--reclaim]``.
|
||||
"""
|
||||
board = _resolve_board(board)
|
||||
conn = _conn(board=board)
|
||||
try:
|
||||
ok = kanban_db.reassign_task(
|
||||
conn, task_id,
|
||||
payload.profile or None,
|
||||
reclaim_first=bool(payload.reclaim_first),
|
||||
reason=payload.reason,
|
||||
)
|
||||
if not ok:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=(
|
||||
f"cannot reassign {task_id}: unknown id, or still "
|
||||
"running (pass reclaim_first=true to release the claim first)"
|
||||
),
|
||||
)
|
||||
return {"ok": True, "task_id": task_id, "assignee": payload.profile or None}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin config (read dashboard.kanban.* defaults from config.yaml)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -150,3 +150,13 @@ Tell them what you created in plain prose:
|
|||
**Don't pre-create the whole graph if the shape depends on intermediate findings.** If T3's structure depends on what T1 and T2 find, let T3 exist as a "synthesize findings" task whose own first step is to read parent handoffs and plan the rest. Orchestrators can spawn orchestrators.
|
||||
|
||||
**Tenant inheritance.** If `HERMES_TENANT` is set in your env, pass `tenant=os.environ.get("HERMES_TENANT")` on every `kanban_create` call so child tasks stay in the same namespace.
|
||||
|
||||
## Recovering stuck workers
|
||||
|
||||
When a worker profile keeps crashing, hallucinating, or getting blocked by its own mistakes (usually: wrong model, missing skill, broken credential), the kanban dashboard flags the task with a ⚠ badge and opens a **Recovery** section in the drawer. Three primary actions:
|
||||
|
||||
1. **Reclaim** (or `hermes kanban reclaim <task_id>`) — abort the running worker immediately and reset the task to `ready`. The existing claim TTL is ~15 min; this is the fast path out.
|
||||
2. **Reassign** (or `hermes kanban reassign <task_id> <new-profile> --reclaim`) — switch the task to a different profile and let the dispatcher pick it up with a fresh worker.
|
||||
3. **Change profile model** — the dashboard prints a copy-paste hint for `hermes -p <profile> model` since profile config lives on disk; edit it in a terminal, then Reclaim to retry with the new model.
|
||||
|
||||
Hallucination warnings appear on tasks where a worker's `kanban_complete(created_cards=[...])` claim included card ids that don't exist or weren't created by the worker's profile (the gate blocks the completion), or where the free-form summary references `t_<hex>` ids that don't resolve (advisory prose scan, non-blocking). Both produce audit events that persist even after recovery actions — the trail stays for debugging.
|
||||
|
|
|
|||
|
|
@ -75,6 +75,32 @@ kanban_complete(
|
|||
|
||||
Shape `metadata` so downstream parsers (reviewers, aggregators, schedulers) can use it without re-reading your prose.
|
||||
|
||||
## Claiming cards you actually created
|
||||
|
||||
If your run produced new kanban tasks (via `kanban_create`), pass the ids in `created_cards` on `kanban_complete`. The kernel verifies each id exists and was created by your profile; any phantom id blocks the completion with an error listing what went wrong, and the rejected attempt is permanently recorded on the task's event log. **Only list ids you captured from a successful `kanban_create` return value — never invent ids from prose, never paste ids from earlier runs, never claim cards another worker created.**
|
||||
|
||||
```python
|
||||
# GOOD — capture return values, then claim them.
|
||||
c1 = kanban_create(title="remediate SQL injection", assignee="security-worker")
|
||||
c2 = kanban_create(title="fix CSRF middleware", assignee="web-worker")
|
||||
|
||||
kanban_complete(
|
||||
summary="Review done; spawned remediations for both findings.",
|
||||
metadata={"pr_number": 123, "approved": False},
|
||||
created_cards=[c1["task_id"], c2["task_id"]],
|
||||
)
|
||||
```
|
||||
|
||||
```python
|
||||
# BAD — claiming ids you don't have captured return values for.
|
||||
kanban_complete(
|
||||
summary="Created remediation cards t_a1b2c3d4, t_deadbeef", # hallucinated
|
||||
created_cards=["t_a1b2c3d4", "t_deadbeef"], # → gate rejects
|
||||
)
|
||||
```
|
||||
|
||||
If a `kanban_create` call fails (exception, tool_error), the card was NOT created — do not include a phantom id for it. Retry the create, or omit the id and mention the failure in your summary. The prose-scan pass also catches `t_<hex>` references in your free-form summary that don't resolve; these don't block the completion but show up as advisory warnings on the task in the dashboard.
|
||||
|
||||
## Block reasons that get answered fast
|
||||
|
||||
Bad: `"stuck"` — the human has no context.
|
||||
|
|
|
|||
|
|
@ -208,3 +208,81 @@ def test_kanban_not_gateway_only():
|
|||
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
|
||||
assert not cmd.cli_only
|
||||
assert not cmd.gateway_only
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# reclaim + reassign CLI smoke tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_run_slash_reclaim_running_task(kanban_home):
|
||||
import re
|
||||
import time
|
||||
import secrets
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
out1 = kc.run_slash("create 'stuck worker task' --assignee broken-model")
|
||||
m = re.search(r"(t_[a-f0-9]+)", out1)
|
||||
assert m
|
||||
tid = m.group(1)
|
||||
|
||||
# Simulate a running claim outside TTL.
|
||||
conn = kb.connect()
|
||||
try:
|
||||
lock = secrets.token_hex(4)
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
||||
"worker_pid=? WHERE id=?",
|
||||
(lock, int(time.time()) + 3600, 4242, tid),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
||||
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
||||
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
|
||||
)
|
||||
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
||||
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
out = kc.run_slash(f"reclaim {tid} --reason 'test'")
|
||||
assert "Reclaimed" in out, out
|
||||
# Status back to ready.
|
||||
out2 = kc.run_slash(f"show {tid}")
|
||||
assert "ready" in out2.lower()
|
||||
|
||||
|
||||
def test_run_slash_reassign_with_reclaim_flag(kanban_home):
|
||||
import re
|
||||
import time
|
||||
import secrets
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
out1 = kc.run_slash("create 'switch model' --assignee orig")
|
||||
m = re.search(r"(t_[a-f0-9]+)", out1)
|
||||
tid = m.group(1)
|
||||
|
||||
# Simulate a running claim.
|
||||
conn = kb.connect()
|
||||
try:
|
||||
lock = secrets.token_hex(4)
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
||||
"worker_pid=? WHERE id=?",
|
||||
(lock, int(time.time()) + 3600, 4242, tid),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
||||
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
||||
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
|
||||
)
|
||||
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
||||
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
out = kc.run_slash(f"reassign {tid} newbie --reclaim --reason 'switch'")
|
||||
assert "Reassigned" in out, out
|
||||
out2 = kc.run_slash(f"show {tid}")
|
||||
assert "newbie" in out2
|
||||
|
|
|
|||
|
|
@ -2786,3 +2786,269 @@ def test_gateway_dispatcher_watcher_env_truthy_uses_config(monkeypatch):
|
|||
timeout=3.0,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hallucination gate (created_cards verify + prose scan)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_complete_with_created_cards_all_verified_records_manifest(kanban_home):
|
||||
"""A completion with created_cards that all exist + belong to this
|
||||
worker records them on the ``completed`` event payload."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
parent = kb.create_task(conn, title="parent", assignee="alice")
|
||||
c1 = kb.create_task(conn, title="c1", assignee="x", created_by="alice")
|
||||
c2 = kb.create_task(conn, title="c2", assignee="y", created_by="alice")
|
||||
ok = kb.complete_task(
|
||||
conn, parent,
|
||||
summary="done, created c1+c2",
|
||||
created_cards=[c1, c2],
|
||||
)
|
||||
assert ok is True
|
||||
evs = list(conn.execute(
|
||||
"SELECT kind, payload FROM task_events WHERE task_id=? ORDER BY id",
|
||||
(parent,),
|
||||
))
|
||||
completed = [e for e in evs if e["kind"] == "completed"]
|
||||
assert len(completed) == 1
|
||||
import json as _json
|
||||
payload = _json.loads(completed[0]["payload"])
|
||||
assert payload.get("verified_cards") == [c1, c2]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_complete_with_phantom_created_cards_raises_and_audits(kanban_home):
|
||||
"""A completion claiming a card id that doesn't exist raises
|
||||
HallucinatedCardsError, leaves the task in its prior state, and
|
||||
records a ``completion_blocked_hallucination`` event for auditing."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
parent = kb.create_task(conn, title="parent", assignee="alice")
|
||||
real = kb.create_task(conn, title="real", assignee="x", created_by="alice")
|
||||
phantom_id = "t_deadbeefcafe"
|
||||
|
||||
with pytest.raises(kb.HallucinatedCardsError) as excinfo:
|
||||
kb.complete_task(
|
||||
conn, parent,
|
||||
summary="claimed phantom",
|
||||
created_cards=[real, phantom_id],
|
||||
)
|
||||
assert excinfo.value.phantom == [phantom_id]
|
||||
|
||||
# Task still in prior state (ready, not done).
|
||||
row = conn.execute(
|
||||
"SELECT status FROM tasks WHERE id=?", (parent,),
|
||||
).fetchone()
|
||||
assert row["status"] == "ready"
|
||||
|
||||
# Audit event landed.
|
||||
kinds = [
|
||||
r["kind"] for r in conn.execute(
|
||||
"SELECT kind FROM task_events WHERE task_id=? ORDER BY id",
|
||||
(parent,),
|
||||
)
|
||||
]
|
||||
assert "completion_blocked_hallucination" in kinds
|
||||
assert "completed" not in kinds
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_complete_with_cross_worker_card_is_rejected(kanban_home):
|
||||
"""A card that exists but was created by a different worker profile
|
||||
is treated as phantom (hallucinated attribution)."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
parent = kb.create_task(conn, title="parent", assignee="alice")
|
||||
other = kb.create_task(conn, title="other", assignee="x", created_by="bob")
|
||||
|
||||
with pytest.raises(kb.HallucinatedCardsError) as excinfo:
|
||||
kb.complete_task(
|
||||
conn, parent,
|
||||
summary="claiming someone else's card",
|
||||
created_cards=[other],
|
||||
)
|
||||
assert excinfo.value.phantom == [other]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_complete_prose_scan_flags_nonexistent_ids(kanban_home):
|
||||
"""Successful completion whose summary references a ``t_<hex>`` id
|
||||
that doesn't resolve emits a ``suspected_hallucinated_references``
|
||||
event. Does not block the completion."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
parent = kb.create_task(conn, title="parent", assignee="x")
|
||||
ok = kb.complete_task(
|
||||
conn, parent,
|
||||
summary="also saw t_abcd1234ffff failing in CI",
|
||||
)
|
||||
assert ok is True
|
||||
kinds_and_payloads = list(conn.execute(
|
||||
"SELECT kind, payload FROM task_events WHERE task_id=? ORDER BY id",
|
||||
(parent,),
|
||||
))
|
||||
kinds = [r["kind"] for r in kinds_and_payloads]
|
||||
assert "suspected_hallucinated_references" in kinds
|
||||
import json as _json
|
||||
susp = [
|
||||
_json.loads(r["payload"])
|
||||
for r in kinds_and_payloads
|
||||
if r["kind"] == "suspected_hallucinated_references"
|
||||
][0]
|
||||
assert "t_abcd1234ffff" in susp["phantom_refs"]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_complete_prose_scan_ignores_existing_ids(kanban_home):
|
||||
"""Summaries referencing real task ids don't emit a warning."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
other = kb.create_task(conn, title="other", assignee="x")
|
||||
parent = kb.create_task(conn, title="parent", assignee="x")
|
||||
ok = kb.complete_task(
|
||||
conn, parent,
|
||||
summary=f"depended on {other}, now done",
|
||||
)
|
||||
assert ok is True
|
||||
kinds = [
|
||||
r["kind"] for r in conn.execute(
|
||||
"SELECT kind FROM task_events WHERE task_id=? ORDER BY id",
|
||||
(parent,),
|
||||
)
|
||||
]
|
||||
assert "suspected_hallucinated_references" not in kinds
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Recovery helpers (reclaim + reassign)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_reclaim_task_resets_running_to_ready(kanban_home):
|
||||
"""Manual reclaim releases the claim, resets status, and emits a
|
||||
``reclaimed`` event even when claim_expires has not passed."""
|
||||
import time
|
||||
import secrets
|
||||
conn = kb.connect()
|
||||
try:
|
||||
t = kb.create_task(conn, title="stuck", assignee="broken")
|
||||
# Simulate a live claim (not expired).
|
||||
lock = secrets.token_hex(8)
|
||||
future = int(time.time()) + 3600
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
||||
"worker_pid=? WHERE id=?",
|
||||
(lock, future, 12345, t),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
||||
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
||||
(t, lock, future, 12345, int(time.time())),
|
||||
)
|
||||
run_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
||||
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (run_id, t))
|
||||
conn.commit()
|
||||
|
||||
# release_stale_claims should NOT reclaim (not expired).
|
||||
assert kb.release_stale_claims(conn) == 0
|
||||
|
||||
# reclaim_task should work immediately.
|
||||
assert kb.reclaim_task(conn, t, reason="test reason") is True
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT status, claim_lock, worker_pid FROM tasks WHERE id=?",
|
||||
(t,),
|
||||
).fetchone()
|
||||
assert row["status"] == "ready"
|
||||
assert row["claim_lock"] is None
|
||||
assert row["worker_pid"] is None
|
||||
|
||||
import json as _json
|
||||
reclaim_evs = [
|
||||
_json.loads(r["payload"])
|
||||
for r in conn.execute(
|
||||
"SELECT payload FROM task_events WHERE task_id=? AND kind='reclaimed'",
|
||||
(t,),
|
||||
)
|
||||
]
|
||||
assert len(reclaim_evs) == 1
|
||||
assert reclaim_evs[0].get("manual") is True
|
||||
assert reclaim_evs[0].get("reason") == "test reason"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_reclaim_task_returns_false_for_already_ready(kanban_home):
|
||||
"""Reclaiming a task that's not running returns False (no-op)."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
t = kb.create_task(conn, title="ready task", assignee="x")
|
||||
assert kb.reclaim_task(conn, t) is False
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_reassign_task_refuses_running_without_reclaim_first(kanban_home):
|
||||
"""Without ``reclaim_first=True``, reassigning a running task is a
|
||||
no-op returning False (matches assign_task's RuntimeError via
|
||||
internal catch)."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
t = kb.create_task(conn, title="running", assignee="orig")
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status='running', claim_lock=? WHERE id=?",
|
||||
("live", t),
|
||||
)
|
||||
conn.commit()
|
||||
assert kb.reassign_task(conn, t, "new") is False
|
||||
# Assignee unchanged.
|
||||
row = conn.execute(
|
||||
"SELECT assignee FROM tasks WHERE id=?", (t,),
|
||||
).fetchone()
|
||||
assert row["assignee"] == "orig"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_reassign_task_with_reclaim_first_switches_profile(kanban_home):
|
||||
"""With ``reclaim_first=True``, a running task is reclaimed and
|
||||
reassigned in one operation."""
|
||||
import time
|
||||
import secrets
|
||||
conn = kb.connect()
|
||||
try:
|
||||
t = kb.create_task(conn, title="switch me", assignee="orig")
|
||||
lock = secrets.token_hex(8)
|
||||
future = int(time.time()) + 3600
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
||||
"worker_pid=? WHERE id=?",
|
||||
(lock, future, 99999, t),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
||||
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
||||
(t, lock, future, 99999, int(time.time())),
|
||||
)
|
||||
run_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
||||
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (run_id, t))
|
||||
conn.commit()
|
||||
|
||||
assert kb.reassign_task(
|
||||
conn, t, "new-profile",
|
||||
reclaim_first=True, reason="switch model",
|
||||
) is True
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT assignee, status FROM tasks WHERE id=?", (t,),
|
||||
).fetchone()
|
||||
assert row["assignee"] == "new-profile"
|
||||
assert row["status"] == "ready"
|
||||
finally:
|
||||
conn.close()
|
||||
|
|
|
|||
|
|
@ -1117,3 +1117,221 @@ def test_home_channels_empty_when_no_homes_configured(client, monkeypatch):
|
|||
r = client.get("/api/plugins/kanban/home-channels")
|
||||
assert r.status_code == 200
|
||||
assert r.json()["home_channels"] == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Recovery endpoints (reclaim + reassign) and warnings field
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_board_surfaces_warnings_field_for_hallucinated_completions(client):
|
||||
"""Tasks with a pending completion_blocked_hallucination event surface
|
||||
a ``warnings`` object on the /board payload so the UI can badge
|
||||
them without fetching per-task events."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
parent = kb.create_task(conn, title="parent", assignee="alice")
|
||||
real = kb.create_task(conn, title="real", assignee="x", created_by="alice")
|
||||
|
||||
import pytest as _pytest
|
||||
with _pytest.raises(kb.HallucinatedCardsError):
|
||||
kb.complete_task(
|
||||
conn, parent,
|
||||
summary="claimed phantom",
|
||||
created_cards=[real, "t_deadbeefcafe"],
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
tasks = [t for col in data["columns"] for t in col["tasks"]]
|
||||
parent_dict = next(t for t in tasks if t["title"] == "parent")
|
||||
assert parent_dict.get("warnings") is not None
|
||||
w = parent_dict["warnings"]
|
||||
assert w["count"] >= 1
|
||||
assert "completion_blocked_hallucination" in w["kinds"]
|
||||
|
||||
|
||||
def test_board_warnings_cleared_after_clean_completion(client):
|
||||
"""A completed or edited event after a hallucination event clears
|
||||
the warning badge — we don't mark tasks permanently."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
parent = kb.create_task(conn, title="parent", assignee="alice")
|
||||
real = kb.create_task(conn, title="real", assignee="x", created_by="alice")
|
||||
|
||||
import pytest as _pytest
|
||||
with _pytest.raises(kb.HallucinatedCardsError):
|
||||
kb.complete_task(
|
||||
conn, parent,
|
||||
summary="first attempt phantom",
|
||||
created_cards=[real, "t_phantom11"],
|
||||
)
|
||||
|
||||
# Second attempt drops the bad id — succeeds.
|
||||
ok = kb.complete_task(
|
||||
conn, parent,
|
||||
summary="retry without phantom",
|
||||
created_cards=[real],
|
||||
)
|
||||
assert ok is True
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.get("/api/plugins/kanban/board", params={"include_archived": True})
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
tasks = [t for col in data["columns"] for t in col["tasks"]]
|
||||
parent_dict = next(t for t in tasks if t["title"] == "parent")
|
||||
# The clean completion wiped the warning.
|
||||
assert parent_dict.get("warnings") is None
|
||||
|
||||
|
||||
def test_reclaim_endpoint_releases_running_claim(client):
|
||||
"""POST /tasks/<id>/reclaim drops the claim, returns ok, and emits
|
||||
a manual reclaimed event."""
|
||||
import secrets
|
||||
conn = kb.connect()
|
||||
try:
|
||||
t = kb.create_task(conn, title="running", assignee="x")
|
||||
lock = secrets.token_hex(8)
|
||||
future = int(time.time()) + 3600
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
||||
"worker_pid=? WHERE id=?",
|
||||
(lock, future, 99999, t),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
||||
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
||||
(t, lock, future, 99999, int(time.time())),
|
||||
)
|
||||
run_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
||||
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (run_id, t))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t}/reclaim",
|
||||
json={"reason": "browser recovery"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
body = r.json()
|
||||
assert body["ok"] is True
|
||||
assert body["task_id"] == t
|
||||
|
||||
# Confirm the task is back to ready.
|
||||
conn2 = kb.connect()
|
||||
try:
|
||||
row = conn2.execute(
|
||||
"SELECT status, claim_lock FROM tasks WHERE id=?", (t,),
|
||||
).fetchone()
|
||||
assert row["status"] == "ready"
|
||||
assert row["claim_lock"] is None
|
||||
finally:
|
||||
conn2.close()
|
||||
|
||||
|
||||
def test_reclaim_endpoint_409_for_non_running_task(client):
|
||||
"""Reclaiming a task that's already ready returns 409."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
t = kb.create_task(conn, title="ready", assignee="x")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t}/reclaim",
|
||||
json={},
|
||||
)
|
||||
assert r.status_code == 409
|
||||
|
||||
|
||||
def test_reassign_endpoint_switches_profile(client):
|
||||
"""POST /tasks/<id>/reassign changes the assignee field."""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
t = kb.create_task(conn, title="task", assignee="orig")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t}/reassign",
|
||||
json={"profile": "newbie", "reclaim_first": False},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
assert r.json()["assignee"] == "newbie"
|
||||
|
||||
conn2 = kb.connect()
|
||||
try:
|
||||
row = conn2.execute(
|
||||
"SELECT assignee FROM tasks WHERE id=?", (t,),
|
||||
).fetchone()
|
||||
assert row["assignee"] == "newbie"
|
||||
finally:
|
||||
conn2.close()
|
||||
|
||||
|
||||
def test_reassign_endpoint_409_on_running_without_reclaim(client):
|
||||
"""Reassigning a running task without reclaim_first returns 409."""
|
||||
import secrets
|
||||
conn = kb.connect()
|
||||
try:
|
||||
t = kb.create_task(conn, title="running", assignee="orig")
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status='running', claim_lock=? WHERE id=?",
|
||||
(secrets.token_hex(4), t),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t}/reassign",
|
||||
json={"profile": "new", "reclaim_first": False},
|
||||
)
|
||||
assert r.status_code == 409
|
||||
|
||||
|
||||
def test_reassign_endpoint_with_reclaim_first_succeeds_on_running(client):
|
||||
"""With reclaim_first=true, a running task is reclaimed+reassigned in
|
||||
one call."""
|
||||
import secrets
|
||||
conn = kb.connect()
|
||||
try:
|
||||
t = kb.create_task(conn, title="running", assignee="orig")
|
||||
lock = secrets.token_hex(4)
|
||||
conn.execute(
|
||||
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
||||
"worker_pid=? WHERE id=?",
|
||||
(lock, int(time.time()) + 3600, 1234, t),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
||||
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
||||
(t, lock, int(time.time()) + 3600, 1234, int(time.time())),
|
||||
)
|
||||
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
||||
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, t))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t}/reassign",
|
||||
json={"profile": "new", "reclaim_first": True, "reason": "switch"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
assert r.json()["assignee"] == "new"
|
||||
|
||||
conn2 = kb.connect()
|
||||
try:
|
||||
row = conn2.execute(
|
||||
"SELECT status, assignee FROM tasks WHERE id=?", (t,),
|
||||
).fetchone()
|
||||
assert row["status"] == "ready"
|
||||
assert row["assignee"] == "new"
|
||||
finally:
|
||||
conn2.close()
|
||||
|
|
|
|||
|
|
@ -210,6 +210,20 @@ def _handle_complete(args: dict, **kw) -> str:
|
|||
summary = args.get("summary")
|
||||
metadata = args.get("metadata")
|
||||
result = args.get("result")
|
||||
created_cards = args.get("created_cards")
|
||||
if created_cards is not None:
|
||||
if isinstance(created_cards, str):
|
||||
# Accept a single id as a string for convenience.
|
||||
created_cards = [created_cards]
|
||||
if not isinstance(created_cards, (list, tuple)):
|
||||
return tool_error(
|
||||
f"created_cards must be a list of task ids, got "
|
||||
f"{type(created_cards).__name__}"
|
||||
)
|
||||
# Normalise: strings only, stripped, non-empty.
|
||||
created_cards = [
|
||||
str(c).strip() for c in created_cards if str(c).strip()
|
||||
]
|
||||
if not (summary or result):
|
||||
return tool_error(
|
||||
"provide at least one of: summary (preferred), result"
|
||||
|
|
@ -221,10 +235,23 @@ def _handle_complete(args: dict, **kw) -> str:
|
|||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
ok = kb.complete_task(
|
||||
conn, tid,
|
||||
result=result, summary=summary, metadata=metadata,
|
||||
)
|
||||
try:
|
||||
ok = kb.complete_task(
|
||||
conn, tid,
|
||||
result=result, summary=summary, metadata=metadata,
|
||||
created_cards=created_cards,
|
||||
)
|
||||
except kb.HallucinatedCardsError as hall_err:
|
||||
# Structured rejection — surface the phantom ids so the
|
||||
# worker can retry with a corrected list or drop the
|
||||
# field. Audit event already landed in the DB.
|
||||
return tool_error(
|
||||
f"kanban_complete blocked: the following created_cards "
|
||||
f"do not exist or were not created by this worker: "
|
||||
f"{', '.join(hall_err.phantom)}. "
|
||||
f"Either omit them, use only ids returned from successful "
|
||||
f"kanban_create calls, or remove the created_cards field."
|
||||
)
|
||||
if not ok:
|
||||
return tool_error(
|
||||
f"could not complete {tid} (unknown id or already terminal)"
|
||||
|
|
@ -452,7 +479,11 @@ KANBAN_COMPLETE_SCHEMA = {
|
|||
"human-readable 1-3 sentence description of what you did; put "
|
||||
"machine-readable facts in ``metadata`` (changed_files, "
|
||||
"tests_run, decisions, findings, etc). At least one of "
|
||||
"``summary`` or ``result`` is required."
|
||||
"``summary`` or ``result`` is required. If you created new "
|
||||
"tasks via ``kanban_create`` during this run, list their ids "
|
||||
"in ``created_cards`` — the kernel verifies them so phantom "
|
||||
"references are caught before they leak into downstream "
|
||||
"automation."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
|
|
@ -487,6 +518,22 @@ KANBAN_COMPLETE_SCHEMA = {
|
|||
"callers that still set --result on the CLI."
|
||||
),
|
||||
},
|
||||
"created_cards": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": (
|
||||
"Optional structured manifest of task ids you "
|
||||
"created via ``kanban_create`` during this run. "
|
||||
"The kernel verifies each id exists and was "
|
||||
"created by this worker's profile; any phantom "
|
||||
"id blocks the completion with an error listing "
|
||||
"what went wrong (auditable in the task's events). "
|
||||
"Only list ids you got back from a successful "
|
||||
"``kanban_create`` call — do not invent or "
|
||||
"remember ids from prose. Omit the field if you "
|
||||
"did not create any cards."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue