diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py index f166582d82..9f293c555e 100644 --- a/hermes_cli/kanban.py +++ b/hermes_cli/kanban.py @@ -337,6 +337,28 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu help="Human-readable reason (recorded on the reclaimed event)", ) + # --- diagnostics (board-wide health) --- + p_diag = sub.add_parser( + "diagnostics", + aliases=["diag"], + help="List active diagnostics on the current board", + ) + p_diag.add_argument( + "--severity", + choices=["warning", "error", "critical"], + default=None, + help="Only show diagnostics at or above this severity", + ) + p_diag.add_argument( + "--task", + default=None, + help="Only show diagnostics for one task id", + ) + p_diag.add_argument( + "--json", action="store_true", + help="Emit JSON (structured) instead of the default human table", + ) + # --- link / unlink --- p_link = sub.add_parser("link", help="Add a parent->child dependency") p_link.add_argument("parent_id") @@ -628,6 +650,8 @@ def kanban_command(args: argparse.Namespace) -> int: "assign": _cmd_assign, "reclaim": _cmd_reclaim, "reassign": _cmd_reassign, + "diagnostics": _cmd_diagnostics, + "diag": _cmd_diagnostics, "link": _cmd_link, "unlink": _cmd_unlink, "claim": _cmd_claim, @@ -1091,6 +1115,31 @@ def _cmd_show(args: argparse.Namespace) -> int: if task.skills: print(f" skills: {', '.join(task.skills)}") print(f" created: {_fmt_ts(task.created_at)} by {task.created_by or '-'}") + + # Diagnostics section — surface active distress signals at the top + # of show output so CLI users see them before scrolling through + # comments / runs. + from hermes_cli import kanban_diagnostics as kd + diags = kd.compute_task_diagnostics(task, events, runs) + if diags: + sev_marker = {"warning": "⚠", "error": "!!", "critical": "!!!"} + print(f"\n Diagnostics ({len(diags)}):") + for d in diags: + print(f" {sev_marker.get(d.severity, '?')} [{d.severity}] {d.title}") + if d.data: + bits = [] + for k, v in d.data.items(): + if isinstance(v, list): + bits.append(f"{k}={','.join(str(x) for x in v)}") + else: + bits.append(f"{k}={v}") + if bits: + print(f" data: {' | '.join(bits)}") + # Only show suggested actions in show output to keep it tight; + # full list is available via `kanban diagnostics --task `. + for a in d.actions: + if a.suggested: + print(f" → {a.label}") if task.started_at: print(f" started: {_fmt_ts(task.started_at)}") if task.completed_at: @@ -1187,6 +1236,128 @@ def _cmd_reassign(args: argparse.Namespace) -> int: return 0 +def _cmd_diagnostics(args: argparse.Namespace) -> int: + """List active diagnostics on the board. Wraps the same rule engine + the dashboard uses, so CLI output matches what the UI shows. + """ + from hermes_cli import kanban_diagnostics as kd + + with kb.connect() as conn: + # Either one-task mode or fleet mode. + if getattr(args, "task", None): + task = kb.get_task(conn, args.task) + if task is None: + print(f"no such task: {args.task}", file=sys.stderr) + return 1 + diags_by_task = { + args.task: kd.compute_task_diagnostics( + task, + kb.list_events(conn, args.task), + kb.list_runs(conn, args.task), + ) + } + else: + # Fleet mode: pull all non-archived tasks + their events/runs. + rows = list(conn.execute( + "SELECT * FROM tasks WHERE status != 'archived'" + ).fetchall()) + ids = [r["id"] for r in rows] + if not ids: + diags_by_task = {} + else: + placeholders = ",".join(["?"] * len(ids)) + ev_by = {i: [] for i in ids} + for row in conn.execute( + f"SELECT * FROM task_events WHERE task_id IN ({placeholders}) ORDER BY id", + tuple(ids), + ): + ev_by.setdefault(row["task_id"], []).append(row) + run_by = {i: [] for i in ids} + for row in conn.execute( + f"SELECT * FROM task_runs WHERE task_id IN ({placeholders}) ORDER BY id", + tuple(ids), + ): + run_by.setdefault(row["task_id"], []).append(row) + diags_by_task = {} + for r in rows: + tid = r["id"] + dl = kd.compute_task_diagnostics(r, ev_by.get(tid, []), run_by.get(tid, [])) + if dl: + diags_by_task[tid] = dl + + # Severity filter. + sev = getattr(args, "severity", None) + if sev: + for tid in list(diags_by_task.keys()): + kept = [d for d in diags_by_task[tid] if d.severity == sev] + if kept: + diags_by_task[tid] = kept + else: + del diags_by_task[tid] + + # Map task_id → title/status/assignee for the table output. + meta: dict[str, dict] = {} + if diags_by_task: + placeholders = ",".join(["?"] * len(diags_by_task)) + for r in conn.execute( + f"SELECT id, title, status, assignee FROM tasks WHERE id IN ({placeholders})", + tuple(diags_by_task.keys()), + ): + meta[r["id"]] = { + "title": r["title"], "status": r["status"], + "assignee": r["assignee"], + } + + if getattr(args, "json", False): + out_json = [ + { + "task_id": tid, + **meta.get(tid, {}), + "diagnostics": [d.to_dict() for d in dl], + } + for tid, dl in diags_by_task.items() + ] + print(json.dumps(out_json, indent=2, ensure_ascii=False)) + return 0 + + if not diags_by_task: + print("No active diagnostics on this board.") + return 0 + + # Human-readable summary: grouped by task, severity-marked, with + # suggested actions inline. + sev_marker = {"warning": "⚠", "error": "!!", "critical": "!!!"} + total = sum(len(dl) for dl in diags_by_task.values()) + print( + f"{total} active diagnostic(s) across " + f"{len(diags_by_task)} task(s):\n" + ) + for tid, dl in diags_by_task.items(): + m = meta.get(tid, {}) + title = m.get("title") or "(untitled)" + status = m.get("status") or "?" + assignee = m.get("assignee") or "(unassigned)" + print(f" {tid} {status:8s} @{assignee:18s} {title}") + for d in dl: + print(f" {sev_marker.get(d.severity, '?')} [{d.severity}] {d.kind}: {d.title}") + if d.data: + # Compact key:value pairs on one line. + bits = [] + for k, v in d.data.items(): + if isinstance(v, list): + bits.append(f"{k}={','.join(str(x) for x in v)}") + else: + bits.append(f"{k}={v}") + if bits: + print(f" data: {' | '.join(bits)}") + # Suggested actions first. + for a in d.actions: + if a.suggested: + print(f" → {a.label}") + print() + return 0 + + def _cmd_link(args: argparse.Namespace) -> int: with kb.connect() as conn: kb.link_tasks(conn, args.parent_id, args.child_id) diff --git a/hermes_cli/kanban_diagnostics.py b/hermes_cli/kanban_diagnostics.py new file mode 100644 index 0000000000..5a08ee6df5 --- /dev/null +++ b/hermes_cli/kanban_diagnostics.py @@ -0,0 +1,570 @@ +"""Kanban diagnostics — structured, actionable distress signals for tasks. + +A ``Diagnostic`` is a machine-readable description of something that's wrong +with a kanban task: a hallucinated card id, a spawn crash-loop, a task +stuck blocked for too long, etc. Each one carries: + +* A **kind** (canonical code; UI/tests match on this). +* A **severity** (``warning`` / ``error`` / ``critical``). +* A **title** (one-line human description) and **detail** (longer text). +* A list of **suggested actions** — structured entries the dashboard + turns into buttons and the CLI turns into hints. + +Rules run over (task, recent events, recent runs) and emit diagnostics. +They are stateless and read-only — no DB writes. Callers compute +diagnostics on demand (on ``/board`` load, ``/tasks/:id`` fetch, or +``hermes kanban diagnostics``). + +Design goals: + +* Fixable-on-the-operator's-side signals only (missing config, phantom + ids, crash loop). Not "the provider returned 502 once" — that's a + transient runtime blip, not a diagnostic. +* Recoverable: every diagnostic comes with at least one suggested + recovery action the operator can actually take from the UI. +* Auto-clearing: when the underlying failure mode resolves (a clean + ``completed`` event arrives, a spawn succeeds, the task gets + unblocked), the diagnostic stops firing. The audit event trail stays. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Callable, Iterable, Optional +import json +import time + + +# Severity rungs, ordered least → most urgent. The UI colors them +# amber (warning), orange (error), red (critical). Sorted outputs put +# critical first so operators see the worst fires at the top. +SEVERITY_ORDER = ("warning", "error", "critical") + + +@dataclass +class DiagnosticAction: + """A single recovery action attached to a diagnostic. + + The ``kind`` determines how both the UI and CLI render it: + + * ``reclaim`` / ``reassign`` — POST to the matching /tasks/:id/* + endpoint; dashboard wires into the existing recovery popover. + * ``unblock`` — PATCH status back to ``ready`` (for stuck-blocked + diagnostics). + * ``cli_hint`` — print/copy a shell command (e.g. + ``hermes -p auth``). No HTTP side effect. + * ``open_docs`` — deep-link to the docs URL named in ``payload.url``. + * ``comment`` — nudge the operator to add a comment (for + stuck-blocked tasks that need human input). + + ``suggested=True`` marks the action as the recommended first step; + the UI highlights it. Multiple actions can be suggested if they're + equally valid. + """ + + kind: str + label: str + payload: dict = field(default_factory=dict) + suggested: bool = False + + def to_dict(self) -> dict: + return { + "kind": self.kind, + "label": self.label, + "payload": self.payload, + "suggested": self.suggested, + } + + +@dataclass +class Diagnostic: + """One active distress signal on a task.""" + + kind: str + severity: str # "warning" | "error" | "critical" + title: str + detail: str + actions: list[DiagnosticAction] = field(default_factory=list) + first_seen_at: int = 0 + last_seen_at: int = 0 + count: int = 1 + # Optional: the run id this diagnostic is scoped to. None = task-wide. + run_id: Optional[int] = None + # Optional structured payload for the UI (phantom ids, failure count). + data: dict = field(default_factory=dict) + + def to_dict(self) -> dict: + return { + "kind": self.kind, + "severity": self.severity, + "title": self.title, + "detail": self.detail, + "actions": [a.to_dict() for a in self.actions], + "first_seen_at": self.first_seen_at, + "last_seen_at": self.last_seen_at, + "count": self.count, + "run_id": self.run_id, + "data": self.data, + } + + +# --------------------------------------------------------------------------- +# Rule helpers +# --------------------------------------------------------------------------- + +def _task_field(task, name, default=None): + """Read a field from a task regardless of representation. + + Callers pass sqlite3.Row (dict-like with [] but no attribute + access), kanban_db.Task dataclasses (attribute access), or plain + dicts (both). This normalises them so rule functions don't have + to branch on type each time. + """ + if task is None: + return default + # sqlite Row + plain dicts both support mapping access; Row also + # supports .keys(). + try: + # Row raises IndexError if the key isn't a column in the query; + # dicts return default via .get. Handle both. + if hasattr(task, "keys") and name in task.keys(): + return task[name] + except Exception: + pass + if isinstance(task, dict): + return task.get(name, default) + return getattr(task, name, default) + + +def _parse_payload(ev) -> dict: + """Tolerate event.payload being either a dict or a JSON string.""" + p = _task_field(ev, "payload", None) + if p is None: + return {} + if isinstance(p, dict): + return p + if isinstance(p, str): + try: + return json.loads(p) or {} + except Exception: + return {} + return {} + + +def _event_kind(ev) -> str: + return _task_field(ev, "kind", "") or "" + + +def _event_ts(ev) -> int: + t = _task_field(ev, "created_at", 0) + return int(t or 0) + + +def _active_hallucination_events( + events: Iterable[Any], + kind: str, +) -> list[Any]: + """Return events of ``kind`` that have no ``completed``/``edited`` + event *strictly after* them. Walks chronologically: each clean + event resets the accumulator; each matching event gets appended. + + Events must be sorted by id (i.e. arrival order); callers pass the + task's full event list which the DB already returns in that order. + """ + # Events arrive sorted by id asc (chronological). Walk once, track + # which hallucination events are still "active" (no clean event + # supersedes them). + active: list[Any] = [] + for ev in events: + k = _event_kind(ev) + if k in ("completed", "edited"): + active.clear() + elif k == kind: + active.append(ev) + return active + + +def _latest_clean_event_ts(events: Iterable[Any]) -> int: + """Timestamp of the most recent clean completion / edit event. + + Kept for general "has this task ever been successfully completed" + lookups; hallucination rules use ``_active_hallucination_events`` + instead because they need strict ordering. + """ + latest = 0 + for ev in events: + if _event_kind(ev) in ("completed", "edited"): + t = _event_ts(ev) + if t > latest: + latest = t + return latest + + +# Standard always-available actions. Every diagnostic can offer these as +# fallbacks regardless of kind — they're the two baseline recovery +# primitives the kernel supports. +def _generic_recovery_actions(task: Any, *, running: bool) -> list[DiagnosticAction]: + out: list[DiagnosticAction] = [] + if running: + out.append(DiagnosticAction( + kind="reclaim", + label="Reclaim task", + payload={}, + )) + out.append(DiagnosticAction( + kind="reassign", + label="Reassign to different profile", + payload={"reclaim_first": running}, + )) + return out + + +# --------------------------------------------------------------------------- +# Rule implementations +# --------------------------------------------------------------------------- + +# Each rule takes (task, events, runs, now_ts, config) and returns +# zero or more Diagnostic instances. ``events`` / ``runs`` are lists of +# kanban_db.Event / kanban_db.Run (or plain dicts matching the same +# shape — for test convenience). + +RuleFn = Callable[[Any, list[Any], list[Any], int, dict], list[Diagnostic]] + + +def _rule_hallucinated_cards(task, events, runs, now, cfg) -> list[Diagnostic]: + """Blocked-hallucination gate fires: a worker called kanban_complete + with created_cards that didn't exist or weren't created by the + completing profile. Task stayed in its prior state; the operator + needs to decide how to proceed. + + Auto-clears when a successful completion (or edit) follows the + blocked event. + """ + hits = _active_hallucination_events(events, "completion_blocked_hallucination") + if not hits: + return [] + phantom_ids: list[str] = [] + first = _event_ts(hits[0]) + last = _event_ts(hits[-1]) + for ev in hits: + payload = _parse_payload(ev) + for pid in payload.get("phantom_cards", []) or []: + if pid not in phantom_ids: + phantom_ids.append(pid) + running = _task_field(task, "status") == "running" + actions: list[DiagnosticAction] = [] + actions.append(DiagnosticAction( + kind="comment", + label="Add a comment explaining what to do", + suggested=False, + )) + actions.extend(_generic_recovery_actions(task, running=running)) + return [Diagnostic( + kind="hallucinated_cards", + severity="error", + title="Worker claimed cards that don't exist", + detail=( + f"The completing worker declared created_cards that either didn't " + f"exist or weren't created by its profile. The completion was " + f"blocked and the task stayed in its prior state. " + f"Usually means the worker hallucinated ids instead of capturing " + f"return values from kanban_create." + ), + actions=actions, + first_seen_at=first, + last_seen_at=last, + count=len(hits), + data={"phantom_ids": phantom_ids}, + )] + + +def _rule_prose_phantom_refs(task, events, runs, now, cfg) -> list[Diagnostic]: + """Advisory prose-scan: the completion summary mentions ``t_`` + ids that don't resolve. Non-blocking; surfaced as a warning only. + + Auto-clears when a fresh clean completion arrives AFTER the + suspected event. + """ + hits = _active_hallucination_events(events, "suspected_hallucinated_references") + if not hits: + return [] + phantom_refs: list[str] = [] + for ev in hits: + for pid in _parse_payload(ev).get("phantom_refs", []) or []: + if pid not in phantom_refs: + phantom_refs.append(pid) + running = _task_field(task, "status") == "running" + return [Diagnostic( + kind="prose_phantom_refs", + severity="warning", + title="Completion summary references unknown task ids", + detail=( + "The completion summary mentions task ids that don't resolve " + "in this board's database. The completion itself succeeded, " + "but downstream consumers parsing the summary may be pointed " + "at cards that never existed." + ), + actions=_generic_recovery_actions(task, running=running), + first_seen_at=_event_ts(hits[0]), + last_seen_at=_event_ts(hits[-1]), + count=len(hits), + data={"phantom_refs": phantom_refs}, + )] + + +def _rule_repeated_spawn_failures(task, events, runs, now, cfg) -> list[Diagnostic]: + """Task's ``spawn_failures`` counter is climbing — worker can't + even start. Usually a profile misconfiguration (missing config.yaml, + bad PATH/venv, wrong credentials). + + Threshold: cfg["spawn_failure_threshold"] (default 3). + """ + threshold = int(cfg.get("spawn_failure_threshold", 3)) + failures = _task_field(task, "spawn_failures", 0) + if failures is None or failures < threshold: + return [] + last_err = _task_field(task, "last_spawn_error") + assignee = _task_field(task, "assignee") + actions: list[DiagnosticAction] = [] + if assignee and assignee != "default": + actions.append(DiagnosticAction( + kind="cli_hint", + label=f"Verify profile: hermes -p {assignee} doctor", + payload={"command": f"hermes -p {assignee} doctor"}, + suggested=True, + )) + actions.append(DiagnosticAction( + kind="cli_hint", + label=f"Fix profile auth: hermes -p {assignee} auth", + payload={"command": f"hermes -p {assignee} auth"}, + )) + actions.extend(_generic_recovery_actions(task, running=False)) + severity = "critical" if failures >= threshold * 2 else "error" + err_text = (last_err or "").strip() if last_err else "" + err_snippet = err_text[:500] + ("…" if len(err_text) > 500 else "") if err_text else "" + if err_snippet: + title = f"Agent spawn failed {failures}x: {err_snippet.splitlines()[0][:160]}" + detail = ( + f"The dispatcher tried to launch a worker {failures} times " + f"and failed every time. Full last error:\n\n{err_snippet}\n\n" + f"Common causes: missing config.yaml, bad venv/PATH, or " + f"missing credentials for the profile's configured provider." + ) + else: + title = f"Agent spawn failed {failures}x (no error recorded)" + detail = ( + f"The dispatcher tried to launch a worker {failures} times " + f"and failed every time, but no error text was captured. " + f"Usually a profile configuration issue — check profile " + f"health with the suggested command." + ) + return [Diagnostic( + kind="repeated_spawn_failures", + severity=severity, + title=title, + detail=detail, + actions=actions, + first_seen_at=now, + last_seen_at=now, + count=failures, + data={"spawn_failures": failures, "last_spawn_error": last_err}, + )] + + +def _rule_repeated_crashes(task, events, runs, now, cfg) -> list[Diagnostic]: + """The worker spawns fine but keeps crashing mid-run. Check the last + N runs' outcomes; N consecutive ``crashed`` without a successful + ``completed`` means something about the task + profile combo is + broken (OOM, missing dependency, tool it needs is down). + + Threshold: cfg["crash_threshold"] (default 2). + """ + threshold = int(cfg.get("crash_threshold", 2)) + ordered = sorted(runs, key=lambda r: _task_field(r, "id", 0)) + # Count trailing consecutive 'crashed' outcomes. + consecutive = 0 + last_err = None + for r in reversed(ordered): + outcome = _task_field(r, "outcome") + if outcome == "crashed": + consecutive += 1 + if last_err is None: + last_err = _task_field(r, "error") + elif outcome in ("completed", "reclaimed"): + # A success (or manual reclaim) breaks the streak. + break + else: + # Other outcomes (timed_out, blocked, spawn_failed, gave_up) + # aren't crash signals — don't count them, but they also + # don't break the crash streak. + continue + if consecutive < threshold: + return [] + task_id = _task_field(task, "id") + actions: list[DiagnosticAction] = [] + if task_id: + actions.append(DiagnosticAction( + kind="cli_hint", + label=f"Check logs: hermes kanban log {task_id}", + payload={"command": f"hermes kanban log {task_id}"}, + suggested=True, + )) + running = _task_field(task, "status") == "running" + actions.extend(_generic_recovery_actions(task, running=running)) + severity = "critical" if consecutive >= threshold * 2 else "error" + # Put the actual error up-front so operators see WHAT broke without + # having to open the logs. Truncate defensively — these can be huge + # (full tracebacks). + err_text = (last_err or "").strip() if last_err else "" + err_snippet = err_text[:500] + ("…" if len(err_text) > 500 else "") if err_text else "" + if err_snippet: + title = f"Agent crashed {consecutive}x: {err_snippet.splitlines()[0][:160]}" + detail = ( + f"The last {consecutive} runs ended with outcome=crashed. " + f"Full last error:\n\n{err_snippet}" + ) + else: + title = f"Agent crashed {consecutive}x (no error recorded)" + detail = ( + f"The last {consecutive} runs ended with outcome=crashed but " + f"no error text was captured. Check the worker log for more." + ) + return [Diagnostic( + kind="repeated_crashes", + severity=severity, + title=title, + detail=detail, + actions=actions, + first_seen_at=now, + last_seen_at=now, + count=consecutive, + data={"consecutive_crashes": consecutive, "last_error": last_err}, + )] + + +def _rule_stuck_in_blocked(task, events, runs, now, cfg) -> list[Diagnostic]: + """Task has been in ``blocked`` status for too long without a comment. + + Threshold: cfg["blocked_stale_hours"] (default 24). + Surfaced as a warning so humans know there's a pending unblock. + """ + hours = float(cfg.get("blocked_stale_hours", 24)) + status = _task_field(task, "status") + if status != "blocked": + return [] + # Find the most recent ``blocked`` event. + last_blocked_ts = 0 + for ev in events: + if _event_kind(ev) == "blocked": + t = _event_ts(ev) + if t > last_blocked_ts: + last_blocked_ts = t + if last_blocked_ts == 0: + return [] + age_hours = (now - last_blocked_ts) / 3600.0 + if age_hours < hours: + return [] + # Any comment / unblock after the block breaks the "stale" signal. + for ev in events: + if _event_kind(ev) in ("commented", "unblocked") and _event_ts(ev) > last_blocked_ts: + return [] + actions: list[DiagnosticAction] = [ + DiagnosticAction( + kind="comment", + label="Add a comment / unblock the task", + suggested=True, + ), + ] + return [Diagnostic( + kind="stuck_in_blocked", + severity="warning", + title=f"Task has been blocked for {int(age_hours)}h", + detail=( + f"This task transitioned to blocked {int(age_hours)}h ago and " + f"has had no comments or unblock attempts since. Blocked tasks " + f"are waiting for human input — check the block reason and " + f"either unblock with feedback or answer with a comment." + ), + actions=actions, + first_seen_at=last_blocked_ts, + last_seen_at=last_blocked_ts, + count=1, + data={"blocked_at": last_blocked_ts, "age_hours": round(age_hours, 1)}, + )] + + +# Registry — order matters: rules higher on the list render first when +# severity ties. Add new rules here. +_RULES: list[RuleFn] = [ + _rule_hallucinated_cards, + _rule_prose_phantom_refs, + _rule_repeated_spawn_failures, + _rule_repeated_crashes, + _rule_stuck_in_blocked, +] + + +# Known kinds (for the UI's filter / legend / i18n keys). Update when +# rules are added. +DIAGNOSTIC_KINDS = ( + "hallucinated_cards", + "prose_phantom_refs", + "repeated_spawn_failures", + "repeated_crashes", + "stuck_in_blocked", +) + + +DEFAULT_CONFIG = { + "spawn_failure_threshold": 3, + "crash_threshold": 2, + "blocked_stale_hours": 24, +} + + +def compute_task_diagnostics( + task, + events: list, + runs: list, + *, + now: Optional[int] = None, + config: Optional[dict] = None, +) -> list[Diagnostic]: + """Run every rule against a single task's state and return a + severity-sorted list of active diagnostics. + + Sorting: critical first, then error, then warning; ties broken by + most-recent ``last_seen_at``. + """ + now_ts = int(now if now is not None else time.time()) + cfg = {**DEFAULT_CONFIG, **(config or {})} + out: list[Diagnostic] = [] + for rule in _RULES: + try: + out.extend(rule(task, events, runs, now_ts, cfg)) + except Exception: + # A broken rule must never crash the dashboard. Rule bugs + # get caught in tests; in production we'd rather drop the + # diagnostic than 500 a whole /board request. + continue + severity_idx = {s: i for i, s in enumerate(SEVERITY_ORDER)} + out.sort( + key=lambda d: ( + -severity_idx.get(d.severity, -1), + -(d.last_seen_at or 0), + ) + ) + return out + + +def severity_of_highest(diagnostics: Iterable[Diagnostic]) -> Optional[str]: + """Highest severity present in the list, or None if empty. Useful + for card badges that need a single color.""" + highest_idx = -1 + highest = None + for d in diagnostics: + idx = SEVERITY_ORDER.index(d.severity) if d.severity in SEVERITY_ORDER else -1 + if idx > highest_idx: + highest_idx = idx + highest = d.severity + return highest diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js index 7d5434729f..02935b73eb 100644 --- a/plugins/kanban/dashboard/dist/index.js +++ b/plugins/kanban/dashboard/dist/index.js @@ -60,30 +60,19 @@ blocked: "Mark this task as blocked? The worker's claim is released.", }; - // Event kinds that indicate a hallucinated/phantom task-id reference - // in a completion. ``completion_blocked_hallucination`` is emitted when - // the kernel's ``created_cards`` gate rejects a completion; the task is - // left in its prior state and the worker can retry. ``suspected_ - // hallucinated_references`` is the advisory prose-scan result — the - // completion succeeded but the summary text references task ids that - // do not resolve. - const HALLUCINATION_EVENT_KINDS = [ - "completion_blocked_hallucination", - "suspected_hallucinated_references", - ]; - const HALLUCINATION_EVENT_LABELS = { - completion_blocked_hallucination: "Completion blocked — phantom card ids", - suspected_hallucinated_references: "Prose referenced phantom card ids", + // Diagnostic kind labels for the events-tab callout. Event kinds emitted + // by the kernel get a human-readable header when we detect them in the + // events list; add new entries here as new diagnostic event kinds land. + const DIAGNOSTIC_EVENT_LABELS = { + completion_blocked_hallucination: "⚠ Completion blocked — phantom card ids", + suspected_hallucinated_references: "⚠ Prose referenced phantom card ids", }; - function isHallucinationEvent(kind) { - return HALLUCINATION_EVENT_KINDS.indexOf(kind) !== -1; + function isDiagnosticEvent(kind) { + return Object.prototype.hasOwnProperty.call(DIAGNOSTIC_EVENT_LABELS, kind); } function phantomIdsFromEvent(ev) { - // Payload shapes: - // completion_blocked_hallucination: {phantom_cards, verified_cards, summary_preview} - // suspected_hallucinated_references: {phantom_refs, source} if (!ev || !ev.payload) return []; const p = ev.payload; return p.phantom_cards || p.phantom_refs || []; @@ -725,24 +714,36 @@ } // ------------------------------------------------------------------------- - // Attention strip — surfaces tasks with active hallucination warnings. - // Renders a collapsed bar just below the board switcher; clicking expands - // a list of affected tasks with an "Open" button each. Dismissible per - // session via state flag; tasks re-appear on page reload if they still - // have warnings. + // Attention strip — surfaces every task with active diagnostics, + // severity-marked (warning/error/critical). Collapsed by default; click + // Show to expand into per-task rows with Open buttons. Dismissible + // per session via state flag. // ------------------------------------------------------------------------- - function collectWarningTasks(boardData) { + function collectDiagTasks(boardData) { if (!boardData || !boardData.columns) return []; const out = []; for (const col of boardData.columns) { for (const t of col.tasks || []) { - if (t.warnings && t.warnings.count > 0) out.push(t); + if (t.diagnostics && t.diagnostics.length > 0) out.push(t); + else if (t.warnings && t.warnings.count > 0) out.push(t); } } - // Sort: most recent warning first. + // Sort: highest severity first (critical > error > warning), then by + // most recent latest_at. + const sevIdx = function (s) { + if (s === "critical") return 3; + if (s === "error") return 2; + if (s === "warning") return 1; + return 0; + }; out.sort(function (a, b) { - return (b.warnings.latest_at || 0) - (a.warnings.latest_at || 0); + const aSev = sevIdx((a.warnings && a.warnings.highest_severity) || "warning"); + const bSev = sevIdx((b.warnings && b.warnings.highest_severity) || "warning"); + if (aSev !== bSev) return bSev - aSev; + const aLa = (a.warnings && a.warnings.latest_at) || 0; + const bLa = (b.warnings && b.warnings.latest_at) || 0; + return bLa - aLa; }); return out; } @@ -750,18 +751,31 @@ function AttentionStrip(props) { const [expanded, setExpanded] = useState(false); const [dismissed, setDismissed] = useState(false); - const warnTasks = useMemo( - function () { return collectWarningTasks(props.boardData); }, + const diagTasks = useMemo( + function () { return collectDiagTasks(props.boardData); }, [props.boardData] ); - if (dismissed || warnTasks.length === 0) return null; - return h("div", { className: "hermes-kanban-attention" }, + if (dismissed || diagTasks.length === 0) return null; + // Pick the highest severity present so we can colour the strip. + let topSev = "warning"; + for (const t of diagTasks) { + const s = (t.warnings && t.warnings.highest_severity) || "warning"; + if (s === "critical") { topSev = "critical"; break; } + if (s === "error" && topSev !== "critical") topSev = "error"; + } + return h("div", { + className: cn( + "hermes-kanban-attention", + "hermes-kanban-attention--" + topSev, + ), + }, h("div", { className: "hermes-kanban-attention-bar" }, - h("span", { className: "hermes-kanban-attention-icon" }, "⚠"), + h("span", { className: "hermes-kanban-attention-icon" }, + topSev === "critical" ? "!!!" : topSev === "error" ? "!!" : "⚠"), h("span", { className: "hermes-kanban-attention-text" }, - warnTasks.length === 1 - ? "1 task with hallucination warnings" - : `${warnTasks.length} tasks with hallucination warnings`, + diagTasks.length === 1 + ? "1 task needs attention" + : `${diagTasks.length} tasks need attention`, ), h("button", { className: "hermes-kanban-attention-toggle", @@ -773,19 +787,29 @@ onClick: function () { setDismissed(true); }, title: "Hide until next page reload", type: "button", - }, "✕"), + }, "\u2715"), ), expanded ? h("div", { className: "hermes-kanban-attention-list" }, - warnTasks.map(function (t) { - return h("div", { key: t.id, className: "hermes-kanban-attention-row" }, + diagTasks.map(function (t) { + const sev = (t.warnings && t.warnings.highest_severity) || "warning"; + const kinds = t.warnings && t.warnings.kinds ? Object.keys(t.warnings.kinds) : []; + return h("div", { + key: t.id, + className: cn( + "hermes-kanban-attention-row", + "hermes-kanban-attention-row--" + sev, + ), + }, + h("span", { className: "hermes-kanban-attention-row-sev" }, + sev === "critical" ? "!!!" : sev === "error" ? "!!" : "⚠"), h("span", { className: "hermes-kanban-attention-row-id" }, t.id), h("span", { className: "hermes-kanban-attention-row-title" }, t.title || "(untitled)"), h("span", { className: "hermes-kanban-attention-row-meta" }, t.assignee ? "@" + t.assignee : "unassigned", - " · ", - `${t.warnings.count} event${t.warnings.count === 1 ? "" : "s"}`, + " \u00b7 ", + kinds.length > 0 ? kinds.join(", ") : "diagnostic", ), h("button", { className: "hermes-kanban-attention-row-btn", @@ -800,195 +824,266 @@ } // ------------------------------------------------------------------------- - // Recovery popover — operator actions for a task flagged with - // hallucination warnings. Three primary actions: - // 1. Reclaim — release a running worker's claim; task back to ready. - // 2. Reassign — switch the task to a different profile (with optional - // reclaim-first toggle for currently-running tasks). - // 3. Edit profile — copy the CLI hint for `hermes -p model` - // (the dashboard can't edit profile config from the - // browser; it lives on the filesystem). - // Rendered from inside TaskDetail via a toggle button. + // Diagnostics section — generic renderer for a task's active distress + // signals. Each diagnostic carries its own title, detail, data payload, + // and a list of structured actions; the section renders them uniformly + // regardless of kind. Replaces the hallucination-specific + // ``RecoveryPopover`` from the previous iteration. + // + // Action kinds supported today: + // reclaim → POST /tasks/:id/reclaim + // reassign → POST /tasks/:id/reassign (with profile picker) + // unblock → PATCH /tasks/:id body: {status: "ready"} + // comment → scroll to the comment input at the bottom of the drawer + // cli_hint → copy payload.command to clipboard + // open_docs → open payload.url in a new tab + // Unknown kinds are rendered as a disabled informational row so the + // server can add new action kinds without breaking the UI. // ------------------------------------------------------------------------- - function RecoveryPopover(props) { - const t = props.task; - const board = props.boardSlug; - const assignees = props.assignees || []; - const [reason, setReason] = useState(""); - const [newProfile, setNewProfile] = useState(t.assignee || ""); - const [reclaimFirst, setReclaimFirst] = useState(t.status === "running"); + function DiagnosticActionButton(props) { + const { action, onExec, busy, extra } = props; + const label = (action.suggested ? "\u2606 " : "") + action.label; + const cls = cn( + "hermes-kanban-diag-action-btn", + action.suggested ? "hermes-kanban-diag-action-btn--suggested" : "", + ); + if (action.kind === "reclaim" || action.kind === "reassign" || + action.kind === "unblock") { + return h("button", { + className: cls, + disabled: busy || (extra && extra.disabled), + onClick: function () { onExec(action); }, + type: "button", + }, label); + } + if (action.kind === "cli_hint") { + return h("button", { + className: cls, + disabled: busy, + onClick: function () { onExec(action); }, + type: "button", + title: "Copy command to clipboard", + }, (extra && extra.copied) ? "Copied" : label); + } + if (action.kind === "comment") { + return h("button", { + className: cls, + onClick: function () { onExec(action); }, + type: "button", + }, label); + } + if (action.kind === "open_docs") { + return h("a", { + className: cls, + href: (action.payload && action.payload.url) || "#", + target: "_blank", + rel: "noreferrer", + }, label); + } + // Unknown kind — render informational, non-interactive. + return h("span", { className: cls + " hermes-kanban-diag-action-btn--unknown" }, + label); + } + + function DiagnosticCard(props) { + const { diag, task, boardSlug, assignees, onRefresh } = props; const [busy, setBusy] = useState(false); const [msg, setMsg] = useState(null); - const [copied, setCopied] = useState(false); + const [copiedKey, setCopiedKey] = useState(null); + const [reassignProfile, setReassignProfile] = useState(task.assignee || ""); - const act = function (kind) { + const execAction = function (action) { if (busy) return; - setBusy(true); - setMsg(null); - const urlBase = `${API}/tasks/${encodeURIComponent(t.id)}`; - const url = kind === "reclaim" - ? withBoard(`${urlBase}/reclaim`, board) - : withBoard(`${urlBase}/reassign`, board); - const body = kind === "reclaim" - ? { reason: reason || null } - : { - profile: newProfile || null, - reclaim_first: !!reclaimFirst, - reason: reason || null, - }; - SDK.fetchJSON(url, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(body), - }).then(function () { - setMsg({ ok: true, text: - kind === "reclaim" - ? `Reclaimed ${t.id}. Task back to ready.` - : `Reassigned ${t.id} to ${newProfile || "(unassigned)"}.` - }); - if (props.onActionComplete) props.onActionComplete(kind); - }).catch(function (err) { - setMsg({ ok: false, text: `Failed: ${err.message || err}` }); - }).then(function () { - setBusy(false); - }); - }; - - const profileCmd = `hermes -p ${t.assignee || ""} model`; - const copyCmd = function () { - try { - navigator.clipboard.writeText(profileCmd).then(function () { - setCopied(true); - setTimeout(function () { setCopied(false); }, 2000); - }); - } catch (_) { - window.prompt("Copy this command:", profileCmd); + if (action.kind === "cli_hint") { + const cmd = (action.payload && action.payload.command) || action.label; + const fallback = function () { window.prompt("Copy this command:", cmd); }; + try { + const p = navigator.clipboard && navigator.clipboard.writeText(cmd); + if (p && p.then) { + p.then(function () { + setCopiedKey(action.label); + setTimeout(function () { setCopiedKey(null); }, 2000); + }).catch(fallback); + } else { + fallback(); + } + } catch (_) { + fallback(); + } + return; + } + if (action.kind === "comment") { + // Scroll the comment input into view; the drawer already has one + // at the bottom. Focus it so the operator can start typing. + const ta = document.querySelector(".hermes-kanban-drawer-comment-row input, .hermes-kanban-drawer-comment-row textarea"); + if (ta) { + ta.scrollIntoView({ behavior: "smooth", block: "nearest" }); + ta.focus(); + } + return; + } + if (action.kind === "unblock") { + setBusy(true); setMsg(null); + const url = withBoard(`${API}/tasks/${encodeURIComponent(task.id)}`, boardSlug); + SDK.fetchJSON(url, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ status: "ready" }), + }).then(function () { + setMsg({ ok: true, text: `Unblocked ${task.id}. Task is ready for the next tick.` }); + if (onRefresh) onRefresh(); + }).catch(function (err) { + setMsg({ ok: false, text: `Unblock failed: ${err.message || err}` }); + }).then(function () { setBusy(false); }); + return; + } + if (action.kind === "reclaim") { + setBusy(true); setMsg(null); + const url = withBoard(`${API}/tasks/${encodeURIComponent(task.id)}/reclaim`, boardSlug); + SDK.fetchJSON(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ reason: `recovery action for ${diag.kind}` }), + }).then(function () { + setMsg({ ok: true, text: `Reclaimed ${task.id}. Task is back to ready.` }); + if (onRefresh) onRefresh(); + }).catch(function (err) { + setMsg({ ok: false, text: `Reclaim failed: ${err.message || err}` }); + }).then(function () { setBusy(false); }); + return; + } + if (action.kind === "reassign") { + if (!reassignProfile) { + setMsg({ ok: false, text: "Pick a profile first." }); + return; + } + setBusy(true); setMsg(null); + const url = withBoard(`${API}/tasks/${encodeURIComponent(task.id)}/reassign`, boardSlug); + const body = { + profile: reassignProfile || null, + reclaim_first: !!(action.payload && action.payload.reclaim_first), + reason: `recovery action for ${diag.kind}`, + }; + SDK.fetchJSON(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }).then(function () { + setMsg({ + ok: true, + text: `Reassigned ${task.id} to ${reassignProfile}.`, + }); + if (onRefresh) onRefresh(); + }).catch(function (err) { + setMsg({ ok: false, text: `Reassign failed: ${err.message || err}` }); + }).then(function () { setBusy(false); }); + return; } }; - return h("div", { className: "hermes-kanban-recovery" }, - h("div", { className: "hermes-kanban-recovery-title" }, - "Recovery actions"), - h("div", { className: "hermes-kanban-recovery-hint" }, - "Use these when a worker is stuck (crash loop, repeated hallucination, ", - "broken model). Events in this task's history are preserved as audit trail."), + // Pull out the reassign action so we can render its picker inline. + const reassignAction = (diag.actions || []).find(function (a) { + return a.kind === "reassign"; + }); - // Reason input (shared across actions) - h("div", { className: "hermes-kanban-recovery-section" }, - h("label", { className: "hermes-kanban-recovery-label" }, - "Reason (optional, logged on event)"), - h("input", { - type: "text", - className: "hermes-kanban-recovery-input", - value: reason, - onChange: function (e) { setReason(e.target.value); }, - placeholder: "e.g. model hallucinating, switching to larger", + const sevClass = "hermes-kanban-diag--" + (diag.severity || "warning"); + return h("div", { className: cn("hermes-kanban-diag", sevClass) }, + h("div", { className: "hermes-kanban-diag-header" }, + h("span", { className: "hermes-kanban-diag-sev" }, + diag.severity === "critical" ? "!!!" : + diag.severity === "error" ? "!!" : "\u26a0"), + h("span", { className: "hermes-kanban-diag-title" }, + diag.title), + ), + h("div", { className: "hermes-kanban-diag-detail" }, + diag.detail), + diag.data && Object.keys(diag.data).length > 0 + ? h("div", { className: "hermes-kanban-diag-data" }, + Object.keys(diag.data).map(function (k) { + const v = diag.data[k]; + if (Array.isArray(v) && v.length > 0 && typeof v[0] === "string" && + v[0].indexOf("t_") === 0) { + // Task-id list — render as chips. + return h("div", { key: k, className: "hermes-kanban-diag-data-row" }, + h("span", { className: "hermes-kanban-diag-data-key" }, k + ":"), + v.map(function (x) { + return h("code", { + key: x, className: "hermes-kanban-event-phantom-chip", + }, x); + }), + ); + } + return h("div", { key: k, className: "hermes-kanban-diag-data-row" }, + h("span", { className: "hermes-kanban-diag-data-key" }, k + ":"), + h("span", { className: "hermes-kanban-diag-data-val" }, + Array.isArray(v) ? v.join(", ") : String(v)), + ); + }), + ) + : null, + // Inline reassign picker — only shown when the diagnostic offers + // a reassign action. Profile list comes from the board payload. + reassignAction + ? h("div", { className: "hermes-kanban-diag-reassign-row" }, + h("span", { className: "hermes-kanban-diag-reassign-label" }, + "Reassign to:"), + h("select", { + className: "hermes-kanban-recovery-select", + value: reassignProfile, + onChange: function (e) { setReassignProfile(e.target.value); }, + }, + h("option", { value: "" }, "(unassigned)"), + (assignees || []).map(function (a) { + return h("option", { key: a, value: a }, a); + }), + ), + ) + : null, + h("div", { className: "hermes-kanban-diag-actions" }, + (diag.actions || []).map(function (a, i) { + return h(DiagnosticActionButton, { + key: a.kind + i, + action: a, + onExec: execAction, + busy: busy, + extra: { + copied: copiedKey === a.label, + disabled: (a.kind === "reassign" && !reassignProfile), + }, + }); }), ), - - // Action 1: Reclaim - h("div", { className: "hermes-kanban-recovery-section" }, - h("div", { className: "hermes-kanban-recovery-action-row" }, - h("div", { className: "hermes-kanban-recovery-action-label" }, - "1. Reclaim"), - h("div", { className: "hermes-kanban-recovery-action-desc" }, - t.status === "running" - ? "Abort the running worker and reset to ready." - : "Task is not running — nothing to reclaim."), - h("button", { - className: "hermes-kanban-recovery-btn", - disabled: busy || t.status !== "running", - onClick: function () { act("reclaim"); }, - type: "button", - }, "Reclaim"), - ), - ), - - // Action 2: Reassign - h("div", { className: "hermes-kanban-recovery-section" }, - h("div", { className: "hermes-kanban-recovery-action-row" }, - h("div", { className: "hermes-kanban-recovery-action-label" }, - "2. Reassign"), - h("div", { className: "hermes-kanban-recovery-action-desc" }, - "Switch to a different worker profile and retry."), - ), - h("div", { className: "hermes-kanban-recovery-reassign-row" }, - h("select", { - className: "hermes-kanban-recovery-select", - value: newProfile, - onChange: function (e) { setNewProfile(e.target.value); }, - }, - h("option", { value: "" }, "(unassigned)"), - assignees.map(function (a) { - return h("option", { key: a, value: a }, a); - }), - ), - h("label", { className: "hermes-kanban-recovery-checkbox" }, - h("input", { - type: "checkbox", - checked: reclaimFirst, - onChange: function (e) { setReclaimFirst(e.target.checked); }, - }), - " Reclaim first", - ), - h("button", { - className: "hermes-kanban-recovery-btn", - disabled: busy, - onClick: function () { act("reassign"); }, - type: "button", - }, "Reassign"), - ), - ), - - // Action 3: Edit profile model (CLI hint) - h("div", { className: "hermes-kanban-recovery-section" }, - h("div", { className: "hermes-kanban-recovery-action-row" }, - h("div", { className: "hermes-kanban-recovery-action-label" }, - "3. Change profile model"), - h("div", { className: "hermes-kanban-recovery-action-desc" }, - "Profile config lives on disk — change it from a terminal, ", - "then use Reclaim above to retry with the new model."), - ), - h("div", { className: "hermes-kanban-recovery-cmd-row" }, - h("code", { className: "hermes-kanban-recovery-cmd" }, profileCmd), - h("button", { - className: "hermes-kanban-recovery-btn", - onClick: copyCmd, - type: "button", - }, copied ? "Copied" : "Copy"), - ), - ), - msg ? h("div", { className: cn( - "hermes-kanban-recovery-msg", - msg.ok ? "hermes-kanban-recovery-msg--ok" : "hermes-kanban-recovery-msg--err", + "hermes-kanban-diag-msg", + msg.ok ? "hermes-kanban-diag-msg--ok" : "hermes-kanban-diag-msg--err", ), }, msg.text) : null, ); } - // Thin wrapper that toggles the RecoveryPopover visibility inside a - // task drawer. Auto-opens when the task has active hallucination - // warnings; operators can still collapse it. Always available via a - // header button for tasks without warnings, so reclaim/reassign is - // accessible for other stuck-worker scenarios too. - function RecoverySection(props) { - const [open, setOpen] = useState(!!props.hasWarnings); - // Re-open automatically if warnings appear while the drawer is open. + function DiagnosticsSection(props) { + const diags = props.diagnostics || []; + const hasOpenDiags = diags.length > 0; + const [open, setOpen] = useState(hasOpenDiags); useEffect(function () { - if (props.hasWarnings) setOpen(true); - }, [props.hasWarnings]); + if (hasOpenDiags) setOpen(true); + }, [hasOpenDiags]); + if (!hasOpenDiags && !props.alwaysVisible) { + // Nothing active. Collapse the section entirely rather than showing + // an empty "Recovery" header — keeps clean tasks visually clean. + return null; + } return h("div", { className: "hermes-kanban-section" }, h("div", { className: "hermes-kanban-section-head-row" }, h("span", { className: "hermes-kanban-section-head" }, - props.hasWarnings + hasOpenDiags ? h("span", { className: "hermes-kanban-section-head-warning" }, - "⚠ Recovery") - : "Recovery", + `\u26a0 Diagnostics (${diags.length})`) + : "Diagnostics", ), h("button", { className: "hermes-kanban-section-toggle", @@ -997,24 +1092,23 @@ }, open ? "Hide" : "Show"), ), open - ? h(RecoveryPopover, { - // Keyed by task id so React tears the popover down and - // remounts it when the drawer swaps to a different task — - // otherwise reason / newProfile / success toast from the - // previous task leak into the new one. - key: props.task.id, - task: props.task, - boardSlug: props.boardSlug, - assignees: props.assignees, - onActionComplete: function () { - if (props.onRefresh) props.onRefresh(); - }, - }) + ? h("div", { className: "hermes-kanban-diag-list" }, + diags.map(function (d, i) { + return h(DiagnosticCard, { + key: props.task.id + ":" + d.kind + i, + diag: d, + task: props.task, + boardSlug: props.boardSlug, + assignees: props.assignees, + onRefresh: props.onRefresh, + }); + }), + ) : null, ); } - // ------------------------------------------------------------------------- + // ------------------------------------------------------------------------- // Board switcher (multi-project) // ------------------------------------------------------------------------- @@ -1545,11 +1639,18 @@ h("span", { className: "hermes-kanban-card-id" }, t.id), t.warnings && t.warnings.count > 0 ? h("span", { - className: "hermes-kanban-warning-badge", - title: `⚠ ${t.warnings.count} hallucination ` + - `event(s) since last clean completion. ` + - `Click to open for details.`, - }, "⚠") + className: cn( + "hermes-kanban-warning-badge", + "hermes-kanban-warning-badge--" + (t.warnings.highest_severity || "warning"), + ), + title: ( + `${t.warnings.count} active diagnostic` + + (t.warnings.count === 1 ? "" : "s") + + ` (severity: ${t.warnings.highest_severity || "warning"}). ` + + `Click to open for details.` + ), + }, t.warnings.highest_severity === "critical" ? "!!!" : + t.warnings.highest_severity === "error" ? "!!" : "⚠") : null, t.priority > 0 ? h(Badge, { className: "hermes-kanban-priority" }, `P${t.priority}`) @@ -1945,11 +2046,11 @@ t.created_by ? h(MetaRow, { label: "Created by", value: t.created_by }) : null, ), h(StatusActions, { task: t, onPatch: props.onPatch }), - h(RecoverySection, { + h(DiagnosticsSection, { task: t, boardSlug: props.boardSlug, assignees: props.assignees, - hasWarnings: t.warnings && t.warnings.count > 0, + diagnostics: t.diagnostics || [], onRefresh: props.onRefresh, }), h(HomeSubsSection, { @@ -1992,20 +2093,20 @@ h("div", { className: "hermes-kanban-section" }, h("div", { className: "hermes-kanban-section-head" }, `Events (${events.length})`), events.slice().reverse().slice(0, 20).map(function (e) { - const isHall = isHallucinationEvent(e.kind); - const phantoms = isHall ? phantomIdsFromEvent(e) : []; + const isDiag = isDiagnosticEvent(e.kind); + const phantoms = isDiag ? phantomIdsFromEvent(e) : []; return h("div", { key: e.id, className: cn( "hermes-kanban-event", - isHall ? "hermes-kanban-event--hallucination" : "", + isDiag ? "hermes-kanban-event--hallucination" : "", ), }, - isHall + isDiag ? h("div", { className: "hermes-kanban-event-header" }, h("span", { className: "hermes-kanban-event-warning-icon" }, "⚠"), h("span", { className: "hermes-kanban-event-warning-label" }, - HALLUCINATION_EVENT_LABELS[e.kind] || e.kind), + DIAGNOSTIC_EVENT_LABELS[e.kind] || e.kind), h("span", { className: "hermes-kanban-event-ago" }, timeAgo ? timeAgo(e.created_at) : ""), ) @@ -2014,7 +2115,7 @@ h("span", { className: "hermes-kanban-event-ago" }, timeAgo ? timeAgo(e.created_at) : ""), ), - isHall && phantoms.length > 0 + isDiag && phantoms.length > 0 ? h("div", { className: "hermes-kanban-event-phantom-row" }, h("span", { className: "hermes-kanban-event-phantom-label" }, "Phantom ids:"), @@ -2026,7 +2127,7 @@ }), ) : null, - e.payload && !isHall + e.payload && !isDiag ? h("code", { className: "hermes-kanban-event-payload" }, JSON.stringify(e.payload)) : null, diff --git a/plugins/kanban/dashboard/dist/style.css b/plugins/kanban/dashboard/dist/style.css index d993af510a..d10b766bd2 100644 --- a/plugins/kanban/dashboard/dist/style.css +++ b/plugins/kanban/dashboard/dist/style.css @@ -1100,3 +1100,173 @@ color: #ff8b8b; border: 1px solid rgba(255, 107, 107, 0.3); } + +/* ---------------------------------------------------------------------- */ +/* Diagnostics — generic, severity-coloured distress signals on tasks. */ +/* Three rungs: warning (amber), error (orange), critical (red). */ +/* ---------------------------------------------------------------------- */ + +/* Severity token variables so every diagnostic-coloured surface uses the */ +/* same palette. */ +.hermes-kanban-diag, +.hermes-kanban-attention, +.hermes-kanban-warning-badge, +.hermes-kanban-attention-row { + --hermes-diag-warning: #ff9e3b; + --hermes-diag-error: #ff6b3d; + --hermes-diag-critical: #ff4d4d; +} + +/* Warning-badge severity variants (overrides the base colour). */ +.hermes-kanban-warning-badge--warning { color: var(--hermes-diag-warning); } +.hermes-kanban-warning-badge--error { color: var(--hermes-diag-error); font-weight: 700; } +.hermes-kanban-warning-badge--critical { color: var(--hermes-diag-critical); font-weight: 700; } + +/* Attention-strip severity variants. */ +.hermes-kanban-attention--warning { + border-color: rgba(255, 158, 59, 0.35); + background: rgba(255, 158, 59, 0.06); +} +.hermes-kanban-attention--error { + border-color: rgba(255, 107, 61, 0.45); + background: rgba(255, 107, 61, 0.08); +} +.hermes-kanban-attention--critical { + border-color: rgba(255, 77, 77, 0.55); + background: rgba(255, 77, 77, 0.10); +} +.hermes-kanban-attention--error .hermes-kanban-attention-icon { color: var(--hermes-diag-error); } +.hermes-kanban-attention--critical .hermes-kanban-attention-icon { color: var(--hermes-diag-critical); } + +/* Per-row severity marker in the expanded attention list. */ +.hermes-kanban-attention-row-sev { + display: inline-block; + min-width: 1.5rem; + font-weight: 600; +} +.hermes-kanban-attention-row--warning .hermes-kanban-attention-row-sev { color: var(--hermes-diag-warning); } +.hermes-kanban-attention-row--error .hermes-kanban-attention-row-sev { color: var(--hermes-diag-error); font-weight: 700; } +.hermes-kanban-attention-row--critical .hermes-kanban-attention-row-sev { color: var(--hermes-diag-critical); font-weight: 700; } + +/* Individual diagnostic card inside the drawer's Diagnostics section. */ +.hermes-kanban-diag-list { + display: flex; + flex-direction: column; + gap: 0.6rem; +} +.hermes-kanban-diag { + border-left: 3px solid var(--hermes-diag-warning); + background: rgba(255, 158, 59, 0.05); + border-radius: 0.35rem; + padding: 0.6rem 0.75rem; + display: flex; + flex-direction: column; + gap: 0.4rem; +} +.hermes-kanban-diag--error { + border-left-color: var(--hermes-diag-error); + background: rgba(255, 107, 61, 0.06); +} +.hermes-kanban-diag--critical { + border-left-color: var(--hermes-diag-critical); + background: rgba(255, 77, 77, 0.07); +} +.hermes-kanban-diag-header { + display: flex; + align-items: center; + gap: 0.5rem; +} +.hermes-kanban-diag-sev { + font-weight: 700; + min-width: 1.5rem; +} +.hermes-kanban-diag--warning .hermes-kanban-diag-sev { color: var(--hermes-diag-warning); } +.hermes-kanban-diag--error .hermes-kanban-diag-sev { color: var(--hermes-diag-error); } +.hermes-kanban-diag--critical .hermes-kanban-diag-sev { color: var(--hermes-diag-critical); } +.hermes-kanban-diag-title { + font-weight: 600; + font-size: 0.875rem; +} +.hermes-kanban-diag-detail { + font-size: 0.8125rem; + color: var(--color-foreground, #ccc); + line-height: 1.4; +} +.hermes-kanban-diag-data { + display: flex; + flex-direction: column; + gap: 0.2rem; + font-size: 0.75rem; +} +.hermes-kanban-diag-data-row { + display: flex; + align-items: center; + gap: 0.35rem; + flex-wrap: wrap; +} +.hermes-kanban-diag-data-key { + color: var(--color-muted-foreground, #888); + font-weight: 500; +} +.hermes-kanban-diag-data-val { + font-family: ui-monospace, SFMono-Regular, monospace; +} +.hermes-kanban-diag-reassign-row { + display: flex; + align-items: center; + gap: 0.4rem; + font-size: 0.75rem; +} +.hermes-kanban-diag-reassign-label { + color: var(--color-muted-foreground, #888); +} +.hermes-kanban-diag-actions { + display: flex; + flex-wrap: wrap; + gap: 0.4rem; + margin-top: 0.1rem; +} +.hermes-kanban-diag-action-btn { + padding: 0.25rem 0.6rem; + font-size: 0.75rem; + background: rgba(0, 0, 0, 0.2); + border: 1px solid rgba(120, 120, 140, 0.3); + border-radius: 0.3rem; + color: inherit; + cursor: pointer; + text-decoration: none; +} +.hermes-kanban-diag-action-btn:hover:not(:disabled) { + background: rgba(0, 0, 0, 0.3); +} +.hermes-kanban-diag-action-btn:disabled { + opacity: 0.4; + cursor: not-allowed; +} +.hermes-kanban-diag-action-btn--suggested { + background: rgba(255, 158, 59, 0.15); + border-color: rgba(255, 158, 59, 0.4); + font-weight: 600; +} +.hermes-kanban-diag-action-btn--suggested:hover:not(:disabled) { + background: rgba(255, 158, 59, 0.25); +} +.hermes-kanban-diag-action-btn--unknown { + opacity: 0.6; + cursor: default; +} +.hermes-kanban-diag-msg { + font-size: 0.75rem; + padding: 0.35rem 0.5rem; + border-radius: 0.3rem; +} +.hermes-kanban-diag-msg--ok { + background: rgba(120, 200, 120, 0.12); + color: #6bc46b; + border: 1px solid rgba(120, 200, 120, 0.3); +} +.hermes-kanban-diag-msg--err { + background: rgba(255, 107, 61, 0.12); + color: #ff8b6b; + border: 1px solid rgba(255, 107, 61, 0.3); +} diff --git a/plugins/kanban/dashboard/plugin_api.py b/plugins/kanban/dashboard/plugin_api.py index 59c6a9e233..2b5bcd0dad 100644 --- a/plugins/kanban/dashboard/plugin_api.py +++ b/plugins/kanban/dashboard/plugin_api.py @@ -187,63 +187,109 @@ _WARNING_EVENT_KINDS = ( ) -def _compute_warnings_for_tasks( +def _compute_task_diagnostics( conn: sqlite3.Connection, task_ids: Optional[list[str]] = None, -) -> dict[str, dict]: - """Return {task_id: {count, kinds, latest_at}} for tasks with - hallucination warnings that occurred AFTER the most recent clean - completion event (completed / edited). An empty dict means no tasks - on the board have active warnings. +) -> dict[str, list[dict]]: + """Run the diagnostic rule engine against every task (or a subset) + and return ``{task_id: [diagnostic_dict, ...]}``. - ``task_ids`` narrows the query; pass ``None`` to scan the whole DB - (matches board-level rollup). Used by both the /board aggregate and - per-task /tasks/:id endpoints. + Tasks with no active diagnostics are omitted from the result. + Uses ``hermes_cli.kanban_diagnostics`` — see that module for the + rule definitions. """ - params: tuple = () + from hermes_cli import kanban_diagnostics as kd + + # Build the candidate task list. We need each task's row + its + # events + its runs. Doing N separate queries works but scales + # poorly; do three aggregate queries instead. if task_ids is not None: if not task_ids: return {} placeholders = ",".join(["?"] * len(task_ids)) - sql = ( - "SELECT task_id, kind, created_at FROM task_events " - f"WHERE task_id IN ({placeholders}) AND kind IN " - "('completion_blocked_hallucination', " - " 'suspected_hallucinated_references', " - " 'completed', 'edited') " - "ORDER BY task_id, id" - ) - params = tuple(task_ids) + rows = conn.execute( + f"SELECT * FROM tasks WHERE id IN ({placeholders})", + tuple(task_ids), + ).fetchall() else: - sql = ( - "SELECT task_id, kind, created_at FROM task_events " - "WHERE kind IN " - "('completion_blocked_hallucination', " - " 'suspected_hallucinated_references', " - " 'completed', 'edited') " - "ORDER BY task_id, id" - ) + rows = conn.execute( + "SELECT * FROM tasks WHERE status != 'archived'", + ).fetchall() - out: dict[str, dict] = {} - for row in conn.execute(sql, params).fetchall(): - tid = row["task_id"] - kind = row["kind"] - created_at = row["created_at"] - if kind in ("completed", "edited"): - # Clean event wipes prior warning counters; only events after - # this timestamp count. - out.pop(tid, None) - continue - bucket = out.setdefault( - tid, {"count": 0, "kinds": {}, "latest_at": 0} + if not rows: + return {} + + # Index events + runs by task id. For very large boards this will + # slurp a lot — acceptable on the dashboard's typical working set + # (hundreds of tasks), but we can add pagination / filtering later + # if profiling shows it's a hotspot. + row_ids = [r["id"] for r in rows] + placeholders = ",".join(["?"] * len(row_ids)) + events_by_task: dict[str, list] = {tid: [] for tid in row_ids} + for ev_row in conn.execute( + f"SELECT * FROM task_events WHERE task_id IN ({placeholders}) ORDER BY id", + tuple(row_ids), + ).fetchall(): + events_by_task.setdefault(ev_row["task_id"], []).append(ev_row) + runs_by_task: dict[str, list] = {tid: [] for tid in row_ids} + for run_row in conn.execute( + f"SELECT * FROM task_runs WHERE task_id IN ({placeholders}) ORDER BY id", + tuple(row_ids), + ).fetchall(): + runs_by_task.setdefault(run_row["task_id"], []).append(run_row) + + out: dict[str, list[dict]] = {} + for r in rows: + tid = r["id"] + diags = kd.compute_task_diagnostics( + r, + events_by_task.get(tid, []), + runs_by_task.get(tid, []), ) - bucket["count"] += 1 - bucket["kinds"][kind] = bucket["kinds"].get(kind, 0) + 1 - if created_at > bucket["latest_at"]: - bucket["latest_at"] = created_at + if diags: + out[tid] = [d.to_dict() for d in diags] return out +def _warnings_summary_from_diagnostics( + diagnostics: list[dict], +) -> Optional[dict]: + """Compact summary for cards: {count, highest_severity, kinds, + latest_at}. Replaces the old hallucination-only ``warnings`` object + — same shape additions plus ``highest_severity`` so the UI can color + badges per diagnostic severity. + + Returns None when ``diagnostics`` is empty. + """ + if not diagnostics: + return None + from hermes_cli.kanban_diagnostics import SEVERITY_ORDER + + kinds: dict[str, int] = {} + latest = 0 + highest_idx = -1 + highest_sev: Optional[str] = None + count = 0 + for d in diagnostics: + kinds[d["kind"]] = kinds.get(d["kind"], 0) + d.get("count", 1) + count += d.get("count", 1) + la = d.get("last_seen_at") or 0 + if la > latest: + latest = la + sev = d.get("severity") + if sev in SEVERITY_ORDER: + idx = SEVERITY_ORDER.index(sev) + if idx > highest_idx: + highest_idx = idx + highest_sev = sev + return { + "count": count, + "kinds": kinds, + "latest_at": latest, + "highest_severity": highest_sev, + } + + def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]: """Return {'parents': [...], 'children': [...]} for a task.""" parents = [ @@ -321,10 +367,11 @@ def get_board( if row["cstatus"] == "done": p["done"] += 1 - # Hallucination-warning rollup for this board (all tasks). - # Delegated to _compute_warnings_for_tasks so the per-task - # /tasks/:id endpoint can reuse the same rule. - warnings_per_task = _compute_warnings_for_tasks(conn, task_ids=None) + # Diagnostics rollup for this board — see kanban_diagnostics. + # We get the full structured list per task AND a compact + # summary for the card badge (so cards don't carry the detail + # text; the drawer fetches that via /tasks/:id or /diagnostics). + diagnostics_per_task = _compute_task_diagnostics(conn, task_ids=None) latest_event_id = conn.execute( "SELECT COALESCE(MAX(id), 0) AS m FROM task_events" @@ -339,9 +386,13 @@ def get_board( d["link_counts"] = link_counts.get(t.id, {"parents": 0, "children": 0}) d["comment_count"] = comment_counts.get(t.id, 0) d["progress"] = progress.get(t.id) # None when the task has no children - w = warnings_per_task.get(t.id) - if w: - d["warnings"] = w + diags = diagnostics_per_task.get(t.id) + if diags: + # Full list goes into the payload so the drawer can render + # without a second round-trip. The board-level badge only + # needs the summary. + d["diagnostics"] = diags + d["warnings"] = _warnings_summary_from_diagnostics(diags) col = t.status if t.status in columns else "todo" columns[col].append(d) @@ -390,11 +441,13 @@ def get_task(task_id: str, board: Optional[str] = Query(None)): if task is None: raise HTTPException(status_code=404, detail=f"task {task_id} not found") task_d = _task_dict(task) - # Attach warnings metadata so the drawer's Recovery section can - # auto-open when a hallucination is unresolved. - warnings = _compute_warnings_for_tasks(conn, task_ids=[task_id]) - if warnings.get(task_id): - task_d["warnings"] = warnings[task_id] + # Attach diagnostics so the drawer's Diagnostics section can + # render recovery actions without a second round-trip. + diags = _compute_task_diagnostics(conn, task_ids=[task_id]) + diag_list = diags.get(task_id) or [] + if diag_list: + task_d["diagnostics"] = diag_list + task_d["warnings"] = _warnings_summary_from_diagnostics(diag_list) return { "task": task_d, "comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)], @@ -795,6 +848,89 @@ def bulk_update(payload: BulkTaskBody, board: Optional[str] = Query(None)): conn.close() +# --------------------------------------------------------------------------- +# Diagnostics — fleet-wide distress signals (hallucinations, crashes, +# spawn failures, stuck-blocked). See hermes_cli.kanban_diagnostics for +# the rule engine. +# --------------------------------------------------------------------------- + +@router.get("/diagnostics") +def list_diagnostics( + board: Optional[str] = Query(None, description="Kanban board slug (omit for current)"), + severity: Optional[str] = Query( + None, + description="Filter by severity: warning|error|critical", + ), +): + """Return ``[{task_id, task_title, task_status, task_assignee, + diagnostics: [...]}, ...]`` for every task on the board with at + least one active diagnostic. + + Severity-filterable so the UI can render "just the critical ones" + or the CLI can grep. Useful for the board-header attention strip + AND for ``hermes kanban diagnostics`` which shells to this + endpoint when the dashboard's running, or invokes the engine + directly when it isn't. + """ + board = _resolve_board(board) + conn = _conn(board=board) + try: + diags_by_task = _compute_task_diagnostics(conn, task_ids=None) + if not diags_by_task: + return {"diagnostics": [], "count": 0} + + # Narrow by severity if asked. + if severity: + filtered: dict[str, list[dict]] = {} + for tid, dl in diags_by_task.items(): + keep = [d for d in dl if d.get("severity") == severity] + if keep: + filtered[tid] = keep + diags_by_task = filtered + if not diags_by_task: + return {"diagnostics": [], "count": 0} + + # Pull the task rows we need in one query so we can include + # titles/statuses without a per-task lookup. + ids = list(diags_by_task.keys()) + placeholders = ",".join(["?"] * len(ids)) + rows = { + r["id"]: r + for r in conn.execute( + f"SELECT id, title, status, assignee FROM tasks WHERE id IN ({placeholders})", + tuple(ids), + ).fetchall() + } + + out = [] + for tid, dl in diags_by_task.items(): + r = rows.get(tid) + out.append({ + "task_id": tid, + "task_title": r["title"] if r else None, + "task_status": r["status"] if r else None, + "task_assignee": r["assignee"] if r else None, + "diagnostics": dl, + }) + # Sort: highest severity first, then most recent. + from hermes_cli.kanban_diagnostics import SEVERITY_ORDER + sev_idx = {s: i for i, s in enumerate(SEVERITY_ORDER)} + def _sort_key(row): + top = row["diagnostics"][0] + return ( + -sev_idx.get(top.get("severity"), -1), + -(top.get("last_seen_at") or 0), + ) + out.sort(key=_sort_key) + + return { + "diagnostics": out, + "count": sum(len(d["diagnostics"]) for d in out), + } + finally: + conn.close() + + # --------------------------------------------------------------------------- # Recovery actions — reclaim a running claim, reassign to a new profile # --------------------------------------------------------------------------- diff --git a/tests/hermes_cli/test_kanban_diagnostics.py b/tests/hermes_cli/test_kanban_diagnostics.py new file mode 100644 index 0000000000..0fabd8558e --- /dev/null +++ b/tests/hermes_cli/test_kanban_diagnostics.py @@ -0,0 +1,353 @@ +"""Tests for hermes_cli.kanban_diagnostics — rule-engine that produces +structured distress signals (diagnostics) for kanban tasks. + +These tests exercise each rule in isolation using minimal in-memory +task/event/run fixtures (no DB) plus a few integration-style cases +that round-trip through the real kanban_db to make sure the rule +engine works on sqlite3.Row objects as well as dataclasses. +""" + +from __future__ import annotations + +import time +from pathlib import Path + +import pytest + +from hermes_cli import kanban_db as kb +from hermes_cli import kanban_diagnostics as kd + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def kanban_home(tmp_path, monkeypatch): + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +def _task(**overrides): + base = { + "id": "t_demo00", + "title": "demo task", + "assignee": "demo", + "status": "ready", + "spawn_failures": 0, + "last_spawn_error": None, + } + base.update(overrides) + return base + + +def _event(kind, ts=None, **payload): + return { + "kind": kind, + "created_at": int(ts if ts is not None else time.time()), + "payload": payload or None, + } + + +def _run(outcome="completed", run_id=1, error=None): + return { + "id": run_id, + "outcome": outcome, + "error": error, + } + + +# --------------------------------------------------------------------------- +# Each rule — positive + negative + clearing +# --------------------------------------------------------------------------- + + +def test_hallucinated_cards_fires_on_blocked_event(): + task = _task(status="ready") + events = [ + _event("created", ts=100), + _event("completion_blocked_hallucination", ts=200, + phantom_cards=["t_bad1", "t_bad2"], + verified_cards=["t_good1"]), + ] + diags = kd.compute_task_diagnostics(task, events, []) + assert len(diags) == 1 + d = diags[0] + assert d.kind == "hallucinated_cards" + assert d.severity == "error" + assert d.data["phantom_ids"] == ["t_bad1", "t_bad2"] + # Generic recovery actions always available; comment action too. + kinds = [a.kind for a in d.actions] + assert "comment" in kinds + assert "reassign" in kinds + + +def test_hallucinated_cards_clears_on_subsequent_completion(): + task = _task(status="done") + events = [ + _event("completion_blocked_hallucination", ts=100, phantom_cards=["t_x"]), + _event("completed", ts=200, summary="retry worked"), + ] + diags = kd.compute_task_diagnostics(task, events, []) + assert diags == [] + + +def test_prose_phantom_refs_fires_after_clean_completion(): + # Prose scan emits its event AFTER the completed event in the DB + # path, but a subsequent clean completion clears it. Phantom id + # must be valid hex — the scanner regex is ``t_[a-f0-9]{8,}``. + task = _task(status="done") + events = [ + _event("completed", ts=100, summary="referenced t_bad", result_len=0), + _event("suspected_hallucinated_references", ts=101, + phantom_refs=["t_deadbeef99"], source="completion_summary"), + ] + diags = kd.compute_task_diagnostics(task, events, []) + assert len(diags) == 1 + assert diags[0].kind == "prose_phantom_refs" + assert diags[0].severity == "warning" + assert diags[0].data["phantom_refs"] == ["t_deadbeef99"] + + +def test_prose_phantom_refs_clears_on_later_clean_edit(): + task = _task(status="done") + events = [ + _event("completed", ts=100, summary="bad"), + _event("suspected_hallucinated_references", ts=101, + phantom_refs=["t_ffff0000cc"]), + _event("edited", ts=200, fields=["result", "summary"]), + ] + diags = kd.compute_task_diagnostics(task, events, []) + assert diags == [] + + +def test_repeated_spawn_failures_fires_at_threshold(): + task = _task(status="blocked", spawn_failures=3, + last_spawn_error="Profile 'debugger' does not exist") + diags = kd.compute_task_diagnostics(task, [], []) + assert len(diags) == 1 + d = diags[0] + assert d.kind == "repeated_spawn_failures" + assert d.severity == "error" + # CLI hints are what operators actually need here. + suggested = [a.label for a in d.actions if a.suggested] + assert any("doctor" in s for s in suggested) + + +def test_repeated_spawn_failures_escalates_to_critical(): + task = _task(spawn_failures=6, last_spawn_error="boom") + diags = kd.compute_task_diagnostics(task, [], []) + assert diags[0].severity == "critical" + + +def test_repeated_spawn_failures_below_threshold_silent(): + task = _task(spawn_failures=2) + assert kd.compute_task_diagnostics(task, [], []) == [] + + +def test_repeated_crashes_counts_trailing_streak_only(): + task = _task(status="ready", assignee="crashy") + runs = [ + _run(outcome="completed", run_id=1), + _run(outcome="crashed", run_id=2, error="OOM"), + _run(outcome="crashed", run_id=3, error="OOM again"), + ] + diags = kd.compute_task_diagnostics(task, [], runs) + assert len(diags) == 1 + d = diags[0] + assert d.kind == "repeated_crashes" + # 2 consecutive crashes at the end → default threshold 2 → error severity. + assert d.severity == "error" + assert d.data["consecutive_crashes"] == 2 + + +def test_repeated_crashes_breaks_on_recent_success(): + task = _task(status="ready", assignee="fixed") + runs = [ + _run(outcome="crashed", run_id=1), + _run(outcome="crashed", run_id=2), + _run(outcome="completed", run_id=3), + ] + assert kd.compute_task_diagnostics(task, [], runs) == [] + + +def test_repeated_crashes_escalates_on_many_crashes(): + task = _task(status="ready", assignee="x") + runs = [_run(outcome="crashed", run_id=i) for i in range(1, 6)] # 5 in a row + diags = kd.compute_task_diagnostics(task, [], runs) + assert diags[0].severity == "critical" + + +def test_stuck_in_blocked_fires_past_threshold(): + now = int(time.time()) + task = _task(status="blocked") + events = [ + _event("blocked", ts=now - 3600 * 48, reason="needs approval"), + ] + diags = kd.compute_task_diagnostics( + task, events, [], now=now, + ) + assert len(diags) == 1 + d = diags[0] + assert d.kind == "stuck_in_blocked" + assert d.severity == "warning" + assert d.data["age_hours"] >= 48 + + +def test_stuck_in_blocked_silent_with_recent_comment(): + now = int(time.time()) + task = _task(status="blocked") + events = [ + _event("blocked", ts=now - 3600 * 48), + _event("commented", ts=now - 3600 * 2, author="human"), + ] + assert kd.compute_task_diagnostics(task, events, [], now=now) == [] + + +def test_stuck_in_blocked_silent_when_not_blocked(): + task = _task(status="ready") + events = [_event("blocked", ts=1000)] + assert kd.compute_task_diagnostics(task, events, [], now=9999999) == [] + + +def test_repeated_crashes_surfaces_actual_error_in_title(): + """The title should lead with the actual error text so operators + see WHAT broke (e.g. rate-limit, auth, OOM) without opening logs. + """ + task = _task(status="ready", assignee="x") + runs = [ + _run(outcome="crashed", run_id=1, error="openai: 429 Too Many Requests"), + _run(outcome="crashed", run_id=2, error="openai: 429 Too Many Requests"), + ] + diags = kd.compute_task_diagnostics(task, [], runs) + assert len(diags) == 1 + d = diags[0] + assert "429" in d.title + assert "Too Many Requests" in d.title + # Full error in detail. + assert "429 Too Many Requests" in d.detail + + +def test_repeated_crashes_no_error_fallback_title(): + task = _task(status="ready", assignee="x") + runs = [ + _run(outcome="crashed", run_id=1, error=None), + _run(outcome="crashed", run_id=2, error=None), + ] + diags = kd.compute_task_diagnostics(task, [], runs) + assert "no error recorded" in diags[0].title + + +def test_repeated_spawn_failures_surfaces_actual_error_in_title(): + task = _task(spawn_failures=5, + last_spawn_error="insufficient_quota: billing limit reached") + diags = kd.compute_task_diagnostics(task, [], []) + assert len(diags) == 1 + d = diags[0] + assert "insufficient_quota" in d.title or "billing limit" in d.title + assert "insufficient_quota" in d.detail + + +def test_repeated_crashes_truncates_huge_tracebacks(): + """Full Python tracebacks can be tens of KB. The title stays one + line (≤160 chars); the detail caps at 500 chars + ellipsis so the + card doesn't explode visually.""" + huge = "Traceback (most recent call last):\n" + (" File\n" * 500) + task = _task(status="ready") + runs = [ + _run(outcome="crashed", run_id=1, error=huge), + _run(outcome="crashed", run_id=2, error=huge), + ] + diags = kd.compute_task_diagnostics(task, [], runs) + d = diags[0] + # Title only the first line, capped. + assert "\n" not in d.title + assert len(d.title) < 250 + # Detail contains the snippet with ellipsis. + assert d.detail.endswith("…") or len(d.detail) < 700 + + +# --------------------------------------------------------------------------- +# Severity sorting +# --------------------------------------------------------------------------- + + +def test_diagnostics_sorted_critical_first(): + """A task with both a critical (many spawn failures) and a warning + (prose phantoms) diagnostic should list the critical one first.""" + task = _task(status="done", spawn_failures=10, + last_spawn_error="nope") + events = [ + _event("completed", ts=100, summary="referenced t_missing"), + _event("suspected_hallucinated_references", ts=101, + phantom_refs=["t_missing11"]), + ] + diags = kd.compute_task_diagnostics(task, events, []) + kinds = [d.kind for d in diags] + assert kinds[0] == "repeated_spawn_failures" # critical + assert "prose_phantom_refs" in kinds + + +# --------------------------------------------------------------------------- +# Integration — runs through real kanban_db so sqlite.Row fields work +# --------------------------------------------------------------------------- + + +def test_engine_works_on_sqlite_row_objects(kanban_home): + """Regression: the rule functions must handle sqlite3.Row (which + supports mapping access but not attribute access and isn't a dict) + as well as dataclass Task / plain dict. The API layer passes Row + objects directly. + """ + conn = kb.connect() + try: + parent = kb.create_task(conn, title="p", assignee="w") + real = kb.create_task(conn, title="r", assignee="x", created_by="w") + with pytest.raises(kb.HallucinatedCardsError): + kb.complete_task( + conn, parent, + summary="with phantom", created_cards=[real, "t_deadbeef1"], + ) + # Pull Row objects the way the API helper does. + row = conn.execute( + "SELECT * FROM tasks WHERE id = ?", (parent,), + ).fetchone() + events = list(conn.execute( + "SELECT * FROM task_events WHERE task_id = ? ORDER BY id", + (parent,), + ).fetchall()) + runs = list(conn.execute( + "SELECT * FROM task_runs WHERE task_id = ? ORDER BY id", + (parent,), + ).fetchall()) + diags = kd.compute_task_diagnostics(row, events, runs) + assert len(diags) == 1 + assert diags[0].kind == "hallucinated_cards" + assert "t_deadbeef1" in diags[0].data["phantom_ids"] + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Error-tolerance: a broken rule shouldn't 500 the whole compute call +# --------------------------------------------------------------------------- + + +def test_broken_rule_is_isolated(monkeypatch): + def _bad_rule(task, events, runs, now, cfg): + raise RuntimeError("synthetic rule bug") + + # Insert a broken rule at the front of the registry; subsequent + # rules should still run and produce their diagnostics. + monkeypatch.setattr(kd, "_RULES", [_bad_rule] + kd._RULES) + + task = _task(spawn_failures=5, last_spawn_error="e") + diags = kd.compute_task_diagnostics(task, [], []) + # The broken rule silently drops, the real one still fires. + kinds = [d.kind for d in diags] + assert "repeated_spawn_failures" in kinds diff --git a/tests/plugins/test_kanban_dashboard_plugin.py b/tests/plugins/test_kanban_dashboard_plugin.py index 82c67f37a7..0b6a3510f8 100644 --- a/tests/plugins/test_kanban_dashboard_plugin.py +++ b/tests/plugins/test_kanban_dashboard_plugin.py @@ -1126,7 +1126,11 @@ def test_home_channels_empty_when_no_homes_configured(client, monkeypatch): def test_board_surfaces_warnings_field_for_hallucinated_completions(client): """Tasks with a pending completion_blocked_hallucination event surface a ``warnings`` object on the /board payload so the UI can badge - them without fetching per-task events.""" + them without fetching per-task events. The warnings summary is + keyed by diagnostic kind (``hallucinated_cards``) rather than the + raw event kind — see hermes_cli.kanban_diagnostics for the rule + that produces it. + """ conn = kb.connect() try: parent = kb.create_task(conn, title="parent", assignee="alice") @@ -1150,7 +1154,12 @@ def test_board_surfaces_warnings_field_for_hallucinated_completions(client): assert parent_dict.get("warnings") is not None w = parent_dict["warnings"] assert w["count"] >= 1 - assert "completion_blocked_hallucination" in w["kinds"] + assert "hallucinated_cards" in w["kinds"] + assert w["highest_severity"] == "error" + # Full diagnostic list also on the payload for drawer rendering. + assert parent_dict.get("diagnostics") is not None + assert parent_dict["diagnostics"][0]["kind"] == "hallucinated_cards" + assert "t_deadbeefcafe" in parent_dict["diagnostics"][0]["data"]["phantom_ids"] def test_board_warnings_cleared_after_clean_completion(client): @@ -1335,3 +1344,99 @@ def test_reassign_endpoint_with_reclaim_first_succeeds_on_running(client): assert row["assignee"] == "new" finally: conn2.close() + + +# --------------------------------------------------------------------------- +# Diagnostics endpoint (/api/plugins/kanban/diagnostics) +# --------------------------------------------------------------------------- + +def test_diagnostics_endpoint_empty_for_clean_board(client): + r = client.get("/api/plugins/kanban/diagnostics") + assert r.status_code == 200 + data = r.json() + assert data["count"] == 0 + assert data["diagnostics"] == [] + + +def test_diagnostics_endpoint_surfaces_blocked_hallucination(client): + conn = kb.connect() + try: + parent = kb.create_task(conn, title="parent", assignee="alice") + real = kb.create_task(conn, title="real", assignee="x", created_by="alice") + import pytest as _pytest + with _pytest.raises(kb.HallucinatedCardsError): + kb.complete_task( + conn, parent, summary="phantom", + created_cards=[real, "t_ffff00001234"], + ) + finally: + conn.close() + + r = client.get("/api/plugins/kanban/diagnostics") + assert r.status_code == 200 + data = r.json() + assert data["count"] == 1 + row = data["diagnostics"][0] + assert row["task_id"] == parent + assert row["diagnostics"][0]["kind"] == "hallucinated_cards" + assert row["diagnostics"][0]["severity"] == "error" + assert "t_ffff00001234" in row["diagnostics"][0]["data"]["phantom_ids"] + + +def test_diagnostics_endpoint_severity_filter(client): + """Warning-severity filter excludes error-severity entries.""" + conn = kb.connect() + try: + # A warning-severity diagnostic (prose phantom) on one task. + # Phantom id must be valid hex — the prose scanner regex + # requires ``t_[a-f0-9]{8,}``. + p1 = kb.create_task(conn, title="prose", assignee="a") + kb.complete_task(conn, p1, summary="mentioned t_deadbeef1234") + # An error-severity diagnostic (spawn failures) on another + p2 = kb.create_task(conn, title="spawn", assignee="b") + conn.execute( + "UPDATE tasks SET spawn_failures=5, last_spawn_error='x' WHERE id=?", + (p2,), + ) + conn.commit() + finally: + conn.close() + + r = client.get("/api/plugins/kanban/diagnostics?severity=warning") + assert r.status_code == 200 + data = r.json() + assert data["count"] == 1 + assert data["diagnostics"][0]["task_id"] == p1 + + r = client.get("/api/plugins/kanban/diagnostics?severity=error") + data = r.json() + assert data["count"] == 1 + assert data["diagnostics"][0]["task_id"] == p2 + + +def test_board_exposes_diagnostics_list_and_summary(client): + """/board should attach both the full diagnostics list AND the + compact warnings summary (with highest_severity) on each task + that has any diagnostic. + """ + conn = kb.connect() + try: + t = kb.create_task(conn, title="crashy", assignee="worker") + # Simulate 2 consecutive crashes -> repeated_crashes error diag + for i in range(2): + conn.execute( + "INSERT INTO task_runs (task_id, status, outcome, started_at, " + "ended_at, error) VALUES (?, 'crashed', 'crashed', ?, ?, ?)", + (t, int(time.time()) - 100, int(time.time()) - 50, "OOM"), + ) + conn.commit() + finally: + conn.close() + + r = client.get("/api/plugins/kanban/board") + data = r.json() + tasks = [x for col in data["columns"] for x in col["tasks"]] + task_dict = next(x for x in tasks if x["title"] == "crashy") + assert task_dict["warnings"] is not None + assert task_dict["warnings"]["highest_severity"] == "error" + assert task_dict["diagnostics"][0]["kind"] == "repeated_crashes"