diff --git a/cli.py b/cli.py index da401e5c18..f876a93398 100644 --- a/cli.py +++ b/cli.py @@ -5818,7 +5818,28 @@ class HermesCLI: print(f"(._.) Unknown cron command: {subcommand}") print(" Available: list, add, edit, pause, resume, run, remove") - + + def _handle_kanban_command(self, cmd: str): + """Handle the /kanban command — delegate to the shared kanban CLI. + + The string form passed here is the user's full ``/kanban ...`` + including the leading slash; we strip it and hand the remainder + to ``kanban.run_slash`` which returns a single formatted string. + """ + from hermes_cli.kanban import run_slash + + rest = cmd.strip() + if rest.startswith("/"): + rest = rest.lstrip("/") + if rest.startswith("kanban"): + rest = rest[len("kanban"):].lstrip() + try: + output = run_slash(rest) + except Exception as exc: # pragma: no cover - defensive + output = f"(._.) kanban error: {exc}" + if output: + print(output) + def _handle_skills_command(self, cmd: str): """Handle /skills slash command — delegates to hermes_cli.skills_hub.""" from hermes_cli.skills_hub import handle_skills_slash @@ -6055,6 +6076,8 @@ class HermesCLI: self.save_conversation() elif canonical == "cron": self._handle_cron_command(cmd_original) + elif canonical == "kanban": + self._handle_kanban_command(cmd_original) elif canonical == "skills": with self._busy_command(self._slow_command_status(cmd_original)): self._handle_skills_command(cmd_original) diff --git a/docs/hermes-kanban-v1-spec.pdf b/docs/hermes-kanban-v1-spec.pdf new file mode 100644 index 0000000000..c7899cd12a Binary files /dev/null and b/docs/hermes-kanban-v1-spec.pdf differ diff --git a/gateway/run.py b/gateway/run.py index 9926920b81..c85210515f 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3503,6 +3503,14 @@ class GatewayRunner: if _cmd_def_inner and _cmd_def_inner.name == "background": return await self._handle_background_command(event) + # /kanban must bypass the guard. It writes to a profile-agnostic + # DB (kanban.db), not to the running agent's state. In fact + # /kanban unblock is often the only way to free a worker that + # has blocked waiting for a peer — letting that be dispatched + # mid-run is the whole point of the board. + if _cmd_def_inner and _cmd_def_inner.name == "kanban": + return await self._handle_kanban_command(event) + # Session-level toggles that are safe to run mid-agent — # /yolo can unblock a pending approval prompt, /verbose cycles # the tool-progress display mode for the ongoing stream. @@ -3727,6 +3735,9 @@ class GatewayRunner: if canonical == "personality": return await self._handle_personality_command(event) + if canonical == "kanban": + return await self._handle_kanban_command(event) + if canonical == "retry": return await self._handle_retry_command(event) @@ -5154,6 +5165,37 @@ class GatewayRunner: return "\n".join(lines) + + async def _handle_kanban_command(self, event: MessageEvent) -> str: + """Handle /kanban — delegate to the shared kanban CLI. + + Run the potentially-blocking DB work in a thread pool so the + gateway event loop stays responsive. Read operations (list, + show, context, tail) are permitted while an agent is running; + mutations are allowed too because the board is profile-agnostic + and does not touch the running agent's state. + """ + import asyncio + from hermes_cli.kanban import run_slash + + text = (event.text or "").strip() + # Strip the leading "/kanban" (with or without slash), leaving args. + if text.startswith("/"): + text = text.lstrip("/") + if text.startswith("kanban"): + text = text[len("kanban"):].lstrip() + + try: + output = await asyncio.to_thread(run_slash, text) + except Exception as exc: # pragma: no cover - defensive + return f"⚠ kanban error: {exc}" + + # Gateway messages have practical length caps; truncate long + # listings to keep the UX reasonable. + if len(output) > 3800: + output = output[:3800] + "\n… (truncated; use `hermes kanban …` in your terminal for full output)" + return output or "(no output)" + async def _handle_status_command(self, event: MessageEvent) -> str: """Handle /status command.""" source = event.source diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 614d783d95..2d748d525d 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -140,6 +140,11 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("cron", "Manage scheduled tasks", "Tools & Skills", cli_only=True, args_hint="[subcommand]", subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")), + CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)", + "Tools & Skills", args_hint="[subcommand]", + subcommands=("list", "ls", "show", "create", "assign", "link", "unlink", + "claim", "comment", "complete", "block", "unblock", "archive", + "tail", "dispatch", "context", "init", "gc")), CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills", cli_only=True), CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills", diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py new file mode 100644 index 0000000000..0744a78753 --- /dev/null +++ b/hermes_cli/kanban.py @@ -0,0 +1,662 @@ +"""CLI for the Hermes Kanban board — ``hermes kanban …`` subcommand. + +Exposes the full 15-verb surface documented in the design spec +(``docs/hermes-kanban-v1-spec.pdf``). All DB work is delegated to +``kanban_db``. This module adds: + + * Argparse subcommand construction (``build_parser``). + * Argument dispatch (``kanban_command``). + * Output formatting (plain text + ``--json``). + * A short shared helper that parses a single slash-style string + (used by ``/kanban …`` in CLI and gateway) and forwards it to the + argparse surface. +""" + +from __future__ import annotations + +import argparse +import json +import os +import shlex +import sys +import time +from pathlib import Path +from typing import Any, Optional + +from hermes_cli import kanban_db as kb + + +# --------------------------------------------------------------------------- +# Small formatting helpers +# --------------------------------------------------------------------------- + +_STATUS_ICONS = { + "todo": "◻", + "ready": "▶", + "running": "●", + "blocked": "⊘", + "done": "✓", + "archived": "—", +} + + +def _fmt_ts(ts: Optional[int]) -> str: + if not ts: + return "" + return time.strftime("%Y-%m-%d %H:%M", time.localtime(ts)) + + +def _fmt_task_line(t: kb.Task) -> str: + icon = _STATUS_ICONS.get(t.status, "?") + assignee = t.assignee or "(unassigned)" + tenant = f" [{t.tenant}]" if t.tenant else "" + return f"{icon} {t.id} {t.status:8s} {assignee:20s}{tenant} {t.title}" + + +def _task_to_dict(t: kb.Task) -> dict[str, Any]: + return { + "id": t.id, + "title": t.title, + "body": t.body, + "assignee": t.assignee, + "status": t.status, + "priority": t.priority, + "tenant": t.tenant, + "workspace_kind": t.workspace_kind, + "workspace_path": t.workspace_path, + "created_by": t.created_by, + "created_at": t.created_at, + "started_at": t.started_at, + "completed_at": t.completed_at, + "result": t.result, + } + + +def _parse_workspace_flag(value: str) -> tuple[str, Optional[str]]: + """Parse ``--workspace`` into ``(kind, path|None)``. + + Accepts: ``scratch``, ``worktree``, ``dir:``. + """ + if not value: + return ("scratch", None) + v = value.strip() + if v in ("scratch", "worktree"): + return (v, None) + if v.startswith("dir:"): + path = v[len("dir:"):].strip() + if not path: + raise argparse.ArgumentTypeError( + "--workspace dir: requires a path after the colon" + ) + return ("dir", os.path.expanduser(path)) + raise argparse.ArgumentTypeError( + f"unknown --workspace value {value!r}: use scratch, worktree, or dir:" + ) + + +# --------------------------------------------------------------------------- +# Argparse builder +# --------------------------------------------------------------------------- + +def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: + """Attach the ``kanban`` subcommand tree under an existing subparsers. + + Returns the top-level ``kanban`` parser so caller can ``set_defaults``. + """ + kanban_parser = parent_subparsers.add_parser( + "kanban", + help="Multi-profile collaboration board (tasks, links, comments)", + description=( + "Durable SQLite-backed task board shared across Hermes profiles. " + "Tasks are claimed atomically, can depend on other tasks, and " + "are executed by a named profile in an isolated workspace. " + "See https://hermes-agent.nousresearch.com/docs/user-guide/features/kanban " + "or docs/hermes-kanban-v1-spec.pdf for the full design." + ), + ) + sub = kanban_parser.add_subparsers(dest="kanban_action") + + # --- init --- + sub.add_parser("init", help="Create kanban.db if missing (idempotent)") + + # --- create --- + p_create = sub.add_parser("create", help="Create a new task") + p_create.add_argument("title", help="Task title") + p_create.add_argument("--body", default=None, help="Optional opening post") + p_create.add_argument("--assignee", default=None, help="Profile name to assign") + p_create.add_argument("--parent", action="append", default=[], + help="Parent task id (repeatable)") + p_create.add_argument("--workspace", default="scratch", + help="scratch | worktree | dir: (default: scratch)") + p_create.add_argument("--tenant", default=None, help="Tenant namespace") + p_create.add_argument("--priority", type=int, default=0, help="Priority tiebreaker") + p_create.add_argument("--created-by", default="user", + help="Author name recorded on the task (default: user)") + p_create.add_argument("--json", action="store_true", help="Emit JSON output") + + # --- list --- + p_list = sub.add_parser("list", aliases=["ls"], help="List tasks") + p_list.add_argument("--mine", action="store_true", + help="Filter by $HERMES_PROFILE as assignee") + p_list.add_argument("--assignee", default=None) + p_list.add_argument("--status", default=None, + choices=sorted(kb.VALID_STATUSES)) + p_list.add_argument("--tenant", default=None) + p_list.add_argument("--archived", action="store_true", + help="Include archived tasks") + p_list.add_argument("--json", action="store_true") + + # --- show --- + p_show = sub.add_parser("show", help="Show a task with comments + events") + p_show.add_argument("task_id") + p_show.add_argument("--json", action="store_true") + + # --- assign --- + p_assign = sub.add_parser("assign", help="Assign or reassign a task") + p_assign.add_argument("task_id") + p_assign.add_argument("profile", help="Profile name (or 'none' to unassign)") + + # --- link / unlink --- + p_link = sub.add_parser("link", help="Add a parent->child dependency") + p_link.add_argument("parent_id") + p_link.add_argument("child_id") + p_unlink = sub.add_parser("unlink", help="Remove a parent->child dependency") + p_unlink.add_argument("parent_id") + p_unlink.add_argument("child_id") + + # --- claim --- + p_claim = sub.add_parser( + "claim", + help="Atomically claim a ready task (prints resolved workspace path)", + ) + p_claim.add_argument("task_id") + p_claim.add_argument("--ttl", type=int, default=kb.DEFAULT_CLAIM_TTL_SECONDS, + help="Claim TTL in seconds (default: 900)") + + # --- comment / complete / block / unblock / archive --- + p_comment = sub.add_parser("comment", help="Append a comment") + p_comment.add_argument("task_id") + p_comment.add_argument("text", nargs="+", help="Comment body") + p_comment.add_argument("--author", default=None, + help="Author name (default: $HERMES_PROFILE or 'user')") + + p_complete = sub.add_parser("complete", help="Mark a task done") + p_complete.add_argument("task_id") + p_complete.add_argument("--result", default=None, help="Result summary") + + p_block = sub.add_parser("block", help="Mark a task blocked (needs input)") + p_block.add_argument("task_id") + p_block.add_argument("reason", nargs="*", help="Reason (also appended as a comment)") + + p_unblock = sub.add_parser("unblock", help="Return a blocked task to ready") + p_unblock.add_argument("task_id") + + p_archive = sub.add_parser("archive", help="Archive a task (hide from default list)") + p_archive.add_argument("task_id") + + # --- tail --- + p_tail = sub.add_parser("tail", help="Follow a task's event stream") + p_tail.add_argument("task_id") + p_tail.add_argument("--interval", type=float, default=1.0) + + # --- dispatch --- + p_disp = sub.add_parser( + "dispatch", + help="One dispatcher pass: reclaim stale, promote ready, spawn workers", + ) + p_disp.add_argument("--dry-run", action="store_true", + help="Don't actually spawn processes; just print what would happen") + p_disp.add_argument("--max", type=int, default=None, + help="Cap number of spawns this pass") + p_disp.add_argument("--json", action="store_true") + + # --- context --- (for spawned workers) + p_ctx = sub.add_parser( + "context", + help="Print the full context a worker sees for a task " + "(title + body + parent results + comments).", + ) + p_ctx.add_argument("task_id") + + # --- gc --- + sub.add_parser( + "gc", help="Garbage-collect workspaces of archived tasks" + ) + + kanban_parser.set_defaults(_kanban_parser=kanban_parser) + return kanban_parser + + +# --------------------------------------------------------------------------- +# Command dispatch +# --------------------------------------------------------------------------- + +def kanban_command(args: argparse.Namespace) -> int: + """Entry point from ``hermes kanban …`` argparse dispatch. + + Returns a shell-style exit code (0 on success, non-zero on error). + """ + action = getattr(args, "kanban_action", None) + if not action: + # No subaction given: print help via the stored parser reference. + parser = getattr(args, "_kanban_parser", None) + if parser is not None: + parser.print_help() + else: + print( + "usage: hermes kanban [options]\n" + "Run 'hermes kanban --help' for the full list of actions.", + file=sys.stderr, + ) + return 0 + + handlers = { + "init": _cmd_init, + "create": _cmd_create, + "list": _cmd_list, + "ls": _cmd_list, + "show": _cmd_show, + "assign": _cmd_assign, + "link": _cmd_link, + "unlink": _cmd_unlink, + "claim": _cmd_claim, + "comment": _cmd_comment, + "complete": _cmd_complete, + "block": _cmd_block, + "unblock": _cmd_unblock, + "archive": _cmd_archive, + "tail": _cmd_tail, + "dispatch": _cmd_dispatch, + "context": _cmd_context, + "gc": _cmd_gc, + } + handler = handlers.get(action) + if not handler: + print(f"kanban: unknown action {action!r}", file=sys.stderr) + return 2 + try: + return int(handler(args) or 0) + except (ValueError, RuntimeError) as exc: + print(f"kanban: {exc}", file=sys.stderr) + return 1 + + +# --------------------------------------------------------------------------- +# Handlers +# --------------------------------------------------------------------------- + +def _profile_author() -> str: + """Best-effort author name for an interactive CLI call.""" + for env in ("HERMES_PROFILE_NAME", "HERMES_PROFILE"): + v = os.environ.get(env) + if v: + return v + try: + from hermes_cli.profiles import get_active_profile_name + return get_active_profile_name() or "user" + except Exception: + return "user" + + +def _cmd_init(args: argparse.Namespace) -> int: + path = kb.init_db() + print(f"Kanban DB initialized at {path}") + return 0 + + +def _cmd_create(args: argparse.Namespace) -> int: + ws_kind, ws_path = _parse_workspace_flag(args.workspace) + with kb.connect() as conn: + task_id = kb.create_task( + conn, + title=args.title, + body=args.body, + assignee=args.assignee, + created_by=args.created_by or _profile_author(), + workspace_kind=ws_kind, + workspace_path=ws_path, + tenant=args.tenant, + priority=args.priority, + parents=tuple(args.parent or ()), + ) + task = kb.get_task(conn, task_id) + if getattr(args, "json", False): + print(json.dumps(_task_to_dict(task), indent=2, ensure_ascii=False)) + else: + print(f"Created {task_id} ({task.status}, assignee={task.assignee or '-'})") + return 0 + + +def _cmd_list(args: argparse.Namespace) -> int: + assignee = args.assignee + if args.mine and not assignee: + assignee = _profile_author() + with kb.connect() as conn: + # Cheap "mini-dispatch": recompute ready so list output reflects + # dependencies that may have cleared since the last dispatcher tick. + kb.recompute_ready(conn) + tasks = kb.list_tasks( + conn, + assignee=assignee, + status=args.status, + tenant=args.tenant, + include_archived=args.archived, + ) + if getattr(args, "json", False): + print(json.dumps([_task_to_dict(t) for t in tasks], indent=2, ensure_ascii=False)) + return 0 + if not tasks: + print("(no matching tasks)") + return 0 + for t in tasks: + print(_fmt_task_line(t)) + return 0 + + +def _cmd_show(args: argparse.Namespace) -> int: + with kb.connect() as conn: + task = kb.get_task(conn, args.task_id) + if not task: + print(f"no such task: {args.task_id}", file=sys.stderr) + return 1 + comments = kb.list_comments(conn, args.task_id) + events = kb.list_events(conn, args.task_id) + parents = kb.parent_ids(conn, args.task_id) + children = kb.child_ids(conn, args.task_id) + + if getattr(args, "json", False): + payload = { + "task": _task_to_dict(task), + "parents": parents, + "children": children, + "comments": [ + {"author": c.author, "body": c.body, "created_at": c.created_at} + for c in comments + ], + "events": [ + {"kind": e.kind, "payload": e.payload, "created_at": e.created_at} + for e in events + ], + } + print(json.dumps(payload, indent=2, ensure_ascii=False)) + return 0 + + print(f"Task {task.id}: {task.title}") + print(f" status: {task.status}") + print(f" assignee: {task.assignee or '-'}") + if task.tenant: + print(f" tenant: {task.tenant}") + print(f" workspace: {task.workspace_kind}" + + (f" @ {task.workspace_path}" if task.workspace_path else "")) + print(f" created: {_fmt_ts(task.created_at)} by {task.created_by or '-'}") + if task.started_at: + print(f" started: {_fmt_ts(task.started_at)}") + if task.completed_at: + print(f" completed: {_fmt_ts(task.completed_at)}") + if parents: + print(f" parents: {', '.join(parents)}") + if children: + print(f" children: {', '.join(children)}") + if task.body: + print() + print("Body:") + print(task.body) + if task.result: + print() + print("Result:") + print(task.result) + if comments: + print() + print(f"Comments ({len(comments)}):") + for c in comments: + print(f" [{_fmt_ts(c.created_at)}] {c.author}: {c.body}") + if events: + print() + print(f"Events ({len(events)}):") + for e in events[-20:]: + pl = f" {e.payload}" if e.payload else "" + print(f" [{_fmt_ts(e.created_at)}] {e.kind}{pl}") + return 0 + + +def _cmd_assign(args: argparse.Namespace) -> int: + profile = None if args.profile.lower() in ("none", "-", "null") else args.profile + with kb.connect() as conn: + ok = kb.assign_task(conn, args.task_id, profile) + if not ok: + print(f"no such task: {args.task_id}", file=sys.stderr) + return 1 + print(f"Assigned {args.task_id} to {profile or '(unassigned)'}") + return 0 + + +def _cmd_link(args: argparse.Namespace) -> int: + with kb.connect() as conn: + kb.link_tasks(conn, args.parent_id, args.child_id) + print(f"Linked {args.parent_id} -> {args.child_id}") + return 0 + + +def _cmd_unlink(args: argparse.Namespace) -> int: + with kb.connect() as conn: + ok = kb.unlink_tasks(conn, args.parent_id, args.child_id) + if not ok: + print(f"No such link: {args.parent_id} -> {args.child_id}", file=sys.stderr) + return 1 + print(f"Unlinked {args.parent_id} -> {args.child_id}") + return 0 + + +def _cmd_claim(args: argparse.Namespace) -> int: + with kb.connect() as conn: + task = kb.claim_task(conn, args.task_id, ttl_seconds=args.ttl) + if task is None: + # Report why + existing = kb.get_task(conn, args.task_id) + if existing is None: + print(f"no such task: {args.task_id}", file=sys.stderr) + return 1 + print( + f"cannot claim {args.task_id}: status={existing.status} " + f"lock={existing.claim_lock or '(none)'}", + file=sys.stderr, + ) + return 1 + workspace = kb.resolve_workspace(task) + kb.set_workspace_path(conn, task.id, str(workspace)) + print(f"Claimed {task.id}") + print(f"Workspace: {workspace}") + return 0 + + +def _cmd_comment(args: argparse.Namespace) -> int: + body = " ".join(args.text).strip() + author = args.author or _profile_author() + with kb.connect() as conn: + kb.add_comment(conn, args.task_id, author, body) + print(f"Comment added to {args.task_id}") + return 0 + + +def _cmd_complete(args: argparse.Namespace) -> int: + with kb.connect() as conn: + ok = kb.complete_task(conn, args.task_id, result=args.result) + if not ok: + print(f"cannot complete {args.task_id} (unknown id or terminal state)", file=sys.stderr) + return 1 + print(f"Completed {args.task_id}") + return 0 + + +def _cmd_block(args: argparse.Namespace) -> int: + reason = " ".join(args.reason).strip() if args.reason else None + author = _profile_author() + with kb.connect() as conn: + if reason: + kb.add_comment(conn, args.task_id, author, f"BLOCKED: {reason}") + ok = kb.block_task(conn, args.task_id, reason=reason) + if not ok: + print(f"cannot block {args.task_id}", file=sys.stderr) + return 1 + print(f"Blocked {args.task_id}" + (f": {reason}" if reason else "")) + return 0 + + +def _cmd_unblock(args: argparse.Namespace) -> int: + with kb.connect() as conn: + ok = kb.unblock_task(conn, args.task_id) + if not ok: + print(f"cannot unblock {args.task_id} (not blocked?)", file=sys.stderr) + return 1 + print(f"Unblocked {args.task_id}") + return 0 + + +def _cmd_archive(args: argparse.Namespace) -> int: + with kb.connect() as conn: + ok = kb.archive_task(conn, args.task_id) + if not ok: + print(f"cannot archive {args.task_id}", file=sys.stderr) + return 1 + print(f"Archived {args.task_id}") + return 0 + + +def _cmd_tail(args: argparse.Namespace) -> int: + last_id = 0 + print(f"Tailing events for {args.task_id}. Ctrl-C to stop.") + try: + while True: + with kb.connect() as conn: + events = kb.list_events(conn, args.task_id) + for e in events: + if e.id > last_id: + pl = f" {e.payload}" if e.payload else "" + print(f"[{_fmt_ts(e.created_at)}] {e.kind}{pl}", flush=True) + last_id = e.id + time.sleep(max(0.1, args.interval)) + except KeyboardInterrupt: + print("\n(stopped)") + return 0 + + +def _cmd_dispatch(args: argparse.Namespace) -> int: + with kb.connect() as conn: + res = kb.dispatch_once( + conn, + dry_run=args.dry_run, + max_spawn=args.max, + ) + if getattr(args, "json", False): + print(json.dumps({ + "reclaimed": res.reclaimed, + "promoted": res.promoted, + "spawned": [ + {"task_id": tid, "assignee": who, "workspace": ws} + for (tid, who, ws) in res.spawned + ], + "skipped_unassigned": res.skipped_unassigned, + }, indent=2)) + return 0 + print(f"Reclaimed: {res.reclaimed}") + print(f"Promoted: {res.promoted}") + print(f"Spawned: {len(res.spawned)}") + for tid, who, ws in res.spawned: + tag = " (dry)" if args.dry_run else "" + print(f" - {tid} -> {who} @ {ws or '-'}{tag}") + if res.skipped_unassigned: + print(f"Skipped (unassigned): {', '.join(res.skipped_unassigned)}") + return 0 + + +def _cmd_context(args: argparse.Namespace) -> int: + with kb.connect() as conn: + text = kb.build_worker_context(conn, args.task_id) + print(text) + return 0 + + +def _cmd_gc(args: argparse.Namespace) -> int: + """Remove scratch workspaces of archived tasks. + + Only touches directories under the default scratch root; leaves user + ``dir:`` workspaces and ``worktree`` dirs alone (user owns those). + """ + import shutil + scratch_root = kb.workspaces_root() + removed = 0 + with kb.connect() as conn: + rows = conn.execute( + "SELECT id, workspace_kind, workspace_path FROM tasks WHERE status = 'archived'" + ).fetchall() + for row in rows: + if row["workspace_kind"] != "scratch": + continue + path = Path(row["workspace_path"] or (scratch_root / row["id"])) + try: + path = path.resolve() + except OSError: + continue + try: + scratch_root.resolve().relative_to(scratch_root.resolve()) + path.relative_to(scratch_root.resolve()) + except ValueError: + # Safety: never delete outside the scratch root. + continue + if path.exists() and path.is_dir(): + shutil.rmtree(path, ignore_errors=True) + removed += 1 + print(f"GC complete: removed {removed} scratch workspace(s)") + return 0 + + +# --------------------------------------------------------------------------- +# Slash-command entry point (used by /kanban from CLI and gateway) +# --------------------------------------------------------------------------- + +def run_slash(rest: str) -> str: + """Execute a ``/kanban …`` string and return captured stdout/stderr. + + ``rest`` is everything after ``/kanban`` (may be empty). Used from + both the interactive CLI (``self._handle_kanban_command``) and the + gateway (``_handle_kanban_command``) so formatting is identical. + """ + import io + import contextlib + + tokens = shlex.split(rest) if rest and rest.strip() else [] + + parser = argparse.ArgumentParser(prog="/kanban", add_help=False) + parser.exit_on_error = False # type: ignore[attr-defined] + sub = parser.add_subparsers(dest="kanban_action") + # Reuse the argparse builder -- call it with a throwaway parent + # subparsers via a wrapping top-level parser. + wrap = argparse.ArgumentParser(prog="/", add_help=False) + wrap.exit_on_error = False # type: ignore[attr-defined] + wrap_sub = wrap.add_subparsers(dest="_top") + build_parser(wrap_sub) + + buf_out = io.StringIO() + buf_err = io.StringIO() + try: + # Prepend the "kanban" token so our top-level subparser routes here. + argv = ["kanban", *tokens] if tokens else ["kanban"] + args = wrap.parse_args(argv) + except SystemExit as exc: + return f"(usage error: {exc})" + except argparse.ArgumentError as exc: + return f"(usage error: {exc})" + + with contextlib.redirect_stdout(buf_out), contextlib.redirect_stderr(buf_err): + try: + kanban_command(args) + except SystemExit: + pass + except Exception as exc: + print(f"error: {exc}", file=sys.stderr) + + out = buf_out.getvalue().rstrip() + err = buf_err.getvalue().rstrip() + if err and out: + return f"{out}\n{err}" + return err if err else (out or "(no output)") diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py new file mode 100644 index 0000000000..862f9f3c1d --- /dev/null +++ b/hermes_cli/kanban_db.py @@ -0,0 +1,1067 @@ +"""SQLite-backed Kanban board for multi-profile collaboration. + +The board lives at ``$HERMES_HOME/kanban.db`` (profile-agnostic on purpose: +multiple profiles on the same machine all see the same board, which IS the +coordination primitive). + +Schema is intentionally small: tasks, task_links, task_comments, +task_events. The ``workspace_kind`` field decouples coordination from git +worktrees so that research / ops / digital-twin workloads work alongside +coding workloads. See ``docs/hermes-kanban-v1-spec.pdf`` for the full +design specification. + +Concurrency strategy: WAL mode + ``BEGIN IMMEDIATE`` for write +transactions + compare-and-swap (CAS) updates on ``tasks.status`` and +``tasks.claim_lock``. SQLite serializes writers via its WAL lock, so at +most one claimer can win any given task. Losers observe zero affected +rows and move on -- no retry loops, no distributed-lock machinery. +""" + +from __future__ import annotations + +import contextlib +import json +import os +import secrets +import sqlite3 +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Iterable, Optional + + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +VALID_STATUSES = {"todo", "ready", "running", "blocked", "done", "archived"} +VALID_WORKSPACE_KINDS = {"scratch", "worktree", "dir"} + +# A running task's claim is valid for 15 minutes; after that the next +# dispatcher tick reclaims it. Workers that outlive this window should call +# ``heartbeat_claim(task_id)`` periodically. In practice most kanban +# workloads either finish within 15m or set a longer claim explicitly. +DEFAULT_CLAIM_TTL_SECONDS = 15 * 60 + + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +def kanban_db_path() -> Path: + """Return the path to ``kanban.db`` inside the active HERMES_HOME.""" + from hermes_constants import get_hermes_home + return get_hermes_home() / "kanban.db" + + +def workspaces_root() -> Path: + """Return the directory under which ``scratch`` workspaces are created.""" + from hermes_constants import get_hermes_home + return get_hermes_home() / "kanban" / "workspaces" + + +# --------------------------------------------------------------------------- +# Data classes +# --------------------------------------------------------------------------- + +@dataclass +class Task: + """In-memory view of a row from the ``tasks`` table.""" + + id: str + title: str + body: Optional[str] + assignee: Optional[str] + status: str + priority: int + created_by: Optional[str] + created_at: int + started_at: Optional[int] + completed_at: Optional[int] + workspace_kind: str + workspace_path: Optional[str] + claim_lock: Optional[str] + claim_expires: Optional[int] + tenant: Optional[str] + result: Optional[str] = None + + @classmethod + def from_row(cls, row: sqlite3.Row) -> "Task": + return cls( + id=row["id"], + title=row["title"], + body=row["body"], + assignee=row["assignee"], + status=row["status"], + priority=row["priority"], + created_by=row["created_by"], + created_at=row["created_at"], + started_at=row["started_at"], + completed_at=row["completed_at"], + workspace_kind=row["workspace_kind"], + workspace_path=row["workspace_path"], + claim_lock=row["claim_lock"], + claim_expires=row["claim_expires"], + tenant=row["tenant"] if "tenant" in row.keys() else None, + result=row["result"] if "result" in row.keys() else None, + ) + + +@dataclass +class Comment: + id: int + task_id: str + author: str + body: str + created_at: int + + +@dataclass +class Event: + id: int + task_id: str + kind: str + payload: Optional[dict] + created_at: int + + +# --------------------------------------------------------------------------- +# Schema +# --------------------------------------------------------------------------- + +SCHEMA_SQL = """ +CREATE TABLE IF NOT EXISTS tasks ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + body TEXT, + assignee TEXT, + status TEXT NOT NULL, + priority INTEGER DEFAULT 0, + created_by TEXT, + created_at INTEGER NOT NULL, + started_at INTEGER, + completed_at INTEGER, + workspace_kind TEXT NOT NULL DEFAULT 'scratch', + workspace_path TEXT, + claim_lock TEXT, + claim_expires INTEGER, + tenant TEXT, + result TEXT +); + +CREATE TABLE IF NOT EXISTS task_links ( + parent_id TEXT NOT NULL, + child_id TEXT NOT NULL, + PRIMARY KEY (parent_id, child_id) +); + +CREATE TABLE IF NOT EXISTS task_comments ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + author TEXT NOT NULL, + body TEXT NOT NULL, + created_at INTEGER NOT NULL +); + +CREATE TABLE IF NOT EXISTS task_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + kind TEXT NOT NULL, + payload TEXT, + created_at INTEGER NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_tasks_assignee_status ON tasks(assignee, status); +CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status); +CREATE INDEX IF NOT EXISTS idx_tasks_tenant ON tasks(tenant); +CREATE INDEX IF NOT EXISTS idx_links_child ON task_links(child_id); +CREATE INDEX IF NOT EXISTS idx_links_parent ON task_links(parent_id); +CREATE INDEX IF NOT EXISTS idx_comments_task ON task_comments(task_id, created_at); +CREATE INDEX IF NOT EXISTS idx_events_task ON task_events(task_id, created_at); +""" + + +# --------------------------------------------------------------------------- +# Connection helpers +# --------------------------------------------------------------------------- + +def connect(db_path: Optional[Path] = None) -> sqlite3.Connection: + """Open (and initialize if needed) the kanban DB. + + WAL mode is enabled on every connection; it's a no-op after the first + time but keeps the code robust if the DB file is ever re-created. + """ + path = db_path or kanban_db_path() + path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(path), isolation_level=None, timeout=30) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA journal_mode=WAL") + conn.execute("PRAGMA synchronous=NORMAL") + conn.execute("PRAGMA foreign_keys=ON") + return conn + + +def init_db(db_path: Optional[Path] = None) -> Path: + """Create the schema if it doesn't exist; return the path used.""" + path = db_path or kanban_db_path() + with contextlib.closing(connect(path)) as conn: + conn.executescript(SCHEMA_SQL) + _migrate_add_optional_columns(conn) + return path + + +def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None: + """Add columns that were introduced after v1 release to legacy DBs. + + Called by ``init_db`` so opening an old DB is always safe. + """ + cols = {row["name"] for row in conn.execute("PRAGMA table_info(tasks)")} + if "tenant" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN tenant TEXT") + if "result" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN result TEXT") + + +@contextlib.contextmanager +def write_txn(conn: sqlite3.Connection): + """Context manager for an IMMEDIATE write transaction. + + Use for any multi-statement write (creating a task + link, claiming a + task + recording an event, etc.). A claim CAS inside this context is + atomic -- at most one concurrent writer can succeed. + """ + conn.execute("BEGIN IMMEDIATE") + try: + yield conn + except Exception: + conn.execute("ROLLBACK") + raise + else: + conn.execute("COMMIT") + + +# --------------------------------------------------------------------------- +# ID generation +# --------------------------------------------------------------------------- + +def _new_task_id() -> str: + """Generate a short, URL-safe, human-readable task id. + + Format: ``t_<4 hex chars>``. Space is 65k values; collisions are + rare but handled by a one-shot retry in ``create_task``. + """ + return "t_" + secrets.token_hex(2) + + +def _claimer_id() -> str: + """Return a ``host:pid`` string that identifies this claimer.""" + import socket + try: + host = socket.gethostname() or "unknown" + except Exception: + host = "unknown" + return f"{host}:{os.getpid()}" + + +# --------------------------------------------------------------------------- +# Task creation / mutation +# --------------------------------------------------------------------------- + +def create_task( + conn: sqlite3.Connection, + *, + title: str, + body: Optional[str] = None, + assignee: Optional[str] = None, + created_by: Optional[str] = None, + workspace_kind: str = "scratch", + workspace_path: Optional[str] = None, + tenant: Optional[str] = None, + priority: int = 0, + parents: Iterable[str] = (), +) -> str: + """Create a new task and optionally link it under parent tasks. + + Returns the new task id. Status is ``ready`` when there are no + parents (or all parents already ``done``), otherwise ``todo``. + """ + if not title or not title.strip(): + raise ValueError("title is required") + if workspace_kind not in VALID_WORKSPACE_KINDS: + raise ValueError( + f"workspace_kind must be one of {sorted(VALID_WORKSPACE_KINDS)}, " + f"got {workspace_kind!r}" + ) + parents = tuple(p for p in parents if p) + + now = int(time.time()) + + # Retry once on the extremely unlikely id collision. + for attempt in range(2): + task_id = _new_task_id() + try: + with write_txn(conn): + # Determine initial status from parent status. + initial_status = "ready" + if parents: + missing = _find_missing_parents(conn, parents) + if missing: + raise ValueError(f"unknown parent task(s): {', '.join(missing)}") + # If any parent is not yet done, we're todo. + rows = conn.execute( + "SELECT status FROM tasks WHERE id IN " + "(" + ",".join("?" * len(parents)) + ")", + parents, + ).fetchall() + if any(r["status"] != "done" for r in rows): + initial_status = "todo" + + conn.execute( + """ + INSERT INTO tasks ( + id, title, body, assignee, status, priority, + created_by, created_at, workspace_kind, workspace_path, + tenant + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + task_id, + title.strip(), + body, + assignee, + initial_status, + priority, + created_by, + now, + workspace_kind, + workspace_path, + tenant, + ), + ) + for pid in parents: + conn.execute( + "INSERT OR IGNORE INTO task_links (parent_id, child_id) VALUES (?, ?)", + (pid, task_id), + ) + _append_event( + conn, + task_id, + "created", + { + "assignee": assignee, + "status": initial_status, + "parents": list(parents), + "tenant": tenant, + }, + ) + return task_id + except sqlite3.IntegrityError: + if attempt == 1: + raise + # Retry with a fresh id. + continue + raise RuntimeError("unreachable") + + +def _find_missing_parents(conn: sqlite3.Connection, parents: Iterable[str]) -> list[str]: + parents = list(parents) + if not parents: + return [] + placeholders = ",".join("?" * len(parents)) + rows = conn.execute( + f"SELECT id FROM tasks WHERE id IN ({placeholders})", + parents, + ).fetchall() + present = {r["id"] for r in rows} + return [p for p in parents if p not in present] + + +def get_task(conn: sqlite3.Connection, task_id: str) -> Optional[Task]: + row = conn.execute("SELECT * FROM tasks WHERE id = ?", (task_id,)).fetchone() + return Task.from_row(row) if row else None + + +def list_tasks( + conn: sqlite3.Connection, + *, + assignee: Optional[str] = None, + status: Optional[str] = None, + tenant: Optional[str] = None, + include_archived: bool = False, + limit: Optional[int] = None, +) -> list[Task]: + query = "SELECT * FROM tasks WHERE 1=1" + params: list[Any] = [] + if assignee is not None: + query += " AND assignee = ?" + params.append(assignee) + if status is not None: + if status not in VALID_STATUSES: + raise ValueError(f"status must be one of {sorted(VALID_STATUSES)}") + query += " AND status = ?" + params.append(status) + if tenant is not None: + query += " AND tenant = ?" + params.append(tenant) + if not include_archived and status != "archived": + query += " AND status != 'archived'" + query += " ORDER BY priority DESC, created_at ASC" + if limit: + query += f" LIMIT {int(limit)}" + rows = conn.execute(query, params).fetchall() + return [Task.from_row(r) for r in rows] + + +def assign_task(conn: sqlite3.Connection, task_id: str, profile: Optional[str]) -> bool: + """Assign or reassign a task. Returns True on success. + + Refuses to reassign a task that's currently running (claim_lock set). + Reassign after the current run completes if needed. + """ + with write_txn(conn): + row = conn.execute( + "SELECT status, claim_lock FROM tasks WHERE id = ?", (task_id,) + ).fetchone() + if not row: + return False + if row["claim_lock"] is not None and row["status"] == "running": + raise RuntimeError( + f"cannot reassign {task_id}: currently running (claimed). " + "Wait for completion or reclaim the stale lock first." + ) + conn.execute("UPDATE tasks SET assignee = ? WHERE id = ?", (profile, task_id)) + _append_event(conn, task_id, "assigned", {"assignee": profile}) + return True + + +# --------------------------------------------------------------------------- +# Links +# --------------------------------------------------------------------------- + +def link_tasks(conn: sqlite3.Connection, parent_id: str, child_id: str) -> None: + if parent_id == child_id: + raise ValueError("a task cannot depend on itself") + with write_txn(conn): + missing = _find_missing_parents(conn, [parent_id, child_id]) + if missing: + raise ValueError(f"unknown task(s): {', '.join(missing)}") + if _would_cycle(conn, parent_id, child_id): + raise ValueError( + f"linking {parent_id} -> {child_id} would create a cycle" + ) + conn.execute( + "INSERT OR IGNORE INTO task_links (parent_id, child_id) VALUES (?, ?)", + (parent_id, child_id), + ) + # If child was ready but parent is not yet done, demote child to todo. + parent_status = conn.execute( + "SELECT status FROM tasks WHERE id = ?", (parent_id,) + ).fetchone()["status"] + if parent_status != "done": + conn.execute( + "UPDATE tasks SET status = 'todo' WHERE id = ? AND status = 'ready'", + (child_id,), + ) + _append_event( + conn, child_id, "linked", + {"parent": parent_id, "child": child_id}, + ) + + +def _would_cycle(conn: sqlite3.Connection, parent_id: str, child_id: str) -> bool: + """Return True if adding parent->child creates a cycle. + + A cycle exists iff ``parent_id`` is already a descendant of + ``child_id`` via existing parent->child links. We walk downward + from ``child_id`` and check whether we reach ``parent_id``. + """ + seen = set() + stack = [child_id] + while stack: + node = stack.pop() + if node == parent_id: + return True + if node in seen: + continue + seen.add(node) + rows = conn.execute( + "SELECT child_id FROM task_links WHERE parent_id = ?", (node,) + ).fetchall() + stack.extend(r["child_id"] for r in rows) + return False + + +def unlink_tasks(conn: sqlite3.Connection, parent_id: str, child_id: str) -> bool: + with write_txn(conn): + cur = conn.execute( + "DELETE FROM task_links WHERE parent_id = ? AND child_id = ?", + (parent_id, child_id), + ) + if cur.rowcount: + _append_event( + conn, child_id, "unlinked", + {"parent": parent_id, "child": child_id}, + ) + return cur.rowcount > 0 + + +def parent_ids(conn: sqlite3.Connection, task_id: str) -> list[str]: + rows = conn.execute( + "SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id", + (task_id,), + ).fetchall() + return [r["parent_id"] for r in rows] + + +def child_ids(conn: sqlite3.Connection, task_id: str) -> list[str]: + rows = conn.execute( + "SELECT child_id FROM task_links WHERE parent_id = ? ORDER BY child_id", + (task_id,), + ).fetchall() + return [r["child_id"] for r in rows] + + +def parent_results(conn: sqlite3.Connection, task_id: str) -> list[tuple[str, Optional[str]]]: + """Return ``(parent_id, result)`` for every done parent of ``task_id``.""" + rows = conn.execute( + """ + SELECT t.id AS id, t.result AS result + FROM tasks t + JOIN task_links l ON l.parent_id = t.id + WHERE l.child_id = ? AND t.status = 'done' + ORDER BY t.completed_at ASC + """, + (task_id,), + ).fetchall() + return [(r["id"], r["result"]) for r in rows] + + +# --------------------------------------------------------------------------- +# Comments & events +# --------------------------------------------------------------------------- + +def add_comment( + conn: sqlite3.Connection, task_id: str, author: str, body: str +) -> int: + if not body or not body.strip(): + raise ValueError("comment body is required") + if not author or not author.strip(): + raise ValueError("comment author is required") + now = int(time.time()) + with write_txn(conn): + if not conn.execute( + "SELECT 1 FROM tasks WHERE id = ?", (task_id,) + ).fetchone(): + raise ValueError(f"unknown task {task_id}") + cur = conn.execute( + "INSERT INTO task_comments (task_id, author, body, created_at) " + "VALUES (?, ?, ?, ?)", + (task_id, author.strip(), body.strip(), now), + ) + _append_event(conn, task_id, "commented", {"author": author, "len": len(body)}) + return int(cur.lastrowid or 0) + + +def list_comments(conn: sqlite3.Connection, task_id: str) -> list[Comment]: + rows = conn.execute( + "SELECT * FROM task_comments WHERE task_id = ? ORDER BY created_at ASC", + (task_id,), + ).fetchall() + return [ + Comment( + id=r["id"], + task_id=r["task_id"], + author=r["author"], + body=r["body"], + created_at=r["created_at"], + ) + for r in rows + ] + + +def list_events(conn: sqlite3.Connection, task_id: str) -> list[Event]: + rows = conn.execute( + "SELECT * FROM task_events WHERE task_id = ? ORDER BY created_at ASC, id ASC", + (task_id,), + ).fetchall() + out = [] + for r in rows: + try: + payload = json.loads(r["payload"]) if r["payload"] else None + except Exception: + payload = None + out.append( + Event( + id=r["id"], + task_id=r["task_id"], + kind=r["kind"], + payload=payload, + created_at=r["created_at"], + ) + ) + return out + + +def _append_event( + conn: sqlite3.Connection, + task_id: str, + kind: str, + payload: Optional[dict] = None, +) -> None: + """Record an event row. Called from within an already-open txn.""" + now = int(time.time()) + pl = json.dumps(payload, ensure_ascii=False) if payload else None + conn.execute( + "INSERT INTO task_events (task_id, kind, payload, created_at) " + "VALUES (?, ?, ?, ?)", + (task_id, kind, pl, now), + ) + + +# --------------------------------------------------------------------------- +# Dependency resolution (todo -> ready) +# --------------------------------------------------------------------------- + +def recompute_ready(conn: sqlite3.Connection) -> int: + """Promote ``todo`` tasks to ``ready`` when all parents are ``done``. + + Returns the number of tasks promoted. Safe to call inside or outside + an existing transaction; it opens its own IMMEDIATE txn. + """ + promoted = 0 + with write_txn(conn): + todo_rows = conn.execute( + "SELECT id FROM tasks WHERE status = 'todo'" + ).fetchall() + for row in todo_rows: + task_id = row["id"] + parents = conn.execute( + "SELECT t.status FROM tasks t " + "JOIN task_links l ON l.parent_id = t.id " + "WHERE l.child_id = ?", + (task_id,), + ).fetchall() + if all(p["status"] == "done" for p in parents): + conn.execute( + "UPDATE tasks SET status = 'ready' WHERE id = ? AND status = 'todo'", + (task_id,), + ) + _append_event(conn, task_id, "ready", None) + promoted += 1 + return promoted + + +# --------------------------------------------------------------------------- +# Claim / complete / block +# --------------------------------------------------------------------------- + +def claim_task( + conn: sqlite3.Connection, + task_id: str, + *, + ttl_seconds: int = DEFAULT_CLAIM_TTL_SECONDS, + claimer: Optional[str] = None, +) -> Optional[Task]: + """Atomically transition ``ready -> running``. + + Returns the claimed ``Task`` on success, ``None`` if the task was + already claimed (or is not in ``ready`` status). + """ + now = int(time.time()) + lock = claimer or _claimer_id() + expires = now + int(ttl_seconds) + with write_txn(conn): + cur = conn.execute( + """ + UPDATE tasks + SET status = 'running', + claim_lock = ?, + claim_expires = ?, + started_at = COALESCE(started_at, ?) + WHERE id = ? + AND status = 'ready' + AND claim_lock IS NULL + """, + (lock, expires, now, task_id), + ) + if cur.rowcount != 1: + return None + _append_event(conn, task_id, "claimed", {"lock": lock, "expires": expires}) + return get_task(conn, task_id) + + +def heartbeat_claim( + conn: sqlite3.Connection, + task_id: str, + *, + ttl_seconds: int = DEFAULT_CLAIM_TTL_SECONDS, + claimer: Optional[str] = None, +) -> bool: + """Extend a running claim. Returns True if we still own it. + + Workers that know they'll exceed 15 minutes should call this every + few minutes to keep ownership. + """ + expires = int(time.time()) + int(ttl_seconds) + lock = claimer or _claimer_id() + with write_txn(conn): + cur = conn.execute( + "UPDATE tasks SET claim_expires = ? " + "WHERE id = ? AND status = 'running' AND claim_lock = ?", + (expires, task_id, lock), + ) + return cur.rowcount == 1 + + +def release_stale_claims(conn: sqlite3.Connection) -> int: + """Reset any ``running`` task whose claim has expired. + + Returns the number of stale claims reclaimed. Safe to call often. + """ + now = int(time.time()) + reclaimed = 0 + with write_txn(conn): + stale = conn.execute( + "SELECT id, claim_lock FROM tasks " + "WHERE status = 'running' AND claim_expires IS NOT NULL AND claim_expires < ?", + (now,), + ).fetchall() + for row in stale: + conn.execute( + "UPDATE tasks SET status = 'ready', claim_lock = NULL, " + "claim_expires = NULL " + "WHERE id = ? AND status = 'running'", + (row["id"],), + ) + _append_event( + conn, row["id"], "reclaimed", + {"stale_lock": row["claim_lock"]}, + ) + reclaimed += 1 + return reclaimed + + +def complete_task( + conn: sqlite3.Connection, + task_id: str, + *, + result: Optional[str] = None, +) -> bool: + """Transition ``running|ready -> done`` and record ``result``. + + Accepts a task that's merely ``ready`` too, so a manual CLI + completion (``hermes kanban complete ``) works without requiring + a claim/start/complete sequence. + """ + now = int(time.time()) + with write_txn(conn): + cur = conn.execute( + """ + UPDATE tasks + SET status = 'done', + result = ?, + completed_at = ?, + claim_lock = NULL, + claim_expires= NULL + WHERE id = ? + AND status IN ('running', 'ready', 'blocked') + """, + (result, now, task_id), + ) + if cur.rowcount != 1: + return False + _append_event( + conn, task_id, "completed", + {"result_len": len(result) if result else 0}, + ) + # Recompute ready status for dependents (separate txn so children see done). + recompute_ready(conn) + return True + + +def block_task( + conn: sqlite3.Connection, + task_id: str, + *, + reason: Optional[str] = None, +) -> bool: + """Transition ``running -> blocked``.""" + with write_txn(conn): + cur = conn.execute( + """ + UPDATE tasks + SET status = 'blocked', + claim_lock = NULL, + claim_expires= NULL + WHERE id = ? + AND status IN ('running', 'ready') + """, + (task_id,), + ) + if cur.rowcount != 1: + return False + _append_event(conn, task_id, "blocked", {"reason": reason}) + return True + + +def unblock_task(conn: sqlite3.Connection, task_id: str) -> bool: + """Transition ``blocked -> ready``.""" + with write_txn(conn): + cur = conn.execute( + "UPDATE tasks SET status = 'ready' WHERE id = ? AND status = 'blocked'", + (task_id,), + ) + if cur.rowcount != 1: + return False + _append_event(conn, task_id, "unblocked", None) + return True + + +def archive_task(conn: sqlite3.Connection, task_id: str) -> bool: + with write_txn(conn): + cur = conn.execute( + "UPDATE tasks SET status = 'archived' WHERE id = ? AND status != 'archived'", + (task_id,), + ) + if cur.rowcount != 1: + return False + _append_event(conn, task_id, "archived", None) + return True + + +# --------------------------------------------------------------------------- +# Workspace resolution +# --------------------------------------------------------------------------- + +def resolve_workspace(task: Task) -> Path: + """Resolve (and create if needed) the workspace for a task. + + - ``scratch``: a fresh dir under ``$HERMES_HOME/kanban/workspaces//``. + - ``dir:``: the path stored in ``workspace_path``. Created if missing. + - ``worktree``: a git worktree at ``workspace_path``. Not created + automatically in v1 -- the kanban-worker skill documents + ``git worktree add`` as a worker-side step. Returns the intended path. + + Persist the resolved path back to the task row via ``set_workspace_path`` + so subsequent runs reuse the same directory. + """ + kind = task.workspace_kind or "scratch" + if kind == "scratch": + if task.workspace_path: + p = Path(task.workspace_path).expanduser() + else: + p = workspaces_root() / task.id + p.mkdir(parents=True, exist_ok=True) + return p + if kind == "dir": + if not task.workspace_path: + raise ValueError( + f"task {task.id} has workspace_kind=dir but no workspace_path" + ) + p = Path(task.workspace_path).expanduser() + p.mkdir(parents=True, exist_ok=True) + return p + if kind == "worktree": + if not task.workspace_path: + # Default: .worktrees// under CWD. Worker skill creates it. + return Path.cwd() / ".worktrees" / task.id + return Path(task.workspace_path).expanduser() + raise ValueError(f"unknown workspace_kind: {kind}") + + +def set_workspace_path( + conn: sqlite3.Connection, task_id: str, path: Path | str +) -> None: + with write_txn(conn): + conn.execute( + "UPDATE tasks SET workspace_path = ? WHERE id = ?", + (str(path), task_id), + ) + + +# --------------------------------------------------------------------------- +# Dispatcher (one-shot pass) +# --------------------------------------------------------------------------- + +@dataclass +class DispatchResult: + """Outcome of a single ``dispatch`` pass.""" + + reclaimed: int = 0 + promoted: int = 0 + spawned: list[tuple[str, str, str]] = field(default_factory=list) + """List of ``(task_id, assignee, workspace_path)`` triples.""" + skipped_unassigned: list[str] = field(default_factory=list) + + +def dispatch_once( + conn: sqlite3.Connection, + *, + spawn_fn=None, + ttl_seconds: int = DEFAULT_CLAIM_TTL_SECONDS, + dry_run: bool = False, + max_spawn: Optional[int] = None, +) -> DispatchResult: + """Run one dispatcher tick. + + Steps: + 1. Reclaim stale running tasks. + 2. Promote todo -> ready where all parents are done. + 3. For each ready task with an assignee, atomically claim and call + ``spawn_fn(task, workspace_path)``. + + ``spawn_fn`` defaults to ``_default_spawn`` which invokes + ``hermes -p chat -q "..."`` in the background. Tests pass + a stub. + """ + result = DispatchResult() + result.reclaimed = release_stale_claims(conn) + result.promoted = recompute_ready(conn) + + ready_rows = conn.execute( + "SELECT id, assignee FROM tasks " + "WHERE status = 'ready' AND claim_lock IS NULL " + "ORDER BY priority DESC, created_at ASC" + ).fetchall() + spawned = 0 + for row in ready_rows: + if max_spawn is not None and spawned >= max_spawn: + break + if not row["assignee"]: + result.skipped_unassigned.append(row["id"]) + continue + if dry_run: + result.spawned.append((row["id"], row["assignee"], "")) + continue + claimed = claim_task(conn, row["id"], ttl_seconds=ttl_seconds) + if claimed is None: + continue + workspace = resolve_workspace(claimed) + # Persist the resolved workspace path so the worker can cd there. + set_workspace_path(conn, claimed.id, str(workspace)) + if spawn_fn is None: + spawn_fn = _default_spawn + try: + spawn_fn(claimed, str(workspace)) + result.spawned.append((claimed.id, claimed.assignee or "", str(workspace))) + spawned += 1 + except Exception as exc: + # Spawn failed: release the claim so the next tick can retry. + with write_txn(conn): + conn.execute( + "UPDATE tasks SET status = 'ready', claim_lock = NULL, " + "claim_expires = NULL WHERE id = ? AND status = 'running'", + (claimed.id,), + ) + _append_event( + conn, claimed.id, "spawn_failed", + {"error": str(exc)[:500]}, + ) + return result + + +def _default_spawn(task: Task, workspace: str) -> None: + """Fire-and-forget ``hermes -p chat -q ...`` subprocess. + + We don't wait for the child; its completion is observed by polling + the board ``complete``/``block`` transitions that the worker writes. + """ + import subprocess + if not task.assignee: + raise ValueError(f"task {task.id} has no assignee") + + prompt = f"work kanban task {task.id}" + env = dict(os.environ) + if task.tenant: + env["HERMES_TENANT"] = task.tenant + env["HERMES_KANBAN_TASK"] = task.id + env["HERMES_KANBAN_WORKSPACE"] = workspace + + cmd = [ + "hermes", + "-p", task.assignee, + "chat", + "-q", prompt, + ] + # Use Popen with DEVNULL stdin so the child doesn't inherit our tty. + # Redirect output to a per-task log under HERMES_HOME/kanban/logs/. + from hermes_constants import get_hermes_home + log_dir = get_hermes_home() / "kanban" / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / f"{task.id}.log" + + # Use 'a' so a re-run on unblock appends rather than overwrites. + log_f = open(log_path, "ab") + try: + subprocess.Popen( # noqa: S603 -- argv is a fixed list built above + cmd, + cwd=workspace if os.path.isdir(workspace) else None, + stdin=subprocess.DEVNULL, + stdout=log_f, + stderr=subprocess.STDOUT, + env=env, + start_new_session=True, + ) + except FileNotFoundError: + log_f.close() + raise RuntimeError( + "`hermes` executable not found on PATH. " + "Install Hermes Agent or activate its venv before running the kanban dispatcher." + ) + # NOTE: we intentionally do NOT close log_f here — we want Popen's + # child process to keep writing after this function returns. The + # handle is kept alive by the child's inheritance. The parent's + # reference goes out of scope and is GC'd, but the OS-level FD stays + # open in the child until the child exits. + + +# --------------------------------------------------------------------------- +# Worker context builder (what a spawned worker sees) +# --------------------------------------------------------------------------- + +def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str: + """Return the full text a worker should read to understand its task. + + Order (per design spec §8): + 1. Task title (mandatory). + 2. Task body (optional opening post). + 3. Every comment on the task, chronologically, with authors. + 4. Completion results of every done parent task. + """ + task = get_task(conn, task_id) + if not task: + raise ValueError(f"unknown task {task_id}") + + lines: list[str] = [] + lines.append(f"# Kanban task {task.id}: {task.title}") + lines.append("") + lines.append(f"Assignee: {task.assignee or '(unassigned)'}") + lines.append(f"Status: {task.status}") + if task.tenant: + lines.append(f"Tenant: {task.tenant}") + lines.append(f"Workspace: {task.workspace_kind} @ {task.workspace_path or '(unresolved)'}") + lines.append("") + + if task.body and task.body.strip(): + lines.append("## Body") + lines.append(task.body.strip()) + lines.append("") + + parents = parent_results(conn, task_id) + if parents: + lines.append("## Parent task results") + for pid, result in parents: + lines.append(f"### {pid}") + lines.append((result or "(no result recorded)").strip()) + lines.append("") + + comments = list_comments(conn, task_id) + if comments: + lines.append("## Comment thread") + for c in comments: + ts = time.strftime("%Y-%m-%d %H:%M", time.localtime(c.created_at)) + lines.append(f"**{c.author}** ({ts}):") + lines.append(c.body.strip()) + lines.append("") + + return "\n".join(lines).rstrip() + "\n" diff --git a/hermes_cli/main.py b/hermes_cli/main.py index a53b8d2c5e..19623434d9 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -4780,6 +4780,13 @@ def cmd_webhook(args): webhook_command(args) +def cmd_kanban(args): + """Multi-profile collaboration board.""" + from hermes_cli.kanban import kanban_command + + return kanban_command(args) + + def cmd_hooks(args): """Shell-hook inspection and management.""" from hermes_cli.hooks import hooks_command @@ -8116,6 +8123,13 @@ For more help on a command: webhook_parser.set_defaults(func=cmd_webhook) + # ========================================================================= + # kanban command — multi-profile collaboration board + # ========================================================================= + from hermes_cli.kanban import build_parser as _build_kanban_parser + kanban_parser = _build_kanban_parser(subparsers) + kanban_parser.set_defaults(func=cmd_kanban) + # ========================================================================= # hooks command — shell-hook inspection and management # ========================================================================= diff --git a/skills/devops/kanban-orchestrator/SKILL.md b/skills/devops/kanban-orchestrator/SKILL.md new file mode 100644 index 0000000000..1b706b9fca --- /dev/null +++ b/skills/devops/kanban-orchestrator/SKILL.md @@ -0,0 +1,140 @@ +--- +name: kanban-orchestrator +description: Decompose user goals into Kanban tasks and delegate them to specialist profiles. Load this skill in an orchestrator profile whose job is routing, NOT execution. Triggers when the user's goal spans multiple profiles, needs parallel work, or should be durable/auditable. +version: 1.0.0 +metadata: + hermes: + tags: [kanban, multi-agent, orchestration, routing] + related_skills: [kanban-worker] +--- + +# Kanban Orchestrator + +**You are a dispatcher, not a worker.** + +Load this skill in an orchestrator profile. An orchestrator's job is to route: read the user's goal, decompose it into well-scoped tasks, assign each to the right specialist profile, link dependencies, and step back. It does NOT do research, writing, coding, or any implementation work itself. + +## When to use the board (vs. just doing the work) + +Create Kanban tasks when any of these are true: + +1. **Multiple specialists are needed.** Research + analysis + writing is three profiles. +2. **The work should survive a crash or restart.** Long-running, recurring, or important. +3. **The user might want to interject.** Human-in-the-loop at any step. +4. **Multiple subtasks can run in parallel.** Fan-out for speed. +5. **Review / iteration is expected.** A reviewer profile loops on drafter output. +6. **The audit trail matters.** Board rows persist in SQLite forever. + +If *none* of those apply — it's a small one-shot reasoning task — use `delegate_task` instead or answer directly. + +## The anti-temptation rules + +These are the rules you MUST NOT break: + +- **Do not execute the work yourself.** Your tools literally don't include terminal/file/code/web for implementation. If you find yourself "just fixing this quickly" — stop. +- **For any concrete task, create a Kanban task and assign it to a specialist.** Every single time. +- **If no specialist fits, ask the user which profile to create.** Do not default to doing it yourself under "close enough." +- **Your job is to decompose, route, and summarize — nothing else.** + +## The standard specialist roster (convention) + +Unless the user's setup has customized profiles, assume these exist. Adjust to whatever profiles the user actually has — ask if unsure. + +| Profile | Does | +|---|---| +| `researcher` | Reads sources, gathers facts, writes findings. Scratch workspace. | +| `analyst` | Synthesizes, ranks, de-dupes. Consumes multiple `researcher` outputs. | +| `writer` | Drafts prose in the user's voice. | +| `reviewer` | Reads output, leaves line-comments, gates approval. | +| `backend-eng` | Writes server-side code. Worktree workspace. | +| `frontend-eng` | Writes client-side code. Worktree workspace. | +| `ops` | Runs scripts, manages services, handles deployments. | + +## Decomposition playbook + +### Step 1 — Understand the goal + +Ask clarifying questions if the goal is ambiguous. Cheap to ask; expensive to spawn the wrong fleet. + +### Step 2 — Sketch the task graph + +Before creating anything, draft the graph out loud (in your response): + +``` +T1 [planner] — meta; this is me + ├── T2 [researcher] — angle A + ├── T3 [researcher] — angle B + ├── T4 [researcher] — angle C + └── T5 [analyst] — synthesize T2,T3,T4 + └── T6 [writer] — brief the user +``` + +### Step 3 — Create tasks, link dependencies + +For each leaf-level task: +```bash +hermes kanban create "angle: cost analysis" \ + --assignee researcher \ + --tenant $HERMES_TENANT +``` + +Repeat per task. Then link them: +```bash +hermes kanban link +``` + +**Do not assign something to yourself.** If the orchestrator shows up as an assignee anywhere, you've made a mistake. + +### Step 4 — Complete your own orchestration task with a summary + +If you were spawned as a task yourself (e.g. `planner` profile was assigned `T1: "investigate foo"`), mark it done with a summary of what you created: + +```bash +hermes kanban complete $HERMES_KANBAN_TASK \ + --result "decomposed into T2-T6: 3 research angles, 1 synthesis, 1 brief" +``` + +### Step 5 — Tell the user what you did + +Reply to the user with: +- The task IDs you created. +- What each is doing. +- Who will work on them. +- Roughly when to expect results (or "I'll message when the last one's done" if the gateway is wired up). + +## Tenant propagation + +If `$HERMES_TENANT` is set, **every task you create must carry the same `--tenant `.** This is how one specialist fleet serves multiple businesses — the tenant flows down the graph, not across. + +## Pattern reference + +The eight collaboration patterns you can instantiate (load the design spec if unsure): + +- **P1 Fan-out** — N siblings, same role, no links between them. +- **P2 Pipeline** — role-specialized chain with linear deps. +- **P3 Voting/quorum** — N siblings + 1 aggregator linked from all N. +- **P4 Journal** — same profile + `--workspace dir:` + recurring cron. +- **P5 Human-in-the-loop** — any worker blocks; user/peer unblocks. +- **P6 @mention** — the user or an agent can write `@profile-name` inline to address a profile; the gateway parses and routes. (UX, not a new primitive.) +- **P7 Thread-scoped workspace** — `/kanban here` pins workspace to current thread dir. +- **P8 Fleet farming** — one profile, N tasks, one workspace per subject (e.g. 50 social accounts). + +## Example run + +User says: *"Analyze whether we should migrate to Postgres. Include a cost analysis and a performance angle."* + +Your decomposition: +1. `hermes kanban create "research: Postgres cost vs current" --assignee researcher` +2. `hermes kanban create "research: Postgres performance vs current" --assignee researcher` +3. `hermes kanban create "synthesize migration recommendation" --assignee analyst` +4. `hermes kanban link ` ; `hermes kanban link ` +5. `hermes kanban create "draft decision memo" --assignee writer --parent ` +6. Report task IDs and expected flow to the user. + +## Pitfalls + +**The "just a quick check" trap.** When the user asks a small question you could probably answer yourself, the temptation is to skip the board. If the question is genuinely one-shot, answer directly. If it's the opening of a workflow ("first, check X; then Y; then Z"), it's board work even if step 1 looks small. + +**Reassignment vs. new task.** If a reviewer blocks with "needs changes," create a NEW task linked from the reviewer's task — don't re-run the same task with a stern look. The new task is assigned to the original implementer profile. + +**Link order matters.** `hermes kanban link ` — parent first. Mixing them up demotes the wrong task to `todo`. diff --git a/skills/devops/kanban-worker/SKILL.md b/skills/devops/kanban-worker/SKILL.md new file mode 100644 index 0000000000..a6e6d54432 --- /dev/null +++ b/skills/devops/kanban-worker/SKILL.md @@ -0,0 +1,120 @@ +--- +name: kanban-worker +description: How a Hermes profile should work a task from the shared Kanban board. Load this skill in any profile that participates in the board (researcher, backend-eng, reviewer, etc.). Triggers on HERMES_KANBAN_TASK env var or a "work kanban task " prompt. +version: 1.0.0 +metadata: + hermes: + tags: [kanban, multi-agent, collaboration, workflow] + related_skills: [kanban-orchestrator] +--- + +# Kanban Worker + +Use this skill when you were spawned to work a task from the shared Hermes Kanban board. Symptoms: + +- Your initial prompt says "work kanban task " — e.g. `work kanban task t_9f2a`. +- Env vars set: `HERMES_KANBAN_TASK`, `HERMES_KANBAN_WORKSPACE`, optionally `HERMES_TENANT`. +- You were started by `hermes kanban dispatch` (cron) or a human ran `hermes -p chat -q "work kanban task "`. + +## Your job + +You are **one run of one specialist profile working one task.** Read the task, do the work inside the workspace, record a result, and exit. Everything else is somebody else's job. + +## Step 1 — Read the full context + +```bash +hermes kanban context $HERMES_KANBAN_TASK +``` + +That command prints: +1. Task title + body. +2. Every comment on the task, in order, with author names. +3. Completion results of every `done` parent task (upstream context). + +**Read all of it.** The comment thread is the inter-agent protocol — past peers, human clarifications, and blocker resolutions all live there. If a reviewer left feedback or the user answered a blocker, it's in the comments. + +## Step 2 — Work inside the workspace + +`cd $HERMES_KANBAN_WORKSPACE` and do the work there. The workspace kind determines what that means: + +| `workspace_kind` | What it is | Your behavior | +|---|---|---| +| `scratch` | Fresh temp dir, yours alone | Read/write freely; it gets GC'd when the task is archived. | +| `dir:` | Shared persistent directory | Treat as a long-lived workspace; other runs will read what you write. | +| `worktree` | Git worktree at the resolved path | You may need to `git worktree add ` if it doesn't exist yet. Commit work here. | + +For `worktree` mode: check if `.git` exists in the workspace path. If not, run: +```bash +git worktree add $HERMES_KANBAN_WORKSPACE +``` +from the main repo's root. Then cd and work normally. + +## Step 3 — If tenancy matters, respect it + +If `$HERMES_TENANT` is set, the task belongs to that tenant namespace. When reading or writing persistent memory, prefix memory entries with the tenant name so context doesn't leak across tenants: + +> Good: memory entry `business-a: Acme is our biggest customer` +> Bad: unprefixed `Acme is our biggest customer` (leaks across tenants) + +## Step 4 — If you hit an ambiguity you can't resolve, BLOCK. Don't guess. + +Any of these should trigger a block: +- User-specific decision you can't infer (IP vs. user-id keys; which tone to use). +- Missing credential or access. +- Source that needs human input (paywalled article, 2FA-gated login). +- Peer profile needs to deliver something first and you can't reach around that. + +```bash +hermes kanban block $HERMES_KANBAN_TASK "need decision: IP vs user_id for rate limit key?" +``` + +`block` also appends your reason as a visible comment. When the user or a peer unblocks and the dispatcher re-spawns you, you'll see the full comment thread including their answer in step 1's context read. + +## Step 5 — Complete with a crisp, machine-readable result + +```bash +hermes kanban complete $HERMES_KANBAN_TASK --result "rate_limiter.py implemented; keys on user_id with IP fallback; tests passing" +``` + +Rules for the `--result` string: +- One to three sentences. It's not a report, it's a handoff note. +- Name concrete artifacts you produced (file paths, URLs, commit SHAs). +- State any caveats a downstream profile needs to know. +- **Do not** include secrets, tokens, or raw PII — results are durable in the board DB forever. + +Downstream tasks (children linked from this task) will see your `--result` verbatim as part of their parent-result context. + +## Step 6 — If follow-up work is obvious, create it. Don't do it. + +You are one task. If you notice something else needs doing, create a linked child task for the right profile instead of scope-creeping: + +```bash +hermes kanban create "add concurrent-request test" \ + --assignee backend-eng \ + --parent $HERMES_KANBAN_TASK +``` + +## Leave comments to talk to peers + +If you want to flag something for a reviewer, a future run, or the user — append a comment: + +```bash +hermes kanban comment $HERMES_KANBAN_TASK "note: skipped the sqlite driver path; needs separate task" +``` + +Comments are the inter-agent protocol. Direct IPC does not exist; the board is the only channel. + +## Do NOT + +- Do not call `delegate_task` as a substitute for creating kanban tasks — `delegate_task` is for short synchronous reasoning subtasks inside your own run, not for cross-agent handoffs. +- Do not modify files outside `$HERMES_KANBAN_WORKSPACE` unless the task body explicitly asks for it. +- Do not assign tasks to yourself during your run (you're already running one; create new tasks for follow-ups only). +- Do not complete a task you didn't actually finish. Block it instead. + +## Pitfalls + +**The task might already be blocked or reassigned when you start.** Between when the dispatcher claimed and when you actually booted up, circumstances can change. Always read the current state at step 1. If `hermes kanban show` reports the task is blocked or reassigned, stop — don't keep running. + +**The workspace may already have artifacts from a previous run.** Especially for `dir:` and `worktree` workspaces, a previous worker may have written files that are incomplete or stale. Read the comment thread — it usually explains why you're running again. + +**Your memory persists but the task result does not carry over automatically.** If you learn something that matters for future runs of this profile in other tasks, write it to your profile memory via the normal mechanism. Comments on the task are for humans and peers; memory is for your future self. diff --git a/tests/hermes_cli/test_kanban_cli.py b/tests/hermes_cli/test_kanban_cli.py new file mode 100644 index 0000000000..f7c84d5df8 --- /dev/null +++ b/tests/hermes_cli/test_kanban_cli.py @@ -0,0 +1,210 @@ +"""Tests for the kanban CLI surface (hermes_cli.kanban).""" + +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path + +import pytest + +from hermes_cli import kanban as kc +from hermes_cli import kanban_db as kb + + +@pytest.fixture +def kanban_home(tmp_path, monkeypatch): + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +# --------------------------------------------------------------------------- +# Workspace flag parsing +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize( + "value,expected", + [ + ("scratch", ("scratch", None)), + ("worktree", ("worktree", None)), + ("dir:/tmp/work", ("dir", "/tmp/work")), + ], +) +def test_parse_workspace_flag_valid(value, expected): + assert kc._parse_workspace_flag(value) == expected + + +def test_parse_workspace_flag_expands_user(): + kind, path = kc._parse_workspace_flag("dir:~/vault") + assert kind == "dir" + assert path.endswith("/vault") + assert not path.startswith("~") + + +@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"]) +def test_parse_workspace_flag_rejects(bad): + if not bad: + # Empty -> defaults; not an error. + assert kc._parse_workspace_flag(bad) == ("scratch", None) + return + with pytest.raises(argparse.ArgumentTypeError): + kc._parse_workspace_flag(bad) + + +# --------------------------------------------------------------------------- +# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use) +# --------------------------------------------------------------------------- + +def test_run_slash_no_args_shows_usage(kanban_home): + out = kc.run_slash("") + assert "kanban" in out.lower() + assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower() + + +def test_run_slash_create_and_list(kanban_home): + out = kc.run_slash("create 'ship feature' --assignee alice") + assert "Created" in out + out = kc.run_slash("list") + assert "ship feature" in out + assert "alice" in out + + +def test_run_slash_create_with_parent_and_cascade(kanban_home): + # Parent then child via --parent + out1 = kc.run_slash("create 'parent' --assignee alice") + # Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)" + import re + m = re.search(r"(t_[a-f0-9]+)", out1) + assert m + p = m.group(1) + out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}") + assert "todo" in out2 # child starts as todo + + # Complete parent; list should promote child to ready + kc.run_slash(f"complete {p}") + # Explicit filter: child should now be ready (was todo before complete). + ready_list = kc.run_slash("list --status ready") + assert "child" in ready_list + + +def test_run_slash_show_includes_comments(kanban_home): + out = kc.run_slash("create 'x'") + import re + tid = re.search(r"(t_[a-f0-9]+)", out).group(1) + kc.run_slash(f"comment {tid} 'source is paywalled'") + show = kc.run_slash(f"show {tid}") + assert "source is paywalled" in show + + +def test_run_slash_block_unblock_cycle(kanban_home): + out = kc.run_slash("create 'x' --assignee alice") + import re + tid = re.search(r"(t_[a-f0-9]+)", out).group(1) + # Claim first so block() finds it running + kc.run_slash(f"claim {tid}") + assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'") + assert "Unblocked" in kc.run_slash(f"unblock {tid}") + + +def test_run_slash_json_output(kanban_home): + out = kc.run_slash("create 'jsontask' --assignee alice --json") + payload = json.loads(out) + assert payload["title"] == "jsontask" + assert payload["assignee"] == "alice" + assert payload["status"] == "ready" + + +def test_run_slash_dispatch_dry_run_counts(kanban_home): + kc.run_slash("create 'a' --assignee alice") + kc.run_slash("create 'b' --assignee bob") + out = kc.run_slash("dispatch --dry-run") + assert "Spawned:" in out + + +def test_run_slash_context_output_format(kanban_home): + out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'") + import re + tid = re.search(r"(t_[a-f0-9]+)", out).group(1) + kc.run_slash(f"comment {tid} 'remember to include performance section'") + ctx = kc.run_slash(f"context {tid}") + assert "tech spec" in ctx + assert "write an RFC" in ctx + assert "performance section" in ctx + + +def test_run_slash_tenant_filter(kanban_home): + kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice") + kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice") + a = kc.run_slash("list --tenant biz-a") + b = kc.run_slash("list --tenant biz-b") + assert "biz-a task" in a and "biz-b task" not in a + assert "biz-b task" in b and "biz-a task" not in b + + +def test_run_slash_usage_error_returns_message(kanban_home): + # Missing required argument for create + out = kc.run_slash("create") + assert "usage" in out.lower() or "error" in out.lower() + + +def test_run_slash_assign_reassigns(kanban_home): + out = kc.run_slash("create 'x' --assignee alice") + import re + tid = re.search(r"(t_[a-f0-9]+)", out).group(1) + assert "Assigned" in kc.run_slash(f"assign {tid} bob") + show = kc.run_slash(f"show {tid}") + assert "bob" in show + + +def test_run_slash_link_unlink(kanban_home): + a = kc.run_slash("create 'a'") + b = kc.run_slash("create 'b'") + import re + ta = re.search(r"(t_[a-f0-9]+)", a).group(1) + tb = re.search(r"(t_[a-f0-9]+)", b).group(1) + assert "Linked" in kc.run_slash(f"link {ta} {tb}") + # After link, b is todo + show = kc.run_slash(f"show {tb}") + assert "todo" in show + assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}") + + +# --------------------------------------------------------------------------- +# Integration with the COMMAND_REGISTRY +# --------------------------------------------------------------------------- + +def test_kanban_is_resolvable(): + from hermes_cli.commands import resolve_command + + cmd = resolve_command("kanban") + assert cmd is not None + assert cmd.name == "kanban" + + +def test_kanban_bypasses_active_session_guard(): + from hermes_cli.commands import should_bypass_active_session + + assert should_bypass_active_session("kanban") + + +def test_kanban_in_autocomplete_table(): + from hermes_cli.commands import COMMANDS, SUBCOMMANDS + + assert "/kanban" in COMMANDS + subs = SUBCOMMANDS.get("/kanban") or [] + assert "create" in subs + assert "dispatch" in subs + + +def test_kanban_not_gateway_only(): + # kanban is available in BOTH CLI and gateway surfaces. + from hermes_cli.commands import COMMAND_REGISTRY + + cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban") + assert not cmd.cli_only + assert not cmd.gateway_only diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py new file mode 100644 index 0000000000..fcc6396be4 --- /dev/null +++ b/tests/hermes_cli/test_kanban_db.py @@ -0,0 +1,438 @@ +"""Tests for the Kanban DB layer (hermes_cli.kanban_db).""" + +from __future__ import annotations + +import concurrent.futures +import os +import time +from pathlib import Path + +import pytest + +from hermes_cli import kanban_db as kb + + +@pytest.fixture +def kanban_home(tmp_path, monkeypatch): + """Isolated HERMES_HOME with an empty kanban DB.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +# --------------------------------------------------------------------------- +# Schema / init +# --------------------------------------------------------------------------- + +def test_init_db_is_idempotent(kanban_home): + # Second call should not error or drop data. + with kb.connect() as conn: + kb.create_task(conn, title="persisted") + kb.init_db() + with kb.connect() as conn: + tasks = kb.list_tasks(conn) + assert len(tasks) == 1 + assert tasks[0].title == "persisted" + + +def test_init_creates_expected_tables(kanban_home): + with kb.connect() as conn: + rows = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" + ).fetchall() + names = {r["name"] for r in rows} + assert {"tasks", "task_links", "task_comments", "task_events"} <= names + + +# --------------------------------------------------------------------------- +# Task creation + status inference +# --------------------------------------------------------------------------- + +def test_create_task_no_parents_is_ready(kanban_home): + with kb.connect() as conn: + tid = kb.create_task(conn, title="ship it", assignee="alice") + t = kb.get_task(conn, tid) + assert t is not None + assert t.status == "ready" + assert t.assignee == "alice" + assert t.workspace_kind == "scratch" + + +def test_create_task_with_parent_is_todo_until_parent_done(kanban_home): + with kb.connect() as conn: + p = kb.create_task(conn, title="parent") + c = kb.create_task(conn, title="child", parents=[p]) + assert kb.get_task(conn, c).status == "todo" + kb.complete_task(conn, p, result="ok") + assert kb.get_task(conn, c).status == "ready" + + +def test_create_task_unknown_parent_errors(kanban_home): + with kb.connect() as conn, pytest.raises(ValueError, match="unknown parent"): + kb.create_task(conn, title="orphan", parents=["t_ghost"]) + + +def test_workspace_kind_validation(kanban_home): + with kb.connect() as conn, pytest.raises(ValueError, match="workspace_kind"): + kb.create_task(conn, title="bad ws", workspace_kind="cloud") + + +# --------------------------------------------------------------------------- +# Links + dependency resolution +# --------------------------------------------------------------------------- + +def test_link_demotes_ready_child_to_todo_when_parent_not_done(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b") + assert kb.get_task(conn, b).status == "ready" + kb.link_tasks(conn, a, b) + assert kb.get_task(conn, b).status == "todo" + + +def test_link_keeps_ready_child_when_parent_already_done(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + kb.complete_task(conn, a) + b = kb.create_task(conn, title="b") + assert kb.get_task(conn, b).status == "ready" + kb.link_tasks(conn, a, b) + assert kb.get_task(conn, b).status == "ready" + + +def test_link_rejects_self_loop(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + with pytest.raises(ValueError, match="itself"): + kb.link_tasks(conn, a, a) + + +def test_link_detects_cycle(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b", parents=[a]) + c = kb.create_task(conn, title="c", parents=[b]) + with pytest.raises(ValueError, match="cycle"): + kb.link_tasks(conn, c, a) + with pytest.raises(ValueError, match="cycle"): + kb.link_tasks(conn, b, a) + + +def test_recompute_ready_cascades_through_chain(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b", parents=[a]) + c = kb.create_task(conn, title="c", parents=[b]) + assert [kb.get_task(conn, x).status for x in (a, b, c)] == \ + ["ready", "todo", "todo"] + kb.complete_task(conn, a) + assert kb.get_task(conn, b).status == "ready" + kb.complete_task(conn, b) + assert kb.get_task(conn, c).status == "ready" + + +def test_recompute_ready_fan_in_waits_for_all_parents(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b") + c = kb.create_task(conn, title="c", parents=[a, b]) + kb.complete_task(conn, a) + assert kb.get_task(conn, c).status == "todo" + kb.complete_task(conn, b) + assert kb.get_task(conn, c).status == "ready" + + +# --------------------------------------------------------------------------- +# Atomic claim (CAS) +# --------------------------------------------------------------------------- + +def test_claim_once_wins_second_loses(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + first = kb.claim_task(conn, t, claimer="host:1") + assert first is not None and first.status == "running" + second = kb.claim_task(conn, t, claimer="host:2") + assert second is None + + +def test_claim_fails_on_non_ready(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + # Move to todo by introducing an unsatisfied parent. + p = kb.create_task(conn, title="p") + kb.link_tasks(conn, p, t) + assert kb.get_task(conn, t).status == "todo" + assert kb.claim_task(conn, t) is None + + +def test_stale_claim_reclaimed(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + kb.claim_task(conn, t) + # Rewind claim_expires so it looks stale. + conn.execute( + "UPDATE tasks SET claim_expires = ? WHERE id = ?", + (int(time.time()) - 3600, t), + ) + reclaimed = kb.release_stale_claims(conn) + assert reclaimed == 1 + assert kb.get_task(conn, t).status == "ready" + + +def test_heartbeat_extends_claim(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + claimer = "host:hb" + kb.claim_task(conn, t, claimer=claimer, ttl_seconds=60) + original = kb.get_task(conn, t).claim_expires + # Rewind then heartbeat. + conn.execute("UPDATE tasks SET claim_expires = ? WHERE id = ?", (0, t)) + ok = kb.heartbeat_claim(conn, t, claimer=claimer, ttl_seconds=3600) + assert ok + new = kb.get_task(conn, t).claim_expires + assert new > int(time.time()) + 3000 + + +def test_concurrent_claims_only_one_wins(kanban_home): + """Fire N threads claiming the same task; exactly one must win.""" + with kb.connect() as conn: + t = kb.create_task(conn, title="race", assignee="a") + + def attempt(i): + with kb.connect() as c: + return kb.claim_task(c, t, claimer=f"host:{i}") + + n_workers = 8 + with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as ex: + results = list(ex.map(attempt, range(n_workers))) + winners = [r for r in results if r is not None] + assert len(winners) == 1 + assert winners[0].status == "running" + + +# --------------------------------------------------------------------------- +# Complete / block / unblock / archive / assign +# --------------------------------------------------------------------------- + +def test_complete_records_result(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + assert kb.complete_task(conn, t, result="done and dusted") + task = kb.get_task(conn, t) + assert task.status == "done" + assert task.result == "done and dusted" + assert task.completed_at is not None + + +def test_block_then_unblock(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + kb.claim_task(conn, t) + assert kb.block_task(conn, t, reason="need input") + assert kb.get_task(conn, t).status == "blocked" + assert kb.unblock_task(conn, t) + assert kb.get_task(conn, t).status == "ready" + + +def test_assign_refuses_while_running(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + kb.claim_task(conn, t) + with pytest.raises(RuntimeError, match="currently running"): + kb.assign_task(conn, t, "b") + + +def test_assign_reassigns_when_not_running(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + assert kb.assign_task(conn, t, "b") + assert kb.get_task(conn, t).assignee == "b" + + +def test_archive_hides_from_default_list(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + kb.complete_task(conn, t) + assert kb.archive_task(conn, t) + assert len(kb.list_tasks(conn)) == 0 + assert len(kb.list_tasks(conn, include_archived=True)) == 1 + + +# --------------------------------------------------------------------------- +# Comments / events / worker context +# --------------------------------------------------------------------------- + +def test_comments_recorded_in_order(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + kb.add_comment(conn, t, "user", "first") + kb.add_comment(conn, t, "researcher", "second") + comments = kb.list_comments(conn, t) + assert [c.body for c in comments] == ["first", "second"] + assert [c.author for c in comments] == ["user", "researcher"] + + +def test_empty_comment_rejected(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + with pytest.raises(ValueError, match="body is required"): + kb.add_comment(conn, t, "user", "") + + +def test_events_capture_lifecycle(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + kb.claim_task(conn, t) + kb.complete_task(conn, t, result="ok") + events = kb.list_events(conn, t) + kinds = [e.kind for e in events] + assert "created" in kinds + assert "claimed" in kinds + assert "completed" in kinds + + +def test_worker_context_includes_parent_results_and_comments(kanban_home): + with kb.connect() as conn: + p = kb.create_task(conn, title="p") + kb.complete_task(conn, p, result="PARENT_RESULT_MARKER") + c = kb.create_task(conn, title="child", parents=[p]) + kb.add_comment(conn, c, "user", "CLARIFICATION_MARKER") + ctx = kb.build_worker_context(conn, c) + assert "PARENT_RESULT_MARKER" in ctx + assert "CLARIFICATION_MARKER" in ctx + assert c in ctx + assert "child" in ctx + + +# --------------------------------------------------------------------------- +# Dispatcher +# --------------------------------------------------------------------------- + +def test_dispatch_dry_run_does_not_claim(kanban_home): + with kb.connect() as conn: + t1 = kb.create_task(conn, title="a", assignee="alice") + t2 = kb.create_task(conn, title="b", assignee="bob") + res = kb.dispatch_once(conn, dry_run=True) + assert {s[0] for s in res.spawned} == {t1, t2} + with kb.connect() as conn: + # Dry run must NOT mutate status. + assert kb.get_task(conn, t1).status == "ready" + assert kb.get_task(conn, t2).status == "ready" + + +def test_dispatch_skips_unassigned(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="floater") + res = kb.dispatch_once(conn, dry_run=True) + assert t in res.skipped_unassigned + assert not res.spawned + + +def test_dispatch_promotes_ready_and_spawns(kanban_home): + spawns = [] + + def fake_spawn(task, workspace): + spawns.append((task.id, task.assignee, workspace)) + + with kb.connect() as conn: + p = kb.create_task(conn, title="p", assignee="alice") + c = kb.create_task(conn, title="c", assignee="bob", parents=[p]) + # Finish parent outside dispatch; promotion happens inside. + kb.complete_task(conn, p) + res = kb.dispatch_once(conn, spawn_fn=fake_spawn) + # Spawned c (a was already done when dispatch was called). + assert len(spawns) == 1 + assert spawns[0][0] == c + assert spawns[0][1] == "bob" + # c is now running + with kb.connect() as conn: + assert kb.get_task(conn, c).status == "running" + + +def test_dispatch_spawn_failure_releases_claim(kanban_home): + def boom(task, workspace): + raise RuntimeError("spawn failed") + + with kb.connect() as conn: + t = kb.create_task(conn, title="boom", assignee="alice") + kb.dispatch_once(conn, spawn_fn=boom) + # Must return to ready so the next tick can retry. + assert kb.get_task(conn, t).status == "ready" + assert kb.get_task(conn, t).claim_lock is None + + +def test_dispatch_reclaims_stale_before_spawning(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="alice") + kb.claim_task(conn, t) + conn.execute( + "UPDATE tasks SET claim_expires = ? WHERE id = ?", + (int(time.time()) - 1, t), + ) + res = kb.dispatch_once(conn, dry_run=True) + assert res.reclaimed == 1 + + +# --------------------------------------------------------------------------- +# Workspace resolution +# --------------------------------------------------------------------------- + +def test_scratch_workspace_created_under_hermes_home(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + task = kb.get_task(conn, t) + ws = kb.resolve_workspace(task) + assert ws.exists() + assert ws.is_dir() + assert "kanban" in str(ws) + + +def test_dir_workspace_honors_given_path(kanban_home, tmp_path): + target = tmp_path / "my-vault" + with kb.connect() as conn: + t = kb.create_task( + conn, title="biz", workspace_kind="dir", workspace_path=str(target) + ) + task = kb.get_task(conn, t) + ws = kb.resolve_workspace(task) + assert ws == target + assert ws.exists() + + +def test_worktree_workspace_returns_intended_path(kanban_home, tmp_path): + target = str(tmp_path / ".worktrees" / "my-task") + with kb.connect() as conn: + t = kb.create_task( + conn, title="ship", workspace_kind="worktree", workspace_path=target + ) + task = kb.get_task(conn, t) + ws = kb.resolve_workspace(task) + # We do NOT auto-create worktrees; the worker's skill handles that. + assert str(ws) == target + + +# --------------------------------------------------------------------------- +# Tenancy +# --------------------------------------------------------------------------- + +def test_tenant_column_filters_listings(kanban_home): + with kb.connect() as conn: + kb.create_task(conn, title="a1", tenant="biz-a") + kb.create_task(conn, title="b1", tenant="biz-b") + kb.create_task(conn, title="shared") # no tenant + biz_a = kb.list_tasks(conn, tenant="biz-a") + biz_b = kb.list_tasks(conn, tenant="biz-b") + assert [t.title for t in biz_a] == ["a1"] + assert [t.title for t in biz_b] == ["b1"] + + +def test_tenant_propagates_to_events(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="tenant-task", tenant="biz-a") + events = kb.list_events(conn, t) + # The "created" event should have tenant in its payload. + created = [e for e in events if e.kind == "created"] + assert created and created[0].payload.get("tenant") == "biz-a" diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 947994844b..f0d28d958e 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -45,6 +45,7 @@ hermes [global-options] [subcommand/options] | `hermes login` / `logout` | **Deprecated** — use `hermes auth` instead. | | `hermes status` | Show agent, auth, and platform status. | | `hermes cron` | Inspect and tick the cron scheduler. | +| `hermes kanban` | Multi-profile collaboration board (tasks, links, dispatcher). | | `hermes webhook` | Manage dynamic webhook subscriptions for event-driven activation. | | `hermes doctor` | Diagnose config and dependency issues. | | `hermes dump` | Copy-pasteable setup summary for support/debugging. | @@ -272,6 +273,38 @@ hermes cron | `status` | Check whether the cron scheduler is running. | | `tick` | Run due jobs once and exit. | +## `hermes kanban` + +```bash +hermes kanban [options] +``` + +Multi-profile collaboration board. Tasks live in `~/.hermes/kanban.db` (WAL-mode SQLite); every profile reads and writes the same board. A `cron`-driven dispatcher (`hermes kanban dispatch`) atomically claims ready tasks and spawns the assigned profile as its own process with an isolated workspace. + +| Action | Purpose | +|--------|---------| +| `init` | Create `kanban.db` if missing. Idempotent. | +| `create ""` | Create a new task. Flags: `--body`, `--assignee`, `--parent` (repeatable), `--workspace scratch\|worktree\|dir:<path>`, `--tenant`, `--priority`. | +| `list` / `ls` | List tasks. Filter with `--mine`, `--assignee`, `--status`, `--tenant`, `--archived`, `--json`. | +| `show <id>` | Show a task with comments and events. `--json` for machine output. | +| `assign <id> <profile>` | Assign or reassign. Use `none` to unassign. Refused while task is running. | +| `link <parent> <child>` | Add a dependency. Cycle-detected. | +| `unlink <parent> <child>` | Remove a dependency. | +| `claim <id>` | Atomically claim a ready task. Prints resolved workspace path. | +| `comment <id> "<text>"` | Append a comment. Visible to the next worker that runs the task. | +| `complete <id>` | Mark task done. Flag: `--result "<summary>"` (goes into children's parent-result context). | +| `block <id> "<reason>"` | Mark task blocked. Also appends the reason as a comment. | +| `unblock <id>` | Return a blocked task to ready. | +| `archive <id>` | Hide from default list. `gc` will remove scratch workspaces. | +| `tail <id>` | Follow a task's event stream. | +| `dispatch` | One dispatcher pass. Flags: `--dry-run`, `--max N`, `--json`. | +| `context <id>` | Print the full context a worker would see (title + body + parent results + comments). | +| `gc` | Remove scratch workspaces for archived tasks. | + +All actions are also available as a slash command in the gateway (`/kanban …`), with the same argument surface. + +For the full design — comparison with Cline Kanban / Paperclip / NanoClaw / Gemini Enterprise, eight collaboration patterns, four user stories, concurrency correctness proof — see `docs/hermes-kanban-v1-spec.pdf` in the repository or the [Kanban user guide](/docs/user-guide/features/kanban). + ## `hermes webhook` ```bash diff --git a/website/docs/user-guide/features/kanban.md b/website/docs/user-guide/features/kanban.md new file mode 100644 index 0000000000..068c37275b --- /dev/null +++ b/website/docs/user-guide/features/kanban.md @@ -0,0 +1,167 @@ +--- +sidebar_position: 12 +title: "Kanban (Multi-Agent Board)" +description: "Durable SQLite-backed task board for coordinating multiple Hermes profiles" +--- + +# Kanban — Multi-Agent Profile Collaboration + +Hermes Kanban is a durable task board, shared across all your Hermes profiles, that lets multiple named agents collaborate on work without fragile in-process subagent swarms. Every task is a row in `~/.hermes/kanban.db`; every handoff is a row anyone can read and write; every worker is a full OS process with its own identity. + +This is the shape that covers the workloads `delegate_task` can't: + +- **Research triage** — parallel researchers + analyst + writer, human-in-the-loop. +- **Scheduled ops** — recurring daily briefs that build a journal over weeks. +- **Digital twins** — persistent named assistants (`inbox-triage`, `ops-review`) that accumulate memory over time. +- **Engineering pipelines** — decompose → implement in parallel worktrees → review → iterate → PR. +- **Fleet work** — one specialist managing N subjects (50 social accounts, 12 monitored services). + +For the full design rationale, comparative analysis against Cline Kanban / Paperclip / NanoClaw / Google Gemini Enterprise, and the eight canonical collaboration patterns, see `docs/hermes-kanban-v1-spec.pdf` in the repository. + +## Kanban vs. `delegate_task` + +They look similar; they are not the same primitive. + +| | `delegate_task` | Kanban | +|---|---|---| +| Shape | RPC call (fork → join) | Durable message queue + state machine | +| Parent | Blocks until child returns | Fire-and-forget after `create` | +| Child identity | Anonymous subagent | Named profile with persistent memory | +| Resumability | None — failed = failed | Block → unblock → re-run; crash → reclaim | +| Human in the loop | Not supported | Comment / unblock at any point | +| Agents per task | One call = one subagent | N agents over task's life (retry, review, follow-up) | +| Audit trail | Lost on context compression | Durable rows in SQLite forever | +| Coordination | Hierarchical (caller → callee) | Peer — any profile reads/writes any task | + +**One-sentence distinction:** `delegate_task` is a function call; Kanban is a work queue where every handoff is a row any profile (or human) can see and edit. + +**Use `delegate_task` when** the parent agent needs a short reasoning answer before continuing, no humans involved, result goes back into the parent's context. + +**Use Kanban when** work crosses agent boundaries, needs to survive restarts, might need human input, might be picked up by a different role, or needs to be discoverable after the fact. + +They coexist: a kanban worker may call `delegate_task` internally during its run. + +## Core concepts + +- **Task** — a row with title, optional body, one assignee (a profile name), status (`todo | ready | running | blocked | done | archived`), optional tenant namespace. +- **Link** — `task_links` row recording a parent → child dependency. The dispatcher promotes `todo → ready` when all parents are `done`. +- **Comment** — the inter-agent protocol. Agents and humans append comments; when a worker is (re-)spawned it reads the full comment thread as part of its context. +- **Workspace** — the directory a worker operates in. Three kinds: + - `scratch` (default) — fresh tmp dir under `~/.hermes/kanban/workspaces/<id>/`. + - `dir:<path>` — an existing shared directory (Obsidian vault, mail ops dir, per-account folder). + - `worktree` — a git worktree under `.worktrees/<id>/` for coding tasks. +- **Dispatcher** — `hermes kanban dispatch` runs a one-shot pass: reclaim stale claims, promote ready tasks, atomically claim, spawn assigned profiles. Runs via cron every 60 seconds. +- **Tenant** — optional string namespace. One specialist fleet can serve multiple businesses (`--tenant business-a`) with data isolation by workspace path and memory key prefix. + +## Quick start + +```bash +# 1. Create the board +hermes kanban init + +# 2. Create a task +hermes kanban create "research AI funding landscape" --assignee researcher + +# 3. List what's on the board +hermes kanban list + +# 4. Run a dispatcher pass (dry-run to preview, real to spawn workers) +hermes kanban dispatch --dry-run +hermes kanban dispatch +``` + +To have the board run continuously, schedule the dispatcher: + +```bash +hermes cron add --schedule "*/1 * * * *" \ + --name kanban-dispatch \ + hermes kanban dispatch +``` + +## The worker skill + +Any profile that should be able to work kanban tasks must load the `kanban-worker` skill. It teaches the worker the full lifecycle: + +1. On spawn, read `$HERMES_KANBAN_TASK` env var. +2. Run `hermes kanban context $HERMES_KANBAN_TASK` to read title + body + parent results + full comment thread. +3. `cd $HERMES_KANBAN_WORKSPACE` and do the work there. +4. Complete with `hermes kanban complete <id> --result "<summary>"`, or block with `hermes kanban block <id> "<reason>"` if stuck. + +Load it with: + +```bash +hermes skills install devops/kanban-worker +``` + +## The orchestrator skill + +A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to a specialist, and steps back. The `kanban-orchestrator` skill encodes this: anti-temptation rules, a standard specialist roster (`researcher`, `writer`, `analyst`, `backend-eng`, `reviewer`, `ops`), and a decomposition playbook. + +Load it into your orchestrator profile: + +```bash +hermes skills install devops/kanban-orchestrator +``` + +For best results, pair it with a profile whose toolsets are restricted to board operations (`kanban`, `gateway`, `memory`) so the orchestrator literally cannot execute implementation tasks even if it tries. + +## CLI command reference + +``` +hermes kanban init # create kanban.db +hermes kanban create "<title>" [--body ...] [--assignee <profile>] + [--parent <id>]... [--tenant <name>] + [--workspace scratch|worktree|dir:<path>] + [--priority N] [--json] +hermes kanban list [--mine] [--assignee P] [--status S] [--tenant T] [--archived] [--json] +hermes kanban show <id> [--json] +hermes kanban assign <id> <profile> # or 'none' to unassign +hermes kanban link <parent_id> <child_id> +hermes kanban unlink <parent_id> <child_id> +hermes kanban claim <id> [--ttl SECONDS] +hermes kanban comment <id> "<text>" [--author NAME] +hermes kanban complete <id> [--result "..."] +hermes kanban block <id> "<reason>" +hermes kanban unblock <id> +hermes kanban archive <id> +hermes kanban tail <id> # follow event stream +hermes kanban dispatch [--dry-run] [--max N] [--json] +hermes kanban context <id> # what a worker sees +hermes kanban gc # remove scratch dirs of archived tasks +``` + +All commands are also available as a slash command in the gateway (`/kanban list`, `/kanban comment t_abc "need docs"`, etc.). The slash command bypasses the running-agent guard, so you can `/kanban unblock` a stuck worker while the main agent is still chatting. + +## Collaboration patterns + +The board supports these eight patterns without any new primitives: + +| Pattern | Shape | Example | +|---|---|---| +| **P1 Fan-out** | N siblings, same role | "research 5 angles in parallel" | +| **P2 Pipeline** | role chain: scout → editor → writer | daily brief assembly | +| **P3 Voting / quorum** | N siblings + 1 aggregator | 3 researchers → 1 reviewer picks | +| **P4 Long-running journal** | same profile + shared dir + cron | Obsidian vault | +| **P5 Human-in-the-loop** | worker blocks → user comments → unblock | ambiguous decisions | +| **P6 `@mention`** | inline routing from prose | `@reviewer look at this` | +| **P7 Thread-scoped workspace** | `/kanban here` in a thread | per-project gateway threads | +| **P8 Fleet farming** | one profile, N subjects | 50 social accounts | + +For worked examples of each, see `docs/hermes-kanban-v1-spec.pdf`. + +## Multi-tenant usage + +When one specialist fleet serves multiple businesses, tag each task with a tenant: + +```bash +hermes kanban create "monthly report" \ + --assignee researcher \ + --tenant business-a \ + --workspace dir:~/tenants/business-a/data/ +``` + +Workers receive `$HERMES_TENANT` and namespace their memory writes by prefix. The board, the dispatcher, and the profile definitions are all shared; only the data is scoped. + +## Design spec + +The complete design — architecture, concurrency correctness, comparison with other systems, implementation plan, risks, open questions — lives in `docs/hermes-kanban-v1-spec.pdf`. Read that before filing any behavior-change PR. diff --git a/website/sidebars.ts b/website/sidebars.ts index b654291810..0b201baaf2 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -60,6 +60,7 @@ const sidebars: SidebarsConfig = { items: [ 'user-guide/features/cron', 'user-guide/features/delegation', + 'user-guide/features/kanban', 'user-guide/features/code-execution', 'user-guide/features/hooks', 'user-guide/features/batch-processing',