mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
* feat(kanban): add `specify` — auxiliary LLM fleshes out triage tasks
The Triage column shipped with a placeholder 'a specifier will flesh
out the spec', but the specifier itself was never built. This wires
it up as a dedicated CLI verb.
`hermes kanban specify <id>` calls the auxiliary LLM (configured under
`auxiliary.triage_specifier`) to expand a rough one-liner into a
concrete spec — tightened title plus a body with Goal / Approach /
Acceptance criteria / Out-of-scope sections — then atomically flips
`status: triage -> todo` and recomputes ready so parent-free tasks
go straight to the dispatcher on the same tick.
Surface:
hermes kanban specify <task_id> # single task
hermes kanban specify --all [--tenant T] # sweep triage column
hermes kanban specify ... --author NAME # audit-comment author
hermes kanban specify ... --json # one JSON line per task
Design choices:
- Parent gating is preserved. specify_triage_task flips to 'todo',
then recompute_ready promotes to 'ready' only when parents are
done — same rule as a normal parent-gated todo.
- No daemon, no background watcher. Every invocation is explicit —
keeps cost predictable and doesn't fight the dispatcher loop.
- Response parse is lenient: strict JSON preferred, markdown-fence
tolerated, raw-body fallback on malformed JSON so the LLM can't
strand a task in triage.
- All failure modes (no aux client, API error, task moved out of
triage mid-call) return SpecifyOutcome(ok=False, reason=...) so
--all continues past individual failures.
Changes:
hermes_cli/kanban_db.py + specify_triage_task()
hermes_cli/kanban_specify.py NEW (~220 LOC — prompt, parse, call)
hermes_cli/kanban.py + specify subcommand + _cmd_specify
hermes_cli/config.py + auxiliary.triage_specifier task slot
website/docs/user-guide/features/kanban.md specify + config notes
website/docs/reference/cli-commands.md CLI reference entry
tests/hermes_cli/test_kanban_specify_db.py NEW (10 tests)
tests/hermes_cli/test_kanban_specify.py NEW (20 tests)
Validation: 30/30 targeted tests pass. E2E: triage task -> specify ->
ends in 'ready' with events [created, specified, promoted] and the
audit comment recorded under the configured author.
* feat(kanban): wire specifier into dashboard and gateway slash
Follow-ups to the initial PR #21435 — closes the two gaps I'd left as
post-merge: dashboard button and first-class gateway surface.
Dashboard (plugins/kanban/dashboard/)
- POST /tasks/:id/specify NEW endpoint. Thin wrapper around
kanban_specify.specify_task(). Returns the CLI outcome shape
({ok, task_id, reason, new_title}); ok=false with a human reason
is a 200, not a 4xx, so the UI can render it inline without
treating 'no aux client configured' as a crash.
- Runs sync in FastAPI's threadpool because the LLM call can take
tens of seconds on reasoning models.
- Pins HERMES_KANBAN_BOARD around the specify call so the module's
argless kb.connect() lands on the right board.
- dist/index.js: doSpecify callback threaded through the drawer →
TaskDetail → StatusActions prop chain. ✨ Specify button appears
ONLY when task.status === 'triage' (elsewhere the backend would
reject anyway — hide the button to keep the action row clean).
Busy state (Specifying…) + inline success/error banner under the
button using the response.reason text.
- dist/style.css: tiny hermes-kanban-msg-ok / -err classes using
existing --color vars so themes reskin cleanly.
Gateway slash (/kanban specify)
- Already works via the existing run_slash → build_parser →
kanban_command pipeline. No code change needed — slash commands
inherit the argparse tree automatically. Added coverage:
test_run_slash_specify_end_to_end (create --triage, specify, verify
promotion + retitle) and test_run_slash_specify_help_is_reachable.
Tests
- tests/plugins/test_kanban_dashboard_plugin.py: 3 new tests for the
REST endpoint — happy path, non-triage rejection as ok=false 200,
missing aux client as ok=false 200.
- tests/hermes_cli/test_kanban_cli.py: 2 new slash-surface tests.
Docs
- website/docs/user-guide/features/kanban.md: dashboard action row
description mentions ✨ Specify + all three surfaces. REST table
gains /tasks/:id/specify. Slash examples include /kanban specify.
Validation: 340/340 targeted tests pass. E2E via TestClient: create a
triage task over REST → POST /specify with mocked aux client → task
moves to 'ready' column on /board with new title and body applied.
343 lines
12 KiB
Python
343 lines
12 KiB
Python
"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from hermes_cli import kanban as kc
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
|
|
@pytest.fixture
|
|
def kanban_home(tmp_path, monkeypatch):
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
kb.init_db()
|
|
return home
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Workspace flag parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.parametrize(
|
|
"value,expected",
|
|
[
|
|
("scratch", ("scratch", None)),
|
|
("worktree", ("worktree", None)),
|
|
("dir:/tmp/work", ("dir", "/tmp/work")),
|
|
],
|
|
)
|
|
def test_parse_workspace_flag_valid(value, expected):
|
|
assert kc._parse_workspace_flag(value) == expected
|
|
|
|
|
|
def test_parse_workspace_flag_expands_user():
|
|
kind, path = kc._parse_workspace_flag("dir:~/vault")
|
|
assert kind == "dir"
|
|
assert path.endswith("/vault")
|
|
assert not path.startswith("~")
|
|
|
|
|
|
@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
|
|
def test_parse_workspace_flag_rejects(bad):
|
|
if not bad:
|
|
# Empty -> defaults; not an error.
|
|
assert kc._parse_workspace_flag(bad) == ("scratch", None)
|
|
return
|
|
with pytest.raises(argparse.ArgumentTypeError):
|
|
kc._parse_workspace_flag(bad)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_run_slash_no_args_shows_usage(kanban_home):
|
|
out = kc.run_slash("")
|
|
assert "kanban" in out.lower()
|
|
assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
|
|
|
|
|
|
def test_run_slash_create_and_list(kanban_home):
|
|
out = kc.run_slash("create 'ship feature' --assignee alice")
|
|
assert "Created" in out
|
|
out = kc.run_slash("list")
|
|
assert "ship feature" in out
|
|
assert "alice" in out
|
|
|
|
|
|
def test_run_slash_create_with_parent_and_cascade(kanban_home):
|
|
# Parent then child via --parent
|
|
out1 = kc.run_slash("create 'parent' --assignee alice")
|
|
# Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
|
|
import re
|
|
m = re.search(r"(t_[a-f0-9]+)", out1)
|
|
assert m
|
|
p = m.group(1)
|
|
out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
|
|
assert "todo" in out2 # child starts as todo
|
|
|
|
# Complete parent; list should promote child to ready
|
|
kc.run_slash(f"complete {p}")
|
|
# Explicit filter: child should now be ready (was todo before complete).
|
|
ready_list = kc.run_slash("list --status ready")
|
|
assert "child" in ready_list
|
|
|
|
|
|
def test_run_slash_show_includes_comments(kanban_home):
|
|
out = kc.run_slash("create 'x'")
|
|
import re
|
|
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
|
kc.run_slash(f"comment {tid} 'source is paywalled'")
|
|
show = kc.run_slash(f"show {tid}")
|
|
assert "source is paywalled" in show
|
|
|
|
|
|
def test_run_slash_block_unblock_cycle(kanban_home):
|
|
out = kc.run_slash("create 'x' --assignee alice")
|
|
import re
|
|
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
|
# Claim first so block() finds it running
|
|
kc.run_slash(f"claim {tid}")
|
|
assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
|
|
assert "Unblocked" in kc.run_slash(f"unblock {tid}")
|
|
|
|
|
|
def test_run_slash_json_output(kanban_home):
|
|
out = kc.run_slash("create 'jsontask' --assignee alice --json")
|
|
payload = json.loads(out)
|
|
assert payload["title"] == "jsontask"
|
|
assert payload["assignee"] == "alice"
|
|
assert payload["status"] == "ready"
|
|
|
|
|
|
def test_run_slash_dispatch_dry_run_counts(kanban_home):
|
|
kc.run_slash("create 'a' --assignee alice")
|
|
kc.run_slash("create 'b' --assignee bob")
|
|
out = kc.run_slash("dispatch --dry-run")
|
|
assert "Spawned:" in out
|
|
|
|
|
|
def test_run_slash_context_output_format(kanban_home):
|
|
out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
|
|
import re
|
|
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
|
kc.run_slash(f"comment {tid} 'remember to include performance section'")
|
|
ctx = kc.run_slash(f"context {tid}")
|
|
assert "tech spec" in ctx
|
|
assert "write an RFC" in ctx
|
|
assert "performance section" in ctx
|
|
|
|
|
|
def test_run_slash_tenant_filter(kanban_home):
|
|
kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
|
|
kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
|
|
a = kc.run_slash("list --tenant biz-a")
|
|
b = kc.run_slash("list --tenant biz-b")
|
|
assert "biz-a task" in a and "biz-b task" not in a
|
|
assert "biz-b task" in b and "biz-a task" not in b
|
|
|
|
|
|
def test_run_slash_usage_error_returns_message(kanban_home):
|
|
# Missing required argument for create
|
|
out = kc.run_slash("create")
|
|
assert "usage" in out.lower() or "error" in out.lower()
|
|
|
|
|
|
def test_run_slash_assign_reassigns(kanban_home):
|
|
out = kc.run_slash("create 'x' --assignee alice")
|
|
import re
|
|
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
|
assert "Assigned" in kc.run_slash(f"assign {tid} bob")
|
|
show = kc.run_slash(f"show {tid}")
|
|
assert "bob" in show
|
|
|
|
|
|
def test_run_slash_link_unlink(kanban_home):
|
|
a = kc.run_slash("create 'a'")
|
|
b = kc.run_slash("create 'b'")
|
|
import re
|
|
ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
|
|
tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
|
|
assert "Linked" in kc.run_slash(f"link {ta} {tb}")
|
|
# After link, b is todo
|
|
show = kc.run_slash(f"show {tb}")
|
|
assert "todo" in show
|
|
assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Integration with the COMMAND_REGISTRY
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_kanban_is_resolvable():
|
|
from hermes_cli.commands import resolve_command
|
|
|
|
cmd = resolve_command("kanban")
|
|
assert cmd is not None
|
|
assert cmd.name == "kanban"
|
|
|
|
|
|
def test_kanban_bypasses_active_session_guard():
|
|
from hermes_cli.commands import should_bypass_active_session
|
|
|
|
assert should_bypass_active_session("kanban")
|
|
|
|
|
|
def test_kanban_in_autocomplete_table():
|
|
from hermes_cli.commands import COMMANDS, SUBCOMMANDS
|
|
|
|
assert "/kanban" in COMMANDS
|
|
subs = SUBCOMMANDS.get("/kanban") or []
|
|
assert "create" in subs
|
|
assert "dispatch" in subs
|
|
|
|
|
|
def test_kanban_not_gateway_only():
|
|
# kanban is available in BOTH CLI and gateway surfaces.
|
|
from hermes_cli.commands import COMMAND_REGISTRY
|
|
|
|
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
|
|
assert not cmd.cli_only
|
|
assert not cmd.gateway_only
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# reclaim + reassign CLI smoke tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_run_slash_reclaim_running_task(kanban_home):
|
|
import re
|
|
import time
|
|
import secrets
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
out1 = kc.run_slash("create 'stuck worker task' --assignee broken-model")
|
|
m = re.search(r"(t_[a-f0-9]+)", out1)
|
|
assert m
|
|
tid = m.group(1)
|
|
|
|
# Simulate a running claim outside TTL.
|
|
conn = kb.connect()
|
|
try:
|
|
lock = secrets.token_hex(4)
|
|
conn.execute(
|
|
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
|
"worker_pid=? WHERE id=?",
|
|
(lock, int(time.time()) + 3600, 4242, tid),
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
|
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
|
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
|
|
)
|
|
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
|
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
out = kc.run_slash(f"reclaim {tid} --reason 'test'")
|
|
assert "Reclaimed" in out, out
|
|
# Status back to ready.
|
|
out2 = kc.run_slash(f"show {tid}")
|
|
assert "ready" in out2.lower()
|
|
|
|
|
|
def test_run_slash_reassign_with_reclaim_flag(kanban_home):
|
|
import re
|
|
import time
|
|
import secrets
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
out1 = kc.run_slash("create 'switch model' --assignee orig")
|
|
m = re.search(r"(t_[a-f0-9]+)", out1)
|
|
tid = m.group(1)
|
|
|
|
# Simulate a running claim.
|
|
conn = kb.connect()
|
|
try:
|
|
lock = secrets.token_hex(4)
|
|
conn.execute(
|
|
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
|
"worker_pid=? WHERE id=?",
|
|
(lock, int(time.time()) + 3600, 4242, tid),
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
|
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
|
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
|
|
)
|
|
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
|
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
out = kc.run_slash(f"reassign {tid} newbie --reclaim --reason 'switch'")
|
|
assert "Reassigned" in out, out
|
|
out2 = kc.run_slash(f"show {tid}")
|
|
assert "newbie" in out2
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# /kanban specify — slash surface (same entry point CLI + gateway use)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_run_slash_specify_end_to_end(kanban_home, monkeypatch):
|
|
"""The /kanban specify slash command routes through run_slash, which
|
|
both the interactive CLI and every gateway platform use. This test
|
|
covers both surfaces."""
|
|
from unittest.mock import MagicMock
|
|
|
|
# Create a triage task via the same slash surface.
|
|
create_out = kc.run_slash("create 'rough idea' --triage")
|
|
import re
|
|
m = re.search(r"(t_[a-f0-9]+)", create_out)
|
|
assert m, f"no task id in: {create_out!r}"
|
|
tid = m.group(1)
|
|
|
|
# Mock the auxiliary client so we don't hit a real provider.
|
|
resp = MagicMock()
|
|
resp.choices = [MagicMock()]
|
|
resp.choices[0].message.content = (
|
|
'{"title": "Spec: rough idea", "body": "**Goal**\\nShip it."}'
|
|
)
|
|
fake_client = MagicMock()
|
|
fake_client.chat.completions.create = MagicMock(return_value=resp)
|
|
monkeypatch.setattr(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
lambda *a, **kw: (fake_client, "test-model"),
|
|
)
|
|
|
|
# Specify via slash.
|
|
out = kc.run_slash(f"specify {tid}")
|
|
assert "Specified" in out
|
|
assert tid in out
|
|
|
|
# Task is promoted and retitled.
|
|
with kb.connect() as conn:
|
|
task = kb.get_task(conn, tid)
|
|
assert task.status in {"todo", "ready"}
|
|
assert task.title == "Spec: rough idea"
|
|
|
|
|
|
def test_run_slash_specify_help_is_reachable(kanban_home):
|
|
"""`--help` on a subcommand is handled by argparse itself — it prints
|
|
to the process stdout and raises SystemExit before run_slash's output
|
|
redirection is installed, so the returned string is the usage-error
|
|
sentinel. All we're asserting here is that the subcommand is
|
|
registered (no "unknown action" error) — the shape of the help text
|
|
is covered by the direct argparse tests in test_kanban_specify.py."""
|
|
out = kc.run_slash("specify --help")
|
|
# Either the usage-error sentinel (stdout swallowed by argparse) or
|
|
# a real help rendering — both mean the subcommand exists.
|
|
assert "usage error" in out.lower() or "specify" in out.lower()
|