mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
* feat(kanban): add `specify` — auxiliary LLM fleshes out triage tasks
The Triage column shipped with a placeholder 'a specifier will flesh
out the spec', but the specifier itself was never built. This wires
it up as a dedicated CLI verb.
`hermes kanban specify <id>` calls the auxiliary LLM (configured under
`auxiliary.triage_specifier`) to expand a rough one-liner into a
concrete spec — tightened title plus a body with Goal / Approach /
Acceptance criteria / Out-of-scope sections — then atomically flips
`status: triage -> todo` and recomputes ready so parent-free tasks
go straight to the dispatcher on the same tick.
Surface:
hermes kanban specify <task_id> # single task
hermes kanban specify --all [--tenant T] # sweep triage column
hermes kanban specify ... --author NAME # audit-comment author
hermes kanban specify ... --json # one JSON line per task
Design choices:
- Parent gating is preserved. specify_triage_task flips to 'todo',
then recompute_ready promotes to 'ready' only when parents are
done — same rule as a normal parent-gated todo.
- No daemon, no background watcher. Every invocation is explicit —
keeps cost predictable and doesn't fight the dispatcher loop.
- Response parse is lenient: strict JSON preferred, markdown-fence
tolerated, raw-body fallback on malformed JSON so the LLM can't
strand a task in triage.
- All failure modes (no aux client, API error, task moved out of
triage mid-call) return SpecifyOutcome(ok=False, reason=...) so
--all continues past individual failures.
Changes:
hermes_cli/kanban_db.py + specify_triage_task()
hermes_cli/kanban_specify.py NEW (~220 LOC — prompt, parse, call)
hermes_cli/kanban.py + specify subcommand + _cmd_specify
hermes_cli/config.py + auxiliary.triage_specifier task slot
website/docs/user-guide/features/kanban.md specify + config notes
website/docs/reference/cli-commands.md CLI reference entry
tests/hermes_cli/test_kanban_specify_db.py NEW (10 tests)
tests/hermes_cli/test_kanban_specify.py NEW (20 tests)
Validation: 30/30 targeted tests pass. E2E: triage task -> specify ->
ends in 'ready' with events [created, specified, promoted] and the
audit comment recorded under the configured author.
* feat(kanban): wire specifier into dashboard and gateway slash
Follow-ups to the initial PR #21435 — closes the two gaps I'd left as
post-merge: dashboard button and first-class gateway surface.
Dashboard (plugins/kanban/dashboard/)
- POST /tasks/:id/specify NEW endpoint. Thin wrapper around
kanban_specify.specify_task(). Returns the CLI outcome shape
({ok, task_id, reason, new_title}); ok=false with a human reason
is a 200, not a 4xx, so the UI can render it inline without
treating 'no aux client configured' as a crash.
- Runs sync in FastAPI's threadpool because the LLM call can take
tens of seconds on reasoning models.
- Pins HERMES_KANBAN_BOARD around the specify call so the module's
argless kb.connect() lands on the right board.
- dist/index.js: doSpecify callback threaded through the drawer →
TaskDetail → StatusActions prop chain. ✨ Specify button appears
ONLY when task.status === 'triage' (elsewhere the backend would
reject anyway — hide the button to keep the action row clean).
Busy state (Specifying…) + inline success/error banner under the
button using the response.reason text.
- dist/style.css: tiny hermes-kanban-msg-ok / -err classes using
existing --color vars so themes reskin cleanly.
Gateway slash (/kanban specify)
- Already works via the existing run_slash → build_parser →
kanban_command pipeline. No code change needed — slash commands
inherit the argparse tree automatically. Added coverage:
test_run_slash_specify_end_to_end (create --triage, specify, verify
promotion + retitle) and test_run_slash_specify_help_is_reachable.
Tests
- tests/plugins/test_kanban_dashboard_plugin.py: 3 new tests for the
REST endpoint — happy path, non-triage rejection as ok=false 200,
missing aux client as ok=false 200.
- tests/hermes_cli/test_kanban_cli.py: 2 new slash-surface tests.
Docs
- website/docs/user-guide/features/kanban.md: dashboard action row
description mentions ✨ Specify + all three surfaces. REST table
gains /tasks/:id/specify. Slash examples include /kanban specify.
Validation: 340/340 targeted tests pass. E2E via TestClient: create a
triage task over REST → POST /specify with mocked aux client → task
moves to 'ready' column on /board with new title and body applied.
337 lines
11 KiB
Python
337 lines
11 KiB
Python
"""Tests for the specifier module + `hermes kanban specify` CLI surface.
|
|
|
|
The auxiliary LLM client is mocked — these tests don't hit any network or
|
|
real provider. They exercise the prompt plumbing, response parsing, DB
|
|
writes, and CLI flag surface.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json as jsonlib
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from hermes_cli import kanban as kanban_cli
|
|
from hermes_cli import kanban_db as kb
|
|
from hermes_cli import kanban_specify as spec
|
|
|
|
|
|
@pytest.fixture
|
|
def kanban_home(tmp_path, monkeypatch):
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
kb.init_db()
|
|
return home
|
|
|
|
|
|
def _fake_aux_response(content: str):
|
|
"""Build a minimal object shaped like an OpenAI chat.completions result.
|
|
|
|
The specifier only reads ``resp.choices[0].message.content``, so we
|
|
avoid importing the openai SDK and build the tree with MagicMock.
|
|
"""
|
|
resp = MagicMock()
|
|
resp.choices = [MagicMock()]
|
|
resp.choices[0].message.content = content
|
|
return resp
|
|
|
|
|
|
def _mock_client_returning(content: str):
|
|
client = MagicMock()
|
|
client.chat.completions.create = MagicMock(return_value=_fake_aux_response(content))
|
|
return client
|
|
|
|
|
|
def _patch_aux_client(content: str, *, model: str = "test-model"):
|
|
"""Patch get_text_auxiliary_client at its source + at the module that
|
|
imported it lazily inside specify_task. Both patches are needed
|
|
because kanban_specify imports the function inside the function body.
|
|
"""
|
|
client = _mock_client_returning(content)
|
|
return patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(client, model),
|
|
), client
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# JSON extraction helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_extract_json_blob_handles_plain_json():
|
|
raw = '{"title": "T", "body": "B"}'
|
|
assert spec._extract_json_blob(raw) == {"title": "T", "body": "B"}
|
|
|
|
|
|
def test_extract_json_blob_handles_fenced_json():
|
|
raw = '```json\n{"title": "T", "body": "B"}\n```'
|
|
assert spec._extract_json_blob(raw) == {"title": "T", "body": "B"}
|
|
|
|
|
|
def test_extract_json_blob_handles_prose_preamble():
|
|
raw = 'Sure! Here you go:\n{"title": "T", "body": "B"}\nThanks.'
|
|
assert spec._extract_json_blob(raw) == {"title": "T", "body": "B"}
|
|
|
|
|
|
def test_extract_json_blob_returns_none_for_unparseable():
|
|
assert spec._extract_json_blob("no json here") is None
|
|
assert spec._extract_json_blob("") is None
|
|
assert spec._extract_json_blob("{not: valid}") is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# specify_task (module-level entry point)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_specify_task_happy_path(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="rough", triage=True)
|
|
|
|
content = jsonlib.dumps({
|
|
"title": "Refined rough",
|
|
"body": "**Goal**\nA concrete goal.",
|
|
})
|
|
p, _ = _patch_aux_client(content)
|
|
with p:
|
|
outcome = spec.specify_task(tid, author="ace")
|
|
|
|
assert outcome.ok is True
|
|
assert outcome.task_id == tid
|
|
assert outcome.new_title == "Refined rough"
|
|
|
|
with kb.connect() as conn:
|
|
task = kb.get_task(conn, tid)
|
|
# Parent-free → recompute_ready promotes to ready.
|
|
assert task.status == "ready"
|
|
assert task.title == "Refined rough"
|
|
assert "**Goal**" in (task.body or "")
|
|
|
|
|
|
def test_specify_task_falls_back_to_body_only_on_bad_json(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="keep title", triage=True)
|
|
|
|
# Model returned plain markdown, no JSON object.
|
|
content = "Goal: Do a thing.\nApproach: Steps here."
|
|
p, _ = _patch_aux_client(content)
|
|
with p:
|
|
outcome = spec.specify_task(tid)
|
|
|
|
assert outcome.ok is True
|
|
with kb.connect() as conn:
|
|
t = kb.get_task(conn, tid)
|
|
# Title preserved (no JSON with a title key).
|
|
assert t.title == "keep title"
|
|
# Body replaced with the raw response.
|
|
assert "Goal:" in (t.body or "")
|
|
|
|
|
|
def test_specify_task_rejects_non_triage_task(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="ready task")
|
|
|
|
p, client = _patch_aux_client("unused")
|
|
with p:
|
|
outcome = spec.specify_task(tid)
|
|
|
|
assert outcome.ok is False
|
|
assert "not in triage" in outcome.reason
|
|
# LLM must not be invoked for a non-triage task — fail cheap.
|
|
assert client.chat.completions.create.call_count == 0
|
|
|
|
|
|
def test_specify_task_unknown_id(kanban_home):
|
|
p, client = _patch_aux_client("unused")
|
|
with p:
|
|
outcome = spec.specify_task("t_nope")
|
|
assert outcome.ok is False
|
|
assert "unknown task" in outcome.reason
|
|
assert client.chat.completions.create.call_count == 0
|
|
|
|
|
|
def test_specify_task_no_aux_client_configured(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="rough", triage=True)
|
|
|
|
with patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(None, ""),
|
|
):
|
|
outcome = spec.specify_task(tid)
|
|
|
|
assert outcome.ok is False
|
|
assert "auxiliary client" in outcome.reason
|
|
# Task must stay in triage — we never touched it.
|
|
with kb.connect() as conn:
|
|
assert kb.get_task(conn, tid).status == "triage"
|
|
|
|
|
|
def test_specify_task_llm_api_error_keeps_task_in_triage(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="rough", triage=True)
|
|
|
|
client = MagicMock()
|
|
client.chat.completions.create = MagicMock(side_effect=RuntimeError("429 rate limited"))
|
|
with patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(client, "test-model"),
|
|
):
|
|
outcome = spec.specify_task(tid)
|
|
|
|
assert outcome.ok is False
|
|
assert "LLM error" in outcome.reason
|
|
with kb.connect() as conn:
|
|
assert kb.get_task(conn, tid).status == "triage"
|
|
|
|
|
|
def test_specify_task_empty_llm_response(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="rough", triage=True)
|
|
|
|
p, _ = _patch_aux_client("")
|
|
with p:
|
|
outcome = spec.specify_task(tid)
|
|
|
|
assert outcome.ok is False
|
|
with kb.connect() as conn:
|
|
assert kb.get_task(conn, tid).status == "triage"
|
|
|
|
|
|
def test_list_triage_ids(kanban_home):
|
|
with kb.connect() as conn:
|
|
a = kb.create_task(conn, title="a", triage=True)
|
|
b = kb.create_task(conn, title="b", triage=True, tenant="proj-1")
|
|
kb.create_task(conn, title="c") # not triage — excluded
|
|
|
|
ids_all = spec.list_triage_ids()
|
|
assert set(ids_all) == {a, b}
|
|
ids_tenant = spec.list_triage_ids(tenant="proj-1")
|
|
assert ids_tenant == [b]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI wiring — argparse + _cmd_specify
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _run_cli(*argv: str) -> int:
|
|
"""Invoke the `hermes kanban …` argparse surface directly."""
|
|
root = argparse.ArgumentParser()
|
|
subp = root.add_subparsers(dest="cmd")
|
|
kanban_cli.build_parser(subp)
|
|
ns = root.parse_args(["kanban", *argv])
|
|
return kanban_cli.kanban_command(ns)
|
|
|
|
|
|
def test_cli_specify_requires_id_or_all(kanban_home, capsys):
|
|
rc = _run_cli("specify")
|
|
assert rc == 2
|
|
err = capsys.readouterr().err
|
|
assert "requires a task id or --all" in err
|
|
|
|
|
|
def test_cli_specify_rejects_both_id_and_all(kanban_home, capsys):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="rough", triage=True)
|
|
rc = _run_cli("specify", tid, "--all")
|
|
assert rc == 2
|
|
err = capsys.readouterr().err
|
|
assert "either a task id OR --all" in err
|
|
|
|
|
|
def test_cli_specify_single_id_success(kanban_home, capsys):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="rough", triage=True)
|
|
|
|
content = jsonlib.dumps({"title": "clean", "body": "body"})
|
|
p, _ = _patch_aux_client(content)
|
|
with p:
|
|
rc = _run_cli("specify", tid)
|
|
assert rc == 0
|
|
out = capsys.readouterr().out
|
|
assert tid in out
|
|
assert "→ todo" in out or "-> todo" in out or "→" in out
|
|
|
|
|
|
def test_cli_specify_all_success_and_json(kanban_home, capsys):
|
|
with kb.connect() as conn:
|
|
a = kb.create_task(conn, title="a", triage=True)
|
|
b = kb.create_task(conn, title="b", triage=True)
|
|
|
|
content = jsonlib.dumps({"title": "spec", "body": "body"})
|
|
p, _ = _patch_aux_client(content)
|
|
with p:
|
|
rc = _run_cli("specify", "--all", "--json")
|
|
assert rc == 0
|
|
lines = [l for l in capsys.readouterr().out.strip().splitlines() if l]
|
|
# One JSON object per task + nothing else.
|
|
assert len(lines) == 2
|
|
parsed = [jsonlib.loads(l) for l in lines]
|
|
ids = {row["task_id"] for row in parsed}
|
|
assert ids == {a, b}
|
|
assert all(row["ok"] for row in parsed)
|
|
|
|
|
|
def test_cli_specify_all_empty_triage_column(kanban_home, capsys):
|
|
rc = _run_cli("specify", "--all")
|
|
assert rc == 0
|
|
assert "No triage tasks" in capsys.readouterr().out
|
|
|
|
|
|
def test_cli_specify_all_returns_1_when_every_task_fails(kanban_home, capsys):
|
|
with kb.connect() as conn:
|
|
kb.create_task(conn, title="a", triage=True)
|
|
kb.create_task(conn, title="b", triage=True)
|
|
|
|
with patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(None, ""), # no aux client → every task fails
|
|
):
|
|
rc = _run_cli("specify", "--all")
|
|
|
|
assert rc == 1
|
|
|
|
|
|
def test_cli_specify_tenant_filter(kanban_home, capsys):
|
|
with kb.connect() as conn:
|
|
outside = kb.create_task(conn, title="outside", triage=True)
|
|
inside = kb.create_task(
|
|
conn, title="inside", triage=True, tenant="proj-a",
|
|
)
|
|
|
|
content = jsonlib.dumps({"title": "spec", "body": "body"})
|
|
p, _ = _patch_aux_client(content)
|
|
with p:
|
|
rc = _run_cli("specify", "--all", "--tenant", "proj-a", "--json")
|
|
assert rc == 0
|
|
lines = [
|
|
jsonlib.loads(l)
|
|
for l in capsys.readouterr().out.strip().splitlines()
|
|
if l
|
|
]
|
|
ids = {row["task_id"] for row in lines}
|
|
assert ids == {inside}
|
|
|
|
# The outside task stays in triage.
|
|
with kb.connect() as conn:
|
|
assert kb.get_task(conn, outside).status == "triage"
|
|
# The inside task was promoted.
|
|
assert kb.get_task(conn, inside).status in {"todo", "ready"}
|
|
|
|
|
|
def test_cli_specify_author_passed_through(kanban_home, capsys):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="rough", triage=True)
|
|
|
|
content = jsonlib.dumps({"title": "fresh title", "body": "fresh body"})
|
|
p, _ = _patch_aux_client(content)
|
|
with p:
|
|
rc = _run_cli("specify", tid, "--author", "custom-agent")
|
|
assert rc == 0
|
|
with kb.connect() as conn:
|
|
comments = kb.list_comments(conn, tid)
|
|
assert comments and comments[0].author == "custom-agent"
|