From 62cfe79e9368ecb0fe3c329179984b7d76f35edb Mon Sep 17 00:00:00 2001 From: konsisumer <11262660+konsisumer@users.noreply.github.com> Date: Sun, 10 May 2026 09:52:39 +0200 Subject: [PATCH] fix(tools): clarify kanban_complete phantom-card retry guidance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When kanban_complete rejects a created_cards list as hallucinated, the task is intentionally left in-flight (the gate runs before the write txn) so the worker can retry with a corrected list or pass created_cards=[] to skip the check. The retry path already worked, but the previous error wording read like a terminal failure and workers were observed abandoning the run instead of trying again. Spell out the recovery path explicitly in the tool_error response ("Your task is still in-flight ... Retry kanban_complete with ...") and add regression coverage at both the kernel and tool layers so the retry contract — and the wording the worker depends on to discover it — is pinned. Fixes #22923 --- .../test_kanban_core_functionality.py | 70 ++++++++++++ tests/tools/test_kanban_tools.py | 100 ++++++++++++++++++ tools/kanban_tools.py | 13 ++- 3 files changed, 181 insertions(+), 2 deletions(-) diff --git a/tests/hermes_cli/test_kanban_core_functionality.py b/tests/hermes_cli/test_kanban_core_functionality.py index 4572cd40056..d6d1bf49483 100644 --- a/tests/hermes_cli/test_kanban_core_functionality.py +++ b/tests/hermes_cli/test_kanban_core_functionality.py @@ -3539,6 +3539,76 @@ def test_complete_accepts_cross_worker_card_when_linked_as_child(kanban_home): conn.close() +def test_complete_can_retry_after_phantom_rejection(kanban_home): + """A worker that hits the hallucinated-card gate must be able to + retry kanban_complete on the same task — both with a corrected + created_cards list and with an empty list (the documented escape + hatch). Regression test for #22923, where workers were believed to + be unrecoverable after the first rejection. + """ + conn = kb.connect() + try: + # Two parallel completing tasks so we can exercise both retry + # shapes without status interference. + parent_a = kb.create_task(conn, title="retry-empty", assignee="alice") + kb.claim_task(conn, parent_a) + parent_b = kb.create_task(conn, title="retry-corrected", assignee="alice") + kb.claim_task(conn, parent_b) + real = kb.create_task( + conn, title="real-child", assignee="x", created_by="alice", + ) + + # First attempt: phantom in the list rejects, task stays running. + with pytest.raises(kb.HallucinatedCardsError): + kb.complete_task( + conn, parent_a, + summary="oops", + created_cards=["t_phantomdeadbeef"], + ) + assert kb.get_task(conn, parent_a).status == "running" + + # Retry with [] (escape hatch): gate is skipped, completion lands. + ok = kb.complete_task( + conn, parent_a, + summary="retry without claims", + created_cards=[], + ) + assert ok is True + assert kb.get_task(conn, parent_a).status == "done" + + # Same flow on parent_b, but recover via a corrected list rather + # than the empty escape hatch. + with pytest.raises(kb.HallucinatedCardsError): + kb.complete_task( + conn, parent_b, + summary="oops", + created_cards=[real, "t_anotherphantom"], + ) + assert kb.get_task(conn, parent_b).status == "running" + + ok = kb.complete_task( + conn, parent_b, + summary="retry with corrected list", + created_cards=[real], + ) + assert ok is True + assert kb.get_task(conn, parent_b).status == "done" + + # Both audit events landed; the eventual completion event is + # also present on each task. + for parent in (parent_a, parent_b): + kinds = [ + r["kind"] for r in conn.execute( + "SELECT kind FROM task_events WHERE task_id=? ORDER BY id", + (parent,), + ) + ] + assert kinds.count("completion_blocked_hallucination") == 1 + assert kinds.count("completed") == 1 + finally: + conn.close() + + def test_complete_prose_scan_flags_nonexistent_ids(kanban_home): """Successful completion whose summary references a ``t_`` id that doesn't resolve emits a ``suspected_hallucinated_references`` diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py index d0da47d0bcc..c31ae6f08bb 100644 --- a/tests/tools/test_kanban_tools.py +++ b/tests/tools/test_kanban_tools.py @@ -330,6 +330,106 @@ def test_complete_rejects_non_dict_metadata(worker_env): assert json.loads(out).get("error") +def test_complete_phantom_card_message_advertises_retry(worker_env): + """A phantom-card rejection must surface a tool_error that explicitly + tells the worker the task is still in-flight and how to retry — the + worker has no other channel to discover that. Regression for #22923, + where the previous wording read like a terminal failure and workers + routinely abandoned the run instead of trying again. + """ + from hermes_cli import kanban_db as kb + from tools import kanban_tools as kt + + out = kt._handle_complete({ + "summary": "oops claimed a phantom", + "created_cards": ["t_phantomdeadbeef"], + }) + err = json.loads(out).get("error", "") + assert err, f"expected an error, got {out!r}" + # Phantom id surfaced verbatim. + assert "t_phantomdeadbeef" in err + # The retry-is-supported phrasing — these are the literal cues a + # worker reads to decide whether to retry vs block/abandon. If a + # future change rewords the message, these checks will catch the + # regression. See #22923 for the failure mode. + assert "still in-flight" in err + assert "Retry kanban_complete" in err + assert "created_cards=[]" in err + + # Critically: the task is genuinely still in-flight — the gate + # rejection did not mutate state, so the worker's retry can land. + conn = kb.connect() + try: + assert kb.get_task(conn, worker_env).status == "running" + finally: + conn.close() + + +def test_complete_retry_with_empty_created_cards_succeeds(worker_env): + """After a phantom rejection, retrying kanban_complete with + created_cards=[] (the documented escape hatch) must complete the + task. Regression for #22923.""" + from hermes_cli import kanban_db as kb + from tools import kanban_tools as kt + + # Hit the gate first. + rejected = json.loads(kt._handle_complete({ + "summary": "oops", + "created_cards": ["t_phantomdeadbeef"], + })) + assert rejected.get("error") + + # Retry with the escape hatch. + ok = json.loads(kt._handle_complete({ + "summary": "retry without claims", + "created_cards": [], + })) + assert ok.get("ok") is True + + conn = kb.connect() + try: + assert kb.get_task(conn, worker_env).status == "done" + finally: + conn.close() + + +def test_complete_retry_with_corrected_created_cards_succeeds(worker_env): + """After a phantom rejection, retrying kanban_complete with a + corrected created_cards list (phantom ids removed) must complete the + task. Regression for #22923.""" + from hermes_cli import kanban_db as kb + from tools import kanban_tools as kt + + # Create a real child via the tool so it gets the worker-profile + # attribution the gate trusts. + child = json.loads(kt._handle_create({ + "title": "real child", "assignee": "peer", + })) + assert child["ok"] + real_id = child["task_id"] + + # First attempt mixes real + phantom — gate rejects. + rejected = json.loads(kt._handle_complete({ + "summary": "oops", + "created_cards": [real_id, "t_phantomdeadbeef"], + })) + assert rejected.get("error") + assert "t_phantomdeadbeef" in rejected["error"] + + # Retry with corrected list. + ok = json.loads(kt._handle_complete({ + "summary": "retry with corrected list", + "created_cards": [real_id], + })) + assert ok.get("ok") is True + + conn = kb.connect() + try: + assert kb.get_task(conn, worker_env).status == "done" + finally: + conn.close() + + def test_block_happy_path(worker_env): from tools import kanban_tools as kt out = kt._handle_block({"reason": "need clarification"}) diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py index 267f782d611..84105a9f839 100644 --- a/tools/kanban_tools.py +++ b/tools/kanban_tools.py @@ -406,12 +406,21 @@ def _handle_complete(args: dict, **kw) -> str: # Structured rejection — surface the phantom ids so the # worker can retry with a corrected list or drop the # field. Audit event already landed in the DB. + # + # The task itself was NOT mutated (the gate runs before + # the write txn), so the worker can simply call + # kanban_complete again. Spell that out — without it the + # model often interprets a tool_error as a terminal + # failure and either blocks or crashes the run instead + # of retrying. See #22923. return tool_error( f"kanban_complete blocked: the following created_cards " f"do not exist or were not created by this worker: " f"{', '.join(hall_err.phantom)}. " - f"Either omit them, use only ids returned from successful " - f"kanban_create calls, or remove the created_cards field." + f"Your task is still in-flight (no state change). " + f"Retry kanban_complete with the same summary/metadata " + f"and either drop these ids from created_cards, or pass " + f"created_cards=[] to skip the card-claim check entirely." ) if not ok: return tool_error(