fix(tools): clarify kanban_complete phantom-card retry guidance

When kanban_complete rejects a created_cards list as hallucinated, the task is intentionally left in-flight (the gate runs before the write txn) so the worker can retry with a corrected list or pass created_cards=[] to skip the check. The retry path already worked, but the previous error wording read like a terminal failure and workers were observed abandoning the run instead of trying again. Spell out the recovery path explicitly in the tool_error response ("Your task is still in-flight ... Retry kanban_complete with ...") and add regression coverage at both the kernel and tool layers so the retry contract — and the wording the worker depends on to discover it — is pinned. Fixes #22923
2026-05-18 04:41:56 +00:00 · 2026-05-10 09:52:39 +02:00 · 2026-05-10 09:52:39 +02:00 · 62cfe79e93
commit 62cfe79e93
parent 2f00559d9e
3 changed files with 181 additions and 2 deletions
--- a/tests/hermes_cli/test_kanban_core_functionality.py
+++ b/tests/hermes_cli/test_kanban_core_functionality.py
@ -3539,6 +3539,76 @@ def test_complete_accepts_cross_worker_card_when_linked_as_child(kanban_home):
        conn.close()


+def test_complete_can_retry_after_phantom_rejection(kanban_home):
+    """A worker that hits the hallucinated-card gate must be able to
+    retry kanban_complete on the same task — both with a corrected
+    created_cards list and with an empty list (the documented escape
+    hatch). Regression test for #22923, where workers were believed to
+    be unrecoverable after the first rejection.
+    """
+    conn = kb.connect()
+    try:
+        # Two parallel completing tasks so we can exercise both retry
+        # shapes without status interference.
+        parent_a = kb.create_task(conn, title="retry-empty", assignee="alice")
+        kb.claim_task(conn, parent_a)
+        parent_b = kb.create_task(conn, title="retry-corrected", assignee="alice")
+        kb.claim_task(conn, parent_b)
+        real = kb.create_task(
+            conn, title="real-child", assignee="x", created_by="alice",
+        )
+
+        # First attempt: phantom in the list rejects, task stays running.
+        with pytest.raises(kb.HallucinatedCardsError):
+            kb.complete_task(
+                conn, parent_a,
+                summary="oops",
+                created_cards=["t_phantomdeadbeef"],
+            )
+        assert kb.get_task(conn, parent_a).status == "running"
+
+        # Retry with [] (escape hatch): gate is skipped, completion lands.
+        ok = kb.complete_task(
+            conn, parent_a,
+            summary="retry without claims",
+            created_cards=[],
+        )
+        assert ok is True
+        assert kb.get_task(conn, parent_a).status == "done"
+
+        # Same flow on parent_b, but recover via a corrected list rather
+        # than the empty escape hatch.
+        with pytest.raises(kb.HallucinatedCardsError):
+            kb.complete_task(
+                conn, parent_b,
+                summary="oops",
+                created_cards=[real, "t_anotherphantom"],
+            )
+        assert kb.get_task(conn, parent_b).status == "running"
+
+        ok = kb.complete_task(
+            conn, parent_b,
+            summary="retry with corrected list",
+            created_cards=[real],
+        )
+        assert ok is True
+        assert kb.get_task(conn, parent_b).status == "done"
+
+        # Both audit events landed; the eventual completion event is
+        # also present on each task.
+        for parent in (parent_a, parent_b):
+            kinds = [
+                r["kind"] for r in conn.execute(
+                    "SELECT kind FROM task_events WHERE task_id=? ORDER BY id",
+                    (parent,),
+                )
+            ]
+            assert kinds.count("completion_blocked_hallucination") == 1
+            assert kinds.count("completed") == 1
+    finally:
+        conn.close()
+
+
 def test_complete_prose_scan_flags_nonexistent_ids(kanban_home):
    """Successful completion whose summary references a ``t_<hex>`` id
    that doesn't resolve emits a ``suspected_hallucinated_references``