fix(tools): clarify kanban_complete phantom-card retry guidance

When kanban_complete rejects a created_cards list as hallucinated, the task is intentionally left in-flight (the gate runs before the write txn) so the worker can retry with a corrected list or pass created_cards=[] to skip the check. The retry path already worked, but the previous error wording read like a terminal failure and workers were observed abandoning the run instead of trying again. Spell out the recovery path explicitly in the tool_error response ("Your task is still in-flight ... Retry kanban_complete with ...") and add regression coverage at both the kernel and tool layers so the retry contract — and the wording the worker depends on to discover it — is pinned. Fixes #22923
2026-05-18 04:41:56 +00:00 · 2026-05-10 09:52:39 +02:00 · 2026-05-10 09:52:39 +02:00 · 62cfe79e93
commit 62cfe79e93
parent 2f00559d9e
3 changed files with 181 additions and 2 deletions
--- a/tests/hermes_cli/test_kanban_core_functionality.py
+++ b/tests/hermes_cli/test_kanban_core_functionality.py
@ -3539,6 +3539,76 @@ def test_complete_accepts_cross_worker_card_when_linked_as_child(kanban_home):
        conn.close()


+def test_complete_can_retry_after_phantom_rejection(kanban_home):
+    """A worker that hits the hallucinated-card gate must be able to
+    retry kanban_complete on the same task — both with a corrected
+    created_cards list and with an empty list (the documented escape
+    hatch). Regression test for #22923, where workers were believed to
+    be unrecoverable after the first rejection.
+    """
+    conn = kb.connect()
+    try:
+        # Two parallel completing tasks so we can exercise both retry
+        # shapes without status interference.
+        parent_a = kb.create_task(conn, title="retry-empty", assignee="alice")
+        kb.claim_task(conn, parent_a)
+        parent_b = kb.create_task(conn, title="retry-corrected", assignee="alice")
+        kb.claim_task(conn, parent_b)
+        real = kb.create_task(
+            conn, title="real-child", assignee="x", created_by="alice",
+        )
+
+        # First attempt: phantom in the list rejects, task stays running.
+        with pytest.raises(kb.HallucinatedCardsError):
+            kb.complete_task(
+                conn, parent_a,
+                summary="oops",
+                created_cards=["t_phantomdeadbeef"],
+            )
+        assert kb.get_task(conn, parent_a).status == "running"
+
+        # Retry with [] (escape hatch): gate is skipped, completion lands.
+        ok = kb.complete_task(
+            conn, parent_a,
+            summary="retry without claims",
+            created_cards=[],
+        )
+        assert ok is True
+        assert kb.get_task(conn, parent_a).status == "done"
+
+        # Same flow on parent_b, but recover via a corrected list rather
+        # than the empty escape hatch.
+        with pytest.raises(kb.HallucinatedCardsError):
+            kb.complete_task(
+                conn, parent_b,
+                summary="oops",
+                created_cards=[real, "t_anotherphantom"],
+            )
+        assert kb.get_task(conn, parent_b).status == "running"
+
+        ok = kb.complete_task(
+            conn, parent_b,
+            summary="retry with corrected list",
+            created_cards=[real],
+        )
+        assert ok is True
+        assert kb.get_task(conn, parent_b).status == "done"
+
+        # Both audit events landed; the eventual completion event is
+        # also present on each task.
+        for parent in (parent_a, parent_b):
+            kinds = [
+                r["kind"] for r in conn.execute(
+                    "SELECT kind FROM task_events WHERE task_id=? ORDER BY id",
+                    (parent,),
+                )
+            ]
+            assert kinds.count("completion_blocked_hallucination") == 1
+            assert kinds.count("completed") == 1
+    finally:
+        conn.close()
+
+
 def test_complete_prose_scan_flags_nonexistent_ids(kanban_home):
    """Successful completion whose summary references a ``t_<hex>`` id
    that doesn't resolve emits a ``suspected_hallucinated_references``
--- a/tests/tools/test_kanban_tools.py
+++ b/tests/tools/test_kanban_tools.py
@ -330,6 +330,106 @@ def test_complete_rejects_non_dict_metadata(worker_env):
    assert json.loads(out).get("error")


+def test_complete_phantom_card_message_advertises_retry(worker_env):
+    """A phantom-card rejection must surface a tool_error that explicitly
+    tells the worker the task is still in-flight and how to retry — the
+    worker has no other channel to discover that. Regression for #22923,
+    where the previous wording read like a terminal failure and workers
+    routinely abandoned the run instead of trying again.
+    """
+    from hermes_cli import kanban_db as kb
+    from tools import kanban_tools as kt
+
+    out = kt._handle_complete({
+        "summary": "oops claimed a phantom",
+        "created_cards": ["t_phantomdeadbeef"],
+    })
+    err = json.loads(out).get("error", "")
+    assert err, f"expected an error, got {out!r}"
+    # Phantom id surfaced verbatim.
+    assert "t_phantomdeadbeef" in err
+    # The retry-is-supported phrasing — these are the literal cues a
+    # worker reads to decide whether to retry vs block/abandon. If a
+    # future change rewords the message, these checks will catch the
+    # regression. See #22923 for the failure mode.
+    assert "still in-flight" in err
+    assert "Retry kanban_complete" in err
+    assert "created_cards=[]" in err
+
+    # Critically: the task is genuinely still in-flight — the gate
+    # rejection did not mutate state, so the worker's retry can land.
+    conn = kb.connect()
+    try:
+        assert kb.get_task(conn, worker_env).status == "running"
+    finally:
+        conn.close()
+
+
+def test_complete_retry_with_empty_created_cards_succeeds(worker_env):
+    """After a phantom rejection, retrying kanban_complete with
+    created_cards=[] (the documented escape hatch) must complete the
+    task. Regression for #22923."""
+    from hermes_cli import kanban_db as kb
+    from tools import kanban_tools as kt
+
+    # Hit the gate first.
+    rejected = json.loads(kt._handle_complete({
+        "summary": "oops",
+        "created_cards": ["t_phantomdeadbeef"],
+    }))
+    assert rejected.get("error")
+
+    # Retry with the escape hatch.
+    ok = json.loads(kt._handle_complete({
+        "summary": "retry without claims",
+        "created_cards": [],
+    }))
+    assert ok.get("ok") is True
+
+    conn = kb.connect()
+    try:
+        assert kb.get_task(conn, worker_env).status == "done"
+    finally:
+        conn.close()
+
+
+def test_complete_retry_with_corrected_created_cards_succeeds(worker_env):
+    """After a phantom rejection, retrying kanban_complete with a
+    corrected created_cards list (phantom ids removed) must complete the
+    task. Regression for #22923."""
+    from hermes_cli import kanban_db as kb
+    from tools import kanban_tools as kt
+
+    # Create a real child via the tool so it gets the worker-profile
+    # attribution the gate trusts.
+    child = json.loads(kt._handle_create({
+        "title": "real child", "assignee": "peer",
+    }))
+    assert child["ok"]
+    real_id = child["task_id"]
+
+    # First attempt mixes real + phantom — gate rejects.
+    rejected = json.loads(kt._handle_complete({
+        "summary": "oops",
+        "created_cards": [real_id, "t_phantomdeadbeef"],
+    }))
+    assert rejected.get("error")
+    assert "t_phantomdeadbeef" in rejected["error"]
+
+    # Retry with corrected list.
+    ok = json.loads(kt._handle_complete({
+        "summary": "retry with corrected list",
+        "created_cards": [real_id],
+    }))
+    assert ok.get("ok") is True
+
+    conn = kb.connect()
+    try:
+        assert kb.get_task(conn, worker_env).status == "done"
+    finally:
+        conn.close()
+
+
 def test_block_happy_path(worker_env):
    from tools import kanban_tools as kt
    out = kt._handle_block({"reason": "need clarification"})
--- a/tools/kanban_tools.py
+++ b/tools/kanban_tools.py
@ -406,12 +406,21 @@ def _handle_complete(args: dict, **kw) -> str:
                # Structured rejection — surface the phantom ids so the
                # worker can retry with a corrected list or drop the
                # field. Audit event already landed in the DB.
+                #
+                # The task itself was NOT mutated (the gate runs before
+                # the write txn), so the worker can simply call
+                # kanban_complete again. Spell that out — without it the
+                # model often interprets a tool_error as a terminal
+                # failure and either blocks or crashes the run instead
+                # of retrying. See #22923.
                return tool_error(
                    f"kanban_complete blocked: the following created_cards "
                    f"do not exist or were not created by this worker: "
                    f"{', '.join(hall_err.phantom)}. "
-                    f"Either omit them, use only ids returned from successful "
-                    f"kanban_create calls, or remove the created_cards field."
+                    f"Your task is still in-flight (no state change). "
+                    f"Retry kanban_complete with the same summary/metadata "
+                    f"and either drop these ids from created_cards, or pass "
+                    f"created_cards=[] to skip the card-claim check entirely."
                )
            if not ok:
                return tool_error(