From 62cfe79e9368ecb0fe3c329179984b7d76f35edb Mon Sep 17 00:00:00 2001
From: konsisumer <11262660+konsisumer@users.noreply.github.com>
Date: Sun, 10 May 2026 09:52:39 +0200
Subject: [PATCH] fix(tools): clarify kanban_complete phantom-card retry
 guidance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When kanban_complete rejects a created_cards list as hallucinated, the
task is intentionally left in-flight (the gate runs before the write
txn) so the worker can retry with a corrected list or pass
created_cards=[] to skip the check. The retry path already worked, but
the previous error wording read like a terminal failure and workers
were observed abandoning the run instead of trying again.

Spell out the recovery path explicitly in the tool_error response
("Your task is still in-flight ... Retry kanban_complete with ...") and
add regression coverage at both the kernel and tool layers so the
retry contract — and the wording the worker depends on to discover
it — is pinned.

Fixes #22923
---
 .../test_kanban_core_functionality.py         |  70 ++++++++++++
 tests/tools/test_kanban_tools.py              | 100 ++++++++++++++++++
 tools/kanban_tools.py                         |  13 ++-
 3 files changed, 181 insertions(+), 2 deletions(-)

diff --git a/tests/hermes_cli/test_kanban_core_functionality.py b/tests/hermes_cli/test_kanban_core_functionality.py
index 4572cd40056..d6d1bf49483 100644
--- a/tests/hermes_cli/test_kanban_core_functionality.py
+++ b/tests/hermes_cli/test_kanban_core_functionality.py
@@ -3539,6 +3539,76 @@ def test_complete_accepts_cross_worker_card_when_linked_as_child(kanban_home):
         conn.close()
 
 
+def test_complete_can_retry_after_phantom_rejection(kanban_home):
+    """A worker that hits the hallucinated-card gate must be able to
+    retry kanban_complete on the same task — both with a corrected
+    created_cards list and with an empty list (the documented escape
+    hatch). Regression test for #22923, where workers were believed to
+    be unrecoverable after the first rejection.
+    """
+    conn = kb.connect()
+    try:
+        # Two parallel completing tasks so we can exercise both retry
+        # shapes without status interference.
+        parent_a = kb.create_task(conn, title="retry-empty", assignee="alice")
+        kb.claim_task(conn, parent_a)
+        parent_b = kb.create_task(conn, title="retry-corrected", assignee="alice")
+        kb.claim_task(conn, parent_b)
+        real = kb.create_task(
+            conn, title="real-child", assignee="x", created_by="alice",
+        )
+
+        # First attempt: phantom in the list rejects, task stays running.
+        with pytest.raises(kb.HallucinatedCardsError):
+            kb.complete_task(
+                conn, parent_a,
+                summary="oops",
+                created_cards=["t_phantomdeadbeef"],
+            )
+        assert kb.get_task(conn, parent_a).status == "running"
+
+        # Retry with [] (escape hatch): gate is skipped, completion lands.
+        ok = kb.complete_task(
+            conn, parent_a,
+            summary="retry without claims",
+            created_cards=[],
+        )
+        assert ok is True
+        assert kb.get_task(conn, parent_a).status == "done"
+
+        # Same flow on parent_b, but recover via a corrected list rather
+        # than the empty escape hatch.
+        with pytest.raises(kb.HallucinatedCardsError):
+            kb.complete_task(
+                conn, parent_b,
+                summary="oops",
+                created_cards=[real, "t_anotherphantom"],
+            )
+        assert kb.get_task(conn, parent_b).status == "running"
+
+        ok = kb.complete_task(
+            conn, parent_b,
+            summary="retry with corrected list",
+            created_cards=[real],
+        )
+        assert ok is True
+        assert kb.get_task(conn, parent_b).status == "done"
+
+        # Both audit events landed; the eventual completion event is
+        # also present on each task.
+        for parent in (parent_a, parent_b):
+            kinds = [
+                r["kind"] for r in conn.execute(
+                    "SELECT kind FROM task_events WHERE task_id=? ORDER BY id",
+                    (parent,),
+                )
+            ]
+            assert kinds.count("completion_blocked_hallucination") == 1
+            assert kinds.count("completed") == 1
+    finally:
+        conn.close()
+
+
 def test_complete_prose_scan_flags_nonexistent_ids(kanban_home):
     """Successful completion whose summary references a ``t_<hex>`` id
     that doesn't resolve emits a ``suspected_hallucinated_references``
diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py
index d0da47d0bcc..c31ae6f08bb 100644
--- a/tests/tools/test_kanban_tools.py
+++ b/tests/tools/test_kanban_tools.py
@@ -330,6 +330,106 @@ def test_complete_rejects_non_dict_metadata(worker_env):
     assert json.loads(out).get("error")
 
 
+def test_complete_phantom_card_message_advertises_retry(worker_env):
+    """A phantom-card rejection must surface a tool_error that explicitly
+    tells the worker the task is still in-flight and how to retry — the
+    worker has no other channel to discover that. Regression for #22923,
+    where the previous wording read like a terminal failure and workers
+    routinely abandoned the run instead of trying again.
+    """
+    from hermes_cli import kanban_db as kb
+    from tools import kanban_tools as kt
+
+    out = kt._handle_complete({
+        "summary": "oops claimed a phantom",
+        "created_cards": ["t_phantomdeadbeef"],
+    })
+    err = json.loads(out).get("error", "")
+    assert err, f"expected an error, got {out!r}"
+    # Phantom id surfaced verbatim.
+    assert "t_phantomdeadbeef" in err
+    # The retry-is-supported phrasing — these are the literal cues a
+    # worker reads to decide whether to retry vs block/abandon. If a
+    # future change rewords the message, these checks will catch the
+    # regression. See #22923 for the failure mode.
+    assert "still in-flight" in err
+    assert "Retry kanban_complete" in err
+    assert "created_cards=[]" in err
+
+    # Critically: the task is genuinely still in-flight — the gate
+    # rejection did not mutate state, so the worker's retry can land.
+    conn = kb.connect()
+    try:
+        assert kb.get_task(conn, worker_env).status == "running"
+    finally:
+        conn.close()
+
+
+def test_complete_retry_with_empty_created_cards_succeeds(worker_env):
+    """After a phantom rejection, retrying kanban_complete with
+    created_cards=[] (the documented escape hatch) must complete the
+    task. Regression for #22923."""
+    from hermes_cli import kanban_db as kb
+    from tools import kanban_tools as kt
+
+    # Hit the gate first.
+    rejected = json.loads(kt._handle_complete({
+        "summary": "oops",
+        "created_cards": ["t_phantomdeadbeef"],
+    }))
+    assert rejected.get("error")
+
+    # Retry with the escape hatch.
+    ok = json.loads(kt._handle_complete({
+        "summary": "retry without claims",
+        "created_cards": [],
+    }))
+    assert ok.get("ok") is True
+
+    conn = kb.connect()
+    try:
+        assert kb.get_task(conn, worker_env).status == "done"
+    finally:
+        conn.close()
+
+
+def test_complete_retry_with_corrected_created_cards_succeeds(worker_env):
+    """After a phantom rejection, retrying kanban_complete with a
+    corrected created_cards list (phantom ids removed) must complete the
+    task. Regression for #22923."""
+    from hermes_cli import kanban_db as kb
+    from tools import kanban_tools as kt
+
+    # Create a real child via the tool so it gets the worker-profile
+    # attribution the gate trusts.
+    child = json.loads(kt._handle_create({
+        "title": "real child", "assignee": "peer",
+    }))
+    assert child["ok"]
+    real_id = child["task_id"]
+
+    # First attempt mixes real + phantom — gate rejects.
+    rejected = json.loads(kt._handle_complete({
+        "summary": "oops",
+        "created_cards": [real_id, "t_phantomdeadbeef"],
+    }))
+    assert rejected.get("error")
+    assert "t_phantomdeadbeef" in rejected["error"]
+
+    # Retry with corrected list.
+    ok = json.loads(kt._handle_complete({
+        "summary": "retry with corrected list",
+        "created_cards": [real_id],
+    }))
+    assert ok.get("ok") is True
+
+    conn = kb.connect()
+    try:
+        assert kb.get_task(conn, worker_env).status == "done"
+    finally:
+        conn.close()
+
+
 def test_block_happy_path(worker_env):
     from tools import kanban_tools as kt
     out = kt._handle_block({"reason": "need clarification"})
diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py
index 267f782d611..84105a9f839 100644
--- a/tools/kanban_tools.py
+++ b/tools/kanban_tools.py
@@ -406,12 +406,21 @@ def _handle_complete(args: dict, **kw) -> str:
                 # Structured rejection — surface the phantom ids so the
                 # worker can retry with a corrected list or drop the
                 # field. Audit event already landed in the DB.
+                #
+                # The task itself was NOT mutated (the gate runs before
+                # the write txn), so the worker can simply call
+                # kanban_complete again. Spell that out — without it the
+                # model often interprets a tool_error as a terminal
+                # failure and either blocks or crashes the run instead
+                # of retrying. See #22923.
                 return tool_error(
                     f"kanban_complete blocked: the following created_cards "
                     f"do not exist or were not created by this worker: "
                     f"{', '.join(hall_err.phantom)}. "
-                    f"Either omit them, use only ids returned from successful "
-                    f"kanban_create calls, or remove the created_cards field."
+                    f"Your task is still in-flight (no state change). "
+                    f"Retry kanban_complete with the same summary/metadata "
+                    f"and either drop these ids from created_cards, or pass "
+                    f"created_cards=[] to skip the card-claim check entirely."
                 )
             if not ok:
                 return tool_error(