fix(tools): clarify kanban_complete phantom-card retry guidance

When kanban_complete rejects a created_cards list as hallucinated, the
task is intentionally left in-flight (the gate runs before the write
txn) so the worker can retry with a corrected list or pass
created_cards=[] to skip the check. The retry path already worked, but
the previous error wording read like a terminal failure and workers
were observed abandoning the run instead of trying again.

Spell out the recovery path explicitly in the tool_error response
("Your task is still in-flight ... Retry kanban_complete with ...") and
add regression coverage at both the kernel and tool layers so the
retry contract — and the wording the worker depends on to discover
it — is pinned.

Fixes #22923
This commit is contained in:
konsisumer 2026-05-10 09:52:39 +02:00 committed by Teknium
parent 2f00559d9e
commit 62cfe79e93
3 changed files with 181 additions and 2 deletions

View file

@ -3539,6 +3539,76 @@ def test_complete_accepts_cross_worker_card_when_linked_as_child(kanban_home):
conn.close()
def test_complete_can_retry_after_phantom_rejection(kanban_home):
"""A worker that hits the hallucinated-card gate must be able to
retry kanban_complete on the same task both with a corrected
created_cards list and with an empty list (the documented escape
hatch). Regression test for #22923, where workers were believed to
be unrecoverable after the first rejection.
"""
conn = kb.connect()
try:
# Two parallel completing tasks so we can exercise both retry
# shapes without status interference.
parent_a = kb.create_task(conn, title="retry-empty", assignee="alice")
kb.claim_task(conn, parent_a)
parent_b = kb.create_task(conn, title="retry-corrected", assignee="alice")
kb.claim_task(conn, parent_b)
real = kb.create_task(
conn, title="real-child", assignee="x", created_by="alice",
)
# First attempt: phantom in the list rejects, task stays running.
with pytest.raises(kb.HallucinatedCardsError):
kb.complete_task(
conn, parent_a,
summary="oops",
created_cards=["t_phantomdeadbeef"],
)
assert kb.get_task(conn, parent_a).status == "running"
# Retry with [] (escape hatch): gate is skipped, completion lands.
ok = kb.complete_task(
conn, parent_a,
summary="retry without claims",
created_cards=[],
)
assert ok is True
assert kb.get_task(conn, parent_a).status == "done"
# Same flow on parent_b, but recover via a corrected list rather
# than the empty escape hatch.
with pytest.raises(kb.HallucinatedCardsError):
kb.complete_task(
conn, parent_b,
summary="oops",
created_cards=[real, "t_anotherphantom"],
)
assert kb.get_task(conn, parent_b).status == "running"
ok = kb.complete_task(
conn, parent_b,
summary="retry with corrected list",
created_cards=[real],
)
assert ok is True
assert kb.get_task(conn, parent_b).status == "done"
# Both audit events landed; the eventual completion event is
# also present on each task.
for parent in (parent_a, parent_b):
kinds = [
r["kind"] for r in conn.execute(
"SELECT kind FROM task_events WHERE task_id=? ORDER BY id",
(parent,),
)
]
assert kinds.count("completion_blocked_hallucination") == 1
assert kinds.count("completed") == 1
finally:
conn.close()
def test_complete_prose_scan_flags_nonexistent_ids(kanban_home):
"""Successful completion whose summary references a ``t_<hex>`` id
that doesn't resolve emits a ``suspected_hallucinated_references``