From f042931852658ad6f4d71754cb7d2e2052885860 Mon Sep 17 00:00:00 2001 From: bradhallett <53977268+bradhallett@users.noreply.github.com> Date: Mon, 18 May 2026 20:16:26 -0700 Subject: [PATCH] fix(kanban): reset failure counters on unblock_task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a task is manually unblocked (blocked → ready/todo), the consecutive_failures counter and last_failure_error were left intact. The next failure would immediately re-trip the circuit breaker because the counter was still at or above the failure limit. Reset both fields on unblock so the task gets a fresh retry budget. Includes a regression test that verifies counters are zeroed. --- hermes_cli/kanban_db.py | 3 ++- tests/hermes_cli/test_kanban_db.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index ce7f7a8d8c8..05af0b231c7 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -2718,7 +2718,8 @@ def unblock_task(conn: sqlite3.Connection, task_id: str) -> bool: ).fetchone() new_status = "todo" if undone_parents else "ready" cur = conn.execute( - "UPDATE tasks SET status = ?, current_run_id = NULL " + "UPDATE tasks SET status = ?, current_run_id = NULL, " + "consecutive_failures = 0, last_failure_error = NULL " "WHERE id = ? AND status = 'blocked'", (new_status, task_id), ) diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py index 919ac4ce276..b7ab8b06f39 100644 --- a/tests/hermes_cli/test_kanban_db.py +++ b/tests/hermes_cli/test_kanban_db.py @@ -406,6 +406,26 @@ def test_block_then_unblock(kanban_home): assert kb.get_task(conn, t).status == "ready" +def test_unblock_resets_failure_counters(kanban_home): + """unblock_task must reset consecutive_failures and last_failure_error.""" + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + kb.claim_task(conn, t) + assert kb.block_task(conn, t, reason="need input") + # Simulate accumulated failures from the circuit breaker + conn.execute( + "UPDATE tasks SET consecutive_failures = 5, " + "last_failure_error = 'test error' WHERE id = ?", + (t,), + ) + conn.commit() + assert kb.unblock_task(conn, t) + task = kb.get_task(conn, t) + assert task.status == "ready" + assert task.consecutive_failures == 0 + assert task.last_failure_error is None + + # --------------------------------------------------------------------------- # Parent-completion invariant at the claim gate (RCA t_a6acd07d) # ---------------------------------------------------------------------------