From 35e7ca03d5347c202d9be8be15492f189bf46c93 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 19 Jun 2026 07:08:40 -0700 Subject: [PATCH] fix(kanban): treat already-gone worker as terminated, not survived MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _terminate_reclaimed_worker early-returned on ProcessLookupError with terminated=False. The new reclaim-defer guard reads that as 'worker survived the kill' and defers the reclaim forever, so a stale task whose worker is already dead never lands in result.stale. ProcessLookupError means the process is gone — that IS a successful termination. Split it from the generic OSError branch and set terminated=True. --- hermes_cli/kanban_db.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index a63135c603f..c82d762d592 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -5131,7 +5131,13 @@ def _terminate_reclaimed_worker( info["termination_attempted"] = True try: kill(int(pid), signal.SIGTERM) - except (ProcessLookupError, OSError): + except ProcessLookupError: + # Process is already gone — that's a successful termination, not a + # survival. Leaving terminated=False here would make the reclaim guard + # misread a dead worker as still-alive and defer forever. + info["terminated"] = True + return info + except OSError: return info for _ in range(10):