From 14c4a849b7b501ffa2eedcf15b92ab5347418aa0 Mon Sep 17 00:00:00 2001
From: beardthelion <beardthelion@users.noreply.github.com>
Date: Wed, 3 Jun 2026 13:53:41 -0500
Subject: [PATCH] fix(kanban): make goal_mode judge gate truly fail-open
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to the judge gate. judge_goal() is fail-open at the source:
when no auxiliary model is reachable it returns a "continue" verdict
that is indistinguishable from a real "not done yet" judgment. The gate
treated any non-"done" verdict as a rejection, so an unconfigured or
degraded auxiliary model would wedge every goal_mode worker — it could
never close its own task. That contradicted the gate's own "fail-open"
comment.

Probe judge availability before enforcing (the same auxiliary client
lookup judge_goal performs) and only gate when a judge is actually
reachable. When none is, completion proceeds.

Also fix the rejection guidance: kanban_create takes parents=[...], not
parent=.

Add test_complete_goal_mode_allows_when_judge_unavailable covering the
fail-open path; update the rejection test to force the availability probe.
---
 tests/tools/test_kanban_tools.py | 56 ++++++++++++++++++++++++++++++--
 tools/kanban_tools.py            | 31 +++++++++++++++---
 2 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py
index 3630c8b49da..598caa20f48 100644
--- a/tests/tools/test_kanban_tools.py
+++ b/tests/tools/test_kanban_tools.py
@@ -618,11 +618,13 @@ def test_complete_goal_mode_rejected_by_judge(monkeypatch, tmp_path):
         conn.close()
     monkeypatch.setenv("HERMES_KANBAN_TASK", goal_task_id)
 
-    # Mock the judge to reject the completion
+    # Mock the judge to reject the completion. The gate only runs when a
+    # judge is reachable, so force the availability probe True as well.
     def mock_judge_goal(goal, last_response, *, timeout=30.0, subgoals=None):
         return "continue", "missing verification evidence", False
 
     monkeypatch.setattr("tools.kanban_tools.judge_goal", mock_judge_goal)
+    monkeypatch.setattr("tools.kanban_tools._goal_judge_available", lambda: True)
 
     # Attempt to complete should be rejected
     out = kt._handle_complete({"summary": "I did some stuff but not X"})
@@ -630,7 +632,7 @@ def test_complete_goal_mode_rejected_by_judge(monkeypatch, tmp_path):
     assert "error" in d
     assert "Goal completion rejected by judge" in d["error"]
     assert "missing verification evidence" in d["error"]
-    assert "create continuation tasks" in d["error"]
+    assert f"parents=[{goal_task_id}]" in d["error"]
 
     # Verify the task is NOT completed in the DB
     conn2 = kb.connect()
@@ -641,6 +643,56 @@ def test_complete_goal_mode_rejected_by_judge(monkeypatch, tmp_path):
         conn2.close()
 
 
+def test_complete_goal_mode_allows_when_judge_unavailable(monkeypatch, tmp_path):
+    """Fail-open: an unreachable judge must not wedge a goal_mode worker.
+
+    judge_goal returns a "continue" verdict when no auxiliary model is
+    configured, which is indistinguishable from a real "not done" judgment.
+    The gate probes availability first, so completion proceeds rather than
+    being rejected forever when no judge can be reached."""
+    from pathlib import Path as _Path
+    from hermes_cli import kanban_db as kb
+    from tools import kanban_tools as kt
+
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setenv("HERMES_PROFILE", "test-worker")
+    monkeypatch.delenv("HERMES_SESSION_ID", raising=False)
+    monkeypatch.setattr(_Path, "home", lambda: tmp_path)
+
+    kb._INITIALIZED_PATHS.clear()
+    kb.init_db()
+    conn = kb.connect()
+    try:
+        goal_task_id = kb.create_task(
+            conn, title="goal-mode-test", assignee="test-worker",
+            body="Must achieve X with verified evidence.", goal_mode=True
+        )
+        kb.claim_task(conn, goal_task_id)
+    finally:
+        conn.close()
+    monkeypatch.setenv("HERMES_KANBAN_TASK", goal_task_id)
+
+    # No judge reachable. judge_goal must not even be consulted; if it were,
+    # this stub would reject — so reaching "done" proves the probe short-circuit.
+    def fail_if_called(goal, last_response, *, timeout=30.0, subgoals=None):
+        raise AssertionError("judge_goal must not run when no judge is available")
+
+    monkeypatch.setattr("tools.kanban_tools.judge_goal", fail_if_called)
+    monkeypatch.setattr("tools.kanban_tools._goal_judge_available", lambda: False)
+
+    out = kt._handle_complete({"summary": "done enough"})
+    d = json.loads(out)
+    assert d.get("ok") is True
+
+    conn2 = kb.connect()
+    try:
+        assert kb.get_task(conn2, goal_task_id).status == "done"
+    finally:
+        conn2.close()
+
+
 def test_block_happy_path(worker_env):
     from tools import kanban_tools as kt
     out = kt._handle_block({"reason": "need clarification"})
diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py
index 3b100cf2563..ae69808f91e 100644
--- a/tools/kanban_tools.py
+++ b/tools/kanban_tools.py
@@ -179,6 +179,27 @@ def _connect(board: Optional[str] = None):
     return kb, kb.connect(board=board)
 
 
+def _goal_judge_available() -> bool:
+    """True when an auxiliary client is configured for the goal judge.
+
+    ``judge_goal`` is fail-open at the source: when no auxiliary model can
+    be reached it returns a ``"continue"`` verdict that is indistinguishable
+    from a real "not done yet" judgment. The completion gate must not treat
+    that as a rejection, or an unconfigured/degraded auxiliary model would
+    wedge every ``goal_mode`` worker (it could never close its own task).
+
+    So we probe availability first and only enforce the gate when a judge is
+    actually reachable. This mirrors the same client lookup ``judge_goal``
+    performs internally.
+    """
+    try:
+        from agent.auxiliary_client import get_text_auxiliary_client
+        client, model = get_text_auxiliary_client("goal_judge")
+    except Exception:
+        return False
+    return client is not None and bool(model)
+
+
 # ---------------------------------------------------------------------------
 # Runtime-activity → board-heartbeat bridge (#31752)
 # ---------------------------------------------------------------------------
@@ -571,8 +592,10 @@ def _handle_complete(args: dict, **kw) -> str:
             # Goal-mode pre-completion judge gate (Issue #38367).
             # Prevent workers from bypassing the auxiliary judge by
             # calling kanban_complete before acceptance criteria are met.
+            # Only enforce when a judge is actually reachable — see
+            # _goal_judge_available for why an unavailable judge fails open.
             task = kb.get_task(conn, tid)
-            if task and task.goal_mode:
+            if task and task.goal_mode and _goal_judge_available():
                 verdict = "done"
                 reason = ""
                 try:
@@ -581,8 +604,8 @@ def _handle_complete(args: dict, **kw) -> str:
                         last_response=(summary or result or "").strip(),
                     )
                 except Exception as judge_exc:
-                    # Fail-open to avoid wedging the worker if the judge
-                    # is temporarily unavailable or misconfigured.
+                    # Defensive: judge_goal swallows its own errors, but if
+                    # it ever raises, fail open rather than wedge the worker.
                     logger.warning(
                         "goal judge check failed, allowing completion: %s",
                         judge_exc,
@@ -593,7 +616,7 @@ def _handle_complete(args: dict, **kw) -> str:
                         f"Goal completion rejected by judge: {reason}. "
                         f"To proceed, either: (1) provide explicit acceptance "
                         f"evidence in your summary matching the task's criteria, "
-                        f"or (2) create continuation tasks with parent={tid} "
+                        f"or (2) create continuation tasks with parents=[{tid}] "
                         f"and keep this task alive."
                     )