fix(kanban): gate goal_mode task completion with auxiliary judge

Prevents workers in goal_mode from bypassing the auxiliary judge by calling kanban_complete before acceptance criteria are met. The tool handler now synchronously invokes the goal judge against the task's title/body and the completion summary. If the verdict is not "done", the completion is rejected with actionable guidance for the agent. This keeps kanban_db.py as a pure SQLite wrapper while intercepting the bypass exactly at the agent tool-call boundary, aligning with Hermes separation of concerns. Fixes #38367 Co-authored-by: CommandCodeBot <noreply@commandcode.ai>
2026-07-01 12:02:05 +00:00 · 2026-06-03 12:27:58 -05:00 · 2026-06-03 12:27:58 -05:00 · 0b33bc5396
commit 0b33bc5396
parent 972b162090
2 changed files with 73 additions and 1 deletions
--- a/tests/tools/test_kanban_tools.py
+++ b/tests/tools/test_kanban_tools.py
@ -589,11 +589,56 @@ def test_complete_retry_with_corrected_created_cards_succeeds(worker_env):
    }))
    assert ok.get("ok") is True

+
+def test_complete_goal_mode_rejected_by_judge(monkeypatch, tmp_path):
+    """Goal-mode tasks must pass the auxiliary judge before completion.
+    Regression for #38367: workers bypassing the judge via early kanban_complete."""
+    from pathlib import Path as _Path
+    from hermes_cli import kanban_db as kb
+    from tools import kanban_tools as kt
+
+    # Set up isolated HERMES_HOME
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setenv("HERMES_PROFILE", "test-worker")
+    monkeypatch.delenv("HERMES_SESSION_ID", raising=False)
+    monkeypatch.setattr(_Path, "home", lambda: tmp_path)
+
+    kb._INITIALIZED_PATHS.clear()
+    kb.init_db()
    conn = kb.connect()
    try:
-        assert kb.get_task(conn, worker_env).status == "done"
+        goal_task_id = kb.create_task(
+            conn, title="goal-mode-test", assignee="test-worker",
+            body="Must achieve X with verified evidence.", goal_mode=True
+        )
+        kb.claim_task(conn, goal_task_id)
    finally:
        conn.close()
+    monkeypatch.setenv("HERMES_KANBAN_TASK", goal_task_id)
+
+    # Mock the judge to reject the completion
+    def mock_judge_goal(goal, last_response, *, timeout=30.0, subgoals=None):
+        return "continue", "missing verification evidence", False
+
+    monkeypatch.setattr("hermes_cli.goals.judge_goal", mock_judge_goal)
+
+    # Attempt to complete should be rejected
+    out = kt._handle_complete({"summary": "I did some stuff but not X"})
+    d = json.loads(out)
+    assert "error" in d
+    assert "Goal completion rejected by judge" in d["error"]
+    assert "missing verification evidence" in d["error"]
+    assert "create continuation tasks" in d["error"]
+
+    # Verify the task is NOT completed in the DB
+    conn2 = kb.connect()
+    try:
+        task = kb.get_task(conn2, goal_task_id)
+        assert task.status == "running"  # Should still be running, not done
+    finally:
+        conn2.close()


 def test_block_happy_path(worker_env):
--- a/tools/kanban_tools.py
+++ b/tools/kanban_tools.py
@ -567,6 +567,33 @@ def _handle_complete(args: dict, **kw) -> str:
    try:
        kb, conn = _connect(board=board)
        try:
+            # Goal-mode pre-completion judge gate (Issue #38367).
+            # Prevent workers from bypassing the auxiliary judge by
+            # calling kanban_complete before acceptance criteria are met.
+            task = kb.get_task(conn, tid)
+            if task and task.goal_mode:
+                try:
+                    from hermes_cli.goals import judge_goal
+                    verdict, reason, _ = judge_goal(
+                        goal=f"{task.title}\n\n{task.body or ''}".strip(),
+                        last_response=(summary or result or "").strip(),
+                    )
+                    if verdict != "done":
+                        return tool_error(
+                            f"Goal completion rejected by judge: {reason}. "
+                            f"To proceed, either: (1) provide explicit acceptance "
+                            f"evidence in your summary matching the task's criteria, "
+                            f"or (2) create continuation tasks with parent={tid} "
+                            f"and keep this task alive."
+                        )
+                except Exception as judge_exc:
+                    # Fail-open to avoid wedging the worker if the judge
+                    # is temporarily unavailable or misconfigured.
+                    logger.warning(
+                        "goal judge check failed, allowing completion: %s",
+                        judge_exc,
+                    )
+
            try:
                ok = kb.complete_task(
                    conn, tid,