fix(kanban): gate goal_mode task completion with auxiliary judge

Prevents workers in goal_mode from bypassing the auxiliary judge by
calling kanban_complete before acceptance criteria are met. The tool
handler now synchronously invokes the goal judge against the task's
title/body and the completion summary. If the verdict is not "done",
the completion is rejected with actionable guidance for the agent.

This keeps kanban_db.py as a pure SQLite wrapper while intercepting
the bypass exactly at the agent tool-call boundary, aligning with
Hermes separation of concerns.

Fixes #38367

Co-authored-by: CommandCodeBot <noreply@commandcode.ai>
This commit is contained in:
beardthelion 2026-06-03 12:27:58 -05:00 committed by Teknium
parent 972b162090
commit 0b33bc5396
2 changed files with 73 additions and 1 deletions

View file

@ -589,11 +589,56 @@ def test_complete_retry_with_corrected_created_cards_succeeds(worker_env):
}))
assert ok.get("ok") is True
def test_complete_goal_mode_rejected_by_judge(monkeypatch, tmp_path):
"""Goal-mode tasks must pass the auxiliary judge before completion.
Regression for #38367: workers bypassing the judge via early kanban_complete."""
from pathlib import Path as _Path
from hermes_cli import kanban_db as kb
from tools import kanban_tools as kt
# Set up isolated HERMES_HOME
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setenv("HERMES_PROFILE", "test-worker")
monkeypatch.delenv("HERMES_SESSION_ID", raising=False)
monkeypatch.setattr(_Path, "home", lambda: tmp_path)
kb._INITIALIZED_PATHS.clear()
kb.init_db()
conn = kb.connect()
try:
assert kb.get_task(conn, worker_env).status == "done"
goal_task_id = kb.create_task(
conn, title="goal-mode-test", assignee="test-worker",
body="Must achieve X with verified evidence.", goal_mode=True
)
kb.claim_task(conn, goal_task_id)
finally:
conn.close()
monkeypatch.setenv("HERMES_KANBAN_TASK", goal_task_id)
# Mock the judge to reject the completion
def mock_judge_goal(goal, last_response, *, timeout=30.0, subgoals=None):
return "continue", "missing verification evidence", False
monkeypatch.setattr("hermes_cli.goals.judge_goal", mock_judge_goal)
# Attempt to complete should be rejected
out = kt._handle_complete({"summary": "I did some stuff but not X"})
d = json.loads(out)
assert "error" in d
assert "Goal completion rejected by judge" in d["error"]
assert "missing verification evidence" in d["error"]
assert "create continuation tasks" in d["error"]
# Verify the task is NOT completed in the DB
conn2 = kb.connect()
try:
task = kb.get_task(conn2, goal_task_id)
assert task.status == "running" # Should still be running, not done
finally:
conn2.close()
def test_block_happy_path(worker_env):

View file

@ -567,6 +567,33 @@ def _handle_complete(args: dict, **kw) -> str:
try:
kb, conn = _connect(board=board)
try:
# Goal-mode pre-completion judge gate (Issue #38367).
# Prevent workers from bypassing the auxiliary judge by
# calling kanban_complete before acceptance criteria are met.
task = kb.get_task(conn, tid)
if task and task.goal_mode:
try:
from hermes_cli.goals import judge_goal
verdict, reason, _ = judge_goal(
goal=f"{task.title}\n\n{task.body or ''}".strip(),
last_response=(summary or result or "").strip(),
)
if verdict != "done":
return tool_error(
f"Goal completion rejected by judge: {reason}. "
f"To proceed, either: (1) provide explicit acceptance "
f"evidence in your summary matching the task's criteria, "
f"or (2) create continuation tasks with parent={tid} "
f"and keep this task alive."
)
except Exception as judge_exc:
# Fail-open to avoid wedging the worker if the judge
# is temporarily unavailable or misconfigured.
logger.warning(
"goal judge check failed, allowing completion: %s",
judge_exc,
)
try:
ok = kb.complete_task(
conn, tid,