mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
fix(kanban): gate goal_mode task completion with auxiliary judge
Prevents workers in goal_mode from bypassing the auxiliary judge by calling kanban_complete before acceptance criteria are met. The tool handler now synchronously invokes the goal judge against the task's title/body and the completion summary. If the verdict is not "done", the completion is rejected with actionable guidance for the agent. This keeps kanban_db.py as a pure SQLite wrapper while intercepting the bypass exactly at the agent tool-call boundary, aligning with Hermes separation of concerns. Fixes #38367 Co-authored-by: CommandCodeBot <noreply@commandcode.ai>
This commit is contained in:
parent
972b162090
commit
0b33bc5396
2 changed files with 73 additions and 1 deletions
|
|
@ -589,11 +589,56 @@ def test_complete_retry_with_corrected_created_cards_succeeds(worker_env):
|
|||
}))
|
||||
assert ok.get("ok") is True
|
||||
|
||||
|
||||
def test_complete_goal_mode_rejected_by_judge(monkeypatch, tmp_path):
|
||||
"""Goal-mode tasks must pass the auxiliary judge before completion.
|
||||
Regression for #38367: workers bypassing the judge via early kanban_complete."""
|
||||
from pathlib import Path as _Path
|
||||
from hermes_cli import kanban_db as kb
|
||||
from tools import kanban_tools as kt
|
||||
|
||||
# Set up isolated HERMES_HOME
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setenv("HERMES_PROFILE", "test-worker")
|
||||
monkeypatch.delenv("HERMES_SESSION_ID", raising=False)
|
||||
monkeypatch.setattr(_Path, "home", lambda: tmp_path)
|
||||
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
try:
|
||||
assert kb.get_task(conn, worker_env).status == "done"
|
||||
goal_task_id = kb.create_task(
|
||||
conn, title="goal-mode-test", assignee="test-worker",
|
||||
body="Must achieve X with verified evidence.", goal_mode=True
|
||||
)
|
||||
kb.claim_task(conn, goal_task_id)
|
||||
finally:
|
||||
conn.close()
|
||||
monkeypatch.setenv("HERMES_KANBAN_TASK", goal_task_id)
|
||||
|
||||
# Mock the judge to reject the completion
|
||||
def mock_judge_goal(goal, last_response, *, timeout=30.0, subgoals=None):
|
||||
return "continue", "missing verification evidence", False
|
||||
|
||||
monkeypatch.setattr("hermes_cli.goals.judge_goal", mock_judge_goal)
|
||||
|
||||
# Attempt to complete should be rejected
|
||||
out = kt._handle_complete({"summary": "I did some stuff but not X"})
|
||||
d = json.loads(out)
|
||||
assert "error" in d
|
||||
assert "Goal completion rejected by judge" in d["error"]
|
||||
assert "missing verification evidence" in d["error"]
|
||||
assert "create continuation tasks" in d["error"]
|
||||
|
||||
# Verify the task is NOT completed in the DB
|
||||
conn2 = kb.connect()
|
||||
try:
|
||||
task = kb.get_task(conn2, goal_task_id)
|
||||
assert task.status == "running" # Should still be running, not done
|
||||
finally:
|
||||
conn2.close()
|
||||
|
||||
|
||||
def test_block_happy_path(worker_env):
|
||||
|
|
|
|||
|
|
@ -567,6 +567,33 @@ def _handle_complete(args: dict, **kw) -> str:
|
|||
try:
|
||||
kb, conn = _connect(board=board)
|
||||
try:
|
||||
# Goal-mode pre-completion judge gate (Issue #38367).
|
||||
# Prevent workers from bypassing the auxiliary judge by
|
||||
# calling kanban_complete before acceptance criteria are met.
|
||||
task = kb.get_task(conn, tid)
|
||||
if task and task.goal_mode:
|
||||
try:
|
||||
from hermes_cli.goals import judge_goal
|
||||
verdict, reason, _ = judge_goal(
|
||||
goal=f"{task.title}\n\n{task.body or ''}".strip(),
|
||||
last_response=(summary or result or "").strip(),
|
||||
)
|
||||
if verdict != "done":
|
||||
return tool_error(
|
||||
f"Goal completion rejected by judge: {reason}. "
|
||||
f"To proceed, either: (1) provide explicit acceptance "
|
||||
f"evidence in your summary matching the task's criteria, "
|
||||
f"or (2) create continuation tasks with parent={tid} "
|
||||
f"and keep this task alive."
|
||||
)
|
||||
except Exception as judge_exc:
|
||||
# Fail-open to avoid wedging the worker if the judge
|
||||
# is temporarily unavailable or misconfigured.
|
||||
logger.warning(
|
||||
"goal judge check failed, allowing completion: %s",
|
||||
judge_exc,
|
||||
)
|
||||
|
||||
try:
|
||||
ok = kb.complete_task(
|
||||
conn, tid,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue