mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-25 11:02:03 +00:00
* feat(goals): add /goal wait <pid> barrier to park the loop on a background process
The /goal loop re-pokes the agent every turn via the post-turn judge. When a
goal is gated on a long-running background process (CI poller, build, test
matrix, deploy) that produces nothing to judge yet, this spins the agent into
'is it done?' busy-work and burns the turn budget.
/goal wait <pid> [reason] parks the loop: while the PID is alive, the judge is
skipped, no turn is consumed, no continuation fires, and /goal status shows a
parked indicator. The barrier auto-clears the moment the process exits (the
agent's notify_on_complete watcher is the natural wake signal), then the next
turn resumes normal judging. /goal unwait clears it manually; pause/resume/clear
drop it; a dead/stale PID can never wedge the loop.
Wired across CLI, gateway, and the mid-run command guard for parity. Barrier
persists in SessionDB.state_meta (survives /resume); GoalState gains
backward-compatible waiting_on_pid/waiting_reason/waiting_since fields. 12 new
tests; docs updated.
* fix(goals): use gateway.status._pid_exists for liveness, not os.kill(pid,0)
The Windows-footguns CI guard flagged os.kill(pid, 0) in _pid_alive — on
Windows that's not a no-op, it routes to CTRL_C_EVENT and hard-kills the
target's console process group (bpo-14484). Delegate to the canonical
footgun-safe gateway.status._pid_exists (psutil + ctypes/POSIX fallback)
instead, with a direct-psutil last resort.
* feat(goals): judge-driven auto-wait — the loop parks itself, no manual /goal wait
Makes the wait barrier automatic. Every turn the judge is shown the agent's
live background processes (pid, command, uptime, output tail from the
process_registry) alongside the goal + response, and can return a new 'wait'
verdict instead of continue:
{"verdict":"wait","wait_on_pid":N} → park until that process exits
{"verdict":"wait","wait_for_seconds":N} → park until the deadline passes
evaluate_after_turn acts on the directive (sets the barrier, parks the loop)
so the agent isn't re-poked into busy-work while CI/builds/deploys run. Adds a
time-based waiting_until barrier alongside the pid barrier; both auto-clear and
can never wedge the loop. Drivers (CLI, gateway, tui_gateway) feed the live
registry in via gather_background_processes(). Manual /goal wait stays as an
override. Judge verdict contract widened to (verdict, reason, parse_failed,
wait_directive); legacy {"done":bool} shape still accepted.
* test(goals): update kanban _fake_judge to the 4-tuple judge contract
CI test(3) caught it: test_kanban_goal_mode's _fake_judge still returned the
3-tuple (verdict, reason, parse_failed), but the kanban loop now unpacks the
4-tuple (+ wait_directive). Update the fake to return None for the directive
and accept the background_processes kwarg.
* feat(goals): trigger-based wait — park on a process's own signal, not just exit
Addresses two gaps in the judge-driven wait: (1) the judge could only express
'wait until PID exits' or 'wait N seconds', so a long-lived watcher/server that
fires a trigger MID-RUN (and may never exit) couldn't be waited on; (2) the
process's own watch_patterns/notify_on_complete trigger was invisible to the judge.
Adds a session-based barrier (waiting_on_session) that releases on the process's
OWN trigger via process_registry.is_session_waiting(): the session exits, OR (if
started with watch_patterns) its pattern matches — even while the process keeps
running. list_sessions() now surfaces session_id + watch_patterns/watch_hit/
notify_on_complete so the judge sees the trigger and is told to prefer
wait_on_session for trigger processes. Judge verdict gains a {wait_on_session}
directive (preferred over pid). Backward-compatible GoalState field; pid + time
barriers unchanged.
Tests: TestSessionTriggerBarrier (release on mid-run pattern match while alive,
release on exit, unknown-session, full park→trigger→resume, parse, validation,
backcompat load). 105 goal-surface + 85 process_registry tests green.
298 lines
9.3 KiB
Python
298 lines
9.3 KiB
Python
"""Tests for kanban goal_mode — per-card Ralph-style goal loop.
|
|
|
|
Covers three layers:
|
|
|
|
1. DB: goal_mode / goal_max_turns persist through create_task + from_row,
|
|
and a legacy DB (without the columns) migrates cleanly.
|
|
2. Spawn: _default_spawn sets the HERMES_KANBAN_GOAL_MODE env vars only
|
|
when the card opts in.
|
|
3. Loop: goals.run_kanban_goal_loop continuation / completion / budget
|
|
behaviour, driven entirely through injected callbacks (no live model).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sqlite3
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from hermes_cli import kanban_db as kb
|
|
from hermes_cli import goals
|
|
|
|
|
|
@pytest.fixture
|
|
def kanban_home(tmp_path, monkeypatch):
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
kb.init_db()
|
|
return home
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# DB layer
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_goal_mode_defaults_off(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="plain task", assignee="worker")
|
|
task = kb.get_task(conn, tid)
|
|
assert task.goal_mode is False
|
|
assert task.goal_max_turns is None
|
|
|
|
|
|
def test_goal_mode_persists(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(
|
|
conn,
|
|
title="open-ended task",
|
|
assignee="worker",
|
|
goal_mode=True,
|
|
goal_max_turns=7,
|
|
)
|
|
task = kb.get_task(conn, tid)
|
|
assert task.goal_mode is True
|
|
assert task.goal_max_turns == 7
|
|
|
|
|
|
def test_goal_mode_without_max_turns(kanban_home):
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(
|
|
conn, title="t", assignee="worker", goal_mode=True
|
|
)
|
|
task = kb.get_task(conn, tid)
|
|
assert task.goal_mode is True
|
|
assert task.goal_max_turns is None
|
|
|
|
|
|
def test_legacy_db_migrates_goal_columns(tmp_path, monkeypatch):
|
|
"""A tasks table created without goal columns must gain them on init."""
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
|
|
db_path = kb.kanban_db_path()
|
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
# Minimal legacy schema: tasks table missing goal_mode / goal_max_turns.
|
|
legacy = sqlite3.connect(db_path)
|
|
legacy.execute(
|
|
"""
|
|
CREATE TABLE tasks (
|
|
id TEXT PRIMARY KEY,
|
|
title TEXT NOT NULL,
|
|
body TEXT,
|
|
assignee TEXT,
|
|
status TEXT NOT NULL DEFAULT 'ready',
|
|
priority INTEGER NOT NULL DEFAULT 0,
|
|
created_by TEXT,
|
|
created_at INTEGER NOT NULL,
|
|
started_at INTEGER,
|
|
completed_at INTEGER,
|
|
workspace_kind TEXT NOT NULL DEFAULT 'scratch',
|
|
workspace_path TEXT,
|
|
claim_lock TEXT,
|
|
claim_expires INTEGER
|
|
)
|
|
"""
|
|
)
|
|
legacy.execute(
|
|
"INSERT INTO tasks (id, title, status, priority, created_at, workspace_kind) "
|
|
"VALUES ('legacy1', 'old', 'ready', 0, 1, 'scratch')"
|
|
)
|
|
legacy.commit()
|
|
legacy.close()
|
|
|
|
# init_db runs the additive migration.
|
|
kb.init_db()
|
|
with kb.connect() as conn:
|
|
cols = {r["name"] for r in conn.execute("PRAGMA table_info(tasks)")}
|
|
assert "goal_mode" in cols
|
|
assert "goal_max_turns" in cols
|
|
task = kb.get_task(conn, "legacy1")
|
|
# Existing row keeps the safe default.
|
|
assert task.goal_mode is False
|
|
assert task.goal_max_turns is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Spawn env
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_spawn_sets_goal_env_only_when_enabled(kanban_home, monkeypatch):
|
|
captured = {}
|
|
|
|
class _FakeProc:
|
|
pid = 4242
|
|
|
|
def _fake_popen(cmd, **kwargs):
|
|
captured["env"] = kwargs.get("env", {})
|
|
return _FakeProc()
|
|
|
|
monkeypatch.setattr("subprocess.Popen", _fake_popen)
|
|
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(
|
|
conn,
|
|
title="goal task",
|
|
assignee="default",
|
|
goal_mode=True,
|
|
goal_max_turns=5,
|
|
)
|
|
task = kb.get_task(conn, tid)
|
|
|
|
kb._default_spawn(task, str(kanban_home))
|
|
env = captured["env"]
|
|
assert env.get("HERMES_KANBAN_GOAL_MODE") == "1"
|
|
assert env.get("HERMES_KANBAN_GOAL_MAX_TURNS") == "5"
|
|
|
|
|
|
def test_spawn_no_goal_env_for_plain_task(kanban_home, monkeypatch):
|
|
captured = {}
|
|
|
|
class _FakeProc:
|
|
pid = 4243
|
|
|
|
def _fake_popen(cmd, **kwargs):
|
|
captured["env"] = kwargs.get("env", {})
|
|
return _FakeProc()
|
|
|
|
monkeypatch.setattr("subprocess.Popen", _fake_popen)
|
|
|
|
with kb.connect() as conn:
|
|
tid = kb.create_task(conn, title="plain", assignee="default")
|
|
task = kb.get_task(conn, tid)
|
|
|
|
kb._default_spawn(task, str(kanban_home))
|
|
env = captured["env"]
|
|
assert "HERMES_KANBAN_GOAL_MODE" not in env
|
|
assert "HERMES_KANBAN_GOAL_MAX_TURNS" not in env
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Goal loop logic (callback-injected, no live model)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _patch_judge(monkeypatch, verdicts):
|
|
"""Make judge_goal return a scripted sequence of verdicts."""
|
|
seq = list(verdicts)
|
|
|
|
def _fake_judge(goal, response, subgoals=None, background_processes=None, **_kw):
|
|
v = seq.pop(0) if seq else "done"
|
|
# 4-tuple contract: (verdict, reason, parse_failed, wait_directive)
|
|
return v, f"scripted:{v}", False, None
|
|
|
|
monkeypatch.setattr(goals, "judge_goal", _fake_judge)
|
|
|
|
|
|
def test_loop_stops_when_worker_already_completed(monkeypatch):
|
|
# Worker called kanban_complete on its first turn — no judging needed.
|
|
_patch_judge(monkeypatch, ["continue"]) # should never be consulted
|
|
turns = []
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t1",
|
|
goal_text="do the thing",
|
|
run_turn=lambda p: turns.append(p) or "x",
|
|
task_status_fn=lambda: "done",
|
|
block_fn=lambda r: pytest.fail("should not block"),
|
|
first_response="done already",
|
|
)
|
|
assert res["outcome"] == "completed_by_worker"
|
|
assert turns == [] # no extra turns
|
|
|
|
|
|
def test_loop_continues_then_worker_completes(monkeypatch):
|
|
_patch_judge(monkeypatch, ["continue", "continue"])
|
|
statuses = iter(["running", "running", "done"])
|
|
turns = []
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t2",
|
|
goal_text="ship feature",
|
|
run_turn=lambda p: turns.append(p) or f"turn{len(turns)}",
|
|
task_status_fn=lambda: next(statuses),
|
|
block_fn=lambda r: pytest.fail("should not block"),
|
|
max_turns=10,
|
|
first_response="started",
|
|
)
|
|
assert res["outcome"] == "completed_by_worker"
|
|
# Two continuation turns fed before the worker completed.
|
|
assert len(turns) == 2
|
|
assert all("not done yet" in p for p in turns)
|
|
|
|
|
|
def test_loop_blocks_on_budget_exhaustion(monkeypatch):
|
|
_patch_judge(monkeypatch, ["continue"] * 10)
|
|
blocked = {}
|
|
|
|
def _block(reason):
|
|
blocked["reason"] = reason
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t3",
|
|
goal_text="endless task",
|
|
run_turn=lambda p: "still going",
|
|
task_status_fn=lambda: "running",
|
|
block_fn=_block,
|
|
max_turns=3,
|
|
first_response="turn1",
|
|
)
|
|
assert res["outcome"] == "blocked_budget"
|
|
assert res["turns_used"] == 3
|
|
assert "turn budget" in blocked["reason"].lower()
|
|
|
|
|
|
def test_loop_finalize_nudge_when_judge_done_but_open(monkeypatch):
|
|
# Judge says done, but worker never terminated → one finalize nudge,
|
|
# then worker completes.
|
|
_patch_judge(monkeypatch, ["done", "done"])
|
|
statuses = iter(["running", "done"])
|
|
turns = []
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t4",
|
|
goal_text="task",
|
|
run_turn=lambda p: turns.append(p) or "ok",
|
|
task_status_fn=lambda: next(statuses),
|
|
block_fn=lambda r: pytest.fail("should not block"),
|
|
max_turns=10,
|
|
first_response="looks done",
|
|
)
|
|
assert res["outcome"] == "completed_by_worker"
|
|
assert len(turns) == 1
|
|
assert "still open" in turns[0]
|
|
|
|
|
|
def test_loop_blocks_when_judge_done_but_never_finalizes(monkeypatch):
|
|
# Judge keeps saying done, worker never calls kanban_complete → block
|
|
# after the single finalize nudge.
|
|
_patch_judge(monkeypatch, ["done", "done"])
|
|
blocked = {}
|
|
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t5",
|
|
goal_text="task",
|
|
run_turn=lambda p: "still not finalizing",
|
|
task_status_fn=lambda: "running",
|
|
block_fn=lambda r: blocked.update(reason=r),
|
|
max_turns=10,
|
|
first_response="looks done",
|
|
)
|
|
assert res["outcome"] == "blocked_budget"
|
|
assert "finalize" in blocked["reason"].lower()
|
|
|
|
|
|
def test_loop_stops_if_task_reclaimed(monkeypatch):
|
|
_patch_judge(monkeypatch, ["continue"])
|
|
res = goals.run_kanban_goal_loop(
|
|
task_id="t6",
|
|
goal_text="task",
|
|
run_turn=lambda p: pytest.fail("should not run a turn"),
|
|
task_status_fn=lambda: "archived",
|
|
block_fn=lambda r: pytest.fail("should not block"),
|
|
first_response="x",
|
|
)
|
|
assert res["outcome"] == "stopped"
|