mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
feat(goals): /goal wait <pid> — park the loop on a background process (#50503)
* feat(goals): add /goal wait <pid> barrier to park the loop on a background process
The /goal loop re-pokes the agent every turn via the post-turn judge. When a
goal is gated on a long-running background process (CI poller, build, test
matrix, deploy) that produces nothing to judge yet, this spins the agent into
'is it done?' busy-work and burns the turn budget.
/goal wait <pid> [reason] parks the loop: while the PID is alive, the judge is
skipped, no turn is consumed, no continuation fires, and /goal status shows a
parked indicator. The barrier auto-clears the moment the process exits (the
agent's notify_on_complete watcher is the natural wake signal), then the next
turn resumes normal judging. /goal unwait clears it manually; pause/resume/clear
drop it; a dead/stale PID can never wedge the loop.
Wired across CLI, gateway, and the mid-run command guard for parity. Barrier
persists in SessionDB.state_meta (survives /resume); GoalState gains
backward-compatible waiting_on_pid/waiting_reason/waiting_since fields. 12 new
tests; docs updated.
* fix(goals): use gateway.status._pid_exists for liveness, not os.kill(pid,0)
The Windows-footguns CI guard flagged os.kill(pid, 0) in _pid_alive — on
Windows that's not a no-op, it routes to CTRL_C_EVENT and hard-kills the
target's console process group (bpo-14484). Delegate to the canonical
footgun-safe gateway.status._pid_exists (psutil + ctypes/POSIX fallback)
instead, with a direct-psutil last resort.
* feat(goals): judge-driven auto-wait — the loop parks itself, no manual /goal wait
Makes the wait barrier automatic. Every turn the judge is shown the agent's
live background processes (pid, command, uptime, output tail from the
process_registry) alongside the goal + response, and can return a new 'wait'
verdict instead of continue:
{"verdict":"wait","wait_on_pid":N} → park until that process exits
{"verdict":"wait","wait_for_seconds":N} → park until the deadline passes
evaluate_after_turn acts on the directive (sets the barrier, parks the loop)
so the agent isn't re-poked into busy-work while CI/builds/deploys run. Adds a
time-based waiting_until barrier alongside the pid barrier; both auto-clear and
can never wedge the loop. Drivers (CLI, gateway, tui_gateway) feed the live
registry in via gather_background_processes(). Manual /goal wait stays as an
override. Judge verdict contract widened to (verdict, reason, parse_failed,
wait_directive); legacy {"done":bool} shape still accepted.
* test(goals): update kanban _fake_judge to the 4-tuple judge contract
CI test(3) caught it: test_kanban_goal_mode's _fake_judge still returned the
3-tuple (verdict, reason, parse_failed), but the kanban loop now unpacks the
4-tuple (+ wait_directive). Update the fake to return None for the directive
and accept the background_processes kwarg.
* feat(goals): trigger-based wait — park on a process's own signal, not just exit
Addresses two gaps in the judge-driven wait: (1) the judge could only express
'wait until PID exits' or 'wait N seconds', so a long-lived watcher/server that
fires a trigger MID-RUN (and may never exit) couldn't be waited on; (2) the
process's own watch_patterns/notify_on_complete trigger was invisible to the judge.
Adds a session-based barrier (waiting_on_session) that releases on the process's
OWN trigger via process_registry.is_session_waiting(): the session exits, OR (if
started with watch_patterns) its pattern matches — even while the process keeps
running. list_sessions() now surfaces session_id + watch_patterns/watch_hit/
notify_on_complete so the judge sees the trigger and is told to prefer
wait_on_session for trigger processes. Judge verdict gains a {wait_on_session}
directive (preferred over pid). Backward-compatible GoalState field; pid + time
barriers unchanged.
Tests: TestSessionTriggerBarrier (release on mid-run pattern match while alive,
release on exit, unknown-session, full park→trigger→resume, parse, validation,
backcompat load). 105 goal-surface + 85 process_registry tests green.
This commit is contained in:
parent
d4fa2db1c5
commit
ff85af3fc7
13 changed files with 1139 additions and 104 deletions
|
|
@ -169,7 +169,7 @@ class TestHealthyTurnStillRuns:
|
|||
# Force the judge to say "continue" without touching the network.
|
||||
with patch(
|
||||
"hermes_cli.goals.judge_goal",
|
||||
return_value=("continue", "needs more steps", False),
|
||||
return_value=("continue", "needs more steps", False, None),
|
||||
):
|
||||
cli._maybe_continue_goal_after_turn()
|
||||
|
||||
|
|
@ -189,7 +189,7 @@ class TestHealthyTurnStillRuns:
|
|||
|
||||
with patch(
|
||||
"hermes_cli.goals.judge_goal",
|
||||
return_value=("done", "goal satisfied", False),
|
||||
return_value=("done", "goal satisfied", False, None),
|
||||
):
|
||||
cli._maybe_continue_goal_after_turn()
|
||||
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ async def test_goal_verdict_done_sent_via_adapter_send(hermes_home):
|
|||
mgr = GoalManager(session_entry.session_id)
|
||||
mgr.set("ship the feature")
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "the feature shipped", False)):
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "the feature shipped", False, None)):
|
||||
await runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
|
|
@ -136,7 +136,7 @@ async def test_goal_verdict_continue_enqueues_continuation(hermes_home):
|
|||
mgr = GoalManager(session_entry.session_id)
|
||||
mgr.set("polish the docs")
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "still needs work", False)):
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "still needs work", False, None)):
|
||||
await runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
|
|
@ -164,7 +164,7 @@ async def test_goal_verdict_budget_exhausted_sends_pause(hermes_home):
|
|||
state.turns_used = 2
|
||||
save_goal(session_entry.session_id, state)
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "keep going", False)):
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "keep going", False, None)):
|
||||
await runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
|
|
@ -211,7 +211,7 @@ async def test_goal_verdict_survives_adapter_without_send(hermes_home):
|
|||
|
||||
runner.adapters[Platform.TELEGRAM] = _NoSendAdapter()
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "ok", False)):
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "ok", False, None)):
|
||||
# must not raise
|
||||
await runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
|
@ -40,23 +41,25 @@ class TestParseJudgeResponse:
|
|||
def test_clean_json_done(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
done, reason, _ = _parse_judge_response('{"done": true, "reason": "all good"}')
|
||||
assert done is True
|
||||
verdict, reason, _pf, wait = _parse_judge_response('{"done": true, "reason": "all good"}')
|
||||
assert verdict == "done"
|
||||
assert reason == "all good"
|
||||
assert wait is None
|
||||
|
||||
def test_clean_json_continue(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
done, reason, _ = _parse_judge_response('{"done": false, "reason": "more work needed"}')
|
||||
assert done is False
|
||||
verdict, reason, _pf, wait = _parse_judge_response('{"done": false, "reason": "more work needed"}')
|
||||
assert verdict == "continue"
|
||||
assert reason == "more work needed"
|
||||
assert wait is None
|
||||
|
||||
def test_json_in_markdown_fence(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
raw = '```json\n{"done": true, "reason": "done"}\n```'
|
||||
done, reason, _ = _parse_judge_response(raw)
|
||||
assert done is True
|
||||
verdict, reason, _pf, _w = _parse_judge_response(raw)
|
||||
assert verdict == "done"
|
||||
assert "done" in reason
|
||||
|
||||
def test_json_embedded_in_prose(self):
|
||||
|
|
@ -64,33 +67,79 @@ class TestParseJudgeResponse:
|
|||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
raw = 'Looking at this... the agent says X. Verdict: {"done": false, "reason": "partial"}'
|
||||
done, reason, _ = _parse_judge_response(raw)
|
||||
assert done is False
|
||||
verdict, reason, _pf, _w = _parse_judge_response(raw)
|
||||
assert verdict == "continue"
|
||||
assert reason == "partial"
|
||||
|
||||
def test_string_done_values(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
for s in ("true", "yes", "done", "1"):
|
||||
done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
|
||||
assert done is True
|
||||
verdict, _, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
|
||||
assert verdict == "done"
|
||||
for s in ("false", "no", "not yet"):
|
||||
done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
|
||||
assert done is False
|
||||
verdict, _, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
|
||||
assert verdict == "continue"
|
||||
|
||||
def test_malformed_json_fails_open(self):
|
||||
"""Non-JSON → not done, with error-ish reason (so judge_goal can map to continue)."""
|
||||
def test_new_verdict_shape(self):
|
||||
"""The explicit {"verdict": ...} shape is honored."""
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
done, reason, _ = _parse_judge_response("this is not json at all")
|
||||
assert done is False
|
||||
v, _, _, _ = _parse_judge_response('{"verdict": "done", "reason": "r"}')
|
||||
assert v == "done"
|
||||
v, _, _, _ = _parse_judge_response('{"verdict": "continue", "reason": "r"}')
|
||||
assert v == "continue"
|
||||
|
||||
def test_wait_verdict_with_pid(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
v, reason, pf, wait = _parse_judge_response(
|
||||
'{"verdict": "wait", "wait_on_pid": 4242, "reason": "CI running"}'
|
||||
)
|
||||
assert v == "wait"
|
||||
assert pf is False
|
||||
assert wait == {"pid": 4242}
|
||||
assert reason == "CI running"
|
||||
|
||||
def test_wait_verdict_with_seconds(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
v, _, _, wait = _parse_judge_response(
|
||||
'{"verdict": "wait", "wait_for_seconds": 90, "reason": "rate limited"}'
|
||||
)
|
||||
assert v == "wait"
|
||||
assert wait == {"seconds": 90}
|
||||
|
||||
def test_wait_verdict_without_target_downgrades_to_continue(self):
|
||||
"""A wait verdict with no pid/seconds can't park on anything → continue."""
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
v, _, pf, wait = _parse_judge_response('{"verdict": "wait", "reason": "vague"}')
|
||||
assert v == "continue"
|
||||
assert wait is None
|
||||
assert pf is False
|
||||
|
||||
def test_unknown_verdict_falls_back_to_continue(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
v, _, _, _ = _parse_judge_response('{"verdict": "maybe", "reason": "r"}')
|
||||
assert v == "continue"
|
||||
|
||||
def test_malformed_json_fails_open(self):
|
||||
"""Non-JSON → continue + parse_failed, with error-ish reason."""
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
verdict, reason, parse_failed, _w = _parse_judge_response("this is not json at all")
|
||||
assert verdict == "continue"
|
||||
assert parse_failed is True
|
||||
assert reason # non-empty
|
||||
|
||||
def test_empty_response(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
done, reason, _ = _parse_judge_response("")
|
||||
assert done is False
|
||||
verdict, reason, parse_failed, _w = _parse_judge_response("")
|
||||
assert verdict == "continue"
|
||||
assert parse_failed is True
|
||||
assert reason
|
||||
|
||||
|
||||
|
|
@ -103,13 +152,13 @@ class TestJudgeGoal:
|
|||
def test_empty_goal_skipped(self):
|
||||
from hermes_cli.goals import judge_goal
|
||||
|
||||
verdict, _, _ = judge_goal("", "some response")
|
||||
verdict, _, _, _wd = judge_goal("", "some response")
|
||||
assert verdict == "skipped"
|
||||
|
||||
def test_empty_response_continues(self):
|
||||
from hermes_cli.goals import judge_goal
|
||||
|
||||
verdict, _, _ = judge_goal("ship the thing", "")
|
||||
verdict, _, _, _wd = judge_goal("ship the thing", "")
|
||||
assert verdict == "continue"
|
||||
|
||||
def test_no_aux_client_continues(self):
|
||||
|
|
@ -120,7 +169,7 @@ class TestJudgeGoal:
|
|||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(None, None),
|
||||
):
|
||||
verdict, _, _ = goals.judge_goal("my goal", "my response")
|
||||
verdict, _, _, _wd = goals.judge_goal("my goal", "my response")
|
||||
assert verdict == "continue"
|
||||
|
||||
def test_api_error_continues(self):
|
||||
|
|
@ -133,7 +182,7 @@ class TestJudgeGoal:
|
|||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "judge-model"),
|
||||
):
|
||||
verdict, reason, _ = goals.judge_goal("goal", "response")
|
||||
verdict, reason, _, _wd = goals.judge_goal("goal", "response")
|
||||
assert verdict == "continue"
|
||||
assert "judge error" in reason.lower()
|
||||
|
||||
|
|
@ -152,7 +201,7 @@ class TestJudgeGoal:
|
|||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "judge-model"),
|
||||
):
|
||||
verdict, reason, _ = goals.judge_goal("goal", "agent response")
|
||||
verdict, reason, _, _wd = goals.judge_goal("goal", "agent response")
|
||||
assert verdict == "done"
|
||||
assert reason == "achieved"
|
||||
|
||||
|
|
@ -171,7 +220,7 @@ class TestJudgeGoal:
|
|||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "judge-model"),
|
||||
):
|
||||
verdict, reason, _ = goals.judge_goal("goal", "agent response")
|
||||
verdict, reason, _, _wd = goals.judge_goal("goal", "agent response")
|
||||
assert verdict == "continue"
|
||||
assert reason == "not yet"
|
||||
|
||||
|
|
@ -260,7 +309,7 @@ class TestGoalManager:
|
|||
mgr = GoalManager(session_id="eval-sid-1")
|
||||
mgr.set("ship it")
|
||||
|
||||
with patch.object(goals, "judge_goal", return_value=("done", "shipped", False)):
|
||||
with patch.object(goals, "judge_goal", return_value=("done", "shipped", False, None)):
|
||||
decision = mgr.evaluate_after_turn("I shipped the feature.")
|
||||
|
||||
assert decision["verdict"] == "done"
|
||||
|
|
@ -276,7 +325,7 @@ class TestGoalManager:
|
|||
mgr = GoalManager(session_id="eval-sid-2", default_max_turns=5)
|
||||
mgr.set("a long goal")
|
||||
|
||||
with patch.object(goals, "judge_goal", return_value=("continue", "more work", False)):
|
||||
with patch.object(goals, "judge_goal", return_value=("continue", "more work", False, None)):
|
||||
decision = mgr.evaluate_after_turn("made some progress")
|
||||
|
||||
assert decision["verdict"] == "continue"
|
||||
|
|
@ -294,7 +343,7 @@ class TestGoalManager:
|
|||
mgr = GoalManager(session_id="eval-sid-3", default_max_turns=2)
|
||||
mgr.set("hard goal")
|
||||
|
||||
with patch.object(goals, "judge_goal", return_value=("continue", "not yet", False)):
|
||||
with patch.object(goals, "judge_goal", return_value=("continue", "not yet", False, None)):
|
||||
d1 = mgr.evaluate_after_turn("step 1")
|
||||
assert d1["should_continue"] is True
|
||||
assert mgr.state.turns_used == 1
|
||||
|
|
@ -371,28 +420,28 @@ class TestJudgeParseFailureAutoPause:
|
|||
def test_parse_response_flags_empty_as_parse_failure(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
done, reason, parse_failed = _parse_judge_response("")
|
||||
assert done is False
|
||||
verdict, reason, parse_failed, _w = _parse_judge_response("")
|
||||
assert verdict == "continue"
|
||||
assert parse_failed is True
|
||||
assert "empty" in reason.lower()
|
||||
|
||||
def test_parse_response_flags_non_json_as_parse_failure(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
done, reason, parse_failed = _parse_judge_response(
|
||||
verdict, reason, parse_failed, _w = _parse_judge_response(
|
||||
"Let me analyze whether the goal is fully satisfied based on the agent's response..."
|
||||
)
|
||||
assert done is False
|
||||
assert verdict == "continue"
|
||||
assert parse_failed is True
|
||||
assert "not json" in reason.lower()
|
||||
|
||||
def test_parse_response_clean_json_is_not_parse_failure(self):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
|
||||
done, _, parse_failed = _parse_judge_response(
|
||||
verdict, _, parse_failed, _w = _parse_judge_response(
|
||||
'{"done": false, "reason": "more work"}'
|
||||
)
|
||||
assert done is False
|
||||
assert verdict == "continue"
|
||||
assert parse_failed is False
|
||||
|
||||
def test_api_error_does_not_count_as_parse_failure(self):
|
||||
|
|
@ -405,7 +454,7 @@ class TestJudgeParseFailureAutoPause:
|
|||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "judge-model"),
|
||||
):
|
||||
verdict, _, parse_failed = goals.judge_goal("goal", "response")
|
||||
verdict, _, parse_failed, _wd = goals.judge_goal("goal", "response")
|
||||
assert verdict == "continue"
|
||||
assert parse_failed is False
|
||||
|
||||
|
|
@ -421,7 +470,7 @@ class TestJudgeParseFailureAutoPause:
|
|||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "judge-model"),
|
||||
):
|
||||
verdict, _, parse_failed = goals.judge_goal("goal", "response")
|
||||
verdict, _, parse_failed, _wd = goals.judge_goal("goal", "response")
|
||||
assert verdict == "continue"
|
||||
assert parse_failed is True
|
||||
|
||||
|
|
@ -435,7 +484,7 @@ class TestJudgeParseFailureAutoPause:
|
|||
mgr.set("do a thing")
|
||||
|
||||
with patch.object(
|
||||
goals, "judge_goal", return_value=("continue", "judge returned empty response", True)
|
||||
goals, "judge_goal", return_value=("continue", "judge returned empty response", True, None)
|
||||
):
|
||||
d1 = mgr.evaluate_after_turn("step 1")
|
||||
assert d1["should_continue"] is True
|
||||
|
|
@ -464,7 +513,7 @@ class TestJudgeParseFailureAutoPause:
|
|||
|
||||
# Two parse failures…
|
||||
with patch.object(
|
||||
goals, "judge_goal", return_value=("continue", "not json", True)
|
||||
goals, "judge_goal", return_value=("continue", "not json", True, None)
|
||||
):
|
||||
mgr.evaluate_after_turn("step 1")
|
||||
mgr.evaluate_after_turn("step 2")
|
||||
|
|
@ -472,7 +521,7 @@ class TestJudgeParseFailureAutoPause:
|
|||
|
||||
# …then one clean reply resets the counter.
|
||||
with patch.object(
|
||||
goals, "judge_goal", return_value=("continue", "making progress", False)
|
||||
goals, "judge_goal", return_value=("continue", "making progress", False, None)
|
||||
):
|
||||
d = mgr.evaluate_after_turn("step 3")
|
||||
assert d["should_continue"] is True
|
||||
|
|
@ -487,7 +536,7 @@ class TestJudgeParseFailureAutoPause:
|
|||
mgr.set("goal")
|
||||
|
||||
with patch.object(
|
||||
goals, "judge_goal", return_value=("continue", "judge error: RuntimeError", False)
|
||||
goals, "judge_goal", return_value=("continue", "judge error: RuntimeError", False, None)
|
||||
):
|
||||
for _ in range(5):
|
||||
d = mgr.evaluate_after_turn("still going")
|
||||
|
|
@ -506,7 +555,7 @@ class TestJudgeParseFailureAutoPause:
|
|||
mgr.set("persistent goal")
|
||||
|
||||
with patch.object(
|
||||
goals, "judge_goal", return_value=("continue", "empty", True)
|
||||
goals, "judge_goal", return_value=("continue", "empty", True, None)
|
||||
):
|
||||
mgr.evaluate_after_turn("r")
|
||||
mgr.evaluate_after_turn("r")
|
||||
|
|
@ -714,7 +763,7 @@ class TestJudgeGoalWithSubgoals:
|
|||
return_value=(_FakeClient, "fake-model")), \
|
||||
patch("agent.auxiliary_client.get_auxiliary_extra_body",
|
||||
return_value=None):
|
||||
verdict, reason, parse_failed = goals.judge_goal(
|
||||
verdict, reason, parse_failed, _wd = goals.judge_goal(
|
||||
"ship the feature",
|
||||
"ok shipped",
|
||||
subgoals=["write tests", "update docs"],
|
||||
|
|
@ -778,3 +827,395 @@ class TestStatusLineSubgoalCount:
|
|||
mgr.add_subgoal("b")
|
||||
line = mgr.status_line()
|
||||
assert "2 subgoals" in line
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Wait barrier — parking the goal loop on a background process
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestWaitBarrier:
|
||||
"""The /goal wait barrier parks the loop on a live PID and resumes when
|
||||
the process exits, without burning turns or calling the judge."""
|
||||
|
||||
@staticmethod
|
||||
def _spawn_sleeper():
|
||||
"""Start a short-lived child process; return its Popen handle."""
|
||||
import subprocess
|
||||
import sys
|
||||
return subprocess.Popen([sys.executable, "-c", "import time; time.sleep(30)"])
|
||||
|
||||
@staticmethod
|
||||
def _dead_pid():
|
||||
"""A PID that is essentially guaranteed not to be running."""
|
||||
return 2_000_000_000
|
||||
|
||||
def test_wait_on_requires_active_goal(self, hermes_home):
|
||||
from hermes_cli.goals import GoalManager
|
||||
mgr = GoalManager(session_id="wb-noactive")
|
||||
with pytest.raises(RuntimeError):
|
||||
mgr.wait_on(12345)
|
||||
|
||||
def test_wait_on_rejects_bad_pid(self, hermes_home):
|
||||
from hermes_cli.goals import GoalManager
|
||||
mgr = GoalManager(session_id="wb-badpid")
|
||||
mgr.set("g")
|
||||
with pytest.raises(ValueError):
|
||||
mgr.wait_on(0)
|
||||
|
||||
def test_parked_on_live_pid_does_not_continue_or_judge(self, hermes_home):
|
||||
from hermes_cli import goals
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
proc = self._spawn_sleeper()
|
||||
try:
|
||||
mgr = GoalManager(session_id="wb-live")
|
||||
mgr.set("ship it", max_turns=5)
|
||||
mgr.wait_on(proc.pid, reason="CI green")
|
||||
assert mgr.is_waiting() is True
|
||||
|
||||
# The judge must NOT be called while parked, and no turn is burned.
|
||||
judge = MagicMock(return_value=("continue", "x", False, None))
|
||||
with patch.object(goals, "judge_goal", judge):
|
||||
decision = mgr.evaluate_after_turn("still waiting on CI")
|
||||
|
||||
judge.assert_not_called()
|
||||
assert decision["verdict"] == "waiting"
|
||||
assert decision["should_continue"] is False
|
||||
assert decision["continuation_prompt"] is None
|
||||
assert mgr.state.turns_used == 0 # no turn consumed while parked
|
||||
assert "CI green" in decision["message"]
|
||||
assert mgr.state.status == "active" # still active, just parked
|
||||
finally:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=10)
|
||||
|
||||
def test_barrier_auto_clears_when_process_exits_and_loop_resumes(self, hermes_home):
|
||||
from hermes_cli import goals
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
proc = self._spawn_sleeper()
|
||||
mgr = GoalManager(session_id="wb-exit")
|
||||
mgr.set("ship it", max_turns=5)
|
||||
mgr.wait_on(proc.pid, reason="build")
|
||||
assert mgr.is_waiting() is True
|
||||
|
||||
# Kill the process — barrier should auto-clear and judging resumes.
|
||||
proc.terminate()
|
||||
proc.wait(timeout=10)
|
||||
|
||||
assert mgr.is_waiting() is False # lazy auto-clear
|
||||
assert mgr.state.waiting_on_pid is None
|
||||
|
||||
with patch.object(goals, "judge_goal", return_value=("continue", "more", False, None)):
|
||||
decision = mgr.evaluate_after_turn("process finished, here are results")
|
||||
|
||||
assert decision["verdict"] == "continue"
|
||||
assert decision["should_continue"] is True
|
||||
assert mgr.state.turns_used == 1 # now a turn IS consumed
|
||||
|
||||
def test_dead_pid_never_parks(self, hermes_home):
|
||||
from hermes_cli import goals
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
mgr = GoalManager(session_id="wb-dead")
|
||||
mgr.set("g", max_turns=5)
|
||||
mgr.wait_on(self._dead_pid(), reason="already-dead")
|
||||
# is_waiting clears the stale barrier immediately.
|
||||
assert mgr.is_waiting() is False
|
||||
|
||||
with patch.object(goals, "judge_goal", return_value=("continue", "go", False, None)):
|
||||
decision = mgr.evaluate_after_turn("response")
|
||||
assert decision["should_continue"] is True
|
||||
|
||||
def test_stop_waiting_clears_barrier(self, hermes_home):
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
proc = self._spawn_sleeper()
|
||||
try:
|
||||
mgr = GoalManager(session_id="wb-stop")
|
||||
mgr.set("g")
|
||||
mgr.wait_on(proc.pid)
|
||||
assert mgr.is_waiting() is True
|
||||
assert mgr.stop_waiting() is True
|
||||
assert mgr.state.waiting_on_pid is None
|
||||
assert mgr.is_waiting() is False
|
||||
assert mgr.stop_waiting() is False # idempotent
|
||||
finally:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=10)
|
||||
|
||||
def test_pause_and_resume_clear_barrier(self, hermes_home):
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
proc = self._spawn_sleeper()
|
||||
try:
|
||||
mgr = GoalManager(session_id="wb-pause")
|
||||
mgr.set("g")
|
||||
mgr.wait_on(proc.pid)
|
||||
mgr.pause()
|
||||
assert mgr.state.waiting_on_pid is None
|
||||
|
||||
mgr.resume()
|
||||
assert mgr.state.waiting_on_pid is None
|
||||
finally:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=10)
|
||||
|
||||
def test_barrier_persists_and_reloads(self, hermes_home):
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
proc = self._spawn_sleeper()
|
||||
try:
|
||||
mgr = GoalManager(session_id="wb-persist")
|
||||
mgr.set("g")
|
||||
mgr.wait_on(proc.pid, reason="deploy")
|
||||
|
||||
# Fresh manager loads the persisted barrier.
|
||||
mgr2 = GoalManager(session_id="wb-persist")
|
||||
assert mgr2.state.waiting_on_pid == proc.pid
|
||||
assert mgr2.state.waiting_reason == "deploy"
|
||||
assert mgr2.is_waiting() is True
|
||||
finally:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=10)
|
||||
|
||||
def test_old_state_row_loads_without_barrier_fields(self, hermes_home):
|
||||
"""Backwards-compat: a state_meta row written before the barrier
|
||||
existed must load with no barrier."""
|
||||
from hermes_cli.goals import GoalState
|
||||
|
||||
legacy = json.dumps({
|
||||
"goal": "old goal",
|
||||
"status": "active",
|
||||
"turns_used": 2,
|
||||
"max_turns": 20,
|
||||
})
|
||||
st = GoalState.from_json(legacy)
|
||||
assert st.goal == "old goal"
|
||||
assert st.waiting_on_pid is None
|
||||
assert st.waiting_reason is None
|
||||
assert st.waiting_since == 0.0
|
||||
assert st.waiting_until == 0.0
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Judge-driven auto-wait — the judge parks the loop on its own
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestJudgeDrivenWait:
|
||||
"""The judge returns a `wait` verdict (given live background-process
|
||||
context) and the loop parks automatically — no manual /goal wait."""
|
||||
|
||||
@staticmethod
|
||||
def _spawn_sleeper():
|
||||
import subprocess, sys
|
||||
return subprocess.Popen([sys.executable, "-c", "import time; time.sleep(30)"])
|
||||
|
||||
def test_judge_wait_pid_parks_loop(self, hermes_home):
|
||||
from hermes_cli import goals
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
proc = self._spawn_sleeper()
|
||||
try:
|
||||
mgr = GoalManager(session_id="jw-pid", default_max_turns=10)
|
||||
mgr.set("ship the PR")
|
||||
# Judge sees the running process and says wait-on-pid.
|
||||
with patch.object(
|
||||
goals, "judge_goal",
|
||||
return_value=("wait", "CI watcher still running", False, {"pid": proc.pid}),
|
||||
):
|
||||
decision = mgr.evaluate_after_turn(
|
||||
"Pushed the PR, watching CI.",
|
||||
background_processes=[{
|
||||
"pid": proc.pid, "command": "wait_for_pr_green.sh",
|
||||
"status": "running", "uptime_seconds": 12,
|
||||
}],
|
||||
)
|
||||
assert decision["verdict"] == "wait"
|
||||
assert decision["should_continue"] is False
|
||||
assert decision["continuation_prompt"] is None
|
||||
assert mgr.state.waiting_on_pid == proc.pid
|
||||
assert mgr.is_waiting() is True
|
||||
|
||||
# Next turn while still parked: judge must NOT be called again.
|
||||
judge = MagicMock()
|
||||
with patch.object(goals, "judge_goal", judge):
|
||||
d2 = mgr.evaluate_after_turn("still going")
|
||||
judge.assert_not_called()
|
||||
assert d2["verdict"] == "waiting"
|
||||
assert d2["should_continue"] is False
|
||||
finally:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=10)
|
||||
|
||||
def test_judge_wait_seconds_parks_loop(self, hermes_home):
|
||||
from hermes_cli import goals
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
mgr = GoalManager(session_id="jw-secs", default_max_turns=10)
|
||||
mgr.set("retry after backoff")
|
||||
with patch.object(
|
||||
goals, "judge_goal",
|
||||
return_value=("wait", "rate limited", False, {"seconds": 120}),
|
||||
):
|
||||
decision = mgr.evaluate_after_turn("Hit a 429, backing off.")
|
||||
assert decision["verdict"] == "wait"
|
||||
assert decision["should_continue"] is False
|
||||
assert mgr.state.waiting_until > 0
|
||||
assert mgr.state.waiting_on_pid is None
|
||||
assert mgr.is_waiting() is True
|
||||
|
||||
def test_time_barrier_clears_after_deadline(self, hermes_home):
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
mgr = GoalManager(session_id="jw-deadline")
|
||||
mgr.set("g")
|
||||
mgr.wait_for_seconds(120, reason="backoff")
|
||||
assert mgr.is_waiting() is True
|
||||
# Force the deadline into the past → barrier auto-clears.
|
||||
mgr.state.waiting_until = time.time() - 1
|
||||
assert mgr.is_waiting() is False
|
||||
assert mgr.state.waiting_until == 0.0
|
||||
|
||||
def test_continue_verdict_still_continues_with_background(self, hermes_home):
|
||||
"""A running process present but judge says continue → normal loop."""
|
||||
from hermes_cli import goals
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
mgr = GoalManager(session_id="jw-cont", default_max_turns=10)
|
||||
mgr.set("do work")
|
||||
with patch.object(
|
||||
goals, "judge_goal",
|
||||
return_value=("continue", "more to do", False, None),
|
||||
):
|
||||
decision = mgr.evaluate_after_turn(
|
||||
"made progress",
|
||||
background_processes=[{"pid": 999999, "command": "x", "status": "running"}],
|
||||
)
|
||||
assert decision["verdict"] == "continue"
|
||||
assert decision["should_continue"] is True
|
||||
assert mgr.state.waiting_on_pid is None
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Session/trigger barrier — wait on a process's OWN trigger, not just exit
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSessionTriggerBarrier:
|
||||
"""The session barrier (wait_on_session) releases when a process's own
|
||||
trigger fires — a watch_patterns match mid-run (process may never exit)
|
||||
OR exit — not only on PID exit. CI-safe: uses synthetic registry session
|
||||
objects, no real child processes."""
|
||||
|
||||
@staticmethod
|
||||
def _inject(sid, *, watch_patterns=None, exited=False):
|
||||
import time as _t
|
||||
from tools.process_registry import process_registry, ProcessSession
|
||||
s = ProcessSession(id=sid, command="watcher.sh", task_id="t",
|
||||
session_key="", cwd="/tmp", started_at=_t.time())
|
||||
if watch_patterns:
|
||||
s.watch_patterns = list(watch_patterns)
|
||||
s.exited = exited
|
||||
if exited:
|
||||
process_registry._finished[sid] = s
|
||||
else:
|
||||
process_registry._running[sid] = s
|
||||
return s, process_registry
|
||||
|
||||
def test_registry_is_session_waiting_running_unmatched(self, hermes_home):
|
||||
s, reg = self._inject("proc_t1", watch_patterns=["READY"])
|
||||
assert reg.is_session_waiting("proc_t1") is True
|
||||
|
||||
def test_registry_releases_on_watch_match_while_alive(self, hermes_home):
|
||||
s, reg = self._inject("proc_t2", watch_patterns=["READY"])
|
||||
assert reg.is_session_waiting("proc_t2") is True
|
||||
s._watch_hits = 1 # what _check_watch_patterns sets on a match
|
||||
# Released even though the process is STILL running (never exited).
|
||||
assert s.exited is False
|
||||
assert reg.is_session_waiting("proc_t2") is False
|
||||
|
||||
def test_registry_releases_on_exit_plain_session(self, hermes_home):
|
||||
s, reg = self._inject("proc_t3") # no watch pattern
|
||||
assert reg.is_session_waiting("proc_t3") is True
|
||||
s.exited = True
|
||||
assert reg.is_session_waiting("proc_t3") is False
|
||||
|
||||
def test_registry_unknown_session_never_waits(self, hermes_home):
|
||||
from tools.process_registry import process_registry
|
||||
assert process_registry.is_session_waiting("proc_does_not_exist") is False
|
||||
|
||||
def test_goal_parks_on_session_and_releases_on_trigger(self, hermes_home):
|
||||
from hermes_cli import goals
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
s, reg = self._inject("proc_t4", watch_patterns=["BUILD SUCCESSFUL"])
|
||||
mgr = GoalManager(session_id="st-goal", default_max_turns=10)
|
||||
mgr.set("wait for the build to succeed")
|
||||
with patch.object(
|
||||
goals, "judge_goal",
|
||||
return_value=("wait", "blocked on build", False, {"session_id": "proc_t4"}),
|
||||
):
|
||||
decision = mgr.evaluate_after_turn(
|
||||
"Started the build watcher.",
|
||||
background_processes=[{
|
||||
"session_id": "proc_t4", "pid": 4242, "command": "watcher.sh",
|
||||
"status": "running", "watch_patterns": ["BUILD SUCCESSFUL"],
|
||||
"watch_hit": False,
|
||||
}],
|
||||
)
|
||||
assert decision["verdict"] == "wait"
|
||||
assert mgr.state.waiting_on_session == "proc_t4"
|
||||
assert mgr.is_waiting() is True
|
||||
|
||||
# Judge must NOT be called again while parked.
|
||||
judge = MagicMock()
|
||||
with patch.object(goals, "judge_goal", judge):
|
||||
d2 = mgr.evaluate_after_turn("still building")
|
||||
judge.assert_not_called()
|
||||
assert d2["should_continue"] is False
|
||||
|
||||
# Trigger fires mid-run (process still alive) → barrier releases.
|
||||
s._watch_hits = 1
|
||||
assert mgr.is_waiting() is False
|
||||
assert mgr.state.waiting_on_session is None
|
||||
|
||||
# Loop resumes with a real judge verdict.
|
||||
with patch.object(goals, "judge_goal",
|
||||
return_value=("continue", "build done", False, None)):
|
||||
d3 = mgr.evaluate_after_turn("build succeeded")
|
||||
assert d3["should_continue"] is True
|
||||
|
||||
def test_wait_on_session_validation(self, hermes_home):
|
||||
from hermes_cli.goals import GoalManager
|
||||
mgr = GoalManager(session_id="st-val")
|
||||
# No active goal → RuntimeError
|
||||
try:
|
||||
mgr.wait_on_session("proc_x")
|
||||
assert False, "expected RuntimeError"
|
||||
except RuntimeError:
|
||||
pass
|
||||
mgr.set("g")
|
||||
try:
|
||||
mgr.wait_on_session("")
|
||||
assert False, "expected ValueError"
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def test_session_directive_parsed_from_judge(self, hermes_home):
|
||||
from hermes_cli.goals import _parse_judge_response
|
||||
v, _, pf, wd = _parse_judge_response(
|
||||
'{"verdict": "wait", "wait_on_session": "proc_abc", "reason": "r"}'
|
||||
)
|
||||
assert v == "wait"
|
||||
assert pf is False
|
||||
assert wd == {"session_id": "proc_abc"}
|
||||
|
||||
def test_old_state_loads_without_session_field(self, hermes_home):
|
||||
from hermes_cli.goals import GoalState
|
||||
st = GoalState.from_json(json.dumps({
|
||||
"goal": "g", "status": "active", "turns_used": 0, "max_turns": 20,
|
||||
}))
|
||||
assert st.waiting_on_session is None
|
||||
|
|
|
|||
|
|
@ -179,9 +179,10 @@ def _patch_judge(monkeypatch, verdicts):
|
|||
"""Make judge_goal return a scripted sequence of verdicts."""
|
||||
seq = list(verdicts)
|
||||
|
||||
def _fake_judge(goal, response, subgoals=None):
|
||||
def _fake_judge(goal, response, subgoals=None, background_processes=None, **_kw):
|
||||
v = seq.pop(0) if seq else "done"
|
||||
return v, f"scripted:{v}", False
|
||||
# 4-tuple contract: (verdict, reason, parse_failed, wait_directive)
|
||||
return v, f"scripted:{v}", False, None
|
||||
|
||||
monkeypatch.setattr(goals, "judge_goal", _fake_judge)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue