hermes-agent/tests/hermes_cli/test_goals.py
Teknium 2ba1cfeb2e
feat(goals): completion contracts for /goal — evidence-based judging (#50501)
Adds an optional structured completion contract to the standing-goal loop,
adapted from OpenAI Codex's /goal guidance (a durable objective works best
when it names what done means, how to prove it, what not to break, what's in
scope, and when to stop).

A contract has five optional fields — outcome, verification, constraints,
boundaries, stop_when. When set, the continuation prompt tells the agent to
target the verification surface and respect constraints, and the judge marks
the goal done only when the verification criterion is met with concrete
evidence (command result, file excerpt, test output) instead of a loose
"looks done" claim. This tightens the most common /goal failure mode:
premature completion / endless over-continuation on an underspecified goal.

Two ways to set a contract, both backward compatible (bare /goal <text>
behaves exactly as before):
- /goal draft <objective>  — expands plain text into a full contract via the
  goal_judge aux model (cache-safe side call), falls back to a free-form goal
  if the model is unavailable.
- /goal <text> with inline 'field: value' lines (verify:, constraints:,
  boundaries:, stop when:, ...). Plain goals with an incidental colon are not
  mangled — only known field prefixes are pulled out.
- /goal show prints the active contract.

Contracts persist in SessionDB.state_meta alongside the goal (survive /resume),
compose with /subgoal criteria, and old goal rows load unchanged. CLI + every
gateway platform via the shared GoalManager engine; zero new model tools.

Tests: +18 in tests/hermes_cli/test_goals.py (parse/serialize/judge-prompt/
draft/fallback), 73/73 green; 42/42 across the broader goal test surface;
live E2E roundtrip (set -> persist -> reload -> contract-aware prompts) green.
2026-06-22 12:20:09 -07:00

1568 lines
64 KiB
Python

"""Tests for hermes_cli/goals.py — persistent cross-turn goals."""
from __future__ import annotations
import json
import time
from unittest.mock import patch, MagicMock
import pytest
# ──────────────────────────────────────────────────────────────────────
# Fixtures
# ──────────────────────────────────────────────────────────────────────
@pytest.fixture
def hermes_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME so SessionDB.state_meta writes don't clobber the real one."""
from pathlib import Path
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Bust the goal-module's DB cache for each test so it re-resolves HERMES_HOME.
from hermes_cli import goals
goals._DB_CACHE.clear()
yield home
goals._DB_CACHE.clear()
# ──────────────────────────────────────────────────────────────────────
# _parse_judge_response
# ──────────────────────────────────────────────────────────────────────
class TestParseJudgeResponse:
def test_clean_json_done(self):
from hermes_cli.goals import _parse_judge_response
verdict, reason, _pf, wait = _parse_judge_response('{"done": true, "reason": "all good"}')
assert verdict == "done"
assert reason == "all good"
assert wait is None
def test_clean_json_continue(self):
from hermes_cli.goals import _parse_judge_response
verdict, reason, _pf, wait = _parse_judge_response('{"done": false, "reason": "more work needed"}')
assert verdict == "continue"
assert reason == "more work needed"
assert wait is None
def test_json_in_markdown_fence(self):
from hermes_cli.goals import _parse_judge_response
raw = '```json\n{"done": true, "reason": "done"}\n```'
verdict, reason, _pf, _w = _parse_judge_response(raw)
assert verdict == "done"
assert "done" in reason
def test_json_embedded_in_prose(self):
"""Some models prefix reasoning before emitting JSON — we extract it."""
from hermes_cli.goals import _parse_judge_response
raw = 'Looking at this... the agent says X. Verdict: {"done": false, "reason": "partial"}'
verdict, reason, _pf, _w = _parse_judge_response(raw)
assert verdict == "continue"
assert reason == "partial"
def test_string_done_values(self):
from hermes_cli.goals import _parse_judge_response
for s in ("true", "yes", "done", "1"):
verdict, _, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
assert verdict == "done"
for s in ("false", "no", "not yet"):
verdict, _, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
assert verdict == "continue"
def test_new_verdict_shape(self):
"""The explicit {"verdict": ...} shape is honored."""
from hermes_cli.goals import _parse_judge_response
v, _, _, _ = _parse_judge_response('{"verdict": "done", "reason": "r"}')
assert v == "done"
v, _, _, _ = _parse_judge_response('{"verdict": "continue", "reason": "r"}')
assert v == "continue"
def test_wait_verdict_with_pid(self):
from hermes_cli.goals import _parse_judge_response
v, reason, pf, wait = _parse_judge_response(
'{"verdict": "wait", "wait_on_pid": 4242, "reason": "CI running"}'
)
assert v == "wait"
assert pf is False
assert wait == {"pid": 4242}
assert reason == "CI running"
def test_wait_verdict_with_seconds(self):
from hermes_cli.goals import _parse_judge_response
v, _, _, wait = _parse_judge_response(
'{"verdict": "wait", "wait_for_seconds": 90, "reason": "rate limited"}'
)
assert v == "wait"
assert wait == {"seconds": 90}
def test_wait_verdict_without_target_downgrades_to_continue(self):
"""A wait verdict with no pid/seconds can't park on anything → continue."""
from hermes_cli.goals import _parse_judge_response
v, _, pf, wait = _parse_judge_response('{"verdict": "wait", "reason": "vague"}')
assert v == "continue"
assert wait is None
assert pf is False
def test_unknown_verdict_falls_back_to_continue(self):
from hermes_cli.goals import _parse_judge_response
v, _, _, _ = _parse_judge_response('{"verdict": "maybe", "reason": "r"}')
assert v == "continue"
def test_malformed_json_fails_open(self):
"""Non-JSON → continue + parse_failed, with error-ish reason."""
from hermes_cli.goals import _parse_judge_response
verdict, reason, parse_failed, _w = _parse_judge_response("this is not json at all")
assert verdict == "continue"
assert parse_failed is True
assert reason # non-empty
def test_empty_response(self):
from hermes_cli.goals import _parse_judge_response
verdict, reason, parse_failed, _w = _parse_judge_response("")
assert verdict == "continue"
assert parse_failed is True
assert reason
# ──────────────────────────────────────────────────────────────────────
# judge_goal — fail-open semantics
# ──────────────────────────────────────────────────────────────────────
class TestJudgeGoal:
def test_empty_goal_skipped(self):
from hermes_cli.goals import judge_goal
verdict, _, _, _wd = judge_goal("", "some response")
assert verdict == "skipped"
def test_empty_response_continues(self):
from hermes_cli.goals import judge_goal
verdict, _, _, _wd = judge_goal("ship the thing", "")
assert verdict == "continue"
def test_no_aux_client_continues(self):
"""Fail-open: if no aux client, we must return continue, not skipped/done."""
from hermes_cli import goals
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(None, None),
):
verdict, _, _, _wd = goals.judge_goal("my goal", "my response")
assert verdict == "continue"
def test_api_error_continues(self):
"""Judge exception → fail-open continue (don't wedge progress on judge bugs)."""
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.side_effect = RuntimeError("boom")
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, reason, _, _wd = goals.judge_goal("goal", "response")
assert verdict == "continue"
assert "judge error" in reason.lower()
def test_judge_says_done(self):
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.return_value = MagicMock(
choices=[
MagicMock(
message=MagicMock(content='{"done": true, "reason": "achieved"}')
)
]
)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, reason, _, _wd = goals.judge_goal("goal", "agent response")
assert verdict == "done"
assert reason == "achieved"
def test_judge_says_continue(self):
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.return_value = MagicMock(
choices=[
MagicMock(
message=MagicMock(content='{"done": false, "reason": "not yet"}')
)
]
)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, reason, _, _wd = goals.judge_goal("goal", "agent response")
assert verdict == "continue"
assert reason == "not yet"
# ──────────────────────────────────────────────────────────────────────
# GoalManager lifecycle + persistence
# ──────────────────────────────────────────────────────────────────────
class TestGoalManager:
def test_no_goal_initial(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-1")
assert mgr.state is None
assert not mgr.is_active()
assert not mgr.has_goal()
assert "No active goal" in mgr.status_line()
def test_set_then_status(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-2", default_max_turns=5)
state = mgr.set("port the thing")
assert state.goal == "port the thing"
assert state.status == "active"
assert state.max_turns == 5
assert state.turns_used == 0
assert mgr.is_active()
assert "active" in mgr.status_line().lower()
assert "port the thing" in mgr.status_line()
def test_set_rejects_empty(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-3")
with pytest.raises(ValueError):
mgr.set("")
with pytest.raises(ValueError):
mgr.set(" ")
def test_pause_and_resume(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-4")
mgr.set("goal text")
mgr.pause(reason="user-paused")
assert mgr.state.status == "paused"
assert not mgr.is_active()
assert mgr.has_goal()
mgr.resume()
assert mgr.state.status == "active"
assert mgr.is_active()
def test_clear(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-5")
mgr.set("goal")
mgr.clear()
assert mgr.state is None
assert not mgr.is_active()
def test_persistence_across_managers(self, hermes_home):
"""Key invariant: a second manager on the same session sees the goal.
This is what makes /resume work — each session rebinds its
GoalManager and picks up the saved state.
"""
from hermes_cli.goals import GoalManager
mgr1 = GoalManager(session_id="persist-sid")
mgr1.set("do the thing")
mgr2 = GoalManager(session_id="persist-sid")
assert mgr2.state is not None
assert mgr2.state.goal == "do the thing"
assert mgr2.is_active()
def test_evaluate_after_turn_done(self, hermes_home):
"""Judge says done → status=done, no continuation."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-1")
mgr.set("ship it")
with patch.object(goals, "judge_goal", return_value=("done", "shipped", False, None)):
decision = mgr.evaluate_after_turn("I shipped the feature.")
assert decision["verdict"] == "done"
assert decision["should_continue"] is False
assert decision["continuation_prompt"] is None
assert mgr.state.status == "done"
assert mgr.state.turns_used == 1
def test_evaluate_after_turn_continue_under_budget(self, hermes_home):
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-2", default_max_turns=5)
mgr.set("a long goal")
with patch.object(goals, "judge_goal", return_value=("continue", "more work", False, None)):
decision = mgr.evaluate_after_turn("made some progress")
assert decision["verdict"] == "continue"
assert decision["should_continue"] is True
assert decision["continuation_prompt"] is not None
assert "a long goal" in decision["continuation_prompt"]
assert mgr.state.status == "active"
assert mgr.state.turns_used == 1
def test_evaluate_after_turn_budget_exhausted(self, hermes_home):
"""When turn budget hits ceiling, auto-pause instead of continuing."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-3", default_max_turns=2)
mgr.set("hard goal")
with patch.object(goals, "judge_goal", return_value=("continue", "not yet", False, None)):
d1 = mgr.evaluate_after_turn("step 1")
assert d1["should_continue"] is True
assert mgr.state.turns_used == 1
assert mgr.state.status == "active"
d2 = mgr.evaluate_after_turn("step 2")
# turns_used is now 2 which equals max_turns → paused
assert d2["should_continue"] is False
assert mgr.state.status == "paused"
assert mgr.state.turns_used == 2
assert "budget" in (mgr.state.paused_reason or "").lower()
def test_evaluate_after_turn_inactive(self, hermes_home):
"""evaluate_after_turn is a no-op when goal isn't active."""
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-4")
d = mgr.evaluate_after_turn("anything")
assert d["verdict"] == "inactive"
assert d["should_continue"] is False
mgr.set("a goal")
mgr.pause()
d2 = mgr.evaluate_after_turn("anything")
assert d2["verdict"] == "inactive"
assert d2["should_continue"] is False
def test_continuation_prompt_shape(self, hermes_home):
"""The continuation prompt must include the goal text verbatim —
and must be safe to inject as a user-role message (prompt-cache
invariants: no system-prompt mutation)."""
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="cont-sid")
mgr.set("port goal command to hermes")
prompt = mgr.next_continuation_prompt()
assert prompt is not None
assert "port goal command to hermes" in prompt
assert prompt.strip() # non-empty
# ──────────────────────────────────────────────────────────────────────
# Smoke: CommandDef is wired
# ──────────────────────────────────────────────────────────────────────
def test_goal_command_in_registry():
from hermes_cli.commands import resolve_command
cmd = resolve_command("goal")
assert cmd is not None
assert cmd.name == "goal"
def test_goal_command_dispatches_in_cli_registry_helpers():
"""goal shows up in autocomplete / help categories alongside other Session cmds."""
from hermes_cli.commands import COMMANDS, COMMANDS_BY_CATEGORY
assert "/goal" in COMMANDS
session_cmds = COMMANDS_BY_CATEGORY.get("Session", {})
assert "/goal" in session_cmds
# ──────────────────────────────────────────────────────────────────────
# Auto-pause on consecutive judge parse failures
# ──────────────────────────────────────────────────────────────────────
class TestJudgeParseFailureAutoPause:
"""Regression: weak judge models (e.g. deepseek-v4-flash) that return
empty strings or non-JSON prose must auto-pause the loop after N turns
instead of burning the whole turn budget."""
def test_parse_response_flags_empty_as_parse_failure(self):
from hermes_cli.goals import _parse_judge_response
verdict, reason, parse_failed, _w = _parse_judge_response("")
assert verdict == "continue"
assert parse_failed is True
assert "empty" in reason.lower()
def test_parse_response_flags_non_json_as_parse_failure(self):
from hermes_cli.goals import _parse_judge_response
verdict, reason, parse_failed, _w = _parse_judge_response(
"Let me analyze whether the goal is fully satisfied based on the agent's response..."
)
assert verdict == "continue"
assert parse_failed is True
assert "not json" in reason.lower()
def test_parse_response_clean_json_is_not_parse_failure(self):
from hermes_cli.goals import _parse_judge_response
verdict, _, parse_failed, _w = _parse_judge_response(
'{"done": false, "reason": "more work"}'
)
assert verdict == "continue"
assert parse_failed is False
def test_api_error_does_not_count_as_parse_failure(self):
"""Transient network/API errors must not trip the auto-pause guard."""
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.side_effect = RuntimeError("connection reset")
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, _, parse_failed, _wd = goals.judge_goal("goal", "response")
assert verdict == "continue"
assert parse_failed is False
def test_empty_judge_reply_flagged_as_parse_failure(self):
"""End-to-end: judge returns empty content → parse_failed=True."""
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=""))]
)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, _, parse_failed, _wd = goals.judge_goal("goal", "response")
assert verdict == "continue"
assert parse_failed is True
def test_auto_pause_after_three_consecutive_parse_failures(self, hermes_home):
"""N=3 consecutive parse failures → auto-pause with config pointer."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager, DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES
assert DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES == 3
mgr = GoalManager(session_id="parse-fail-sid-1", default_max_turns=20)
mgr.set("do a thing")
with patch.object(
goals, "judge_goal", return_value=("continue", "judge returned empty response", True, None)
):
d1 = mgr.evaluate_after_turn("step 1")
assert d1["should_continue"] is True
assert mgr.state.consecutive_parse_failures == 1
d2 = mgr.evaluate_after_turn("step 2")
assert d2["should_continue"] is True
assert mgr.state.consecutive_parse_failures == 2
d3 = mgr.evaluate_after_turn("step 3")
assert d3["should_continue"] is False
assert d3["status"] == "paused"
assert mgr.state.consecutive_parse_failures == 3
# Message points at the config surface so the user can fix it.
assert "auxiliary" in d3["message"]
assert "goal_judge" in d3["message"]
assert "config.yaml" in d3["message"]
def test_parse_failure_counter_resets_on_good_reply(self, hermes_home):
"""A single good judge reply resets the counter — transient flakes don't pause."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="parse-fail-sid-2", default_max_turns=20)
mgr.set("another goal")
# Two parse failures…
with patch.object(
goals, "judge_goal", return_value=("continue", "not json", True, None)
):
mgr.evaluate_after_turn("step 1")
mgr.evaluate_after_turn("step 2")
assert mgr.state.consecutive_parse_failures == 2
# …then one clean reply resets the counter.
with patch.object(
goals, "judge_goal", return_value=("continue", "making progress", False, None)
):
d = mgr.evaluate_after_turn("step 3")
assert d["should_continue"] is True
assert mgr.state.consecutive_parse_failures == 0
def test_parse_failure_counter_not_incremented_by_api_errors(self, hermes_home):
"""API/transport errors must NOT count toward the auto-pause threshold."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="parse-fail-sid-3", default_max_turns=20)
mgr.set("goal")
with patch.object(
goals, "judge_goal", return_value=("continue", "judge error: RuntimeError", False, None)
):
for _ in range(5):
d = mgr.evaluate_after_turn("still going")
assert d["should_continue"] is True
assert mgr.state.consecutive_parse_failures == 0
assert mgr.state.status == "active"
def test_consecutive_parse_failures_persists_across_goalmanager_reloads(
self, hermes_home
):
"""The counter must be durable so cross-session resumes see it."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager, load_goal
mgr = GoalManager(session_id="parse-fail-sid-4", default_max_turns=20)
mgr.set("persistent goal")
with patch.object(
goals, "judge_goal", return_value=("continue", "empty", True, None)
):
mgr.evaluate_after_turn("r")
mgr.evaluate_after_turn("r")
reloaded = load_goal("parse-fail-sid-4")
assert reloaded is not None
assert reloaded.consecutive_parse_failures == 2
# ──────────────────────────────────────────────────────────────────────
# /subgoal — user-added criteria
# ──────────────────────────────────────────────────────────────────────
class TestGoalStateSubgoalsBackcompat:
def test_old_state_meta_row_loads_without_subgoals(self):
"""A goal serialized BEFORE the subgoals field existed must
round-trip with an empty list, not crash."""
from hermes_cli.goals import GoalState
legacy = json.dumps({
"goal": "do a thing",
"status": "active",
"turns_used": 2,
"max_turns": 20,
"created_at": 1.0,
"last_turn_at": 2.0,
"consecutive_parse_failures": 0,
})
state = GoalState.from_json(legacy)
assert state.goal == "do a thing"
assert state.subgoals == []
def test_subgoals_round_trip(self):
from hermes_cli.goals import GoalState
state = GoalState(goal="g", subgoals=["a", "b", "c"])
rt = GoalState.from_json(state.to_json())
assert rt.subgoals == ["a", "b", "c"]
class TestMigrateGoalToSession:
"""migrate_goal_to_session carries a /goal from a parent session to its
compression continuation child (#33618). load_goal does a flat
per-session lookup with no lineage walk, so without migration an active
goal silently dies when compression rotates session_id."""
def test_migrates_active_goal_to_child(self, hermes_home):
from hermes_cli.goals import save_goal, load_goal, migrate_goal_to_session, GoalState
save_goal("parent-sid", GoalState(goal="ship the feature"))
assert migrate_goal_to_session("parent-sid", "child-sid", reason="compression") is True
child = load_goal("child-sid")
assert child is not None and child.goal == "ship the feature"
# Parent row archived (cleared) so only the child is active.
parent = load_goal("parent-sid")
assert parent is not None and parent.status == "cleared"
def test_no_goal_to_migrate_returns_false(self, hermes_home):
from hermes_cli.goals import migrate_goal_to_session, load_goal
assert migrate_goal_to_session("empty-parent", "child2") is False
assert load_goal("child2") is None
def test_does_not_clobber_existing_child_goal(self, hermes_home):
from hermes_cli.goals import save_goal, load_goal, migrate_goal_to_session, GoalState
save_goal("p3", GoalState(goal="parent goal"))
save_goal("c3", GoalState(goal="child already has one"))
assert migrate_goal_to_session("p3", "c3") is False
assert load_goal("c3").goal == "child already has one"
def test_same_id_is_noop(self, hermes_home):
from hermes_cli.goals import save_goal, migrate_goal_to_session, GoalState
save_goal("same", GoalState(goal="g"))
assert migrate_goal_to_session("same", "same") is False
def test_cleared_goal_not_migrated(self, hermes_home):
from hermes_cli.goals import save_goal, clear_goal, migrate_goal_to_session, load_goal, GoalState
save_goal("p4", GoalState(goal="done already"))
clear_goal("p4")
assert migrate_goal_to_session("p4", "c4") is False
assert load_goal("c4") is None
class TestGoalManagerSubgoals:
def test_add_subgoal(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-add")
mgr.set("main goal")
text = mgr.add_subgoal(" use bullet points ")
assert text == "use bullet points"
assert mgr.state.subgoals == ["use bullet points"]
def test_add_subgoal_requires_active_goal(self, hermes_home):
import pytest
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-noactive")
with pytest.raises(RuntimeError):
mgr.add_subgoal("oops")
def test_add_empty_subgoal_rejected(self, hermes_home):
import pytest
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-empty")
mgr.set("g")
with pytest.raises(ValueError):
mgr.add_subgoal(" ")
def test_remove_subgoal(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-remove")
mgr.set("g")
mgr.add_subgoal("first")
mgr.add_subgoal("second")
mgr.add_subgoal("third")
removed = mgr.remove_subgoal(2)
assert removed == "second"
assert mgr.state.subgoals == ["first", "third"]
def test_remove_subgoal_out_of_range(self, hermes_home):
import pytest
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-oob")
mgr.set("g")
mgr.add_subgoal("only")
with pytest.raises(IndexError):
mgr.remove_subgoal(5)
with pytest.raises(IndexError):
mgr.remove_subgoal(0)
def test_clear_subgoals(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-clear")
mgr.set("g")
mgr.add_subgoal("a")
mgr.add_subgoal("b")
prev = mgr.clear_subgoals()
assert prev == 2
assert mgr.state.subgoals == []
def test_subgoals_persist_across_reloads(self, hermes_home):
"""Subgoals stored in SessionDB survive a fresh GoalManager."""
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-persist")
mgr.set("g")
mgr.add_subgoal("first")
mgr.add_subgoal("second")
mgr2 = GoalManager(session_id="sub-persist")
assert mgr2.state.subgoals == ["first", "second"]
class TestContinuationPromptWithSubgoals:
def test_empty_subgoals_uses_original_template(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="cp-empty")
mgr.set("ship the feature")
prompt = mgr.next_continuation_prompt()
assert prompt is not None
assert "ship the feature" in prompt
assert "Additional criteria" not in prompt
def test_with_subgoals_includes_them(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="cp-with")
mgr.set("ship the feature")
mgr.add_subgoal("write tests")
mgr.add_subgoal("update docs")
prompt = mgr.next_continuation_prompt()
assert prompt is not None
assert "ship the feature" in prompt
assert "Additional criteria" in prompt
assert "1. write tests" in prompt
assert "2. update docs" in prompt
class TestJudgeGoalWithSubgoals:
def test_judge_uses_subgoals_template_when_provided(self, hermes_home):
"""judge_goal switches templates when subgoals is non-empty.
We don't actually call the model — we patch the aux client to
capture the prompt that would be sent.
"""
from unittest.mock import patch
from hermes_cli import goals
captured = {}
class _FakeMsg:
content = '{"done": true, "reason": "all done"}'
class _FakeChoice:
message = _FakeMsg()
class _FakeResp:
choices = [_FakeChoice()]
class _FakeClient:
class chat:
class completions:
@staticmethod
def create(**kwargs):
captured.update(kwargs)
return _FakeResp()
with patch.object(goals, "get_text_auxiliary_client",
return_value=(_FakeClient, "fake-model"), create=True), \
patch.object(goals, "get_auxiliary_extra_body",
return_value=None, create=True), \
patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(_FakeClient, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body",
return_value=None):
verdict, reason, parse_failed, _wd = goals.judge_goal(
"ship the feature",
"ok shipped",
subgoals=["write tests", "update docs"],
)
# The aux client was called with a prompt that includes the subgoals.
sent_messages = captured.get("messages") or []
user_msg = next((m["content"] for m in sent_messages if m["role"] == "user"), "")
assert "Additional criteria" in user_msg
assert "1. write tests" in user_msg
assert "2. update docs" in user_msg
assert "every additional criterion" in user_msg
assert verdict == "done"
def test_judge_uses_original_template_when_no_subgoals(self, hermes_home):
from unittest.mock import patch
from hermes_cli import goals
captured = {}
class _FakeMsg:
content = '{"done": true, "reason": "ok"}'
class _FakeChoice:
message = _FakeMsg()
class _FakeResp:
choices = [_FakeChoice()]
class _FakeClient:
class chat:
class completions:
@staticmethod
def create(**kwargs):
captured.update(kwargs)
return _FakeResp()
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(_FakeClient, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body",
return_value=None):
goals.judge_goal("ship it", "done", subgoals=None)
sent_messages = captured.get("messages") or []
user_msg = next((m["content"] for m in sent_messages if m["role"] == "user"), "")
assert "Additional criteria" not in user_msg
assert "ship it" in user_msg
class TestStatusLineSubgoalCount:
def test_status_line_no_subgoals(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sl-empty")
mgr.set("ship it")
line = mgr.status_line()
assert "ship it" in line
assert "subgoal" not in line.lower()
def test_status_line_with_subgoals(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sl-with")
mgr.set("ship it")
mgr.add_subgoal("a")
mgr.add_subgoal("b")
line = mgr.status_line()
assert "2 subgoals" in line
# ──────────────────────────────────────────────────────────────────────
# Wait barrier — parking the goal loop on a background process
# ──────────────────────────────────────────────────────────────────────
class TestWaitBarrier:
"""The /goal wait barrier parks the loop on a live PID and resumes when
the process exits, without burning turns or calling the judge."""
@staticmethod
def _spawn_sleeper():
"""Start a short-lived child process; return its Popen handle."""
import subprocess
import sys
return subprocess.Popen([sys.executable, "-c", "import time; time.sleep(30)"])
@staticmethod
def _dead_pid():
"""A PID that is essentially guaranteed not to be running."""
return 2_000_000_000
def test_wait_on_requires_active_goal(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="wb-noactive")
with pytest.raises(RuntimeError):
mgr.wait_on(12345)
def test_wait_on_rejects_bad_pid(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="wb-badpid")
mgr.set("g")
with pytest.raises(ValueError):
mgr.wait_on(0)
def test_parked_on_live_pid_does_not_continue_or_judge(self, hermes_home):
from hermes_cli import goals
from hermes_cli.goals import GoalManager
proc = self._spawn_sleeper()
try:
mgr = GoalManager(session_id="wb-live")
mgr.set("ship it", max_turns=5)
mgr.wait_on(proc.pid, reason="CI green")
assert mgr.is_waiting() is True
# The judge must NOT be called while parked, and no turn is burned.
judge = MagicMock(return_value=("continue", "x", False, None))
with patch.object(goals, "judge_goal", judge):
decision = mgr.evaluate_after_turn("still waiting on CI")
judge.assert_not_called()
assert decision["verdict"] == "waiting"
assert decision["should_continue"] is False
assert decision["continuation_prompt"] is None
assert mgr.state.turns_used == 0 # no turn consumed while parked
assert "CI green" in decision["message"]
assert mgr.state.status == "active" # still active, just parked
finally:
proc.terminate()
proc.wait(timeout=10)
def test_barrier_auto_clears_when_process_exits_and_loop_resumes(self, hermes_home):
from hermes_cli import goals
from hermes_cli.goals import GoalManager
proc = self._spawn_sleeper()
mgr = GoalManager(session_id="wb-exit")
mgr.set("ship it", max_turns=5)
mgr.wait_on(proc.pid, reason="build")
assert mgr.is_waiting() is True
# Kill the process — barrier should auto-clear and judging resumes.
proc.terminate()
proc.wait(timeout=10)
assert mgr.is_waiting() is False # lazy auto-clear
assert mgr.state.waiting_on_pid is None
with patch.object(goals, "judge_goal", return_value=("continue", "more", False, None)):
decision = mgr.evaluate_after_turn("process finished, here are results")
assert decision["verdict"] == "continue"
assert decision["should_continue"] is True
assert mgr.state.turns_used == 1 # now a turn IS consumed
def test_dead_pid_never_parks(self, hermes_home):
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="wb-dead")
mgr.set("g", max_turns=5)
mgr.wait_on(self._dead_pid(), reason="already-dead")
# is_waiting clears the stale barrier immediately.
assert mgr.is_waiting() is False
with patch.object(goals, "judge_goal", return_value=("continue", "go", False, None)):
decision = mgr.evaluate_after_turn("response")
assert decision["should_continue"] is True
def test_stop_waiting_clears_barrier(self, hermes_home):
from hermes_cli.goals import GoalManager
proc = self._spawn_sleeper()
try:
mgr = GoalManager(session_id="wb-stop")
mgr.set("g")
mgr.wait_on(proc.pid)
assert mgr.is_waiting() is True
assert mgr.stop_waiting() is True
assert mgr.state.waiting_on_pid is None
assert mgr.is_waiting() is False
assert mgr.stop_waiting() is False # idempotent
finally:
proc.terminate()
proc.wait(timeout=10)
def test_pause_and_resume_clear_barrier(self, hermes_home):
from hermes_cli.goals import GoalManager
proc = self._spawn_sleeper()
try:
mgr = GoalManager(session_id="wb-pause")
mgr.set("g")
mgr.wait_on(proc.pid)
mgr.pause()
assert mgr.state.waiting_on_pid is None
mgr.resume()
assert mgr.state.waiting_on_pid is None
finally:
proc.terminate()
proc.wait(timeout=10)
def test_barrier_persists_and_reloads(self, hermes_home):
from hermes_cli.goals import GoalManager
proc = self._spawn_sleeper()
try:
mgr = GoalManager(session_id="wb-persist")
mgr.set("g")
mgr.wait_on(proc.pid, reason="deploy")
# Fresh manager loads the persisted barrier.
mgr2 = GoalManager(session_id="wb-persist")
assert mgr2.state.waiting_on_pid == proc.pid
assert mgr2.state.waiting_reason == "deploy"
assert mgr2.is_waiting() is True
finally:
proc.terminate()
proc.wait(timeout=10)
def test_old_state_row_loads_without_barrier_fields(self, hermes_home):
"""Backwards-compat: a state_meta row written before the barrier
existed must load with no barrier."""
from hermes_cli.goals import GoalState
legacy = json.dumps({
"goal": "old goal",
"status": "active",
"turns_used": 2,
"max_turns": 20,
})
st = GoalState.from_json(legacy)
assert st.goal == "old goal"
assert st.waiting_on_pid is None
assert st.waiting_reason is None
assert st.waiting_since == 0.0
assert st.waiting_until == 0.0
# ──────────────────────────────────────────────────────────────────────
# Judge-driven auto-wait — the judge parks the loop on its own
# ──────────────────────────────────────────────────────────────────────
class TestJudgeDrivenWait:
"""The judge returns a `wait` verdict (given live background-process
context) and the loop parks automatically — no manual /goal wait."""
@staticmethod
def _spawn_sleeper():
import subprocess, sys
return subprocess.Popen([sys.executable, "-c", "import time; time.sleep(30)"])
def test_judge_wait_pid_parks_loop(self, hermes_home):
from hermes_cli import goals
from hermes_cli.goals import GoalManager
proc = self._spawn_sleeper()
try:
mgr = GoalManager(session_id="jw-pid", default_max_turns=10)
mgr.set("ship the PR")
# Judge sees the running process and says wait-on-pid.
with patch.object(
goals, "judge_goal",
return_value=("wait", "CI watcher still running", False, {"pid": proc.pid}),
):
decision = mgr.evaluate_after_turn(
"Pushed the PR, watching CI.",
background_processes=[{
"pid": proc.pid, "command": "wait_for_pr_green.sh",
"status": "running", "uptime_seconds": 12,
}],
)
assert decision["verdict"] == "wait"
assert decision["should_continue"] is False
assert decision["continuation_prompt"] is None
assert mgr.state.waiting_on_pid == proc.pid
assert mgr.is_waiting() is True
# Next turn while still parked: judge must NOT be called again.
judge = MagicMock()
with patch.object(goals, "judge_goal", judge):
d2 = mgr.evaluate_after_turn("still going")
judge.assert_not_called()
assert d2["verdict"] == "waiting"
assert d2["should_continue"] is False
finally:
proc.terminate()
proc.wait(timeout=10)
def test_judge_wait_seconds_parks_loop(self, hermes_home):
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="jw-secs", default_max_turns=10)
mgr.set("retry after backoff")
with patch.object(
goals, "judge_goal",
return_value=("wait", "rate limited", False, {"seconds": 120}),
):
decision = mgr.evaluate_after_turn("Hit a 429, backing off.")
assert decision["verdict"] == "wait"
assert decision["should_continue"] is False
assert mgr.state.waiting_until > 0
assert mgr.state.waiting_on_pid is None
assert mgr.is_waiting() is True
def test_time_barrier_clears_after_deadline(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="jw-deadline")
mgr.set("g")
mgr.wait_for_seconds(120, reason="backoff")
assert mgr.is_waiting() is True
# Force the deadline into the past → barrier auto-clears.
mgr.state.waiting_until = time.time() - 1
assert mgr.is_waiting() is False
assert mgr.state.waiting_until == 0.0
def test_continue_verdict_still_continues_with_background(self, hermes_home):
"""A running process present but judge says continue → normal loop."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="jw-cont", default_max_turns=10)
mgr.set("do work")
with patch.object(
goals, "judge_goal",
return_value=("continue", "more to do", False, None),
):
decision = mgr.evaluate_after_turn(
"made progress",
background_processes=[{"pid": 999999, "command": "x", "status": "running"}],
)
assert decision["verdict"] == "continue"
assert decision["should_continue"] is True
assert mgr.state.waiting_on_pid is None
# ──────────────────────────────────────────────────────────────────────
# Session/trigger barrier — wait on a process's OWN trigger, not just exit
# ──────────────────────────────────────────────────────────────────────
class TestSessionTriggerBarrier:
"""The session barrier (wait_on_session) releases when a process's own
trigger fires — a watch_patterns match mid-run (process may never exit)
OR exit — not only on PID exit. CI-safe: uses synthetic registry session
objects, no real child processes."""
@staticmethod
def _inject(sid, *, watch_patterns=None, exited=False):
import time as _t
from tools.process_registry import process_registry, ProcessSession
s = ProcessSession(id=sid, command="watcher.sh", task_id="t",
session_key="", cwd="/tmp", started_at=_t.time())
if watch_patterns:
s.watch_patterns = list(watch_patterns)
s.exited = exited
if exited:
process_registry._finished[sid] = s
else:
process_registry._running[sid] = s
return s, process_registry
def test_registry_is_session_waiting_running_unmatched(self, hermes_home):
s, reg = self._inject("proc_t1", watch_patterns=["READY"])
assert reg.is_session_waiting("proc_t1") is True
def test_registry_releases_on_watch_match_while_alive(self, hermes_home):
s, reg = self._inject("proc_t2", watch_patterns=["READY"])
assert reg.is_session_waiting("proc_t2") is True
s._watch_hits = 1 # what _check_watch_patterns sets on a match
# Released even though the process is STILL running (never exited).
assert s.exited is False
assert reg.is_session_waiting("proc_t2") is False
def test_registry_releases_on_exit_plain_session(self, hermes_home):
s, reg = self._inject("proc_t3") # no watch pattern
assert reg.is_session_waiting("proc_t3") is True
s.exited = True
assert reg.is_session_waiting("proc_t3") is False
def test_registry_unknown_session_never_waits(self, hermes_home):
from tools.process_registry import process_registry
assert process_registry.is_session_waiting("proc_does_not_exist") is False
def test_goal_parks_on_session_and_releases_on_trigger(self, hermes_home):
from hermes_cli import goals
from hermes_cli.goals import GoalManager
s, reg = self._inject("proc_t4", watch_patterns=["BUILD SUCCESSFUL"])
mgr = GoalManager(session_id="st-goal", default_max_turns=10)
mgr.set("wait for the build to succeed")
with patch.object(
goals, "judge_goal",
return_value=("wait", "blocked on build", False, {"session_id": "proc_t4"}),
):
decision = mgr.evaluate_after_turn(
"Started the build watcher.",
background_processes=[{
"session_id": "proc_t4", "pid": 4242, "command": "watcher.sh",
"status": "running", "watch_patterns": ["BUILD SUCCESSFUL"],
"watch_hit": False,
}],
)
assert decision["verdict"] == "wait"
assert mgr.state.waiting_on_session == "proc_t4"
assert mgr.is_waiting() is True
# Judge must NOT be called again while parked.
judge = MagicMock()
with patch.object(goals, "judge_goal", judge):
d2 = mgr.evaluate_after_turn("still building")
judge.assert_not_called()
assert d2["should_continue"] is False
# Trigger fires mid-run (process still alive) → barrier releases.
s._watch_hits = 1
assert mgr.is_waiting() is False
assert mgr.state.waiting_on_session is None
# Loop resumes with a real judge verdict.
with patch.object(goals, "judge_goal",
return_value=("continue", "build done", False, None)):
d3 = mgr.evaluate_after_turn("build succeeded")
assert d3["should_continue"] is True
def test_wait_on_session_validation(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="st-val")
# No active goal → RuntimeError
try:
mgr.wait_on_session("proc_x")
assert False, "expected RuntimeError"
except RuntimeError:
pass
mgr.set("g")
try:
mgr.wait_on_session("")
assert False, "expected ValueError"
except ValueError:
pass
def test_session_directive_parsed_from_judge(self, hermes_home):
from hermes_cli.goals import _parse_judge_response
v, _, pf, wd = _parse_judge_response(
'{"verdict": "wait", "wait_on_session": "proc_abc", "reason": "r"}'
)
assert v == "wait"
assert pf is False
assert wd == {"session_id": "proc_abc"}
def test_old_state_loads_without_session_field(self, hermes_home):
from hermes_cli.goals import GoalState
st = GoalState.from_json(json.dumps({
"goal": "g", "status": "active", "turns_used": 0, "max_turns": 20,
}))
assert st.waiting_on_session is None
# ──────────────────────────────────────────────────────────────────────
# Completion contract (Codex-inspired structured goals)
# ──────────────────────────────────────────────────────────────────────
class TestParseContract:
def test_plain_goal_no_contract(self):
from hermes_cli.goals import parse_contract
headline, contract = parse_contract("Migrate auth to JWT")
assert headline == "Migrate auth to JWT"
assert contract.is_empty()
def test_incidental_colon_not_treated_as_field(self):
from hermes_cli.goals import parse_contract
# "Fix bug:" — "fix bug" is not a known alias, so the whole line
# stays the headline and no contract field is populated.
headline, contract = parse_contract("Fix bug: the parser drops trailing commas")
assert headline == "Fix bug: the parser drops trailing commas"
assert contract.is_empty()
def test_inline_fields_parsed(self):
from hermes_cli.goals import parse_contract
text = (
"Migrate auth to JWT\n"
"verify: the auth test suite passes\n"
"constraints: keep the /login response shape unchanged\n"
"boundaries: only touch services/auth and its tests\n"
"stop when: a schema change needs product sign-off"
)
headline, contract = parse_contract(text)
assert headline == "Migrate auth to JWT"
assert contract.verification == "the auth test suite passes"
assert contract.constraints == "keep the /login response shape unchanged"
assert contract.boundaries == "only touch services/auth and its tests"
assert contract.stop_when == "a schema change needs product sign-off"
assert not contract.is_empty()
def test_alias_variants(self):
from hermes_cli.goals import parse_contract
_, c = parse_contract("Goal\nverified by: tests green\npreserve: public API")
assert c.verification == "tests green"
assert c.constraints == "public API"
def test_multiple_lines_same_field_joined(self):
from hermes_cli.goals import parse_contract
_, c = parse_contract("G\nconstraints: a\nconstraints: b")
assert c.constraints == "a b"
class TestGoalContractSerialization:
def test_roundtrip_with_contract(self):
from hermes_cli.goals import GoalState, GoalContract
state = GoalState(
goal="ship it",
contract=GoalContract(
verification="pytest passes",
constraints="don't break the API",
),
)
restored = GoalState.from_json(state.to_json())
assert restored.goal == "ship it"
assert restored.contract.verification == "pytest passes"
assert restored.contract.constraints == "don't break the API"
assert restored.has_contract()
def test_old_row_without_contract_loads_clean(self):
# A state_meta row written before this feature has no "contract" key.
from hermes_cli.goals import GoalState
legacy = '{"goal": "old goal", "status": "active", "turns_used": 2}'
state = GoalState.from_json(legacy)
assert state.goal == "old goal"
assert state.turns_used == 2
assert state.contract.is_empty()
assert not state.has_contract()
def test_render_block_omits_empty_fields(self):
from hermes_cli.goals import GoalContract
block = GoalContract(outcome="X", verification="Y").render_block()
assert "Outcome: X" in block
assert "Verification: Y" in block
assert "Constraints" not in block
class TestGoalManagerContract:
def test_set_with_contract(self, hermes_home):
from hermes_cli.goals import GoalManager, GoalContract
mgr = GoalManager(session_id="c-set")
mgr.set("ship it", contract=GoalContract(verification="tests pass"))
assert mgr.has_contract()
assert "contract" in mgr.status_line()
def test_set_without_contract_no_marker(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="c-none")
mgr.set("ship it")
assert not mgr.has_contract()
assert "contract" not in mgr.status_line()
def test_continuation_prompt_includes_contract(self, hermes_home):
from hermes_cli.goals import GoalManager, GoalContract
mgr = GoalManager(session_id="c-cont")
mgr.set("ship it", contract=GoalContract(verification="run pytest"))
prompt = mgr.next_continuation_prompt()
assert "Completion contract" in prompt
assert "run pytest" in prompt
assert "concrete evidence" in prompt
def test_set_contract_after_the_fact(self, hermes_home):
from hermes_cli.goals import GoalManager, GoalContract
mgr = GoalManager(session_id="c-after")
mgr.set("ship it")
assert not mgr.has_contract()
mgr.set_contract(GoalContract(verification="x"))
assert mgr.has_contract()
# Survives reload.
from hermes_cli.goals import GoalManager as GM2
assert GM2(session_id="c-after").has_contract()
def test_persistence_roundtrip(self, hermes_home):
from hermes_cli.goals import GoalManager, GoalContract
GoalManager(session_id="c-persist").set(
"ship it", contract=GoalContract(outcome="O", verification="V")
)
reloaded = GoalManager(session_id="c-persist")
assert reloaded.state.contract.outcome == "O"
assert reloaded.state.contract.verification == "V"
class TestJudgeWithContract:
def _fake_client(self, captured, content='{"done": false, "reason": "more"}'):
class _FakeMsg:
pass
_FakeMsg.content = content
class _FakeChoice:
message = _FakeMsg()
class _FakeResp:
choices = [_FakeChoice()]
class _FakeClient:
class chat:
class completions:
@staticmethod
def create(**kwargs):
captured.update(kwargs)
return _FakeResp()
return _FakeClient
def test_judge_uses_contract_template(self, hermes_home):
from unittest.mock import patch
from hermes_cli import goals
from hermes_cli.goals import GoalContract
captured = {}
client = self._fake_client(captured)
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(client, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None):
goals.judge_goal(
"ship it", "I think it's done",
contract=GoalContract(verification="pytest -q passes"),
)
user_msg = next(
(m["content"] for m in (captured.get("messages") or []) if m["role"] == "user"), ""
)
assert "completion contract" in user_msg.lower()
assert "pytest -q passes" in user_msg
assert "concrete evidence" in user_msg
def test_contract_plus_subgoals_combine(self, hermes_home):
from unittest.mock import patch
from hermes_cli import goals
from hermes_cli.goals import GoalContract
captured = {}
client = self._fake_client(captured)
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(client, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None):
goals.judge_goal(
"ship it", "done",
subgoals=["write changelog"],
contract=GoalContract(verification="pytest passes"),
)
user_msg = next(
(m["content"] for m in (captured.get("messages") or []) if m["role"] == "user"), ""
)
assert "pytest passes" in user_msg
assert "write changelog" in user_msg
class TestDraftContract:
def test_draft_parses_json(self, hermes_home):
from unittest.mock import patch
from hermes_cli import goals
class _FakeMsg:
content = (
'{"outcome": "auth on JWT", "verification": "auth suite green", '
'"constraints": "no API change", "boundaries": "services/auth", '
'"stop_when": "schema change needed"}'
)
class _FakeChoice:
message = _FakeMsg()
class _FakeResp:
choices = [_FakeChoice()]
class _FakeClient:
class chat:
class completions:
@staticmethod
def create(**kwargs):
return _FakeResp()
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(_FakeClient, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None):
contract = goals.draft_contract("Migrate auth to JWT")
assert contract is not None
assert contract.outcome == "auth on JWT"
assert contract.verification == "auth suite green"
assert not contract.is_empty()
def test_draft_returns_none_on_bad_json(self, hermes_home):
from unittest.mock import patch
from hermes_cli import goals
class _FakeMsg:
content = "I cannot produce JSON, sorry"
class _FakeChoice:
message = _FakeMsg()
class _FakeResp:
choices = [_FakeChoice()]
class _FakeClient:
class chat:
class completions:
@staticmethod
def create(**kwargs):
return _FakeResp()
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(_FakeClient, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None):
assert goals.draft_contract("anything") is None
def test_draft_returns_none_when_no_client(self, hermes_home):
from unittest.mock import patch
from hermes_cli import goals
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(None, None)):
assert goals.draft_contract("anything") is None
# ──────────────────────────────────────────────────────────────────────
# Compose: completion contract + wait barrier in one judge call
# ──────────────────────────────────────────────────────────────────────
class TestContractAndBackgroundCompose:
"""A contract goal blocked on a background process must surface BOTH
the contract block and the background-process list to the judge, so it
can return either done (evidence met) or wait (parked on the poller)."""
def _capture_client(self, captured, content='{"verdict": "wait", "wait_on_pid": 4242, "reason": "CI still running"}'):
class _FakeMsg:
pass
_FakeMsg.content = content
class _FakeChoice:
message = _FakeMsg()
class _FakeResp:
choices = [_FakeChoice()]
class _FakeClient:
class chat:
class completions:
@staticmethod
def create(**kwargs):
captured.update(kwargs)
return _FakeResp()
return _FakeClient
def test_judge_prompt_carries_contract_and_background(self, hermes_home):
from unittest.mock import patch
from hermes_cli import goals
from hermes_cli.goals import GoalContract
captured = {}
client = self._capture_client(captured)
bg = [{
"session_id": "ci-watch", "pid": 4242, "status": "running",
"command": "wait_for_pr_green.sh 50501", "trigger": "exit",
}]
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(client, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None):
verdict, reason, parse_failed, wait_directive = goals.judge_goal(
"ship the PR",
"I pushed and started the CI watcher; waiting on it now.",
contract=GoalContract(verification="PR CI goes green"),
background_processes=bg,
)
user_msg = next(
(m["content"] for m in (captured.get("messages") or []) if m["role"] == "user"), ""
)
# Both surfaces present in one prompt.
assert "completion contract" in user_msg.lower()
assert "PR CI goes green" in user_msg
assert "Background processes" in user_msg
assert "4242" in user_msg
# The judge can return a wait verdict on a contract goal.
assert verdict == "wait"
assert wait_directive and wait_directive.get("pid") == 4242
def test_contract_goal_can_still_complete_on_evidence(self, hermes_home):
from unittest.mock import patch
from hermes_cli import goals
from hermes_cli.goals import GoalContract
captured = {}
client = self._capture_client(
captured,
content='{"verdict": "done", "reason": "CI is green, evidence shown"}',
)
bg = [{"session_id": "ci", "pid": 4242, "status": "running", "command": "ci", "trigger": "exit"}]
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(client, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None):
verdict, reason, parse_failed, wait_directive = goals.judge_goal(
"ship the PR",
"CI finished: 30 passed, 0 failed. Done.",
contract=GoalContract(verification="PR CI goes green"),
background_processes=bg,
)
assert verdict == "done"
assert wait_directive is None