feat(goals): /subgoal — user-added criteria appended to active /goal (#25449)

* feat(goals): /subgoal — user-added criteria appended to active /goal

Layers a /subgoal command on top of the existing freeform Ralph judge
loop. The user can append extra criteria mid-loop; the judge factors
them into its done/continue verdict and the continuation prompt
surfaces them to the agent. No new tool, no agent self-judging — the
existing judge model just sees a richer prompt.

Forms:
  /subgoal                  show current subgoals
  /subgoal <text>           append a criterion
  /subgoal remove <n>       drop subgoal n (1-based)
  /subgoal clear            wipe all subgoals

How it integrates:

- GoalState gains `subgoals: List[str]` (default []), backwards-compat
  for existing state_meta rows.
- judge_goal accepts an optional subgoals kwarg; non-empty switches to
  JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE which lists them as
  numbered criteria and asks 'is the goal AND every additional
  criterion satisfied?'
- next_continuation_prompt picks CONTINUATION_PROMPT_WITH_SUBGOALS_TEMPLATE
  when non-empty so the agent sees what to target.
- /subgoal is allowed mid-run on the gateway since it only touches the
  state the judge reads at turn boundary — no race with the running
  turn.
- Status line shows '... , N subgoals' when present.

Surface:
- hermes_cli/goals.py — field, prompt blocks, manager methods, judge weave
- hermes_cli/commands.py — /subgoal CommandDef
- cli.py — _handle_subgoal_command
- gateway/run.py — _handle_subgoal_command + mid-run dispatch
- tests/hermes_cli/test_goals.py — 15 new tests (backcompat, mutation,
  persistence, prompt template selection, judge-prompt content via mock,
  status-line rendering)

77 goal-related tests passing across goals + cli + gateway + tui.

* fix(goals): slash commands don't preempt the goal-continuation hook

Two findings from live-testing /subgoal:

1. Slash commands queued while the agent is running landed in
   _pending_input (same queue as real user messages). The goal hook's
   'is a real user message pending?' check returned True and silently
   skipped — but the slash command consumes its queue slot via
   process_command() which never re-fires the goal hook, so the loop
   stalls indefinitely. Now the hook peeks the queue and only defers
   when a non-slash payload is present.

2. The with-subgoals judge prompt was too soft — opus 4.7 said 'done,
   implying all requirements met' without verifying. Tightened to
   demand specific per-criterion evidence (file contents, output line,
   command result) and explicitly reject phrases like 'implying it was
   done.'

Live verified: /subgoal injected mid-loop now correctly forces the
judge to refuse done until the new criterion is met. Agent gets the
continuation prompt with subgoals listed, updates the script, judge
confirms done with specific evidence cited.
This commit is contained in:
Teknium 2026-05-13 22:55:09 -07:00 committed by GitHub
parent d110ce4493
commit 8f19078c6a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 531 additions and 14 deletions

109
cli.py
View file

@ -7647,6 +7647,8 @@ class HermesCLI:
_cprint(f" No agent running; queued as next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
elif canonical == "goal":
self._handle_goal_command(cmd_original)
elif canonical == "subgoal":
self._handle_subgoal_command(cmd_original)
elif canonical == "skin":
self._handle_skin_command(cmd_original)
elif canonical == "voice":
@ -8245,6 +8247,81 @@ class HermesCLI:
except Exception:
pass
def _handle_subgoal_command(self, cmd: str) -> None:
"""Dispatch /subgoal subcommands.
Forms:
/subgoal show current subgoals
/subgoal <text> append a criterion
/subgoal remove <n> drop subgoal n (1-based)
/subgoal clear wipe all subgoals
Subgoals are extra criteria the user adds mid-loop. They get
appended to both the judge prompt (verdict must consider them)
and the continuation prompt (agent sees them) on the next turn
boundary. No special kick the running turn finishes, the next
judge call includes them.
"""
parts = (cmd or "").strip().split(None, 2)
arg = " ".join(parts[1:]).strip() if len(parts) > 1 else ""
mgr = self._get_goal_manager()
if mgr is None:
_cprint(f" {_DIM}Goals unavailable (no active session).{_RST}")
return
if not mgr.has_goal():
_cprint(f" {_DIM}No active goal. Set one with /goal <text>.{_RST}")
return
# No args → list current subgoals.
if not arg:
_cprint(f" {mgr.status_line()}")
_cprint(f" {mgr.render_subgoals()}")
return
tokens = arg.split(None, 1)
verb = tokens[0].lower()
rest = tokens[1].strip() if len(tokens) > 1 else ""
if verb == "remove":
if not rest:
_cprint(" Usage: /subgoal remove <n>")
return
try:
idx = int(rest.split()[0])
except ValueError:
_cprint(" /subgoal remove: <n> must be an integer (1-based index).")
return
try:
removed = mgr.remove_subgoal(idx)
except (IndexError, RuntimeError) as exc:
_cprint(f" /subgoal remove: {exc}")
return
_cprint(f" ✓ Removed subgoal {idx}: {removed}")
return
if verb == "clear":
try:
prev = mgr.clear_subgoals()
except RuntimeError as exc:
_cprint(f" /subgoal clear: {exc}")
return
if prev:
_cprint(f" ✓ Cleared {prev} subgoal{'s' if prev != 1 else ''}.")
else:
_cprint(f" {_DIM}No subgoals to clear.{_RST}")
return
# Otherwise — append the whole arg as a new subgoal.
try:
text = mgr.add_subgoal(arg)
except (ValueError, RuntimeError) as exc:
_cprint(f" /subgoal: {exc}")
return
idx = len(mgr.state.subgoals) if mgr.state else 0
_cprint(f" ✓ Added subgoal {idx}: {text}")
def _maybe_continue_goal_after_turn(self) -> None:
"""Hook run after every CLI turn. Judges + maybe re-queues.
@ -8271,10 +8348,36 @@ class HermesCLI:
# If a real user message is already queued, don't inject a
# continuation prompt on top — let the user's turn go first.
# Slash commands don't count as "real user messages" for this
# check: they're inspection/mutation (e.g. /subgoal added mid-
# run) and the process_loop dispatches them via process_command,
# not via chat(). If we treat a queued /subgoal as preempting,
# the goal loop silently stalls — we'd return here, then the
# slash command consumes its queue slot via process_command()
# which never re-fires the goal hook. Peek at all queued entries
# and only defer when there's a non-slash payload.
try:
if getattr(self, "_pending_input", None) is not None \
and not self._pending_input.empty():
return
pending = getattr(self, "_pending_input", None)
if pending is not None and not pending.empty():
has_real_message = False
try:
# Queue.queue is the underlying deque — direct peek
# without disturbing FIFO order.
for entry in list(pending.queue):
# Bundled payloads are (text, images) tuples;
# unpack for inspection.
if isinstance(entry, tuple) and entry:
entry = entry[0]
if isinstance(entry, str) and _looks_like_slash_command(entry):
continue
has_real_message = True
break
except Exception:
# Fallback: if we can't introspect the queue, behave
# like the old check and defer to be safe.
has_real_message = True
if has_real_message:
return
except Exception:
pass

View file

@ -6173,6 +6173,12 @@ class GatewayRunner:
return await self._handle_goal_command(event)
return "Agent is running — use /goal status / pause / clear mid-run, or /stop before setting a new goal."
# /subgoal is safe mid-run — it only modifies the goal's
# subgoals list, which the judge reads at the next turn
# boundary. No race with the running turn.
if _cmd_def_inner and _cmd_def_inner.name == "subgoal":
return await self._handle_subgoal_command(event)
# Session-level toggles that are safe to run mid-agent —
# /yolo can unblock a pending approval prompt, /verbose cycles
# the tool-progress display mode for the ongoing stream.
@ -6554,6 +6560,9 @@ class GatewayRunner:
if canonical == "goal":
return await self._handle_goal_command(event)
if canonical == "subgoal":
return await self._handle_subgoal_command(event)
if canonical == "voice":
return await self._handle_voice_command(event)
@ -9524,6 +9533,57 @@ class GatewayRunner:
return t("gateway.goal.set", budget=state.max_turns, goal=state.goal)
async def _handle_subgoal_command(self, event: "MessageEvent") -> str:
"""Handle /subgoal for gateway platforms (mirror of CLI handler).
Subgoals are extra criteria appended to the active goal mid-loop.
They modify state read at the next turn boundary, so this is safe
to invoke while the agent is running.
"""
args = (event.get_command_args() or "").strip()
mgr, _session_entry = self._get_goal_manager_for_event(event)
if mgr is None:
return t("gateway.goal.unavailable")
if not mgr.has_goal():
return "No active goal. Set one with /goal <text>."
# No args → list current subgoals.
if not args:
return f"{mgr.status_line()}\n{mgr.render_subgoals()}"
tokens = args.split(None, 1)
verb = tokens[0].lower()
rest = tokens[1].strip() if len(tokens) > 1 else ""
if verb == "remove":
if not rest:
return "Usage: /subgoal remove <n>"
try:
idx = int(rest.split()[0])
except ValueError:
return "/subgoal remove: <n> must be an integer (1-based index)."
try:
removed = mgr.remove_subgoal(idx)
except (IndexError, RuntimeError) as exc:
return f"/subgoal remove: {exc}"
return f"✓ Removed subgoal {idx}: {removed}"
if verb == "clear":
try:
prev = mgr.clear_subgoals()
except RuntimeError as exc:
return f"/subgoal clear: {exc}"
if prev:
return f"✓ Cleared {prev} subgoal{'s' if prev != 1 else ''}."
return "No subgoals to clear."
try:
text = mgr.add_subgoal(args)
except (ValueError, RuntimeError) as exc:
return f"/subgoal: {exc}"
idx = len(mgr.state.subgoals) if mgr.state else 0
return f"✓ Added subgoal {idx}: {text}"
async def _send_goal_status_notice(self, source: Any, message: str) -> None:
"""Send a /goal judge status line back to the originating chat/thread."""
adapter = self.adapters.get(source.platform)

View file

@ -104,6 +104,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
args_hint="<prompt>"),
CommandDef("goal", "Set a standing goal Hermes works on across turns until achieved", "Session",
args_hint="[text | pause | resume | clear | status]"),
CommandDef("subgoal", "Add or manage extra criteria on the active goal", "Session",
args_hint="[text | remove N | clear]"),
CommandDef("status", "Show session info", "Session"),
CommandDef("whoami", "Show your slash command access (admin / user)", "Info"),
CommandDef("profile", "Show active profile name and home directory", "Info"),

View file

@ -33,8 +33,8 @@ import json
import logging
import re
import time
from dataclasses import dataclass, asdict
from typing import Any, Dict, Optional, Tuple
from dataclasses import dataclass, field, asdict
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
@ -65,6 +65,21 @@ CONTINUATION_PROMPT_TEMPLATE = (
"If you are blocked and need input from the user, say so clearly and stop."
)
# Used when the user has added one or more /subgoal criteria. Surfaced
# to the agent verbatim so it sees what to target on the next turn,
# and surfaced to the judge so the verdict considers them too.
CONTINUATION_PROMPT_WITH_SUBGOALS_TEMPLATE = (
"[Continuing toward your standing goal]\n"
"Goal: {goal}\n\n"
"Additional criteria the user added mid-loop:\n"
"{subgoals_block}\n\n"
"Continue working toward the goal AND all additional criteria. Take "
"the next concrete step. If you believe the goal and every "
"additional criterion are complete, state so explicitly and stop. "
"If you are blocked and need input from the user, say so clearly "
"and stop."
)
JUDGE_SYSTEM_PROMPT = (
"You are a strict judge evaluating whether an autonomous agent has "
@ -88,6 +103,23 @@ JUDGE_USER_PROMPT_TEMPLATE = (
"Is the goal satisfied?"
)
# Used when the user has added /subgoal criteria. The judge must
# evaluate ALL of them being met, not just the original goal.
JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE = (
"Goal:\n{goal}\n\n"
"Additional criteria the user added mid-loop (all must also be "
"satisfied for the goal to be DONE):\n{subgoals_block}\n\n"
"Agent's most recent response:\n{response}\n\n"
"Decision: For each numbered criterion above, find concrete "
"evidence in the agent's response that the criterion is "
"satisfied. Do not accept generic phrases like 'all requirements "
"met' or 'implying it was done' — require specific evidence (a "
"file contents excerpt, an output line, a command result). If "
"ANY criterion lacks specific evidence in the response, the goal "
"is NOT done — return CONTINUE.\n\n"
"Is the goal AND every additional criterion satisfied?"
)
# ──────────────────────────────────────────────────────────────────────
# Dataclass
@ -108,6 +140,12 @@ class GoalState:
last_reason: Optional[str] = None
paused_reason: Optional[str] = None # why we auto-paused (budget, etc.)
consecutive_parse_failures: int = 0 # judge-output parse failures in a row
# User-added criteria appended mid-loop via the /subgoal command.
# When non-empty the judge prompt and continuation prompt both
# include them so the agent works toward them and the judge factors
# them into the verdict. Backwards-compatible: defaults to empty so
# old state_meta rows load unchanged.
subgoals: List[str] = field(default_factory=list)
def to_json(self) -> str:
return json.dumps(asdict(self), ensure_ascii=False)
@ -115,6 +153,10 @@ class GoalState:
@classmethod
def from_json(cls, raw: str) -> "GoalState":
data = json.loads(raw)
raw_subgoals = data.get("subgoals") or []
subgoals: List[str] = []
if isinstance(raw_subgoals, list):
subgoals = [str(s).strip() for s in raw_subgoals if str(s).strip()]
return cls(
goal=data.get("goal", ""),
status=data.get("status", "active"),
@ -126,8 +168,18 @@ class GoalState:
last_reason=data.get("last_reason"),
paused_reason=data.get("paused_reason"),
consecutive_parse_failures=int(data.get("consecutive_parse_failures", 0) or 0),
subgoals=subgoals,
)
# --- subgoals helpers -------------------------------------------------
def render_subgoals_block(self) -> str:
"""Render the subgoals as a numbered ``- N. text`` block. Empty
when no subgoals exist."""
if not self.subgoals:
return ""
return "\n".join(f"- {i}. {text}" for i, text in enumerate(self.subgoals, start=1))
# ──────────────────────────────────────────────────────────────────────
# Persistence (SessionDB state_meta)
@ -284,6 +336,7 @@ def judge_goal(
last_response: str,
*,
timeout: float = DEFAULT_JUDGE_TIMEOUT,
subgoals: Optional[List[str]] = None,
) -> Tuple[str, str, bool]:
"""Ask the auxiliary model whether the goal is satisfied.
@ -296,6 +349,11 @@ def judge_goal(
auto-pause after N consecutive parse failures (see
``DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES``).
``subgoals`` is an optional list of user-added criteria (from
``/subgoal``) that the judge must also factor into its DONE/CONTINUE
decision. When non-empty the prompt switches to the with-subgoals
template; otherwise behavior is identical to the original judge.
This is deliberately fail-open: any error returns ``("continue", "...", False)``
so a broken judge doesn't wedge progress — the turn budget and the
consecutive-parse-failures auto-pause are the backstops.
@ -321,10 +379,22 @@ def judge_goal(
if client is None or not model:
return "continue", "no auxiliary client configured", False
prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
goal=_truncate(goal, 2000),
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
)
# Build the prompt — pick the with-subgoals variant when applicable.
clean_subgoals = [s.strip() for s in (subgoals or []) if s and s.strip()]
if clean_subgoals:
subgoals_block = "\n".join(
f"- {i}. {text}" for i, text in enumerate(clean_subgoals, start=1)
)
prompt = JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE.format(
goal=_truncate(goal, 2000),
subgoals_block=_truncate(subgoals_block, 2000),
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
)
else:
prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
goal=_truncate(goal, 2000),
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
)
try:
resp = client.chat.completions.create(
@ -397,14 +467,15 @@ class GoalManager:
if s is None or s.status in {"cleared",}:
return "No active goal. Set one with /goal <text>."
turns = f"{s.turns_used}/{s.max_turns} turns"
sub = f", {len(s.subgoals)} subgoal{'s' if len(s.subgoals) != 1 else ''}" if s.subgoals else ""
if s.status == "active":
return f"⊙ Goal (active, {turns}): {s.goal}"
return f"⊙ Goal (active, {turns}{sub}): {s.goal}"
if s.status == "paused":
extra = f"{s.paused_reason}" if s.paused_reason else ""
return f"⏸ Goal (paused, {turns}{extra}): {s.goal}"
return f"⏸ Goal (paused, {turns}{sub}{extra}): {s.goal}"
if s.status == "done":
return f"✓ Goal done ({turns}): {s.goal}"
return f"Goal ({s.status}, {turns}): {s.goal}"
return f"✓ Goal done ({turns}{sub}): {s.goal}"
return f"Goal ({s.status}, {turns}{sub}): {s.goal}"
# --- mutation -----------------------------------------------------
@ -457,6 +528,53 @@ class GoalManager:
self._state.last_reason = reason
save_goal(self.session_id, self._state)
# --- /subgoal user controls ---------------------------------------
def add_subgoal(self, text: str) -> str:
"""Append a user-added criterion to the active goal. Requires
``has_goal()``; raises ``RuntimeError`` otherwise.
Returns the cleaned text so the caller can show it back to the user.
"""
if self._state is None or not self.has_goal():
raise RuntimeError("no active goal")
text = (text or "").strip()
if not text:
raise ValueError("subgoal text is empty")
self._state.subgoals.append(text)
save_goal(self.session_id, self._state)
return text
def remove_subgoal(self, index_1based: int) -> str:
"""Remove a subgoal by 1-based index. Returns the removed text."""
if self._state is None or not self.has_goal():
raise RuntimeError("no active goal")
idx = int(index_1based) - 1
if idx < 0 or idx >= len(self._state.subgoals):
raise IndexError(
f"index out of range (1..{len(self._state.subgoals)})"
)
removed = self._state.subgoals.pop(idx)
save_goal(self.session_id, self._state)
return removed
def clear_subgoals(self) -> int:
"""Wipe all subgoals. Returns the previous count."""
if self._state is None or not self.has_goal():
raise RuntimeError("no active goal")
prev = len(self._state.subgoals)
self._state.subgoals = []
save_goal(self.session_id, self._state)
return prev
def render_subgoals(self) -> str:
"""Public helper for the /subgoal slash command."""
if self._state is None:
return "(no active goal)"
if not self._state.subgoals:
return "(no subgoals — use /subgoal <text> to add criteria)"
return self._state.render_subgoals_block()
# --- the main entry point called after every turn -----------------
def evaluate_after_turn(
@ -494,7 +612,9 @@ class GoalManager:
state.turns_used += 1
state.last_turn_at = time.time()
verdict, reason, parse_failed = judge_goal(state.goal, last_response)
verdict, reason, parse_failed = judge_goal(
state.goal, last_response, subgoals=state.subgoals or None
)
state.last_verdict = verdict
state.last_reason = reason
@ -579,6 +699,11 @@ class GoalManager:
def next_continuation_prompt(self) -> Optional[str]:
if not self._state or self._state.status != "active":
return None
if self._state.subgoals:
return CONTINUATION_PROMPT_WITH_SUBGOALS_TEMPLATE.format(
goal=self._state.goal,
subgoals_block=self._state.render_subgoals_block(),
)
return CONTINUATION_PROMPT_TEMPLATE.format(goal=self._state.goal)
@ -586,6 +711,9 @@ __all__ = [
"GoalState",
"GoalManager",
"CONTINUATION_PROMPT_TEMPLATE",
"CONTINUATION_PROMPT_WITH_SUBGOALS_TEMPLATE",
"JUDGE_USER_PROMPT_TEMPLATE",
"JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE",
"DEFAULT_MAX_TURNS",
"load_goal",
"save_goal",

View file

@ -514,3 +514,227 @@ class TestJudgeParseFailureAutoPause:
reloaded = load_goal("parse-fail-sid-4")
assert reloaded is not None
assert reloaded.consecutive_parse_failures == 2
# ──────────────────────────────────────────────────────────────────────
# /subgoal — user-added criteria
# ──────────────────────────────────────────────────────────────────────
class TestGoalStateSubgoalsBackcompat:
def test_old_state_meta_row_loads_without_subgoals(self):
"""A goal serialized BEFORE the subgoals field existed must
round-trip with an empty list, not crash."""
import json
from hermes_cli.goals import GoalState
legacy = json.dumps({
"goal": "do a thing",
"status": "active",
"turns_used": 2,
"max_turns": 20,
"created_at": 1.0,
"last_turn_at": 2.0,
"consecutive_parse_failures": 0,
})
state = GoalState.from_json(legacy)
assert state.goal == "do a thing"
assert state.subgoals == []
def test_subgoals_round_trip(self):
from hermes_cli.goals import GoalState
state = GoalState(goal="g", subgoals=["a", "b", "c"])
rt = GoalState.from_json(state.to_json())
assert rt.subgoals == ["a", "b", "c"]
class TestGoalManagerSubgoals:
def test_add_subgoal(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-add")
mgr.set("main goal")
text = mgr.add_subgoal(" use bullet points ")
assert text == "use bullet points"
assert mgr.state.subgoals == ["use bullet points"]
def test_add_subgoal_requires_active_goal(self, hermes_home):
import pytest
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-noactive")
with pytest.raises(RuntimeError):
mgr.add_subgoal("oops")
def test_add_empty_subgoal_rejected(self, hermes_home):
import pytest
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-empty")
mgr.set("g")
with pytest.raises(ValueError):
mgr.add_subgoal(" ")
def test_remove_subgoal(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-remove")
mgr.set("g")
mgr.add_subgoal("first")
mgr.add_subgoal("second")
mgr.add_subgoal("third")
removed = mgr.remove_subgoal(2)
assert removed == "second"
assert mgr.state.subgoals == ["first", "third"]
def test_remove_subgoal_out_of_range(self, hermes_home):
import pytest
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-oob")
mgr.set("g")
mgr.add_subgoal("only")
with pytest.raises(IndexError):
mgr.remove_subgoal(5)
with pytest.raises(IndexError):
mgr.remove_subgoal(0)
def test_clear_subgoals(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-clear")
mgr.set("g")
mgr.add_subgoal("a")
mgr.add_subgoal("b")
prev = mgr.clear_subgoals()
assert prev == 2
assert mgr.state.subgoals == []
def test_subgoals_persist_across_reloads(self, hermes_home):
"""Subgoals stored in SessionDB survive a fresh GoalManager."""
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sub-persist")
mgr.set("g")
mgr.add_subgoal("first")
mgr.add_subgoal("second")
mgr2 = GoalManager(session_id="sub-persist")
assert mgr2.state.subgoals == ["first", "second"]
class TestContinuationPromptWithSubgoals:
def test_empty_subgoals_uses_original_template(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="cp-empty")
mgr.set("ship the feature")
prompt = mgr.next_continuation_prompt()
assert prompt is not None
assert "ship the feature" in prompt
assert "Additional criteria" not in prompt
def test_with_subgoals_includes_them(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="cp-with")
mgr.set("ship the feature")
mgr.add_subgoal("write tests")
mgr.add_subgoal("update docs")
prompt = mgr.next_continuation_prompt()
assert prompt is not None
assert "ship the feature" in prompt
assert "Additional criteria" in prompt
assert "1. write tests" in prompt
assert "2. update docs" in prompt
class TestJudgeGoalWithSubgoals:
def test_judge_uses_subgoals_template_when_provided(self, hermes_home):
"""judge_goal switches templates when subgoals is non-empty.
We don't actually call the model — we patch the aux client to
capture the prompt that would be sent.
"""
from unittest.mock import patch, MagicMock
from hermes_cli import goals
captured = {}
class _FakeMsg:
content = '{"done": true, "reason": "all done"}'
class _FakeChoice:
message = _FakeMsg()
class _FakeResp:
choices = [_FakeChoice()]
class _FakeClient:
class chat:
class completions:
@staticmethod
def create(**kwargs):
captured.update(kwargs)
return _FakeResp()
with patch.object(goals, "get_text_auxiliary_client",
return_value=(_FakeClient, "fake-model"), create=True), \
patch.object(goals, "get_auxiliary_extra_body",
return_value=None, create=True), \
patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(_FakeClient, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body",
return_value=None):
verdict, reason, parse_failed = goals.judge_goal(
"ship the feature",
"ok shipped",
subgoals=["write tests", "update docs"],
)
# The aux client was called with a prompt that includes the subgoals.
sent_messages = captured.get("messages") or []
user_msg = next((m["content"] for m in sent_messages if m["role"] == "user"), "")
assert "Additional criteria" in user_msg
assert "1. write tests" in user_msg
assert "2. update docs" in user_msg
assert "every additional criterion" in user_msg
assert verdict == "done"
def test_judge_uses_original_template_when_no_subgoals(self, hermes_home):
from unittest.mock import patch
from hermes_cli import goals
captured = {}
class _FakeMsg:
content = '{"done": true, "reason": "ok"}'
class _FakeChoice:
message = _FakeMsg()
class _FakeResp:
choices = [_FakeChoice()]
class _FakeClient:
class chat:
class completions:
@staticmethod
def create(**kwargs):
captured.update(kwargs)
return _FakeResp()
with patch("agent.auxiliary_client.get_text_auxiliary_client",
return_value=(_FakeClient, "fake-model")), \
patch("agent.auxiliary_client.get_auxiliary_extra_body",
return_value=None):
goals.judge_goal("ship it", "done", subgoals=None)
sent_messages = captured.get("messages") or []
user_msg = next((m["content"] for m in sent_messages if m["role"] == "user"), "")
assert "Additional criteria" not in user_msg
assert "ship it" in user_msg
class TestStatusLineSubgoalCount:
def test_status_line_no_subgoals(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sl-empty")
mgr.set("ship it")
line = mgr.status_line()
assert "ship it" in line
assert "subgoal" not in line.lower()
def test_status_line_with_subgoals(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="sl-with")
mgr.set("ship it")
mgr.add_subgoal("a")
mgr.add_subgoal("b")
line = mgr.status_line()
assert "2 subgoals" in line