mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Remove unused imports (F401) and duplicate/shadowed import redefinitions (F811) across the codebase using ruff's safe autofixes. No behavioral changes -- imports only. - ~1400 safe autofixes applied across 644 files (net -1072 lines) - __init__.py re-exports preserved (excluded from F401 removal so public re-export surfaces stay intact) - Re-exports that are imported or monkeypatched by tests but look unused in their defining module are kept with explicit # noqa: F401 (gateway/run.py load_dotenv; run_agent re-exports from agent.message_sanitization, agent.context_compressor, agent.retry_utils, agent.prompt_builder, agent.process_bootstrap, agent.codex_responses_adapter) - Unsafe F841 (unused-variable) fixes deliberately skipped -- those can change behavior when the RHS has side effects - ruff lints remain disabled in pyproject.toml (only PLW1514 is selected); this is a one-time cleanup, not a config change Verification: - python -m compileall: clean - pytest --collect-only: all 27161 tests collect (zero import errors) - core entry points import clean (run_agent, model_tools, cli, toolsets, hermes_state, batch_runner, gateway) - static scan: every name any test imports directly from an edited module still resolves
739 lines
29 KiB
Python
739 lines
29 KiB
Python
"""Tests for hermes_cli/goals.py — persistent cross-turn goals."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
# Fixtures
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.fixture
|
|
def hermes_home(tmp_path, monkeypatch):
|
|
"""Isolated HERMES_HOME so SessionDB.state_meta writes don't clobber the real one."""
|
|
from pathlib import Path
|
|
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
|
|
# Bust the goal-module's DB cache for each test so it re-resolves HERMES_HOME.
|
|
from hermes_cli import goals
|
|
|
|
goals._DB_CACHE.clear()
|
|
yield home
|
|
goals._DB_CACHE.clear()
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
# _parse_judge_response
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestParseJudgeResponse:
|
|
def test_clean_json_done(self):
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
done, reason, _ = _parse_judge_response('{"done": true, "reason": "all good"}')
|
|
assert done is True
|
|
assert reason == "all good"
|
|
|
|
def test_clean_json_continue(self):
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
done, reason, _ = _parse_judge_response('{"done": false, "reason": "more work needed"}')
|
|
assert done is False
|
|
assert reason == "more work needed"
|
|
|
|
def test_json_in_markdown_fence(self):
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
raw = '```json\n{"done": true, "reason": "done"}\n```'
|
|
done, reason, _ = _parse_judge_response(raw)
|
|
assert done is True
|
|
assert "done" in reason
|
|
|
|
def test_json_embedded_in_prose(self):
|
|
"""Some models prefix reasoning before emitting JSON — we extract it."""
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
raw = 'Looking at this... the agent says X. Verdict: {"done": false, "reason": "partial"}'
|
|
done, reason, _ = _parse_judge_response(raw)
|
|
assert done is False
|
|
assert reason == "partial"
|
|
|
|
def test_string_done_values(self):
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
for s in ("true", "yes", "done", "1"):
|
|
done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
|
|
assert done is True
|
|
for s in ("false", "no", "not yet"):
|
|
done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
|
|
assert done is False
|
|
|
|
def test_malformed_json_fails_open(self):
|
|
"""Non-JSON → not done, with error-ish reason (so judge_goal can map to continue)."""
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
done, reason, _ = _parse_judge_response("this is not json at all")
|
|
assert done is False
|
|
assert reason # non-empty
|
|
|
|
def test_empty_response(self):
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
done, reason, _ = _parse_judge_response("")
|
|
assert done is False
|
|
assert reason
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
# judge_goal — fail-open semantics
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestJudgeGoal:
|
|
def test_empty_goal_skipped(self):
|
|
from hermes_cli.goals import judge_goal
|
|
|
|
verdict, _, _ = judge_goal("", "some response")
|
|
assert verdict == "skipped"
|
|
|
|
def test_empty_response_continues(self):
|
|
from hermes_cli.goals import judge_goal
|
|
|
|
verdict, _, _ = judge_goal("ship the thing", "")
|
|
assert verdict == "continue"
|
|
|
|
def test_no_aux_client_continues(self):
|
|
"""Fail-open: if no aux client, we must return continue, not skipped/done."""
|
|
from hermes_cli import goals
|
|
|
|
with patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(None, None),
|
|
):
|
|
verdict, _, _ = goals.judge_goal("my goal", "my response")
|
|
assert verdict == "continue"
|
|
|
|
def test_api_error_continues(self):
|
|
"""Judge exception → fail-open continue (don't wedge progress on judge bugs)."""
|
|
from hermes_cli import goals
|
|
|
|
fake_client = MagicMock()
|
|
fake_client.chat.completions.create.side_effect = RuntimeError("boom")
|
|
with patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(fake_client, "judge-model"),
|
|
):
|
|
verdict, reason, _ = goals.judge_goal("goal", "response")
|
|
assert verdict == "continue"
|
|
assert "judge error" in reason.lower()
|
|
|
|
def test_judge_says_done(self):
|
|
from hermes_cli import goals
|
|
|
|
fake_client = MagicMock()
|
|
fake_client.chat.completions.create.return_value = MagicMock(
|
|
choices=[
|
|
MagicMock(
|
|
message=MagicMock(content='{"done": true, "reason": "achieved"}')
|
|
)
|
|
]
|
|
)
|
|
with patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(fake_client, "judge-model"),
|
|
):
|
|
verdict, reason, _ = goals.judge_goal("goal", "agent response")
|
|
assert verdict == "done"
|
|
assert reason == "achieved"
|
|
|
|
def test_judge_says_continue(self):
|
|
from hermes_cli import goals
|
|
|
|
fake_client = MagicMock()
|
|
fake_client.chat.completions.create.return_value = MagicMock(
|
|
choices=[
|
|
MagicMock(
|
|
message=MagicMock(content='{"done": false, "reason": "not yet"}')
|
|
)
|
|
]
|
|
)
|
|
with patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(fake_client, "judge-model"),
|
|
):
|
|
verdict, reason, _ = goals.judge_goal("goal", "agent response")
|
|
assert verdict == "continue"
|
|
assert reason == "not yet"
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
# GoalManager lifecycle + persistence
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestGoalManager:
|
|
def test_no_goal_initial(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="test-sid-1")
|
|
assert mgr.state is None
|
|
assert not mgr.is_active()
|
|
assert not mgr.has_goal()
|
|
assert "No active goal" in mgr.status_line()
|
|
|
|
def test_set_then_status(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="test-sid-2", default_max_turns=5)
|
|
state = mgr.set("port the thing")
|
|
assert state.goal == "port the thing"
|
|
assert state.status == "active"
|
|
assert state.max_turns == 5
|
|
assert state.turns_used == 0
|
|
assert mgr.is_active()
|
|
assert "active" in mgr.status_line().lower()
|
|
assert "port the thing" in mgr.status_line()
|
|
|
|
def test_set_rejects_empty(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="test-sid-3")
|
|
with pytest.raises(ValueError):
|
|
mgr.set("")
|
|
with pytest.raises(ValueError):
|
|
mgr.set(" ")
|
|
|
|
def test_pause_and_resume(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="test-sid-4")
|
|
mgr.set("goal text")
|
|
mgr.pause(reason="user-paused")
|
|
assert mgr.state.status == "paused"
|
|
assert not mgr.is_active()
|
|
assert mgr.has_goal()
|
|
|
|
mgr.resume()
|
|
assert mgr.state.status == "active"
|
|
assert mgr.is_active()
|
|
|
|
def test_clear(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="test-sid-5")
|
|
mgr.set("goal")
|
|
mgr.clear()
|
|
assert mgr.state is None
|
|
assert not mgr.is_active()
|
|
|
|
def test_persistence_across_managers(self, hermes_home):
|
|
"""Key invariant: a second manager on the same session sees the goal.
|
|
|
|
This is what makes /resume work — each session rebinds its
|
|
GoalManager and picks up the saved state.
|
|
"""
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr1 = GoalManager(session_id="persist-sid")
|
|
mgr1.set("do the thing")
|
|
|
|
mgr2 = GoalManager(session_id="persist-sid")
|
|
assert mgr2.state is not None
|
|
assert mgr2.state.goal == "do the thing"
|
|
assert mgr2.is_active()
|
|
|
|
def test_evaluate_after_turn_done(self, hermes_home):
|
|
"""Judge says done → status=done, no continuation."""
|
|
from hermes_cli import goals
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="eval-sid-1")
|
|
mgr.set("ship it")
|
|
|
|
with patch.object(goals, "judge_goal", return_value=("done", "shipped", False)):
|
|
decision = mgr.evaluate_after_turn("I shipped the feature.")
|
|
|
|
assert decision["verdict"] == "done"
|
|
assert decision["should_continue"] is False
|
|
assert decision["continuation_prompt"] is None
|
|
assert mgr.state.status == "done"
|
|
assert mgr.state.turns_used == 1
|
|
|
|
def test_evaluate_after_turn_continue_under_budget(self, hermes_home):
|
|
from hermes_cli import goals
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="eval-sid-2", default_max_turns=5)
|
|
mgr.set("a long goal")
|
|
|
|
with patch.object(goals, "judge_goal", return_value=("continue", "more work", False)):
|
|
decision = mgr.evaluate_after_turn("made some progress")
|
|
|
|
assert decision["verdict"] == "continue"
|
|
assert decision["should_continue"] is True
|
|
assert decision["continuation_prompt"] is not None
|
|
assert "a long goal" in decision["continuation_prompt"]
|
|
assert mgr.state.status == "active"
|
|
assert mgr.state.turns_used == 1
|
|
|
|
def test_evaluate_after_turn_budget_exhausted(self, hermes_home):
|
|
"""When turn budget hits ceiling, auto-pause instead of continuing."""
|
|
from hermes_cli import goals
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="eval-sid-3", default_max_turns=2)
|
|
mgr.set("hard goal")
|
|
|
|
with patch.object(goals, "judge_goal", return_value=("continue", "not yet", False)):
|
|
d1 = mgr.evaluate_after_turn("step 1")
|
|
assert d1["should_continue"] is True
|
|
assert mgr.state.turns_used == 1
|
|
assert mgr.state.status == "active"
|
|
|
|
d2 = mgr.evaluate_after_turn("step 2")
|
|
# turns_used is now 2 which equals max_turns → paused
|
|
assert d2["should_continue"] is False
|
|
assert mgr.state.status == "paused"
|
|
assert mgr.state.turns_used == 2
|
|
assert "budget" in (mgr.state.paused_reason or "").lower()
|
|
|
|
def test_evaluate_after_turn_inactive(self, hermes_home):
|
|
"""evaluate_after_turn is a no-op when goal isn't active."""
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="eval-sid-4")
|
|
d = mgr.evaluate_after_turn("anything")
|
|
assert d["verdict"] == "inactive"
|
|
assert d["should_continue"] is False
|
|
|
|
mgr.set("a goal")
|
|
mgr.pause()
|
|
d2 = mgr.evaluate_after_turn("anything")
|
|
assert d2["verdict"] == "inactive"
|
|
assert d2["should_continue"] is False
|
|
|
|
def test_continuation_prompt_shape(self, hermes_home):
|
|
"""The continuation prompt must include the goal text verbatim —
|
|
and must be safe to inject as a user-role message (prompt-cache
|
|
invariants: no system-prompt mutation)."""
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="cont-sid")
|
|
mgr.set("port goal command to hermes")
|
|
prompt = mgr.next_continuation_prompt()
|
|
assert prompt is not None
|
|
assert "port goal command to hermes" in prompt
|
|
assert prompt.strip() # non-empty
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
# Smoke: CommandDef is wired
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def test_goal_command_in_registry():
|
|
from hermes_cli.commands import resolve_command
|
|
|
|
cmd = resolve_command("goal")
|
|
assert cmd is not None
|
|
assert cmd.name == "goal"
|
|
|
|
|
|
def test_goal_command_dispatches_in_cli_registry_helpers():
|
|
"""goal shows up in autocomplete / help categories alongside other Session cmds."""
|
|
from hermes_cli.commands import COMMANDS, COMMANDS_BY_CATEGORY
|
|
|
|
assert "/goal" in COMMANDS
|
|
session_cmds = COMMANDS_BY_CATEGORY.get("Session", {})
|
|
assert "/goal" in session_cmds
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
# Auto-pause on consecutive judge parse failures
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestJudgeParseFailureAutoPause:
|
|
"""Regression: weak judge models (e.g. deepseek-v4-flash) that return
|
|
empty strings or non-JSON prose must auto-pause the loop after N turns
|
|
instead of burning the whole turn budget."""
|
|
|
|
def test_parse_response_flags_empty_as_parse_failure(self):
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
done, reason, parse_failed = _parse_judge_response("")
|
|
assert done is False
|
|
assert parse_failed is True
|
|
assert "empty" in reason.lower()
|
|
|
|
def test_parse_response_flags_non_json_as_parse_failure(self):
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
done, reason, parse_failed = _parse_judge_response(
|
|
"Let me analyze whether the goal is fully satisfied based on the agent's response..."
|
|
)
|
|
assert done is False
|
|
assert parse_failed is True
|
|
assert "not json" in reason.lower()
|
|
|
|
def test_parse_response_clean_json_is_not_parse_failure(self):
|
|
from hermes_cli.goals import _parse_judge_response
|
|
|
|
done, _, parse_failed = _parse_judge_response(
|
|
'{"done": false, "reason": "more work"}'
|
|
)
|
|
assert done is False
|
|
assert parse_failed is False
|
|
|
|
def test_api_error_does_not_count_as_parse_failure(self):
|
|
"""Transient network/API errors must not trip the auto-pause guard."""
|
|
from hermes_cli import goals
|
|
|
|
fake_client = MagicMock()
|
|
fake_client.chat.completions.create.side_effect = RuntimeError("connection reset")
|
|
with patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(fake_client, "judge-model"),
|
|
):
|
|
verdict, _, parse_failed = goals.judge_goal("goal", "response")
|
|
assert verdict == "continue"
|
|
assert parse_failed is False
|
|
|
|
def test_empty_judge_reply_flagged_as_parse_failure(self):
|
|
"""End-to-end: judge returns empty content → parse_failed=True."""
|
|
from hermes_cli import goals
|
|
|
|
fake_client = MagicMock()
|
|
fake_client.chat.completions.create.return_value = MagicMock(
|
|
choices=[MagicMock(message=MagicMock(content=""))]
|
|
)
|
|
with patch(
|
|
"agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(fake_client, "judge-model"),
|
|
):
|
|
verdict, _, parse_failed = goals.judge_goal("goal", "response")
|
|
assert verdict == "continue"
|
|
assert parse_failed is True
|
|
|
|
def test_auto_pause_after_three_consecutive_parse_failures(self, hermes_home):
|
|
"""N=3 consecutive parse failures → auto-pause with config pointer."""
|
|
from hermes_cli import goals
|
|
from hermes_cli.goals import GoalManager, DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES
|
|
|
|
assert DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES == 3
|
|
mgr = GoalManager(session_id="parse-fail-sid-1", default_max_turns=20)
|
|
mgr.set("do a thing")
|
|
|
|
with patch.object(
|
|
goals, "judge_goal", return_value=("continue", "judge returned empty response", True)
|
|
):
|
|
d1 = mgr.evaluate_after_turn("step 1")
|
|
assert d1["should_continue"] is True
|
|
assert mgr.state.consecutive_parse_failures == 1
|
|
|
|
d2 = mgr.evaluate_after_turn("step 2")
|
|
assert d2["should_continue"] is True
|
|
assert mgr.state.consecutive_parse_failures == 2
|
|
|
|
d3 = mgr.evaluate_after_turn("step 3")
|
|
assert d3["should_continue"] is False
|
|
assert d3["status"] == "paused"
|
|
assert mgr.state.consecutive_parse_failures == 3
|
|
# Message points at the config surface so the user can fix it.
|
|
assert "auxiliary" in d3["message"]
|
|
assert "goal_judge" in d3["message"]
|
|
assert "config.yaml" in d3["message"]
|
|
|
|
def test_parse_failure_counter_resets_on_good_reply(self, hermes_home):
|
|
"""A single good judge reply resets the counter — transient flakes don't pause."""
|
|
from hermes_cli import goals
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="parse-fail-sid-2", default_max_turns=20)
|
|
mgr.set("another goal")
|
|
|
|
# Two parse failures…
|
|
with patch.object(
|
|
goals, "judge_goal", return_value=("continue", "not json", True)
|
|
):
|
|
mgr.evaluate_after_turn("step 1")
|
|
mgr.evaluate_after_turn("step 2")
|
|
assert mgr.state.consecutive_parse_failures == 2
|
|
|
|
# …then one clean reply resets the counter.
|
|
with patch.object(
|
|
goals, "judge_goal", return_value=("continue", "making progress", False)
|
|
):
|
|
d = mgr.evaluate_after_turn("step 3")
|
|
assert d["should_continue"] is True
|
|
assert mgr.state.consecutive_parse_failures == 0
|
|
|
|
def test_parse_failure_counter_not_incremented_by_api_errors(self, hermes_home):
|
|
"""API/transport errors must NOT count toward the auto-pause threshold."""
|
|
from hermes_cli import goals
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_id="parse-fail-sid-3", default_max_turns=20)
|
|
mgr.set("goal")
|
|
|
|
with patch.object(
|
|
goals, "judge_goal", return_value=("continue", "judge error: RuntimeError", False)
|
|
):
|
|
for _ in range(5):
|
|
d = mgr.evaluate_after_turn("still going")
|
|
assert d["should_continue"] is True
|
|
assert mgr.state.consecutive_parse_failures == 0
|
|
assert mgr.state.status == "active"
|
|
|
|
def test_consecutive_parse_failures_persists_across_goalmanager_reloads(
|
|
self, hermes_home
|
|
):
|
|
"""The counter must be durable so cross-session resumes see it."""
|
|
from hermes_cli import goals
|
|
from hermes_cli.goals import GoalManager, load_goal
|
|
|
|
mgr = GoalManager(session_id="parse-fail-sid-4", default_max_turns=20)
|
|
mgr.set("persistent goal")
|
|
|
|
with patch.object(
|
|
goals, "judge_goal", return_value=("continue", "empty", True)
|
|
):
|
|
mgr.evaluate_after_turn("r")
|
|
mgr.evaluate_after_turn("r")
|
|
|
|
reloaded = load_goal("parse-fail-sid-4")
|
|
assert reloaded is not None
|
|
assert reloaded.consecutive_parse_failures == 2
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
# /subgoal — user-added criteria
|
|
# ──────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestGoalStateSubgoalsBackcompat:
|
|
def test_old_state_meta_row_loads_without_subgoals(self):
|
|
"""A goal serialized BEFORE the subgoals field existed must
|
|
round-trip with an empty list, not crash."""
|
|
from hermes_cli.goals import GoalState
|
|
|
|
legacy = json.dumps({
|
|
"goal": "do a thing",
|
|
"status": "active",
|
|
"turns_used": 2,
|
|
"max_turns": 20,
|
|
"created_at": 1.0,
|
|
"last_turn_at": 2.0,
|
|
"consecutive_parse_failures": 0,
|
|
})
|
|
state = GoalState.from_json(legacy)
|
|
assert state.goal == "do a thing"
|
|
assert state.subgoals == []
|
|
|
|
def test_subgoals_round_trip(self):
|
|
from hermes_cli.goals import GoalState
|
|
state = GoalState(goal="g", subgoals=["a", "b", "c"])
|
|
rt = GoalState.from_json(state.to_json())
|
|
assert rt.subgoals == ["a", "b", "c"]
|
|
|
|
|
|
class TestGoalManagerSubgoals:
|
|
def test_add_subgoal(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="sub-add")
|
|
mgr.set("main goal")
|
|
text = mgr.add_subgoal(" use bullet points ")
|
|
assert text == "use bullet points"
|
|
assert mgr.state.subgoals == ["use bullet points"]
|
|
|
|
def test_add_subgoal_requires_active_goal(self, hermes_home):
|
|
import pytest
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="sub-noactive")
|
|
with pytest.raises(RuntimeError):
|
|
mgr.add_subgoal("oops")
|
|
|
|
def test_add_empty_subgoal_rejected(self, hermes_home):
|
|
import pytest
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="sub-empty")
|
|
mgr.set("g")
|
|
with pytest.raises(ValueError):
|
|
mgr.add_subgoal(" ")
|
|
|
|
def test_remove_subgoal(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="sub-remove")
|
|
mgr.set("g")
|
|
mgr.add_subgoal("first")
|
|
mgr.add_subgoal("second")
|
|
mgr.add_subgoal("third")
|
|
removed = mgr.remove_subgoal(2)
|
|
assert removed == "second"
|
|
assert mgr.state.subgoals == ["first", "third"]
|
|
|
|
def test_remove_subgoal_out_of_range(self, hermes_home):
|
|
import pytest
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="sub-oob")
|
|
mgr.set("g")
|
|
mgr.add_subgoal("only")
|
|
with pytest.raises(IndexError):
|
|
mgr.remove_subgoal(5)
|
|
with pytest.raises(IndexError):
|
|
mgr.remove_subgoal(0)
|
|
|
|
def test_clear_subgoals(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="sub-clear")
|
|
mgr.set("g")
|
|
mgr.add_subgoal("a")
|
|
mgr.add_subgoal("b")
|
|
prev = mgr.clear_subgoals()
|
|
assert prev == 2
|
|
assert mgr.state.subgoals == []
|
|
|
|
def test_subgoals_persist_across_reloads(self, hermes_home):
|
|
"""Subgoals stored in SessionDB survive a fresh GoalManager."""
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="sub-persist")
|
|
mgr.set("g")
|
|
mgr.add_subgoal("first")
|
|
mgr.add_subgoal("second")
|
|
|
|
mgr2 = GoalManager(session_id="sub-persist")
|
|
assert mgr2.state.subgoals == ["first", "second"]
|
|
|
|
|
|
class TestContinuationPromptWithSubgoals:
|
|
def test_empty_subgoals_uses_original_template(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="cp-empty")
|
|
mgr.set("ship the feature")
|
|
prompt = mgr.next_continuation_prompt()
|
|
assert prompt is not None
|
|
assert "ship the feature" in prompt
|
|
assert "Additional criteria" not in prompt
|
|
|
|
def test_with_subgoals_includes_them(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="cp-with")
|
|
mgr.set("ship the feature")
|
|
mgr.add_subgoal("write tests")
|
|
mgr.add_subgoal("update docs")
|
|
prompt = mgr.next_continuation_prompt()
|
|
assert prompt is not None
|
|
assert "ship the feature" in prompt
|
|
assert "Additional criteria" in prompt
|
|
assert "1. write tests" in prompt
|
|
assert "2. update docs" in prompt
|
|
|
|
|
|
class TestJudgeGoalWithSubgoals:
|
|
def test_judge_uses_subgoals_template_when_provided(self, hermes_home):
|
|
"""judge_goal switches templates when subgoals is non-empty.
|
|
|
|
We don't actually call the model — we patch the aux client to
|
|
capture the prompt that would be sent.
|
|
"""
|
|
from unittest.mock import patch
|
|
from hermes_cli import goals
|
|
|
|
captured = {}
|
|
|
|
class _FakeMsg:
|
|
content = '{"done": true, "reason": "all done"}'
|
|
class _FakeChoice:
|
|
message = _FakeMsg()
|
|
class _FakeResp:
|
|
choices = [_FakeChoice()]
|
|
class _FakeClient:
|
|
class chat:
|
|
class completions:
|
|
@staticmethod
|
|
def create(**kwargs):
|
|
captured.update(kwargs)
|
|
return _FakeResp()
|
|
|
|
with patch.object(goals, "get_text_auxiliary_client",
|
|
return_value=(_FakeClient, "fake-model"), create=True), \
|
|
patch.object(goals, "get_auxiliary_extra_body",
|
|
return_value=None, create=True), \
|
|
patch("agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(_FakeClient, "fake-model")), \
|
|
patch("agent.auxiliary_client.get_auxiliary_extra_body",
|
|
return_value=None):
|
|
verdict, reason, parse_failed = goals.judge_goal(
|
|
"ship the feature",
|
|
"ok shipped",
|
|
subgoals=["write tests", "update docs"],
|
|
)
|
|
|
|
# The aux client was called with a prompt that includes the subgoals.
|
|
sent_messages = captured.get("messages") or []
|
|
user_msg = next((m["content"] for m in sent_messages if m["role"] == "user"), "")
|
|
assert "Additional criteria" in user_msg
|
|
assert "1. write tests" in user_msg
|
|
assert "2. update docs" in user_msg
|
|
assert "every additional criterion" in user_msg
|
|
assert verdict == "done"
|
|
|
|
def test_judge_uses_original_template_when_no_subgoals(self, hermes_home):
|
|
from unittest.mock import patch
|
|
from hermes_cli import goals
|
|
|
|
captured = {}
|
|
|
|
class _FakeMsg:
|
|
content = '{"done": true, "reason": "ok"}'
|
|
class _FakeChoice:
|
|
message = _FakeMsg()
|
|
class _FakeResp:
|
|
choices = [_FakeChoice()]
|
|
class _FakeClient:
|
|
class chat:
|
|
class completions:
|
|
@staticmethod
|
|
def create(**kwargs):
|
|
captured.update(kwargs)
|
|
return _FakeResp()
|
|
|
|
with patch("agent.auxiliary_client.get_text_auxiliary_client",
|
|
return_value=(_FakeClient, "fake-model")), \
|
|
patch("agent.auxiliary_client.get_auxiliary_extra_body",
|
|
return_value=None):
|
|
goals.judge_goal("ship it", "done", subgoals=None)
|
|
|
|
sent_messages = captured.get("messages") or []
|
|
user_msg = next((m["content"] for m in sent_messages if m["role"] == "user"), "")
|
|
assert "Additional criteria" not in user_msg
|
|
assert "ship it" in user_msg
|
|
|
|
|
|
class TestStatusLineSubgoalCount:
|
|
def test_status_line_no_subgoals(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="sl-empty")
|
|
mgr.set("ship it")
|
|
line = mgr.status_line()
|
|
assert "ship it" in line
|
|
assert "subgoal" not in line.lower()
|
|
|
|
def test_status_line_with_subgoals(self, hermes_home):
|
|
from hermes_cli.goals import GoalManager
|
|
mgr = GoalManager(session_id="sl-with")
|
|
mgr.set("ship it")
|
|
mgr.add_subgoal("a")
|
|
mgr.add_subgoal("b")
|
|
line = mgr.status_line()
|
|
assert "2 subgoals" in line
|