mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
* feat(goals): add /goal wait <pid> barrier to park the loop on a background process
The /goal loop re-pokes the agent every turn via the post-turn judge. When a
goal is gated on a long-running background process (CI poller, build, test
matrix, deploy) that produces nothing to judge yet, this spins the agent into
'is it done?' busy-work and burns the turn budget.
/goal wait <pid> [reason] parks the loop: while the PID is alive, the judge is
skipped, no turn is consumed, no continuation fires, and /goal status shows a
parked indicator. The barrier auto-clears the moment the process exits (the
agent's notify_on_complete watcher is the natural wake signal), then the next
turn resumes normal judging. /goal unwait clears it manually; pause/resume/clear
drop it; a dead/stale PID can never wedge the loop.
Wired across CLI, gateway, and the mid-run command guard for parity. Barrier
persists in SessionDB.state_meta (survives /resume); GoalState gains
backward-compatible waiting_on_pid/waiting_reason/waiting_since fields. 12 new
tests; docs updated.
* fix(goals): use gateway.status._pid_exists for liveness, not os.kill(pid,0)
The Windows-footguns CI guard flagged os.kill(pid, 0) in _pid_alive — on
Windows that's not a no-op, it routes to CTRL_C_EVENT and hard-kills the
target's console process group (bpo-14484). Delegate to the canonical
footgun-safe gateway.status._pid_exists (psutil + ctypes/POSIX fallback)
instead, with a direct-psutil last resort.
* feat(goals): judge-driven auto-wait — the loop parks itself, no manual /goal wait
Makes the wait barrier automatic. Every turn the judge is shown the agent's
live background processes (pid, command, uptime, output tail from the
process_registry) alongside the goal + response, and can return a new 'wait'
verdict instead of continue:
{"verdict":"wait","wait_on_pid":N} → park until that process exits
{"verdict":"wait","wait_for_seconds":N} → park until the deadline passes
evaluate_after_turn acts on the directive (sets the barrier, parks the loop)
so the agent isn't re-poked into busy-work while CI/builds/deploys run. Adds a
time-based waiting_until barrier alongside the pid barrier; both auto-clear and
can never wedge the loop. Drivers (CLI, gateway, tui_gateway) feed the live
registry in via gather_background_processes(). Manual /goal wait stays as an
override. Judge verdict contract widened to (verdict, reason, parse_failed,
wait_directive); legacy {"done":bool} shape still accepted.
* test(goals): update kanban _fake_judge to the 4-tuple judge contract
CI test(3) caught it: test_kanban_goal_mode's _fake_judge still returned the
3-tuple (verdict, reason, parse_failed), but the kanban loop now unpacks the
4-tuple (+ wait_directive). Update the fake to return None for the directive
and accept the background_processes kwarg.
* feat(goals): trigger-based wait — park on a process's own signal, not just exit
Addresses two gaps in the judge-driven wait: (1) the judge could only express
'wait until PID exits' or 'wait N seconds', so a long-lived watcher/server that
fires a trigger MID-RUN (and may never exit) couldn't be waited on; (2) the
process's own watch_patterns/notify_on_complete trigger was invisible to the judge.
Adds a session-based barrier (waiting_on_session) that releases on the process's
OWN trigger via process_registry.is_session_waiting(): the session exits, OR (if
started with watch_patterns) its pattern matches — even while the process keeps
running. list_sessions() now surfaces session_id + watch_patterns/watch_hit/
notify_on_complete so the judge sees the trigger and is told to prefer
wait_on_session for trigger processes. Judge verdict gains a {wait_on_session}
directive (preferred over pid). Backward-compatible GoalState field; pid + time
barriers unchanged.
Tests: TestSessionTriggerBarrier (release on mid-run pattern match while alive,
release on exit, unknown-session, full park→trigger→resume, parse, validation,
backcompat load). 105 goal-surface + 85 process_registry tests green.
221 lines
7.4 KiB
Python
221 lines
7.4 KiB
Python
"""Tests for gateway /goal verdict-message delivery.
|
|
|
|
The judge verdict message ("✓ Goal achieved", "⏸ budget exhausted", etc.)
|
|
must reach the user after each turn. Before this fix the code checked
|
|
``hasattr(adapter, "send_message")`` — but adapters expose ``send()``,
|
|
never ``send_message``, so the check always evaluated False and users
|
|
never saw verdicts. This test locks in the fix.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
|
from gateway.session import SessionEntry, SessionSource, build_session_key
|
|
|
|
|
|
@pytest.fixture()
|
|
def hermes_home(tmp_path, monkeypatch):
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
|
|
from hermes_cli import goals
|
|
|
|
goals._DB_CACHE.clear()
|
|
yield home
|
|
goals._DB_CACHE.clear()
|
|
|
|
|
|
def _make_source() -> SessionSource:
|
|
return SessionSource(
|
|
platform=Platform.TELEGRAM,
|
|
user_id="u1",
|
|
chat_id="c1",
|
|
user_name="tester",
|
|
chat_type="dm",
|
|
)
|
|
|
|
|
|
class _RecordingAdapter:
|
|
"""Minimal adapter that records send() invocations."""
|
|
|
|
def __init__(self) -> None:
|
|
self._pending_messages: dict = {}
|
|
self.sends: list[dict] = []
|
|
|
|
async def send(self, chat_id: str, content: str, reply_to=None, metadata=None):
|
|
self.sends.append({"chat_id": chat_id, "content": content, "metadata": metadata})
|
|
|
|
class _R:
|
|
success = True
|
|
message_id = "mock-msg"
|
|
|
|
return _R()
|
|
|
|
|
|
def _make_runner_with_adapter(session_id: str = None):
|
|
from gateway.run import GatewayRunner
|
|
import uuid
|
|
|
|
runner = object.__new__(GatewayRunner)
|
|
runner.config = GatewayConfig(
|
|
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")},
|
|
)
|
|
runner.adapters = {}
|
|
runner._running_agents = {}
|
|
runner._running_agents_ts = {}
|
|
runner._queued_events = {}
|
|
|
|
src = _make_source()
|
|
# Default to a unique session_id so xdist parallel runs on the same worker
|
|
# don't see each other's GoalManager state (DEFAULT_DB_PATH gets frozen at
|
|
# module-import time, defeating per-test HERMES_HOME monkeypatches).
|
|
session_entry = SessionEntry(
|
|
session_key=build_session_key(src),
|
|
session_id=session_id or f"goal-sess-{uuid.uuid4().hex[:8]}",
|
|
created_at=datetime.now(),
|
|
updated_at=datetime.now(),
|
|
platform=Platform.TELEGRAM,
|
|
chat_type="dm",
|
|
)
|
|
|
|
runner.session_store = MagicMock()
|
|
runner.session_store.get_or_create_session.return_value = session_entry
|
|
runner.session_store._generate_session_key.return_value = build_session_key(src)
|
|
|
|
adapter = _RecordingAdapter()
|
|
runner.adapters[Platform.TELEGRAM] = adapter
|
|
return runner, adapter, session_entry, src
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_goal_verdict_done_sent_via_adapter_send(hermes_home):
|
|
"""When the judge says done, the '✓ Goal achieved' message must reach
|
|
the user through the adapter's ``send()`` method."""
|
|
runner, adapter, session_entry, src = _make_runner_with_adapter()
|
|
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_entry.session_id)
|
|
mgr.set("ship the feature")
|
|
|
|
with patch("hermes_cli.goals.judge_goal", return_value=("done", "the feature shipped", False, None)):
|
|
await runner._post_turn_goal_continuation(
|
|
session_entry=session_entry,
|
|
source=src,
|
|
final_response="I shipped the feature.",
|
|
)
|
|
# fire-and-forget create_task — give the loop a tick
|
|
await asyncio.sleep(0.05)
|
|
|
|
assert len(adapter.sends) == 1, f"expected 1 send, got {len(adapter.sends)}: {adapter.sends}"
|
|
msg = adapter.sends[0]
|
|
assert msg["chat_id"] == "c1"
|
|
assert "Goal achieved" in msg["content"]
|
|
assert "the feature shipped" in msg["content"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_goal_verdict_continue_enqueues_continuation(hermes_home):
|
|
"""When the judge says continue, both the 'continuing' status and the
|
|
continuation-prompt event must be delivered. The continuation prompt is
|
|
routed through the adapter's pending-messages FIFO so the goal loop
|
|
proceeds on the next turn."""
|
|
runner, adapter, session_entry, src = _make_runner_with_adapter()
|
|
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
mgr = GoalManager(session_entry.session_id)
|
|
mgr.set("polish the docs")
|
|
|
|
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "still needs work", False, None)):
|
|
await runner._post_turn_goal_continuation(
|
|
session_entry=session_entry,
|
|
source=src,
|
|
final_response="here's a partial edit",
|
|
)
|
|
await asyncio.sleep(0.05)
|
|
|
|
# Status line sent back
|
|
assert len(adapter.sends) == 1
|
|
assert "Continuing toward goal" in adapter.sends[0]["content"]
|
|
# Continuation prompt enqueued for next turn
|
|
assert adapter._pending_messages, "continuation prompt must be enqueued in pending_messages"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_goal_verdict_budget_exhausted_sends_pause(hermes_home):
|
|
"""When the budget is exhausted, a '⏸ Goal paused' message must be sent
|
|
and no further continuation enqueued."""
|
|
runner, adapter, session_entry, src = _make_runner_with_adapter()
|
|
|
|
from hermes_cli.goals import GoalManager, save_goal
|
|
|
|
mgr = GoalManager(session_entry.session_id, default_max_turns=2)
|
|
state = mgr.set("tiny goal", max_turns=2)
|
|
state.turns_used = 2
|
|
save_goal(session_entry.session_id, state)
|
|
|
|
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "keep going", False, None)):
|
|
await runner._post_turn_goal_continuation(
|
|
session_entry=session_entry,
|
|
source=src,
|
|
final_response="still partial",
|
|
)
|
|
await asyncio.sleep(0.05)
|
|
|
|
assert len(adapter.sends) == 1
|
|
content = adapter.sends[0]["content"]
|
|
assert "paused" in content.lower()
|
|
assert "turns used" in content.lower()
|
|
# No continuation enqueued when budget is exhausted
|
|
assert not adapter._pending_messages
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_goal_verdict_skipped_when_no_active_goal(hermes_home):
|
|
"""No goal set → the hook is a no-op. Nothing is sent, nothing enqueued."""
|
|
runner, adapter, session_entry, src = _make_runner_with_adapter()
|
|
|
|
await runner._post_turn_goal_continuation(
|
|
session_entry=session_entry,
|
|
source=src,
|
|
final_response="anything",
|
|
)
|
|
await asyncio.sleep(0.05)
|
|
|
|
assert adapter.sends == []
|
|
assert adapter._pending_messages == {}
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_goal_verdict_survives_adapter_without_send(hermes_home):
|
|
"""Bad adapter (no ``send`` attribute) must not crash the judge hook."""
|
|
runner, _adapter, session_entry, src = _make_runner_with_adapter()
|
|
|
|
from hermes_cli.goals import GoalManager
|
|
|
|
GoalManager(session_entry.session_id).set("survive missing send")
|
|
|
|
class _NoSendAdapter:
|
|
def __init__(self):
|
|
self._pending_messages: dict = {}
|
|
|
|
runner.adapters[Platform.TELEGRAM] = _NoSendAdapter()
|
|
|
|
with patch("hermes_cli.goals.judge_goal", return_value=("done", "ok", False, None)):
|
|
# must not raise
|
|
await runner._post_turn_goal_continuation(
|
|
session_entry=session_entry,
|
|
source=src,
|
|
final_response="whatever",
|
|
)
|
|
await asyncio.sleep(0.05)
|