mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-13 03:52:00 +00:00
fix(goals): auto-pause when judge model returns unparseable output
Weak judge models (e.g. deepseek-v4-flash) return empty strings or prose
when asked for the strict {done, reason} JSON verdict. The old code
failed-open to continue on every such turn, burning the entire turn
budget with log lines like
judge returned empty response
judge reply was not JSON: "Let me analyze whether the goal..."
and /goal clear could not stop it mid-loop without /stop.
After N=3 consecutive *parse* failures (transport/API errors don't
count — those are transient), the loop auto-pauses and prints:
⏸ Goal paused — the judge model (3 turns) isn't returning the
required JSON verdict. Route the judge to a stricter model in
~/.hermes/config.yaml:
auxiliary:
goal_judge:
provider: openrouter
model: google/gemini-3-flash-preview
Then /goal resume to continue.
The counter resets on any usable reply (both "done"/"continue" and
API errors) and persists across GoalManager reloads so cross-session
resumes carry the correct state.
Also fixes test_goal_verdict_send.py sharing a hardcoded session_id
across tests — the shared id only worked because the previous
_post_turn_goal_continuation was a never-awaited coroutine. Now that
PR #19160 made it properly awaited, the xdist test-leakage bug
surfaced. Each test gets a unique session_id via uuid suffix.
This commit is contained in:
parent
03ddff8897
commit
307c85e5c1
4 changed files with 270 additions and 49 deletions
|
|
@ -61,8 +61,9 @@ class _RecordingAdapter:
|
|||
return _R()
|
||||
|
||||
|
||||
def _make_runner_with_adapter():
|
||||
def _make_runner_with_adapter(session_id: str = None):
|
||||
from gateway.run import GatewayRunner
|
||||
import uuid
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
|
|
@ -74,9 +75,12 @@ def _make_runner_with_adapter():
|
|||
runner._queued_events = {}
|
||||
|
||||
src = _make_source()
|
||||
# Default to a unique session_id so xdist parallel runs on the same worker
|
||||
# don't see each other's GoalManager state (DEFAULT_DB_PATH gets frozen at
|
||||
# module-import time, defeating per-test HERMES_HOME monkeypatches).
|
||||
session_entry = SessionEntry(
|
||||
session_key=build_session_key(src),
|
||||
session_id="goal-sess-1",
|
||||
session_id=session_id or f"goal-sess-{uuid.uuid4().hex[:8]}",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
|
|
@ -103,8 +107,8 @@ async def test_goal_verdict_done_sent_via_adapter_send(hermes_home):
|
|||
mgr = GoalManager(session_entry.session_id)
|
||||
mgr.set("ship the feature")
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "the feature shipped")):
|
||||
runner._post_turn_goal_continuation(
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "the feature shipped", False)):
|
||||
await runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="I shipped the feature.",
|
||||
|
|
@ -132,8 +136,8 @@ async def test_goal_verdict_continue_enqueues_continuation(hermes_home):
|
|||
mgr = GoalManager(session_entry.session_id)
|
||||
mgr.set("polish the docs")
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "still needs work")):
|
||||
runner._post_turn_goal_continuation(
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "still needs work", False)):
|
||||
await runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="here's a partial edit",
|
||||
|
|
@ -160,8 +164,8 @@ async def test_goal_verdict_budget_exhausted_sends_pause(hermes_home):
|
|||
state.turns_used = 2
|
||||
save_goal(session_entry.session_id, state)
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "keep going")):
|
||||
runner._post_turn_goal_continuation(
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "keep going", False)):
|
||||
await runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="still partial",
|
||||
|
|
@ -181,7 +185,7 @@ async def test_goal_verdict_skipped_when_no_active_goal(hermes_home):
|
|||
"""No goal set → the hook is a no-op. Nothing is sent, nothing enqueued."""
|
||||
runner, adapter, session_entry, src = _make_runner_with_adapter()
|
||||
|
||||
runner._post_turn_goal_continuation(
|
||||
await runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="anything",
|
||||
|
|
@ -207,9 +211,9 @@ async def test_goal_verdict_survives_adapter_without_send(hermes_home):
|
|||
|
||||
runner.adapters[Platform.TELEGRAM] = _NoSendAdapter()
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "ok")):
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "ok", False)):
|
||||
# must not raise
|
||||
runner._post_turn_goal_continuation(
|
||||
await runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="whatever",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue