fix(goals): Ctrl+C during /goal loop auto-pauses the goal (#21888)

Reported: Ctrl+C during an active /goal loop felt like it did nothing —
the agent would interrupt the current turn, then immediately queue another
continuation and keep going until the session ended or the 20-turn budget
ran out.

Root cause: cli.py's _maybe_continue_goal_after_turn() ran in the finally:
block around self.chat(...) unconditionally. Whether the turn completed
normally, got interrupted, or returned an empty string, the judge ran on
whatever was in conversation_history and — because the judge is fail-open
— a "continue" verdict pushed another CONTINUATION_PROMPT onto
_pending_input. Ctrl+C was invisible to the hook.

Fix:
- chat() now captures result['interrupted'] onto self._last_turn_interrupted
  (resets to False at entry so early-returns don't leak prior state).
- _maybe_continue_goal_after_turn() checks the flag first: on interrupt,
  auto-pause via mgr.pause(reason='user-interrupted (Ctrl+C)') and print
  a one-liner pointing the user at /goal resume or /goal clear. No judge
  call, no continuation enqueued.
- Also added an empty-response guard that mirrors gateway/run.py's
  _handle_message logic (empty reply → transient failure → skip judging
  so we don't trip the consecutive-parse-failures backstop unnecessarily).

The goal stays in the DB as paused, so /goal resume recovers it after
the user has sorted out whatever made them cancel. /goal clear still
works as before for a full stop.

Tests: tests/cli/test_cli_goal_interrupt.py covers:
  - interrupted turn pauses + doesn't queue + judge is NOT called
  - paused goal is resumable
  - empty / whitespace / missing assistant reply skips judging
  - healthy turn still enqueues continuation / marks done
  - chat() resets _last_turn_interrupted at entry (anti-leak guard)

All 55 existing goal tests still pass.
This commit is contained in:
Teknium 2026-05-08 06:53:13 -07:00 committed by GitHub
parent 5643c29790
commit 674fad1483
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 272 additions and 1 deletions

52
cli.py
View file

@ -2414,6 +2414,11 @@ class HermesCLI:
self._agent_running = False
self._pending_input = queue.Queue()
self._interrupt_queue = queue.Queue()
# Tracks whether the turn that just finished was interrupted via
# Ctrl+C. Consumed by _maybe_continue_goal_after_turn so /goal loops
# don't auto-queue another continuation on top of a user-cancelled
# turn (which would make Ctrl+C feel like it did nothing).
self._last_turn_interrupted = False
self._should_exit = False
self._last_ctrl_c_time = 0
self._clarify_state = None
@ -7523,6 +7528,15 @@ class HermesCLI:
priority and we'll re-judge after that turn). If judge says done,
mark it done and tell the user. If judge says continue and we're
under budget, push the continuation prompt onto the queue.
Interrupt handling: if the turn was user-cancelled (Ctrl+C), we
AUTO-PAUSE the goal instead of judging + re-queuing. Otherwise
Ctrl+C feels like it did nothing the judge runs on whatever
partial output landed, almost always says "continue", and the
loop keeps going. Auto-pause keeps the goal recoverable via
``/goal resume`` once the user has sorted out what they want.
The empty-response skip mirrors the gateway guard at
``_handle_message`` in ``gateway/run.py``.
"""
mgr = self._get_goal_manager()
if mgr is None or not mgr.is_active():
@ -7537,6 +7551,22 @@ class HermesCLI:
except Exception:
pass
# If the turn was user-interrupted (Ctrl+C), auto-pause the goal
# and bail. The judge call would almost always return "continue"
# on the partial output and immediately re-queue another turn,
# which is exactly what the user cancelled. Pausing (rather than
# silently skipping) is the observable, recoverable behavior.
if getattr(self, "_last_turn_interrupted", False):
try:
mgr.pause(reason="user-interrupted (Ctrl+C)")
except Exception as exc:
logging.debug("goal pause-on-interrupt failed: %s", exc)
_cprint(
f" {_DIM}⏸ Goal paused — turn was interrupted. "
f"Use /goal resume to continue, or /goal clear to stop.{_RST}"
)
return
# Extract the agent's final response for this turn.
last_response = ""
try:
@ -7558,6 +7588,13 @@ class HermesCLI:
except Exception:
last_response = ""
# Skip judging on empty/whitespace-only responses. These are almost
# always transient failures (API error, empty stream) where the
# judge would say "continue" and trip the consecutive-parse-failures
# backstop unnecessarily. Mirrors the gateway guard.
if not last_response.strip():
return
decision = mgr.evaluate_after_turn(last_response, user_initiated=True)
msg = decision.get("message") or ""
if msg:
@ -9432,6 +9469,12 @@ class HermesCLI:
# register secure secret capture here as well.
set_secret_capture_callback(self._secret_capture_callback)
# Reset the per-turn interrupt flag. Any subsequent path that
# discovers an interrupt (below, after run_conversation) will flip
# this to True. Early returns (credential refresh failure, etc.)
# leave it False, which is correct — those aren't user interrupts.
self._last_turn_interrupted = False
# Refresh provider credentials if needed (handles key rotation transparently)
if not self._ensure_runtime_credentials():
return None
@ -9855,7 +9898,11 @@ class HermesCLI:
# Handle interrupt - check if we were interrupted
pending_message = None
if result and result.get("interrupted"):
_interrupted_this_turn = bool(result and result.get("interrupted"))
# Expose the flag for post-turn hooks (e.g. goal continuation)
# so they can skip themselves when the turn was user-cancelled.
self._last_turn_interrupted = _interrupted_this_turn
if _interrupted_this_turn:
pending_message = result.get("interrupt_message") or interrupt_msg
# Add indicator that we were interrupted
if response and pending_message:
@ -10335,6 +10382,9 @@ class HermesCLI:
self._agent_running = False
self._pending_input = queue.Queue() # For normal input (commands + new queries)
self._interrupt_queue = queue.Queue() # For messages typed while agent is running
# See constructor note. Mirrored here for the run() path that skips
# the earlier __init__ branch.
self._last_turn_interrupted = False
self._should_exit = False
self._last_ctrl_c_time = 0 # Track double Ctrl+C for force exit

View file

@ -0,0 +1,221 @@
"""Tests for CLI goal-continuation interrupt handling.
Covers:
- Ctrl+C during a /goal turn auto-pauses the goal (no more continuations).
- Empty/whitespace-only responses skip the judge (no phantom continuations).
- Clean response without interrupt still drives the judge + enqueues.
These tests exercise ``_maybe_continue_goal_after_turn`` directly on a
minimal ``HermesCLI`` stub (pattern used elsewhere in tests/cli).
"""
from __future__ import annotations
import queue
import sys
import uuid
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
# ──────────────────────────────────────────────────────────────────────
# Fixtures
# ──────────────────────────────────────────────────────────────────────
@pytest.fixture
def hermes_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME so SessionDB.state_meta writes stay hermetic."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Bust the goal module's DB cache so it re-resolves HERMES_HOME each test.
from hermes_cli import goals
goals._DB_CACHE.clear()
yield home
goals._DB_CACHE.clear()
def _make_cli_with_goal(session_id: str, goal_text: str = "build a thing"):
"""Build a minimal HermesCLI stub with an active goal wired in."""
from cli import HermesCLI
from hermes_cli.goals import GoalManager
cli = HermesCLI.__new__(HermesCLI)
# State the hook + helpers touch directly.
cli._pending_input = queue.Queue()
cli._last_turn_interrupted = False
cli.conversation_history = []
# `_get_goal_manager()` reads `self.session_id` directly, not
# `self.agent.session_id`. Match the production lookup.
cli.session_id = session_id
cli.agent = MagicMock()
cli.agent.session_id = session_id
mgr = GoalManager(session_id=session_id, default_max_turns=5)
mgr.set(goal_text)
cli._goal_manager = mgr
return cli, mgr
# ──────────────────────────────────────────────────────────────────────
# Tests
# ──────────────────────────────────────────────────────────────────────
class TestInterruptAutoPause:
def test_interrupted_turn_pauses_goal_and_skips_continuation(self, hermes_home):
"""Ctrl+C mid-turn must auto-pause the goal, not queue another round."""
sid = f"sid-interrupt-{uuid.uuid4().hex}"
cli, mgr = _make_cli_with_goal(sid)
# Simulate an interrupted turn with a partial assistant reply.
cli._last_turn_interrupted = True
cli.conversation_history = [
{"role": "user", "content": "kickoff"},
{"role": "assistant", "content": "starting work..."},
]
# Judge MUST NOT run on an interrupted turn. If it does, we've
# regressed — fail loudly instead of silently querying a mock.
with patch("hermes_cli.goals.judge_goal") as judge_mock:
judge_mock.side_effect = AssertionError(
"judge_goal called on an interrupted turn"
)
cli._maybe_continue_goal_after_turn()
# Pending input must NOT contain a continuation prompt.
assert cli._pending_input.empty(), (
"Interrupted turn should not enqueue a continuation prompt"
)
# Goal should be paused, not active.
state = mgr.state
assert state is not None
assert state.status == "paused"
assert "interrupt" in (state.paused_reason or "").lower()
def test_interrupted_turn_is_resumable(self, hermes_home):
"""After auto-pause from Ctrl+C, /goal resume puts it back to active."""
sid = f"sid-resume-{uuid.uuid4().hex}"
cli, mgr = _make_cli_with_goal(sid)
cli._last_turn_interrupted = True
cli.conversation_history = [
{"role": "assistant", "content": "partial"},
]
with patch("hermes_cli.goals.judge_goal"):
cli._maybe_continue_goal_after_turn()
assert mgr.state.status == "paused"
mgr.resume()
assert mgr.state.status == "active"
class TestEmptyResponseSkip:
def test_empty_response_does_not_invoke_judge(self, hermes_home):
"""Whitespace-only replies skip judging (transient failure guard)."""
sid = f"sid-empty-{uuid.uuid4().hex}"
cli, mgr = _make_cli_with_goal(sid)
cli._last_turn_interrupted = False
cli.conversation_history = [
{"role": "user", "content": "go"},
{"role": "assistant", "content": " \n\n "},
]
with patch("hermes_cli.goals.judge_goal") as judge_mock:
judge_mock.side_effect = AssertionError(
"judge_goal called on an empty response"
)
cli._maybe_continue_goal_after_turn()
# No continuation queued; goal still active (neither paused nor done).
assert cli._pending_input.empty()
assert mgr.state.status == "active"
def test_no_assistant_message_skipped(self, hermes_home):
"""Conversation with zero assistant replies must not trip the judge."""
sid = f"sid-noassistant-{uuid.uuid4().hex}"
cli, mgr = _make_cli_with_goal(sid)
cli._last_turn_interrupted = False
cli.conversation_history = [
{"role": "user", "content": "go"},
]
with patch("hermes_cli.goals.judge_goal") as judge_mock:
judge_mock.side_effect = AssertionError(
"judge_goal called without an assistant response"
)
cli._maybe_continue_goal_after_turn()
assert cli._pending_input.empty()
assert mgr.state.status == "active"
class TestHealthyTurnStillRuns:
def test_clean_response_enqueues_continuation_when_judge_says_continue(
self, hermes_home,
):
"""Sanity check: the hook still works in the happy path."""
sid = f"sid-healthy-{uuid.uuid4().hex}"
cli, mgr = _make_cli_with_goal(sid)
cli._last_turn_interrupted = False
cli.conversation_history = [
{"role": "user", "content": "go"},
{"role": "assistant", "content": "did some work, more to do"},
]
# Force the judge to say "continue" without touching the network.
with patch(
"hermes_cli.goals.judge_goal",
return_value=("continue", "needs more steps", False),
):
cli._maybe_continue_goal_after_turn()
# Continuation prompt must be queued.
assert not cli._pending_input.empty()
queued = cli._pending_input.get_nowait()
assert "Continuing toward your standing goal" in queued
assert mgr.state.status == "active"
def test_clean_response_marks_done_when_judge_says_done(self, hermes_home):
sid = f"sid-done-{uuid.uuid4().hex}"
cli, mgr = _make_cli_with_goal(sid)
cli._last_turn_interrupted = False
cli.conversation_history = [
{"role": "assistant", "content": "all finished, here's the result"},
]
with patch(
"hermes_cli.goals.judge_goal",
return_value=("done", "goal satisfied", False),
):
cli._maybe_continue_goal_after_turn()
assert cli._pending_input.empty()
assert mgr.state.status == "done"
class TestInterruptFlagLifecycle:
def test_chat_resets_flag_at_entry(self, hermes_home):
"""chat() must reset _last_turn_interrupted at the top of each turn.
This guards against stale flag state: if turn N was interrupted and
turn N+1 runs clean, the hook must not see True from N.
"""
# We can't run chat() end-to-end here, but we can assert the reset
# is the first thing after the secret-capture registration by
# inspecting the source shape.
from cli import HermesCLI
import inspect
src = inspect.getsource(HermesCLI.chat)
# Look for an explicit reset near the top of chat().
head = src.split("if not self._ensure_runtime_credentials", 1)[0]
assert "self._last_turn_interrupted = False" in head, (
"chat() must reset _last_turn_interrupted before run_conversation "
"runs — otherwise a prior turn's interrupt state leaks into the "
"next turn's goal hook decision."
)