From 674fad14832006bfd742c5e3183f34c24018e43a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 8 May 2026 06:53:13 -0700 Subject: [PATCH] fix(goals): Ctrl+C during /goal loop auto-pauses the goal (#21888) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported: Ctrl+C during an active /goal loop felt like it did nothing — the agent would interrupt the current turn, then immediately queue another continuation and keep going until the session ended or the 20-turn budget ran out. Root cause: cli.py's _maybe_continue_goal_after_turn() ran in the finally: block around self.chat(...) unconditionally. Whether the turn completed normally, got interrupted, or returned an empty string, the judge ran on whatever was in conversation_history and — because the judge is fail-open — a "continue" verdict pushed another CONTINUATION_PROMPT onto _pending_input. Ctrl+C was invisible to the hook. Fix: - chat() now captures result['interrupted'] onto self._last_turn_interrupted (resets to False at entry so early-returns don't leak prior state). - _maybe_continue_goal_after_turn() checks the flag first: on interrupt, auto-pause via mgr.pause(reason='user-interrupted (Ctrl+C)') and print a one-liner pointing the user at /goal resume or /goal clear. No judge call, no continuation enqueued. - Also added an empty-response guard that mirrors gateway/run.py's _handle_message logic (empty reply → transient failure → skip judging so we don't trip the consecutive-parse-failures backstop unnecessarily). The goal stays in the DB as paused, so /goal resume recovers it after the user has sorted out whatever made them cancel. /goal clear still works as before for a full stop. Tests: tests/cli/test_cli_goal_interrupt.py covers: - interrupted turn pauses + doesn't queue + judge is NOT called - paused goal is resumable - empty / whitespace / missing assistant reply skips judging - healthy turn still enqueues continuation / marks done - chat() resets _last_turn_interrupted at entry (anti-leak guard) All 55 existing goal tests still pass. --- cli.py | 52 ++++++- tests/cli/test_cli_goal_interrupt.py | 221 +++++++++++++++++++++++++++ 2 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 tests/cli/test_cli_goal_interrupt.py diff --git a/cli.py b/cli.py index 531716885b..c7c33bce32 100644 --- a/cli.py +++ b/cli.py @@ -2414,6 +2414,11 @@ class HermesCLI: self._agent_running = False self._pending_input = queue.Queue() self._interrupt_queue = queue.Queue() + # Tracks whether the turn that just finished was interrupted via + # Ctrl+C. Consumed by _maybe_continue_goal_after_turn so /goal loops + # don't auto-queue another continuation on top of a user-cancelled + # turn (which would make Ctrl+C feel like it did nothing). + self._last_turn_interrupted = False self._should_exit = False self._last_ctrl_c_time = 0 self._clarify_state = None @@ -7523,6 +7528,15 @@ class HermesCLI: priority and we'll re-judge after that turn). If judge says done, mark it done and tell the user. If judge says continue and we're under budget, push the continuation prompt onto the queue. + + Interrupt handling: if the turn was user-cancelled (Ctrl+C), we + AUTO-PAUSE the goal instead of judging + re-queuing. Otherwise + Ctrl+C feels like it did nothing — the judge runs on whatever + partial output landed, almost always says "continue", and the + loop keeps going. Auto-pause keeps the goal recoverable via + ``/goal resume`` once the user has sorted out what they want. + The empty-response skip mirrors the gateway guard at + ``_handle_message`` in ``gateway/run.py``. """ mgr = self._get_goal_manager() if mgr is None or not mgr.is_active(): @@ -7537,6 +7551,22 @@ class HermesCLI: except Exception: pass + # If the turn was user-interrupted (Ctrl+C), auto-pause the goal + # and bail. The judge call would almost always return "continue" + # on the partial output and immediately re-queue another turn, + # which is exactly what the user cancelled. Pausing (rather than + # silently skipping) is the observable, recoverable behavior. + if getattr(self, "_last_turn_interrupted", False): + try: + mgr.pause(reason="user-interrupted (Ctrl+C)") + except Exception as exc: + logging.debug("goal pause-on-interrupt failed: %s", exc) + _cprint( + f" {_DIM}⏸ Goal paused — turn was interrupted. " + f"Use /goal resume to continue, or /goal clear to stop.{_RST}" + ) + return + # Extract the agent's final response for this turn. last_response = "" try: @@ -7558,6 +7588,13 @@ class HermesCLI: except Exception: last_response = "" + # Skip judging on empty/whitespace-only responses. These are almost + # always transient failures (API error, empty stream) where the + # judge would say "continue" and trip the consecutive-parse-failures + # backstop unnecessarily. Mirrors the gateway guard. + if not last_response.strip(): + return + decision = mgr.evaluate_after_turn(last_response, user_initiated=True) msg = decision.get("message") or "" if msg: @@ -9432,6 +9469,12 @@ class HermesCLI: # register secure secret capture here as well. set_secret_capture_callback(self._secret_capture_callback) + # Reset the per-turn interrupt flag. Any subsequent path that + # discovers an interrupt (below, after run_conversation) will flip + # this to True. Early returns (credential refresh failure, etc.) + # leave it False, which is correct — those aren't user interrupts. + self._last_turn_interrupted = False + # Refresh provider credentials if needed (handles key rotation transparently) if not self._ensure_runtime_credentials(): return None @@ -9855,7 +9898,11 @@ class HermesCLI: # Handle interrupt - check if we were interrupted pending_message = None - if result and result.get("interrupted"): + _interrupted_this_turn = bool(result and result.get("interrupted")) + # Expose the flag for post-turn hooks (e.g. goal continuation) + # so they can skip themselves when the turn was user-cancelled. + self._last_turn_interrupted = _interrupted_this_turn + if _interrupted_this_turn: pending_message = result.get("interrupt_message") or interrupt_msg # Add indicator that we were interrupted if response and pending_message: @@ -10335,6 +10382,9 @@ class HermesCLI: self._agent_running = False self._pending_input = queue.Queue() # For normal input (commands + new queries) self._interrupt_queue = queue.Queue() # For messages typed while agent is running + # See constructor note. Mirrored here for the run() path that skips + # the earlier __init__ branch. + self._last_turn_interrupted = False self._should_exit = False self._last_ctrl_c_time = 0 # Track double Ctrl+C for force exit diff --git a/tests/cli/test_cli_goal_interrupt.py b/tests/cli/test_cli_goal_interrupt.py new file mode 100644 index 0000000000..851b87e856 --- /dev/null +++ b/tests/cli/test_cli_goal_interrupt.py @@ -0,0 +1,221 @@ +"""Tests for CLI goal-continuation interrupt handling. + +Covers: +- Ctrl+C during a /goal turn auto-pauses the goal (no more continuations). +- Empty/whitespace-only responses skip the judge (no phantom continuations). +- Clean response without interrupt still drives the judge + enqueues. + +These tests exercise ``_maybe_continue_goal_after_turn`` directly on a +minimal ``HermesCLI`` stub (pattern used elsewhere in tests/cli). +""" + +from __future__ import annotations + +import queue +import sys +import uuid +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +# ────────────────────────────────────────────────────────────────────── +# Fixtures +# ────────────────────────────────────────────────────────────────────── + + +@pytest.fixture +def hermes_home(tmp_path, monkeypatch): + """Isolated HERMES_HOME so SessionDB.state_meta writes stay hermetic.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(home)) + + # Bust the goal module's DB cache so it re-resolves HERMES_HOME each test. + from hermes_cli import goals + goals._DB_CACHE.clear() + yield home + goals._DB_CACHE.clear() + + +def _make_cli_with_goal(session_id: str, goal_text: str = "build a thing"): + """Build a minimal HermesCLI stub with an active goal wired in.""" + from cli import HermesCLI + from hermes_cli.goals import GoalManager + + cli = HermesCLI.__new__(HermesCLI) + # State the hook + helpers touch directly. + cli._pending_input = queue.Queue() + cli._last_turn_interrupted = False + cli.conversation_history = [] + # `_get_goal_manager()` reads `self.session_id` directly, not + # `self.agent.session_id`. Match the production lookup. + cli.session_id = session_id + cli.agent = MagicMock() + cli.agent.session_id = session_id + + mgr = GoalManager(session_id=session_id, default_max_turns=5) + mgr.set(goal_text) + cli._goal_manager = mgr + return cli, mgr + + +# ────────────────────────────────────────────────────────────────────── +# Tests +# ────────────────────────────────────────────────────────────────────── + + +class TestInterruptAutoPause: + def test_interrupted_turn_pauses_goal_and_skips_continuation(self, hermes_home): + """Ctrl+C mid-turn must auto-pause the goal, not queue another round.""" + sid = f"sid-interrupt-{uuid.uuid4().hex}" + cli, mgr = _make_cli_with_goal(sid) + # Simulate an interrupted turn with a partial assistant reply. + cli._last_turn_interrupted = True + cli.conversation_history = [ + {"role": "user", "content": "kickoff"}, + {"role": "assistant", "content": "starting work..."}, + ] + + # Judge MUST NOT run on an interrupted turn. If it does, we've + # regressed — fail loudly instead of silently querying a mock. + with patch("hermes_cli.goals.judge_goal") as judge_mock: + judge_mock.side_effect = AssertionError( + "judge_goal called on an interrupted turn" + ) + cli._maybe_continue_goal_after_turn() + + # Pending input must NOT contain a continuation prompt. + assert cli._pending_input.empty(), ( + "Interrupted turn should not enqueue a continuation prompt" + ) + + # Goal should be paused, not active. + state = mgr.state + assert state is not None + assert state.status == "paused" + assert "interrupt" in (state.paused_reason or "").lower() + + def test_interrupted_turn_is_resumable(self, hermes_home): + """After auto-pause from Ctrl+C, /goal resume puts it back to active.""" + sid = f"sid-resume-{uuid.uuid4().hex}" + cli, mgr = _make_cli_with_goal(sid) + cli._last_turn_interrupted = True + cli.conversation_history = [ + {"role": "assistant", "content": "partial"}, + ] + with patch("hermes_cli.goals.judge_goal"): + cli._maybe_continue_goal_after_turn() + assert mgr.state.status == "paused" + + mgr.resume() + assert mgr.state.status == "active" + + +class TestEmptyResponseSkip: + def test_empty_response_does_not_invoke_judge(self, hermes_home): + """Whitespace-only replies skip judging (transient failure guard).""" + sid = f"sid-empty-{uuid.uuid4().hex}" + cli, mgr = _make_cli_with_goal(sid) + cli._last_turn_interrupted = False + cli.conversation_history = [ + {"role": "user", "content": "go"}, + {"role": "assistant", "content": " \n\n "}, + ] + + with patch("hermes_cli.goals.judge_goal") as judge_mock: + judge_mock.side_effect = AssertionError( + "judge_goal called on an empty response" + ) + cli._maybe_continue_goal_after_turn() + + # No continuation queued; goal still active (neither paused nor done). + assert cli._pending_input.empty() + assert mgr.state.status == "active" + + def test_no_assistant_message_skipped(self, hermes_home): + """Conversation with zero assistant replies must not trip the judge.""" + sid = f"sid-noassistant-{uuid.uuid4().hex}" + cli, mgr = _make_cli_with_goal(sid) + cli._last_turn_interrupted = False + cli.conversation_history = [ + {"role": "user", "content": "go"}, + ] + + with patch("hermes_cli.goals.judge_goal") as judge_mock: + judge_mock.side_effect = AssertionError( + "judge_goal called without an assistant response" + ) + cli._maybe_continue_goal_after_turn() + + assert cli._pending_input.empty() + assert mgr.state.status == "active" + + +class TestHealthyTurnStillRuns: + def test_clean_response_enqueues_continuation_when_judge_says_continue( + self, hermes_home, + ): + """Sanity check: the hook still works in the happy path.""" + sid = f"sid-healthy-{uuid.uuid4().hex}" + cli, mgr = _make_cli_with_goal(sid) + cli._last_turn_interrupted = False + cli.conversation_history = [ + {"role": "user", "content": "go"}, + {"role": "assistant", "content": "did some work, more to do"}, + ] + + # Force the judge to say "continue" without touching the network. + with patch( + "hermes_cli.goals.judge_goal", + return_value=("continue", "needs more steps", False), + ): + cli._maybe_continue_goal_after_turn() + + # Continuation prompt must be queued. + assert not cli._pending_input.empty() + queued = cli._pending_input.get_nowait() + assert "Continuing toward your standing goal" in queued + assert mgr.state.status == "active" + + def test_clean_response_marks_done_when_judge_says_done(self, hermes_home): + sid = f"sid-done-{uuid.uuid4().hex}" + cli, mgr = _make_cli_with_goal(sid) + cli._last_turn_interrupted = False + cli.conversation_history = [ + {"role": "assistant", "content": "all finished, here's the result"}, + ] + + with patch( + "hermes_cli.goals.judge_goal", + return_value=("done", "goal satisfied", False), + ): + cli._maybe_continue_goal_after_turn() + + assert cli._pending_input.empty() + assert mgr.state.status == "done" + + +class TestInterruptFlagLifecycle: + def test_chat_resets_flag_at_entry(self, hermes_home): + """chat() must reset _last_turn_interrupted at the top of each turn. + + This guards against stale flag state: if turn N was interrupted and + turn N+1 runs clean, the hook must not see True from N. + """ + # We can't run chat() end-to-end here, but we can assert the reset + # is the first thing after the secret-capture registration by + # inspecting the source shape. + from cli import HermesCLI + import inspect + + src = inspect.getsource(HermesCLI.chat) + # Look for an explicit reset near the top of chat(). + head = src.split("if not self._ensure_runtime_credentials", 1)[0] + assert "self._last_turn_interrupted = False" in head, ( + "chat() must reset _last_turn_interrupted before run_conversation " + "runs — otherwise a prior turn's interrupt state leaks into the " + "next turn's goal hook decision." + )