From 674fad14832006bfd742c5e3183f34c24018e43a Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 8 May 2026 06:53:13 -0700
Subject: [PATCH] fix(goals): Ctrl+C during /goal loop auto-pauses the goal
 (#21888)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported: Ctrl+C during an active /goal loop felt like it did nothing —
the agent would interrupt the current turn, then immediately queue another
continuation and keep going until the session ended or the 20-turn budget
ran out.

Root cause: cli.py's _maybe_continue_goal_after_turn() ran in the finally:
block around self.chat(...) unconditionally. Whether the turn completed
normally, got interrupted, or returned an empty string, the judge ran on
whatever was in conversation_history and — because the judge is fail-open
— a "continue" verdict pushed another CONTINUATION_PROMPT onto
_pending_input. Ctrl+C was invisible to the hook.

Fix:
- chat() now captures result['interrupted'] onto self._last_turn_interrupted
  (resets to False at entry so early-returns don't leak prior state).
- _maybe_continue_goal_after_turn() checks the flag first: on interrupt,
  auto-pause via mgr.pause(reason='user-interrupted (Ctrl+C)') and print
  a one-liner pointing the user at /goal resume or /goal clear. No judge
  call, no continuation enqueued.
- Also added an empty-response guard that mirrors gateway/run.py's
  _handle_message logic (empty reply → transient failure → skip judging
  so we don't trip the consecutive-parse-failures backstop unnecessarily).

The goal stays in the DB as paused, so /goal resume recovers it after
the user has sorted out whatever made them cancel. /goal clear still
works as before for a full stop.

Tests: tests/cli/test_cli_goal_interrupt.py covers:
  - interrupted turn pauses + doesn't queue + judge is NOT called
  - paused goal is resumable
  - empty / whitespace / missing assistant reply skips judging
  - healthy turn still enqueues continuation / marks done
  - chat() resets _last_turn_interrupted at entry (anti-leak guard)

All 55 existing goal tests still pass.
---
 cli.py                               |  52 ++++++-
 tests/cli/test_cli_goal_interrupt.py | 221 +++++++++++++++++++++++++++
 2 files changed, 272 insertions(+), 1 deletion(-)
 create mode 100644 tests/cli/test_cli_goal_interrupt.py

diff --git a/cli.py b/cli.py
index 531716885b..c7c33bce32 100644
--- a/cli.py
+++ b/cli.py
@@ -2414,6 +2414,11 @@ class HermesCLI:
         self._agent_running = False
         self._pending_input = queue.Queue()
         self._interrupt_queue = queue.Queue()
+        # Tracks whether the turn that just finished was interrupted via
+        # Ctrl+C. Consumed by _maybe_continue_goal_after_turn so /goal loops
+        # don't auto-queue another continuation on top of a user-cancelled
+        # turn (which would make Ctrl+C feel like it did nothing).
+        self._last_turn_interrupted = False
         self._should_exit = False
         self._last_ctrl_c_time = 0
         self._clarify_state = None
@@ -7523,6 +7528,15 @@ class HermesCLI:
         priority and we'll re-judge after that turn). If judge says done,
         mark it done and tell the user. If judge says continue and we're
         under budget, push the continuation prompt onto the queue.
+
+        Interrupt handling: if the turn was user-cancelled (Ctrl+C), we
+        AUTO-PAUSE the goal instead of judging + re-queuing. Otherwise
+        Ctrl+C feels like it did nothing — the judge runs on whatever
+        partial output landed, almost always says "continue", and the
+        loop keeps going. Auto-pause keeps the goal recoverable via
+        ``/goal resume`` once the user has sorted out what they want.
+        The empty-response skip mirrors the gateway guard at
+        ``_handle_message`` in ``gateway/run.py``.
         """
         mgr = self._get_goal_manager()
         if mgr is None or not mgr.is_active():
@@ -7537,6 +7551,22 @@ class HermesCLI:
         except Exception:
             pass
 
+        # If the turn was user-interrupted (Ctrl+C), auto-pause the goal
+        # and bail. The judge call would almost always return "continue"
+        # on the partial output and immediately re-queue another turn,
+        # which is exactly what the user cancelled. Pausing (rather than
+        # silently skipping) is the observable, recoverable behavior.
+        if getattr(self, "_last_turn_interrupted", False):
+            try:
+                mgr.pause(reason="user-interrupted (Ctrl+C)")
+            except Exception as exc:
+                logging.debug("goal pause-on-interrupt failed: %s", exc)
+            _cprint(
+                f"  {_DIM}⏸ Goal paused — turn was interrupted. "
+                f"Use /goal resume to continue, or /goal clear to stop.{_RST}"
+            )
+            return
+
         # Extract the agent's final response for this turn.
         last_response = ""
         try:
@@ -7558,6 +7588,13 @@ class HermesCLI:
         except Exception:
             last_response = ""
 
+        # Skip judging on empty/whitespace-only responses. These are almost
+        # always transient failures (API error, empty stream) where the
+        # judge would say "continue" and trip the consecutive-parse-failures
+        # backstop unnecessarily. Mirrors the gateway guard.
+        if not last_response.strip():
+            return
+
         decision = mgr.evaluate_after_turn(last_response, user_initiated=True)
         msg = decision.get("message") or ""
         if msg:
@@ -9432,6 +9469,12 @@ class HermesCLI:
         # register secure secret capture here as well.
         set_secret_capture_callback(self._secret_capture_callback)
 
+        # Reset the per-turn interrupt flag. Any subsequent path that
+        # discovers an interrupt (below, after run_conversation) will flip
+        # this to True. Early returns (credential refresh failure, etc.)
+        # leave it False, which is correct — those aren't user interrupts.
+        self._last_turn_interrupted = False
+
         # Refresh provider credentials if needed (handles key rotation transparently)
         if not self._ensure_runtime_credentials():
             return None
@@ -9855,7 +9898,11 @@ class HermesCLI:
 
             # Handle interrupt - check if we were interrupted
             pending_message = None
-            if result and result.get("interrupted"):
+            _interrupted_this_turn = bool(result and result.get("interrupted"))
+            # Expose the flag for post-turn hooks (e.g. goal continuation)
+            # so they can skip themselves when the turn was user-cancelled.
+            self._last_turn_interrupted = _interrupted_this_turn
+            if _interrupted_this_turn:
                 pending_message = result.get("interrupt_message") or interrupt_msg
                 # Add indicator that we were interrupted
                 if response and pending_message:
@@ -10335,6 +10382,9 @@ class HermesCLI:
         self._agent_running = False
         self._pending_input = queue.Queue()     # For normal input (commands + new queries)
         self._interrupt_queue = queue.Queue()   # For messages typed while agent is running
+        # See constructor note. Mirrored here for the run() path that skips
+        # the earlier __init__ branch.
+        self._last_turn_interrupted = False
         self._should_exit = False
         self._last_ctrl_c_time = 0  # Track double Ctrl+C for force exit
 
diff --git a/tests/cli/test_cli_goal_interrupt.py b/tests/cli/test_cli_goal_interrupt.py
new file mode 100644
index 0000000000..851b87e856
--- /dev/null
+++ b/tests/cli/test_cli_goal_interrupt.py
@@ -0,0 +1,221 @@
+"""Tests for CLI goal-continuation interrupt handling.
+
+Covers:
+- Ctrl+C during a /goal turn auto-pauses the goal (no more continuations).
+- Empty/whitespace-only responses skip the judge (no phantom continuations).
+- Clean response without interrupt still drives the judge + enqueues.
+
+These tests exercise ``_maybe_continue_goal_after_turn`` directly on a
+minimal ``HermesCLI`` stub (pattern used elsewhere in tests/cli).
+"""
+
+from __future__ import annotations
+
+import queue
+import sys
+import uuid
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Fixtures
+# ──────────────────────────────────────────────────────────────────────
+
+
+@pytest.fixture
+def hermes_home(tmp_path, monkeypatch):
+    """Isolated HERMES_HOME so SessionDB.state_meta writes stay hermetic."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_HOME", str(home))
+
+    # Bust the goal module's DB cache so it re-resolves HERMES_HOME each test.
+    from hermes_cli import goals
+    goals._DB_CACHE.clear()
+    yield home
+    goals._DB_CACHE.clear()
+
+
+def _make_cli_with_goal(session_id: str, goal_text: str = "build a thing"):
+    """Build a minimal HermesCLI stub with an active goal wired in."""
+    from cli import HermesCLI
+    from hermes_cli.goals import GoalManager
+
+    cli = HermesCLI.__new__(HermesCLI)
+    # State the hook + helpers touch directly.
+    cli._pending_input = queue.Queue()
+    cli._last_turn_interrupted = False
+    cli.conversation_history = []
+    # `_get_goal_manager()` reads `self.session_id` directly, not
+    # `self.agent.session_id`. Match the production lookup.
+    cli.session_id = session_id
+    cli.agent = MagicMock()
+    cli.agent.session_id = session_id
+
+    mgr = GoalManager(session_id=session_id, default_max_turns=5)
+    mgr.set(goal_text)
+    cli._goal_manager = mgr
+    return cli, mgr
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Tests
+# ──────────────────────────────────────────────────────────────────────
+
+
+class TestInterruptAutoPause:
+    def test_interrupted_turn_pauses_goal_and_skips_continuation(self, hermes_home):
+        """Ctrl+C mid-turn must auto-pause the goal, not queue another round."""
+        sid = f"sid-interrupt-{uuid.uuid4().hex}"
+        cli, mgr = _make_cli_with_goal(sid)
+        # Simulate an interrupted turn with a partial assistant reply.
+        cli._last_turn_interrupted = True
+        cli.conversation_history = [
+            {"role": "user", "content": "kickoff"},
+            {"role": "assistant", "content": "starting work..."},
+        ]
+
+        # Judge MUST NOT run on an interrupted turn. If it does, we've
+        # regressed — fail loudly instead of silently querying a mock.
+        with patch("hermes_cli.goals.judge_goal") as judge_mock:
+            judge_mock.side_effect = AssertionError(
+                "judge_goal called on an interrupted turn"
+            )
+            cli._maybe_continue_goal_after_turn()
+
+        # Pending input must NOT contain a continuation prompt.
+        assert cli._pending_input.empty(), (
+            "Interrupted turn should not enqueue a continuation prompt"
+        )
+
+        # Goal should be paused, not active.
+        state = mgr.state
+        assert state is not None
+        assert state.status == "paused"
+        assert "interrupt" in (state.paused_reason or "").lower()
+
+    def test_interrupted_turn_is_resumable(self, hermes_home):
+        """After auto-pause from Ctrl+C, /goal resume puts it back to active."""
+        sid = f"sid-resume-{uuid.uuid4().hex}"
+        cli, mgr = _make_cli_with_goal(sid)
+        cli._last_turn_interrupted = True
+        cli.conversation_history = [
+            {"role": "assistant", "content": "partial"},
+        ]
+        with patch("hermes_cli.goals.judge_goal"):
+            cli._maybe_continue_goal_after_turn()
+        assert mgr.state.status == "paused"
+
+        mgr.resume()
+        assert mgr.state.status == "active"
+
+
+class TestEmptyResponseSkip:
+    def test_empty_response_does_not_invoke_judge(self, hermes_home):
+        """Whitespace-only replies skip judging (transient failure guard)."""
+        sid = f"sid-empty-{uuid.uuid4().hex}"
+        cli, mgr = _make_cli_with_goal(sid)
+        cli._last_turn_interrupted = False
+        cli.conversation_history = [
+            {"role": "user", "content": "go"},
+            {"role": "assistant", "content": "   \n\n   "},
+        ]
+
+        with patch("hermes_cli.goals.judge_goal") as judge_mock:
+            judge_mock.side_effect = AssertionError(
+                "judge_goal called on an empty response"
+            )
+            cli._maybe_continue_goal_after_turn()
+
+        # No continuation queued; goal still active (neither paused nor done).
+        assert cli._pending_input.empty()
+        assert mgr.state.status == "active"
+
+    def test_no_assistant_message_skipped(self, hermes_home):
+        """Conversation with zero assistant replies must not trip the judge."""
+        sid = f"sid-noassistant-{uuid.uuid4().hex}"
+        cli, mgr = _make_cli_with_goal(sid)
+        cli._last_turn_interrupted = False
+        cli.conversation_history = [
+            {"role": "user", "content": "go"},
+        ]
+
+        with patch("hermes_cli.goals.judge_goal") as judge_mock:
+            judge_mock.side_effect = AssertionError(
+                "judge_goal called without an assistant response"
+            )
+            cli._maybe_continue_goal_after_turn()
+
+        assert cli._pending_input.empty()
+        assert mgr.state.status == "active"
+
+
+class TestHealthyTurnStillRuns:
+    def test_clean_response_enqueues_continuation_when_judge_says_continue(
+        self, hermes_home,
+    ):
+        """Sanity check: the hook still works in the happy path."""
+        sid = f"sid-healthy-{uuid.uuid4().hex}"
+        cli, mgr = _make_cli_with_goal(sid)
+        cli._last_turn_interrupted = False
+        cli.conversation_history = [
+            {"role": "user", "content": "go"},
+            {"role": "assistant", "content": "did some work, more to do"},
+        ]
+
+        # Force the judge to say "continue" without touching the network.
+        with patch(
+            "hermes_cli.goals.judge_goal",
+            return_value=("continue", "needs more steps", False),
+        ):
+            cli._maybe_continue_goal_after_turn()
+
+        # Continuation prompt must be queued.
+        assert not cli._pending_input.empty()
+        queued = cli._pending_input.get_nowait()
+        assert "Continuing toward your standing goal" in queued
+        assert mgr.state.status == "active"
+
+    def test_clean_response_marks_done_when_judge_says_done(self, hermes_home):
+        sid = f"sid-done-{uuid.uuid4().hex}"
+        cli, mgr = _make_cli_with_goal(sid)
+        cli._last_turn_interrupted = False
+        cli.conversation_history = [
+            {"role": "assistant", "content": "all finished, here's the result"},
+        ]
+
+        with patch(
+            "hermes_cli.goals.judge_goal",
+            return_value=("done", "goal satisfied", False),
+        ):
+            cli._maybe_continue_goal_after_turn()
+
+        assert cli._pending_input.empty()
+        assert mgr.state.status == "done"
+
+
+class TestInterruptFlagLifecycle:
+    def test_chat_resets_flag_at_entry(self, hermes_home):
+        """chat() must reset _last_turn_interrupted at the top of each turn.
+
+        This guards against stale flag state: if turn N was interrupted and
+        turn N+1 runs clean, the hook must not see True from N.
+        """
+        # We can't run chat() end-to-end here, but we can assert the reset
+        # is the first thing after the secret-capture registration by
+        # inspecting the source shape.
+        from cli import HermesCLI
+        import inspect
+
+        src = inspect.getsource(HermesCLI.chat)
+        # Look for an explicit reset near the top of chat().
+        head = src.split("if not self._ensure_runtime_credentials", 1)[0]
+        assert "self._last_turn_interrupted = False" in head, (
+            "chat() must reset _last_turn_interrupted before run_conversation "
+            "runs — otherwise a prior turn's interrupt state leaks into the "
+            "next turn's goal hook decision."
+        )