diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py
index 0f0f3abd9c..894cdddb01 100644
--- a/hermes_cli/goals.py
+++ b/hermes_cli/goals.py
@@ -47,6 +47,14 @@ DEFAULT_MAX_TURNS = 20
 DEFAULT_JUDGE_TIMEOUT = 30.0
 # Cap how much of the last response + recent messages we send to the judge.
 _JUDGE_RESPONSE_SNIPPET_CHARS = 4000
+# After this many consecutive judge *parse* failures (empty output / non-JSON),
+# the loop auto-pauses and points the user at the goal_judge config. API /
+# transport errors do NOT count toward this — those are transient. This guards
+# against small models (e.g. deepseek-v4-flash) that cannot follow the strict
+# JSON reply contract; without it the loop runs until the turn budget is
+# exhausted with every reply shaped like `judge returned empty response` or
+# `judge reply was not JSON`.
+DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES = 3
 
 
 CONTINUATION_PROMPT_TEMPLATE = (
@@ -99,6 +107,7 @@ class GoalState:
     last_verdict: Optional[str] = None        # "done" | "continue" | "skipped"
     last_reason: Optional[str] = None
     paused_reason: Optional[str] = None       # why we auto-paused (budget, etc.)
+    consecutive_parse_failures: int = 0       # judge-output parse failures in a row
 
     def to_json(self) -> str:
         return json.dumps(asdict(self), ensure_ascii=False)
@@ -116,6 +125,7 @@ class GoalState:
             last_verdict=data.get("last_verdict"),
             last_reason=data.get("last_reason"),
             paused_reason=data.get("paused_reason"),
+            consecutive_parse_failures=int(data.get("consecutive_parse_failures", 0) or 0),
         )
 
 
@@ -220,13 +230,17 @@ def _truncate(text: str, limit: int) -> str:
 _JSON_OBJECT_RE = re.compile(r"\{.*?\}", re.DOTALL)
 
 
-def _parse_judge_response(raw: str) -> Tuple[bool, str]:
-    """Parse the judge's reply. Fail-open to ``(False, "<reason>")``.
+def _parse_judge_response(raw: str) -> Tuple[bool, str, bool]:
+    """Parse the judge's reply. Fail-open to ``(False, "<reason>", parse_failed)``.
 
-    Returns ``(done, reason)``.
+    Returns ``(done, reason, parse_failed)``. ``parse_failed`` is True when the
+    judge returned output that couldn't be interpreted as the expected JSON
+    verdict (empty body, prose, malformed JSON). Callers use that flag to
+    auto-pause after N consecutive parse failures so a weak judge model
+    doesn't silently burn the turn budget.
     """
     if not raw:
-        return False, "judge returned empty response"
+        return False, "judge returned empty response", True
 
     text = raw.strip()
 
@@ -252,7 +266,7 @@ def _parse_judge_response(raw: str) -> Tuple[bool, str]:
                 data = None
 
     if not isinstance(data, dict):
-        return False, f"judge reply was not JSON: {_truncate(raw, 200)!r}"
+        return False, f"judge reply was not JSON: {_truncate(raw, 200)!r}", True
 
     done_val = data.get("done")
     if isinstance(done_val, str):
@@ -262,7 +276,7 @@ def _parse_judge_response(raw: str) -> Tuple[bool, str]:
     reason = str(data.get("reason") or "").strip()
     if not reason:
         reason = "no reason provided"
-    return done, reason
+    return done, reason, False
 
 
 def judge_goal(
@@ -270,36 +284,42 @@ def judge_goal(
     last_response: str,
     *,
     timeout: float = DEFAULT_JUDGE_TIMEOUT,
-) -> Tuple[str, str]:
+) -> Tuple[str, str, bool]:
     """Ask the auxiliary model whether the goal is satisfied.
 
-    Returns ``(verdict, reason)`` where verdict is ``"done"``, ``"continue"``,
-    or ``"skipped"`` (when the judge couldn't be reached).
+    Returns ``(verdict, reason, parse_failed)`` where verdict is ``"done"``,
+    ``"continue"``, or ``"skipped"`` (when the judge couldn't be reached).
 
-    This is deliberately fail-open: any error returns ``("continue", "...")``
-    so a broken judge doesn't wedge progress — the turn budget is the
-    backstop.
+    ``parse_failed`` is True only when the judge call succeeded but its output
+    was unusable (empty or non-JSON). API/transport errors return False — they
+    are transient and should fail-open silently. Callers use this flag to
+    auto-pause after N consecutive parse failures (see
+    ``DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES``).
+
+    This is deliberately fail-open: any error returns ``("continue", "...", False)``
+    so a broken judge doesn't wedge progress — the turn budget and the
+    consecutive-parse-failures auto-pause are the backstops.
     """
     if not goal.strip():
-        return "skipped", "empty goal"
+        return "skipped", "empty goal", False
     if not last_response.strip():
         # No substantive reply this turn — almost certainly not done yet.
-        return "continue", "empty response (nothing to evaluate)"
+        return "continue", "empty response (nothing to evaluate)", False
 
     try:
         from agent.auxiliary_client import get_text_auxiliary_client
     except Exception as exc:
         logger.debug("goal judge: auxiliary client import failed: %s", exc)
-        return "continue", "auxiliary client unavailable"
+        return "continue", "auxiliary client unavailable", False
 
     try:
         client, model = get_text_auxiliary_client("goal_judge")
     except Exception as exc:
         logger.debug("goal judge: get_text_auxiliary_client failed: %s", exc)
-        return "continue", "auxiliary client unavailable"
+        return "continue", "auxiliary client unavailable", False
 
     if client is None or not model:
-        return "continue", "no auxiliary client configured"
+        return "continue", "no auxiliary client configured", False
 
     prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
         goal=_truncate(goal, 2000),
@@ -319,17 +339,17 @@ def judge_goal(
         )
     except Exception as exc:
         logger.info("goal judge: API call failed (%s) — falling through to continue", exc)
-        return "continue", f"judge error: {type(exc).__name__}"
+        return "continue", f"judge error: {type(exc).__name__}", False
 
     try:
         raw = resp.choices[0].message.content or ""
     except Exception:
         raw = ""
 
-    done, reason = _parse_judge_response(raw)
+    done, reason, parse_failed = _parse_judge_response(raw)
     verdict = "done" if done else "continue"
     logger.info("goal judge: verdict=%s reason=%s", verdict, _truncate(reason, 120))
-    return verdict, reason
+    return verdict, reason, parse_failed
 
 
 # ──────────────────────────────────────────────────────────────────────
@@ -473,10 +493,18 @@ class GoalManager:
         state.turns_used += 1
         state.last_turn_at = time.time()
 
-        verdict, reason = judge_goal(state.goal, last_response)
+        verdict, reason, parse_failed = judge_goal(state.goal, last_response)
         state.last_verdict = verdict
         state.last_reason = reason
 
+        # Track consecutive judge parse failures. Reset on any usable reply,
+        # including API / transport errors (parse_failed=False) so a flaky
+        # network doesn't trip the auto-pause meant for bad judge models.
+        if parse_failed:
+            state.consecutive_parse_failures += 1
+        else:
+            state.consecutive_parse_failures = 0
+
         if verdict == "done":
             state.status = "done"
             save_goal(self.session_id, state)
@@ -489,6 +517,36 @@ class GoalManager:
                 "message": f"✓ Goal achieved: {reason}",
             }
 
+        # Auto-pause when the judge model can't produce the expected JSON
+        # verdict N turns in a row. Points the user at the goal_judge config
+        # so they can route this side task to a model that follows the
+        # contract (e.g. google/gemini-3-flash-preview). Without this guard,
+        # weak judge models burn the entire turn budget returning prose or
+        # empty strings.
+        if state.consecutive_parse_failures >= DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES:
+            state.status = "paused"
+            state.paused_reason = (
+                f"judge model returned unparseable output {state.consecutive_parse_failures} turns in a row"
+            )
+            save_goal(self.session_id, state)
+            return {
+                "status": "paused",
+                "should_continue": False,
+                "continuation_prompt": None,
+                "verdict": "continue",
+                "reason": reason,
+                "message": (
+                    f"⏸ Goal paused — the judge model ({state.consecutive_parse_failures} turns) "
+                    "isn't returning the required JSON verdict. Route the judge to a stricter "
+                    "model in ~/.hermes/config.yaml:\n"
+                    "  auxiliary:\n"
+                    "    goal_judge:\n"
+                    "      provider: openrouter\n"
+                    "      model: google/gemini-3-flash-preview\n"
+                    "Then /goal resume to continue."
+                ),
+            }
+
         if state.turns_used >= state.max_turns:
             state.status = "paused"
             state.paused_reason = f"turn budget exhausted ({state.turns_used}/{state.max_turns})"
diff --git a/scripts/release.py b/scripts/release.py
index 07e2a3a747..5259b01b7a 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -58,6 +58,7 @@ AUTHOR_MAP = {
     "223003280+Abd0r@users.noreply.github.com": "Abd0r",
     "abdielv@proton.me": "AJV20",
     "mason@growagainorchids.com": "masonjames",
+    "ytchen0719@gmail.com": "liquidchen",
     "am@studio1.tailb672fe.ts.net": "subtract0",
     "axmaiqiu@gmail.com": "qWaitCrypto",
     "159539633+MottledShadow@users.noreply.github.com": "MottledShadow",
diff --git a/tests/gateway/test_goal_verdict_send.py b/tests/gateway/test_goal_verdict_send.py
index bb66851608..14f536aa4f 100644
--- a/tests/gateway/test_goal_verdict_send.py
+++ b/tests/gateway/test_goal_verdict_send.py
@@ -61,8 +61,9 @@ class _RecordingAdapter:
         return _R()
 
 
-def _make_runner_with_adapter():
+def _make_runner_with_adapter(session_id: str = None):
     from gateway.run import GatewayRunner
+    import uuid
 
     runner = object.__new__(GatewayRunner)
     runner.config = GatewayConfig(
@@ -74,9 +75,12 @@ def _make_runner_with_adapter():
     runner._queued_events = {}
 
     src = _make_source()
+    # Default to a unique session_id so xdist parallel runs on the same worker
+    # don't see each other's GoalManager state (DEFAULT_DB_PATH gets frozen at
+    # module-import time, defeating per-test HERMES_HOME monkeypatches).
     session_entry = SessionEntry(
         session_key=build_session_key(src),
-        session_id="goal-sess-1",
+        session_id=session_id or f"goal-sess-{uuid.uuid4().hex[:8]}",
         created_at=datetime.now(),
         updated_at=datetime.now(),
         platform=Platform.TELEGRAM,
@@ -103,8 +107,8 @@ async def test_goal_verdict_done_sent_via_adapter_send(hermes_home):
     mgr = GoalManager(session_entry.session_id)
     mgr.set("ship the feature")
 
-    with patch("hermes_cli.goals.judge_goal", return_value=("done", "the feature shipped")):
-        runner._post_turn_goal_continuation(
+    with patch("hermes_cli.goals.judge_goal", return_value=("done", "the feature shipped", False)):
+        await runner._post_turn_goal_continuation(
             session_entry=session_entry,
             source=src,
             final_response="I shipped the feature.",
@@ -132,8 +136,8 @@ async def test_goal_verdict_continue_enqueues_continuation(hermes_home):
     mgr = GoalManager(session_entry.session_id)
     mgr.set("polish the docs")
 
-    with patch("hermes_cli.goals.judge_goal", return_value=("continue", "still needs work")):
-        runner._post_turn_goal_continuation(
+    with patch("hermes_cli.goals.judge_goal", return_value=("continue", "still needs work", False)):
+        await runner._post_turn_goal_continuation(
             session_entry=session_entry,
             source=src,
             final_response="here's a partial edit",
@@ -160,8 +164,8 @@ async def test_goal_verdict_budget_exhausted_sends_pause(hermes_home):
     state.turns_used = 2
     save_goal(session_entry.session_id, state)
 
-    with patch("hermes_cli.goals.judge_goal", return_value=("continue", "keep going")):
-        runner._post_turn_goal_continuation(
+    with patch("hermes_cli.goals.judge_goal", return_value=("continue", "keep going", False)):
+        await runner._post_turn_goal_continuation(
             session_entry=session_entry,
             source=src,
             final_response="still partial",
@@ -181,7 +185,7 @@ async def test_goal_verdict_skipped_when_no_active_goal(hermes_home):
     """No goal set → the hook is a no-op. Nothing is sent, nothing enqueued."""
     runner, adapter, session_entry, src = _make_runner_with_adapter()
 
-    runner._post_turn_goal_continuation(
+    await runner._post_turn_goal_continuation(
         session_entry=session_entry,
         source=src,
         final_response="anything",
@@ -207,9 +211,9 @@ async def test_goal_verdict_survives_adapter_without_send(hermes_home):
 
     runner.adapters[Platform.TELEGRAM] = _NoSendAdapter()
 
-    with patch("hermes_cli.goals.judge_goal", return_value=("done", "ok")):
+    with patch("hermes_cli.goals.judge_goal", return_value=("done", "ok", False)):
         # must not raise
-        runner._post_turn_goal_continuation(
+        await runner._post_turn_goal_continuation(
             session_entry=session_entry,
             source=src,
             final_response="whatever",
diff --git a/tests/hermes_cli/test_goals.py b/tests/hermes_cli/test_goals.py
index a21c5f4749..b5afd716c9 100644
--- a/tests/hermes_cli/test_goals.py
+++ b/tests/hermes_cli/test_goals.py
@@ -40,14 +40,14 @@ class TestParseJudgeResponse:
     def test_clean_json_done(self):
         from hermes_cli.goals import _parse_judge_response
 
-        done, reason = _parse_judge_response('{"done": true, "reason": "all good"}')
+        done, reason, _ = _parse_judge_response('{"done": true, "reason": "all good"}')
         assert done is True
         assert reason == "all good"
 
     def test_clean_json_continue(self):
         from hermes_cli.goals import _parse_judge_response
 
-        done, reason = _parse_judge_response('{"done": false, "reason": "more work needed"}')
+        done, reason, _ = _parse_judge_response('{"done": false, "reason": "more work needed"}')
         assert done is False
         assert reason == "more work needed"
 
@@ -55,7 +55,7 @@ class TestParseJudgeResponse:
         from hermes_cli.goals import _parse_judge_response
 
         raw = '```json\n{"done": true, "reason": "done"}\n```'
-        done, reason = _parse_judge_response(raw)
+        done, reason, _ = _parse_judge_response(raw)
         assert done is True
         assert "done" in reason
 
@@ -64,7 +64,7 @@ class TestParseJudgeResponse:
         from hermes_cli.goals import _parse_judge_response
 
         raw = 'Looking at this... the agent says X. Verdict: {"done": false, "reason": "partial"}'
-        done, reason = _parse_judge_response(raw)
+        done, reason, _ = _parse_judge_response(raw)
         assert done is False
         assert reason == "partial"
 
@@ -72,24 +72,24 @@ class TestParseJudgeResponse:
         from hermes_cli.goals import _parse_judge_response
 
         for s in ("true", "yes", "done", "1"):
-            done, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
+            done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
             assert done is True
         for s in ("false", "no", "not yet"):
-            done, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
+            done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
             assert done is False
 
     def test_malformed_json_fails_open(self):
         """Non-JSON → not done, with error-ish reason (so judge_goal can map to continue)."""
         from hermes_cli.goals import _parse_judge_response
 
-        done, reason = _parse_judge_response("this is not json at all")
+        done, reason, _ = _parse_judge_response("this is not json at all")
         assert done is False
         assert reason  # non-empty
 
     def test_empty_response(self):
         from hermes_cli.goals import _parse_judge_response
 
-        done, reason = _parse_judge_response("")
+        done, reason, _ = _parse_judge_response("")
         assert done is False
         assert reason
 
@@ -103,13 +103,13 @@ class TestJudgeGoal:
     def test_empty_goal_skipped(self):
         from hermes_cli.goals import judge_goal
 
-        verdict, _ = judge_goal("", "some response")
+        verdict, _, _ = judge_goal("", "some response")
         assert verdict == "skipped"
 
     def test_empty_response_continues(self):
         from hermes_cli.goals import judge_goal
 
-        verdict, _ = judge_goal("ship the thing", "")
+        verdict, _, _ = judge_goal("ship the thing", "")
         assert verdict == "continue"
 
     def test_no_aux_client_continues(self):
@@ -120,7 +120,7 @@ class TestJudgeGoal:
             "agent.auxiliary_client.get_text_auxiliary_client",
             return_value=(None, None),
         ):
-            verdict, _ = goals.judge_goal("my goal", "my response")
+            verdict, _, _ = goals.judge_goal("my goal", "my response")
         assert verdict == "continue"
 
     def test_api_error_continues(self):
@@ -133,7 +133,7 @@ class TestJudgeGoal:
             "agent.auxiliary_client.get_text_auxiliary_client",
             return_value=(fake_client, "judge-model"),
         ):
-            verdict, reason = goals.judge_goal("goal", "response")
+            verdict, reason, _ = goals.judge_goal("goal", "response")
         assert verdict == "continue"
         assert "judge error" in reason.lower()
 
@@ -152,7 +152,7 @@ class TestJudgeGoal:
             "agent.auxiliary_client.get_text_auxiliary_client",
             return_value=(fake_client, "judge-model"),
         ):
-            verdict, reason = goals.judge_goal("goal", "agent response")
+            verdict, reason, _ = goals.judge_goal("goal", "agent response")
         assert verdict == "done"
         assert reason == "achieved"
 
@@ -171,7 +171,7 @@ class TestJudgeGoal:
             "agent.auxiliary_client.get_text_auxiliary_client",
             return_value=(fake_client, "judge-model"),
         ):
-            verdict, reason = goals.judge_goal("goal", "agent response")
+            verdict, reason, _ = goals.judge_goal("goal", "agent response")
         assert verdict == "continue"
         assert reason == "not yet"
 
@@ -260,7 +260,7 @@ class TestGoalManager:
         mgr = GoalManager(session_id="eval-sid-1")
         mgr.set("ship it")
 
-        with patch.object(goals, "judge_goal", return_value=("done", "shipped")):
+        with patch.object(goals, "judge_goal", return_value=("done", "shipped", False)):
             decision = mgr.evaluate_after_turn("I shipped the feature.")
 
         assert decision["verdict"] == "done"
@@ -276,7 +276,7 @@ class TestGoalManager:
         mgr = GoalManager(session_id="eval-sid-2", default_max_turns=5)
         mgr.set("a long goal")
 
-        with patch.object(goals, "judge_goal", return_value=("continue", "more work")):
+        with patch.object(goals, "judge_goal", return_value=("continue", "more work", False)):
             decision = mgr.evaluate_after_turn("made some progress")
 
         assert decision["verdict"] == "continue"
@@ -294,7 +294,7 @@ class TestGoalManager:
         mgr = GoalManager(session_id="eval-sid-3", default_max_turns=2)
         mgr.set("hard goal")
 
-        with patch.object(goals, "judge_goal", return_value=("continue", "not yet")):
+        with patch.object(goals, "judge_goal", return_value=("continue", "not yet", False)):
             d1 = mgr.evaluate_after_turn("step 1")
             assert d1["should_continue"] is True
             assert mgr.state.turns_used == 1
@@ -356,3 +356,161 @@ def test_goal_command_dispatches_in_cli_registry_helpers():
     assert "/goal" in COMMANDS
     session_cmds = COMMANDS_BY_CATEGORY.get("Session", {})
     assert "/goal" in session_cmds
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Auto-pause on consecutive judge parse failures
+# ──────────────────────────────────────────────────────────────────────
+
+
+class TestJudgeParseFailureAutoPause:
+    """Regression: weak judge models (e.g. deepseek-v4-flash) that return
+    empty strings or non-JSON prose must auto-pause the loop after N turns
+    instead of burning the whole turn budget."""
+
+    def test_parse_response_flags_empty_as_parse_failure(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        done, reason, parse_failed = _parse_judge_response("")
+        assert done is False
+        assert parse_failed is True
+        assert "empty" in reason.lower()
+
+    def test_parse_response_flags_non_json_as_parse_failure(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        done, reason, parse_failed = _parse_judge_response(
+            "Let me analyze whether the goal is fully satisfied based on the agent's response..."
+        )
+        assert done is False
+        assert parse_failed is True
+        assert "not json" in reason.lower()
+
+    def test_parse_response_clean_json_is_not_parse_failure(self):
+        from hermes_cli.goals import _parse_judge_response
+
+        done, _, parse_failed = _parse_judge_response(
+            '{"done": false, "reason": "more work"}'
+        )
+        assert done is False
+        assert parse_failed is False
+
+    def test_api_error_does_not_count_as_parse_failure(self):
+        """Transient network/API errors must not trip the auto-pause guard."""
+        from hermes_cli import goals
+
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.side_effect = RuntimeError("connection reset")
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "judge-model"),
+        ):
+            verdict, _, parse_failed = goals.judge_goal("goal", "response")
+        assert verdict == "continue"
+        assert parse_failed is False
+
+    def test_empty_judge_reply_flagged_as_parse_failure(self):
+        """End-to-end: judge returns empty content → parse_failed=True."""
+        from hermes_cli import goals
+
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.return_value = MagicMock(
+            choices=[MagicMock(message=MagicMock(content=""))]
+        )
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "judge-model"),
+        ):
+            verdict, _, parse_failed = goals.judge_goal("goal", "response")
+        assert verdict == "continue"
+        assert parse_failed is True
+
+    def test_auto_pause_after_three_consecutive_parse_failures(self, hermes_home):
+        """N=3 consecutive parse failures → auto-pause with config pointer."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager, DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES
+
+        assert DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES == 3
+        mgr = GoalManager(session_id="parse-fail-sid-1", default_max_turns=20)
+        mgr.set("do a thing")
+
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "judge returned empty response", True)
+        ):
+            d1 = mgr.evaluate_after_turn("step 1")
+            assert d1["should_continue"] is True
+            assert mgr.state.consecutive_parse_failures == 1
+
+            d2 = mgr.evaluate_after_turn("step 2")
+            assert d2["should_continue"] is True
+            assert mgr.state.consecutive_parse_failures == 2
+
+            d3 = mgr.evaluate_after_turn("step 3")
+            assert d3["should_continue"] is False
+            assert d3["status"] == "paused"
+            assert mgr.state.consecutive_parse_failures == 3
+            # Message points at the config surface so the user can fix it.
+            assert "auxiliary" in d3["message"]
+            assert "goal_judge" in d3["message"]
+            assert "config.yaml" in d3["message"]
+
+    def test_parse_failure_counter_resets_on_good_reply(self, hermes_home):
+        """A single good judge reply resets the counter — transient flakes don't pause."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="parse-fail-sid-2", default_max_turns=20)
+        mgr.set("another goal")
+
+        # Two parse failures…
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "not json", True)
+        ):
+            mgr.evaluate_after_turn("step 1")
+            mgr.evaluate_after_turn("step 2")
+            assert mgr.state.consecutive_parse_failures == 2
+
+        # …then one clean reply resets the counter.
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "making progress", False)
+        ):
+            d = mgr.evaluate_after_turn("step 3")
+            assert d["should_continue"] is True
+            assert mgr.state.consecutive_parse_failures == 0
+
+    def test_parse_failure_counter_not_incremented_by_api_errors(self, hermes_home):
+        """API/transport errors must NOT count toward the auto-pause threshold."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager
+
+        mgr = GoalManager(session_id="parse-fail-sid-3", default_max_turns=20)
+        mgr.set("goal")
+
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "judge error: RuntimeError", False)
+        ):
+            for _ in range(5):
+                d = mgr.evaluate_after_turn("still going")
+                assert d["should_continue"] is True
+            assert mgr.state.consecutive_parse_failures == 0
+            assert mgr.state.status == "active"
+
+    def test_consecutive_parse_failures_persists_across_goalmanager_reloads(
+        self, hermes_home
+    ):
+        """The counter must be durable so cross-session resumes see it."""
+        from hermes_cli import goals
+        from hermes_cli.goals import GoalManager, load_goal
+
+        mgr = GoalManager(session_id="parse-fail-sid-4", default_max_turns=20)
+        mgr.set("persistent goal")
+
+        with patch.object(
+            goals, "judge_goal", return_value=("continue", "empty", True)
+        ):
+            mgr.evaluate_after_turn("r")
+            mgr.evaluate_after_turn("r")
+
+        reloaded = load_goal("parse-fail-sid-4")
+        assert reloaded is not None
+        assert reloaded.consecutive_parse_failures == 2