diff --git a/agent/agent_init.py b/agent/agent_init.py
index 71b04e3e540..9b89028e3fa 100644
--- a/agent/agent_init.py
+++ b/agent/agent_init.py
@@ -1105,6 +1105,9 @@ def init_agent(
     compression_protect_first = max(
         0, int(_compression_cfg.get("protect_first_n", 3))
     )
+    compression_abort_on_summary_failure = str(
+        _compression_cfg.get("abort_on_summary_failure", False)
+    ).lower() in {"true", "1", "yes"}
 
     # Read optional explicit context_length override for the auxiliary
     # compression model. Custom endpoints often cannot report this via
@@ -1319,6 +1322,7 @@ def init_agent(
             config_context_length=_config_context_length,
             provider=agent.provider,
             api_mode=agent.api_mode,
+            abort_on_summary_failure=compression_abort_on_summary_failure,
         )
     agent.compression_enabled = compression_enabled
 
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 8ef9796df7f..62636809094 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -523,6 +523,7 @@ class ContextCompressor(ContextEngine):
         config_context_length: int | None = None,
         provider: str = "",
         api_mode: str = "",
+        abort_on_summary_failure: bool = False,
     ):
         self.model = model
         self.base_url = base_url
@@ -534,6 +535,11 @@ class ContextCompressor(ContextEngine):
         self.protect_last_n = protect_last_n
         self.summary_target_ratio = max(0.10, min(summary_target_ratio, 0.80))
         self.quiet_mode = quiet_mode
+        # When True, summary-generation failure aborts compression entirely
+        # (returns messages unchanged, sets _last_compress_aborted=True).
+        # When False (default = historical behavior), insert a static
+        # "summary unavailable" placeholder and drop the middle window.
+        self.abort_on_summary_failure = abort_on_summary_failure
 
         self.context_length = get_model_context_length(
             model, base_url=base_url, api_key=api_key,
@@ -1596,24 +1602,26 @@ The user has requested that this compaction PRIORITISE preserving all informatio
         # Phase 3: Generate structured summary
         summary = self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
 
-        # If summary generation failed, ABORT compression entirely.  Returning
-        # the original messages unchanged preserves the full conversation
-        # context.  Previously this branch dropped every middle message and
-        # replaced them with a static "summary unavailable" placeholder,
-        # which silently lost N turns of work whenever the aux LLM hiccuped.
-        # Auto-compress callers detect the no-op (post-compress length ==
-        # pre-compress length) and stop looping.  The next call to
-        # _generate_summary is gated by _summary_failure_cooldown_until, so
-        # we don't burn the aux model every turn.  Users can force a retry
-        # via /compress (which passes force=True to clear the cooldown).
-        if not summary:
+        # If summary generation failed, behavior splits on
+        # ``abort_on_summary_failure`` (config: compression.abort_on_summary_failure):
+        #   True  → ABORT compression entirely. Return messages unchanged
+        #           and set _last_compress_aborted=True so callers can warn
+        #           the user and stop the auto-compress retry loop.
+        #   False → Fall through to the legacy fallback path below: insert
+        #           a static "summary unavailable" placeholder and drop the
+        #           middle window.  Records _last_summary_fallback_used /
+        #           _last_summary_dropped_count for gateway hygiene to
+        #           surface a warning.
+        # Default is False (historical behavior).
+        if not summary and self.abort_on_summary_failure:
             n_skipped = compress_end - compress_start
             self._last_summary_dropped_count = 0  # nothing actually dropped
             self._last_summary_fallback_used = False
             self._last_compress_aborted = True
             if not self.quiet_mode:
                 logger.warning(
-                    "Summary generation failed — aborting compression. "
+                    "Summary generation failed — aborting compression "
+                    "(compression.abort_on_summary_failure=true). "
                     "%d message(s) preserved unchanged. Conversation is "
                     "frozen until the next /compress or /new.",
                     n_skipped,
@@ -1634,6 +1642,23 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                     )
             compressed.append(msg)
 
+        # Legacy fallback path: LLM summary failed and abort_on_summary_failure
+        # is False (the default).  Insert a static placeholder so the model
+        # knows context was lost rather than silently dropping everything.
+        if not summary:
+            if not self.quiet_mode:
+                logger.warning("Summary generation failed — inserting static fallback context marker")
+            n_dropped = compress_end - compress_start
+            self._last_summary_dropped_count = n_dropped
+            self._last_summary_fallback_used = True
+            summary = (
+                f"{SUMMARY_PREFIX}\n"
+                f"Summary generation was unavailable. {n_dropped} message(s) were "
+                f"removed to free context space but could not be summarized. The removed "
+                f"messages contained earlier work in this session. Continue based on the "
+                f"recent messages below and the current state of any files or resources."
+            )
+
         _merge_summary_into_tail = False
         last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
         first_tail_role = messages[compress_end].get("role", "user") if compress_end < n_messages else "user"
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index e69c51a4d3b..ce3ddd54108 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -803,6 +803,17 @@ DEFAULT_CONFIG = {
                                       # 0 for long-running rolling-compaction sessions
                                       # where you want nothing pinned except the
                                       # system prompt + rolling summary + recent tail.
+        "abort_on_summary_failure": False,  # When True, auto-compression that fails
+                                      # to generate a summary (aux LLM errored / returned
+                                      # non-JSON / timed out) aborts entirely instead of
+                                      # dropping the middle window with a static
+                                      # "summary unavailable" placeholder.  Messages are
+                                      # preserved unchanged and the session "freezes" at
+                                      # its current size until the user runs /compress
+                                      # (which bypasses the failure cooldown) or /new.
+                                      # Default False matches historical behavior; set to
+                                      # True if you'd rather pause than silently lose
+                                      # context turns when your aux model is flaky.
     },
 
     # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index e952732075e..d8691fdf87c 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -64,31 +64,28 @@ class TestCompress:
         result = compressor.compress(msgs)
         assert result == msgs
 
-    def test_no_client_aborts_compression_with_messages_preserved(self, compressor):
-        """compressor has no provider configured, so _generate_summary returns
-        None → compression aborts entirely.  Messages must be returned
-        unchanged (no placeholder, no drop) and _last_compress_aborted set."""
+    def test_truncation_fallback_no_client(self, compressor):
+        # compressor has client=None and abort_on_summary_failure=False (default),
+        # so the LEGACY fallback path inserts a static "summary unavailable"
+        # placeholder and the middle window is dropped.
         msgs = [{"role": "system", "content": "System prompt"}] + self._make_messages(10)
         result = compressor.compress(msgs)
-        # Abort path: messages preserved byte-for-byte
-        assert result == msgs
-        assert compressor._last_compress_aborted is True
-        # Compression count NOT incremented on abort — nothing was compressed.
-        assert compressor.compression_count == 0
+        assert len(result) < len(msgs)
+        # Should keep system message and last N
+        assert result[0]["role"] == "system"
+        assert compressor.compression_count == 1
+        # Abort flag must NOT fire under the default config.
+        assert compressor._last_compress_aborted is False
+        assert compressor._last_summary_fallback_used is True
 
     def test_compression_increments_count(self, compressor):
         msgs = self._make_messages(10)
-        mock_resp = MagicMock()
-        mock_resp.choices = [MagicMock()]
-        mock_resp.choices[0].message.content = "summary text"
-        with patch("agent.context_compressor.call_llm", return_value=mock_resp):
-            compressor.compress(msgs)
-            assert compressor.compression_count == 1
-            # Reset cooldown isn't needed (no prior failure) but reset
-            # iterative-summary state so the next call follows the same
-            # path as the first.
-            compressor.compress(msgs)
-            assert compressor.compression_count == 2
+        # Default config (abort_on_summary_failure=False) — fallback path
+        # increments the count even on summary failure.
+        compressor.compress(msgs)
+        assert compressor.compression_count == 1
+        compressor.compress(msgs)
+        assert compressor.compression_count == 2
 
     def test_protects_first_and_last(self, compressor):
         msgs = self._make_messages(10)
@@ -138,11 +135,7 @@ class TestGenerateSummaryNoneContent:
             {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
             for i in range(10)
         ]
-        mock_resp = MagicMock()
-        mock_resp.choices = [MagicMock()]
-        mock_resp.choices[0].message.content = "summary text"
-        with patch("agent.context_compressor.call_llm", return_value=mock_resp):
-            result = c.compress(msgs)
+        result = c.compress(msgs)
         assert len(result) < len(msgs)
 
 
@@ -730,14 +723,12 @@ class TestAuxModelFallbackSurfacedToCallers:
 
 
 class TestSummaryFailureTrackingForGatewayWarning:
-    """When summary generation fails, the compressor must ABORT compression
-    entirely (return the original messages unchanged) and set the abort flag
-    so gateway hygiene & /compress can surface a visible warning.  Previous
-    behavior of inserting a static "summary unavailable" placeholder while
-    silently dropping the middle window has been removed — losing N turns
-    of context is worse than freezing the chat until the user retries."""
+    """Default behavior (compression.abort_on_summary_failure=False):
+    summary-generation failure inserts a static fallback placeholder and
+    records dropped count + fallback flag so gateway hygiene & /compress
+    can surface a visible warning."""
 
-    def test_compress_aborts_and_preserves_messages_on_summary_failure(self):
+    def test_compress_records_fallback_and_dropped_count_on_summary_failure(self):
         with patch("agent.context_compressor.get_model_context_length", return_value=100000):
             c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
 
@@ -752,28 +743,20 @@ class TestSummaryFailureTrackingForGatewayWarning:
             {"role": "user", "content": "msg 7"},
         ]
 
-        # Simulate summary LLM call failing — covers the 404 / model-not-found
-        # case from issue (auxiliary compression model misconfigured).
         with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")):
             result = c.compress(msgs)
 
-        # Abort flag set, error recorded
-        assert c._last_compress_aborted is True
+        assert c._last_summary_fallback_used is True
+        assert c._last_summary_dropped_count > 0
         assert c._last_summary_error is not None
-        # No fallback inserted, no messages dropped
-        assert c._last_summary_fallback_used is False
-        assert c._last_summary_dropped_count == 0
-        # Original messages preserved byte-for-byte — the agent loop's
-        # "did compression help?" check (len(after) < len(before)) sees a
-        # no-op and stops looping.
-        assert result == msgs
-        # No "Summary generation was unavailable" placeholder leaked in.
-        assert not any(
+        # Default mode: abort flag must NOT fire.
+        assert c._last_compress_aborted is False
+        assert any(
             isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"]
             for m in result
         )
 
-    def test_compress_clears_abort_flag_on_subsequent_success(self):
+    def test_compress_clears_fallback_flag_on_subsequent_success(self):
         mock_response = MagicMock()
         mock_response.choices = [MagicMock()]
         mock_response.choices[0].message.content = "summary text"
@@ -792,12 +775,76 @@ class TestSummaryFailureTrackingForGatewayWarning:
             {"role": "user", "content": "msg 7"},
         ]
 
-        # First call fails, second succeeds — abort flag must reset on second compress.
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")):
+            c.compress(msgs)
+        assert c._last_summary_fallback_used is True
+
+        c._summary_failure_cooldown_until = 0.0
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            c.compress(msgs)
+        assert c._last_summary_fallback_used is False
+        assert c._last_summary_dropped_count == 0
+
+
+class TestAbortOnSummaryFailure:
+    """Opt-in behavior (compression.abort_on_summary_failure=True):
+    summary-generation failure ABORTS compression entirely — returns the
+    original messages unchanged and sets _last_compress_aborted=True so
+    gateway hygiene & /compress can surface a visible warning."""
+
+    def _make_msgs(self):
+        return [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "assistant", "content": "msg 6"},
+            {"role": "user", "content": "msg 7"},
+        ]
+
+    def _make_compressor(self):
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            return ContextCompressor(
+                model="test",
+                quiet_mode=True,
+                protect_first_n=2,
+                protect_last_n=2,
+                abort_on_summary_failure=True,
+            )
+
+    def test_compress_aborts_and_preserves_messages_on_summary_failure(self):
+        c = self._make_compressor()
+        msgs = self._make_msgs()
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")):
+            result = c.compress(msgs)
+
+        assert c._last_compress_aborted is True
+        assert c._last_summary_error is not None
+        # No fallback inserted, no messages dropped
+        assert c._last_summary_fallback_used is False
+        assert c._last_summary_dropped_count == 0
+        # Original messages preserved byte-for-byte.
+        assert result == msgs
+        # No "Summary generation was unavailable" placeholder leaked in.
+        assert not any(
+            isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"]
+            for m in result
+        )
+
+    def test_compress_clears_abort_flag_on_subsequent_success(self):
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        c = self._make_compressor()
+        msgs = self._make_msgs()
+
         with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")):
             c.compress(msgs)
         assert c._last_compress_aborted is True
 
-        # Reset cooldown to allow retry on second compress
         c._summary_failure_cooldown_until = 0.0
         with patch("agent.context_compressor.call_llm", return_value=mock_response):
             c.compress(msgs)
@@ -813,34 +860,17 @@ class TestSummaryFailureTrackingForGatewayWarning:
         mock_response.choices = [MagicMock()]
         mock_response.choices[0].message.content = "summary text"
 
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+        c = self._make_compressor()
+        msgs = self._make_msgs()
 
-        msgs = [
-            {"role": "system", "content": "sys"},
-            {"role": "user", "content": "msg 1"},
-            {"role": "assistant", "content": "msg 2"},
-            {"role": "user", "content": "msg 3"},
-            {"role": "assistant", "content": "msg 4"},
-            {"role": "user", "content": "msg 5"},
-            {"role": "assistant", "content": "msg 6"},
-            {"role": "user", "content": "msg 7"},
-        ]
-
-        # Pre-populate an active cooldown (as if a prior auto-compress aborted).
         import time as _time
         c._summary_failure_cooldown_until = _time.monotonic() + 999.0
 
-        # Without force, _generate_summary would short-circuit on cooldown
-        # and return None → abort.  With force=True the cooldown is cleared
-        # and the call goes through.
         with patch("agent.context_compressor.call_llm", return_value=mock_response):
             result = c.compress(msgs, force=True)
 
         assert c._last_compress_aborted is False
-        # Cooldown was cleared and a real summary attempt was made.
         assert c._summary_failure_cooldown_until == 0.0
-        # Result is actually compressed (shorter than input).
         assert len(result) < len(msgs)
 
 
@@ -1401,11 +1431,7 @@ class TestSummaryTargetRatio:
             + [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
                for i in range(8)]
         )
-        mock_resp = MagicMock()
-        mock_resp.choices = [MagicMock()]
-        mock_resp.choices[0].message.content = "summary text"
-        with patch("agent.context_compressor.call_llm", return_value=mock_resp):
-            result = c.compress(msgs)
+        result = c.compress(msgs)
         # System prompt (msg[0]) survives as head
         assert result[0]["role"] == "system"
         assert result[0]["content"].startswith("System prompt")