fix(compress): make abort-on-summary-failure opt-in via config flag (#28117)

PR #28102 made the summary-failure abort path the unconditional default, changing established behavior. Gate it behind config.yaml flag `compression.abort_on_summary_failure` (default False = historical fallback-placeholder behavior). - hermes_cli/config.py: new `compression.abort_on_summary_failure` key, default False, documented inline. - agent/agent_init.py: read the flag from compression config and pass to ContextCompressor. - agent/context_compressor.py: `__init__` accepts `abort_on_summary_failure` (default False). `compress()` failure branch gates the abort on the flag; when False, falls through to the restored legacy fallback path (static "summary unavailable" placeholder + drop middle window). - tests: restore original fallback expectations as default; add new TestAbortOnSummaryFailure class for the opt-in mode. Gateway/CLI plumbing (force=True on /compress, hygiene/handler abort detection, locale `gateway.compress.aborted` key) from PR #28102 stays intact — those paths only fire when `_last_compress_aborted` is True, which now only happens when the flag is enabled.
2026-06-05 07:41:39 +00:00 · 2026-05-18 10:28:20 -07:00 · 2026-05-18 10:28:20 -07:00 · 9aae59feab
commit 9aae59feab
parent 5e40f83cb7
4 changed files with 150 additions and 84 deletions
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@ -64,31 +64,28 @@ class TestCompress:
        result = compressor.compress(msgs)
        assert result == msgs

-    def test_no_client_aborts_compression_with_messages_preserved(self, compressor):
-        """compressor has no provider configured, so _generate_summary returns
-        None → compression aborts entirely.  Messages must be returned
-        unchanged (no placeholder, no drop) and _last_compress_aborted set."""
+    def test_truncation_fallback_no_client(self, compressor):
+        # compressor has client=None and abort_on_summary_failure=False (default),
+        # so the LEGACY fallback path inserts a static "summary unavailable"
+        # placeholder and the middle window is dropped.
        msgs = [{"role": "system", "content": "System prompt"}] + self._make_messages(10)
        result = compressor.compress(msgs)
-        # Abort path: messages preserved byte-for-byte
-        assert result == msgs
-        assert compressor._last_compress_aborted is True
-        # Compression count NOT incremented on abort — nothing was compressed.
-        assert compressor.compression_count == 0
+        assert len(result) < len(msgs)
+        # Should keep system message and last N
+        assert result[0]["role"] == "system"
+        assert compressor.compression_count == 1
+        # Abort flag must NOT fire under the default config.
+        assert compressor._last_compress_aborted is False
+        assert compressor._last_summary_fallback_used is True

    def test_compression_increments_count(self, compressor):
        msgs = self._make_messages(10)
-        mock_resp = MagicMock()
-        mock_resp.choices = [MagicMock()]
-        mock_resp.choices[0].message.content = "summary text"
-        with patch("agent.context_compressor.call_llm", return_value=mock_resp):
-            compressor.compress(msgs)
-            assert compressor.compression_count == 1
-            # Reset cooldown isn't needed (no prior failure) but reset
-            # iterative-summary state so the next call follows the same
-            # path as the first.
-            compressor.compress(msgs)
-            assert compressor.compression_count == 2
+        # Default config (abort_on_summary_failure=False) — fallback path
+        # increments the count even on summary failure.
+        compressor.compress(msgs)
+        assert compressor.compression_count == 1
+        compressor.compress(msgs)
+        assert compressor.compression_count == 2

    def test_protects_first_and_last(self, compressor):
        msgs = self._make_messages(10)
@ -138,11 +135,7 @@ class TestGenerateSummaryNoneContent:
            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
            for i in range(10)
        ]
-        mock_resp = MagicMock()
-        mock_resp.choices = [MagicMock()]
-        mock_resp.choices[0].message.content = "summary text"
-        with patch("agent.context_compressor.call_llm", return_value=mock_resp):
-            result = c.compress(msgs)
+        result = c.compress(msgs)
        assert len(result) < len(msgs)


@ -730,14 +723,12 @@ class TestAuxModelFallbackSurfacedToCallers:


 class TestSummaryFailureTrackingForGatewayWarning:
-    """When summary generation fails, the compressor must ABORT compression
-    entirely (return the original messages unchanged) and set the abort flag
-    so gateway hygiene & /compress can surface a visible warning.  Previous
-    behavior of inserting a static "summary unavailable" placeholder while
-    silently dropping the middle window has been removed — losing N turns
-    of context is worse than freezing the chat until the user retries."""
+    """Default behavior (compression.abort_on_summary_failure=False):
+    summary-generation failure inserts a static fallback placeholder and
+    records dropped count + fallback flag so gateway hygiene & /compress
+    can surface a visible warning."""

-    def test_compress_aborts_and_preserves_messages_on_summary_failure(self):
+    def test_compress_records_fallback_and_dropped_count_on_summary_failure(self):
        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)

@ -752,28 +743,20 @@ class TestSummaryFailureTrackingForGatewayWarning:
            {"role": "user", "content": "msg 7"},
        ]

-        # Simulate summary LLM call failing — covers the 404 / model-not-found
-        # case from issue (auxiliary compression model misconfigured).
        with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")):
            result = c.compress(msgs)

-        # Abort flag set, error recorded
-        assert c._last_compress_aborted is True
+        assert c._last_summary_fallback_used is True
+        assert c._last_summary_dropped_count > 0
        assert c._last_summary_error is not None
-        # No fallback inserted, no messages dropped
-        assert c._last_summary_fallback_used is False
-        assert c._last_summary_dropped_count == 0
-        # Original messages preserved byte-for-byte — the agent loop's
-        # "did compression help?" check (len(after) < len(before)) sees a
-        # no-op and stops looping.
-        assert result == msgs
-        # No "Summary generation was unavailable" placeholder leaked in.
-        assert not any(
+        # Default mode: abort flag must NOT fire.
+        assert c._last_compress_aborted is False
+        assert any(
            isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"]
            for m in result
        )

-    def test_compress_clears_abort_flag_on_subsequent_success(self):
+    def test_compress_clears_fallback_flag_on_subsequent_success(self):
        mock_response = MagicMock()
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = "summary text"
@ -792,12 +775,76 @@ class TestSummaryFailureTrackingForGatewayWarning:
            {"role": "user", "content": "msg 7"},
        ]

-        # First call fails, second succeeds — abort flag must reset on second compress.
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")):
+            c.compress(msgs)
+        assert c._last_summary_fallback_used is True
+
+        c._summary_failure_cooldown_until = 0.0
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            c.compress(msgs)
+        assert c._last_summary_fallback_used is False
+        assert c._last_summary_dropped_count == 0
+
+
+class TestAbortOnSummaryFailure:
+    """Opt-in behavior (compression.abort_on_summary_failure=True):
+    summary-generation failure ABORTS compression entirely — returns the
+    original messages unchanged and sets _last_compress_aborted=True so
+    gateway hygiene & /compress can surface a visible warning."""
+
+    def _make_msgs(self):
+        return [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "assistant", "content": "msg 6"},
+            {"role": "user", "content": "msg 7"},
+        ]
+
+    def _make_compressor(self):
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            return ContextCompressor(
+                model="test",
+                quiet_mode=True,
+                protect_first_n=2,
+                protect_last_n=2,
+                abort_on_summary_failure=True,
+            )
+
+    def test_compress_aborts_and_preserves_messages_on_summary_failure(self):
+        c = self._make_compressor()
+        msgs = self._make_msgs()
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")):
+            result = c.compress(msgs)
+
+        assert c._last_compress_aborted is True
+        assert c._last_summary_error is not None
+        # No fallback inserted, no messages dropped
+        assert c._last_summary_fallback_used is False
+        assert c._last_summary_dropped_count == 0
+        # Original messages preserved byte-for-byte.
+        assert result == msgs
+        # No "Summary generation was unavailable" placeholder leaked in.
+        assert not any(
+            isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"]
+            for m in result
+        )
+
+    def test_compress_clears_abort_flag_on_subsequent_success(self):
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        c = self._make_compressor()
+        msgs = self._make_msgs()
+
        with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")):
            c.compress(msgs)
        assert c._last_compress_aborted is True

-        # Reset cooldown to allow retry on second compress
        c._summary_failure_cooldown_until = 0.0
        with patch("agent.context_compressor.call_llm", return_value=mock_response):
            c.compress(msgs)
@ -813,34 +860,17 @@ class TestSummaryFailureTrackingForGatewayWarning:
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = "summary text"

-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+        c = self._make_compressor()
+        msgs = self._make_msgs()

-        msgs = [
-            {"role": "system", "content": "sys"},
-            {"role": "user", "content": "msg 1"},
-            {"role": "assistant", "content": "msg 2"},
-            {"role": "user", "content": "msg 3"},
-            {"role": "assistant", "content": "msg 4"},
-            {"role": "user", "content": "msg 5"},
-            {"role": "assistant", "content": "msg 6"},
-            {"role": "user", "content": "msg 7"},
-        ]
-
-        # Pre-populate an active cooldown (as if a prior auto-compress aborted).
        import time as _time
        c._summary_failure_cooldown_until = _time.monotonic() + 999.0

-        # Without force, _generate_summary would short-circuit on cooldown
-        # and return None → abort.  With force=True the cooldown is cleared
-        # and the call goes through.
        with patch("agent.context_compressor.call_llm", return_value=mock_response):
            result = c.compress(msgs, force=True)

        assert c._last_compress_aborted is False
-        # Cooldown was cleared and a real summary attempt was made.
        assert c._summary_failure_cooldown_until == 0.0
-        # Result is actually compressed (shorter than input).
        assert len(result) < len(msgs)


@ -1401,11 +1431,7 @@ class TestSummaryTargetRatio:
            + [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
               for i in range(8)]
        )
-        mock_resp = MagicMock()
-        mock_resp.choices = [MagicMock()]
-        mock_resp.choices[0].message.content = "summary text"
-        with patch("agent.context_compressor.call_llm", return_value=mock_resp):
-            result = c.compress(msgs)
+        result = c.compress(msgs)
        # System prompt (msg[0]) survives as head
        assert result[0]["role"] == "system"
        assert result[0]["content"].startswith("System prompt")