diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 2eb896a9934..19bc0e5f0f1 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -1591,6 +1591,22 @@ This compaction should PRIORITISE preserving all information related to the focu
             # Handle cases where content is not a string (e.g., dict from llama.cpp)
             if not isinstance(content, str):
                 content = str(content) if content else ""
+            # Some OpenAI-compatible proxies (e.g. cmkey.cn, one-api channels)
+            # return a well-formed HTTP 200 with an empty or whitespace-only
+            # ``content`` instead of an error or empty ``choices``. That payload
+            # passes ``_validate_llm_response`` (a ``message`` exists), so it
+            # reaches here and would otherwise be stored as a prefix-only
+            # summary with no body — silently wiping the compacted turns and
+            # making the model forget the in-progress task (#11978, #11914).
+            # Treat empty content as a failure so it routes through the same
+            # main-model fallback + cooldown machinery as a transport error,
+            # rather than replacing real context with an empty summary.
+            if not content.strip():
+                raise RuntimeError(
+                    "Context compression LLM returned empty content "
+                    f"(provider={self.provider or 'auto'} "
+                    f"model={self.summary_model or self.model})"
+                )
             # Redact the summary output as well — the summarizer LLM may
             # ignore prompt instructions and echo back secrets verbatim.
             summary = redact_sensitive_text(content.strip())
@@ -1601,16 +1617,27 @@ This compaction should PRIORITISE preserving all information related to the focu
             self._last_summary_error = None
             self._last_summary_auth_failure = False
             return self._with_summary_prefix(summary)
-        except RuntimeError:
-            # No provider configured — long cooldown, unlikely to self-resolve
-            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
-            self._last_summary_error = "no auxiliary LLM provider configured"
-            logger.warning("Context compression: no provider available for "
-                            "summary. Middle turns will be dropped without summary "
-                            "for %d seconds.",
-                            _SUMMARY_FAILURE_COOLDOWN_SECONDS)
-            return None
         except Exception as e:
+            # ``call_llm`` raises ``RuntimeError`` for two very different cases:
+            #   1. No provider configured ("No LLM provider configured ...") —
+            #      a permanent misconfiguration, long cooldown is correct.
+            #   2. An empty/invalid response from a configured provider
+            #      (``_validate_llm_response`` empty-``choices``/``None``, or our
+            #      empty-``content`` guard above) — a transient/proxy fault that
+            #      should fall back to the main model first, exactly like the
+            #      transport errors handled below.
+            # Only (1) belongs in the long no-provider cooldown; (2) and every
+            # other exception flow into the generic fallback logic so they get
+            # a main-model retry before any cooldown. (#11978, #11914)
+            if isinstance(e, RuntimeError) and "no llm provider configured" in str(e).lower():
+                # No provider configured — long cooldown, unlikely to self-resolve
+                self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+                self._last_summary_error = "no auxiliary LLM provider configured"
+                logger.warning("Context compression: no provider available for "
+                                "summary. Middle turns will be dropped without summary "
+                                "for %d seconds.",
+                                _SUMMARY_FAILURE_COOLDOWN_SECONDS)
+                return None
             # If the summary model is different from the main model and the
             # error looks permanent (model not found, 503, 404), fall back to
             # using the main model instead of entering cooldown that leaves
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 084cb446b4d..cef5f66da81 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -357,11 +357,41 @@ class TestNonStringContent:
         assert isinstance(summary, str)
         assert summary.startswith(SUMMARY_PREFIX)
 
-    def test_none_content_coerced_to_empty(self):
+    def test_none_content_treated_as_failure_not_empty_summary(self):
+        """Regression #11978/#11914: a well-formed response with ``content=None``
+        (some OpenAI-compatible proxies, e.g. cmkey.cn, return HTTP 200 with
+        null/empty content) must NOT be stored as a prefix-only summary that
+        silently wipes the compacted turns. It is treated as a summary failure
+        and routed through cooldown so the turns are dropped without a summary
+        rather than replaced by an empty one."""
         mock_response = MagicMock()
         mock_response.choices = [MagicMock()]
         mock_response.choices[0].message.content = None
 
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            # summary_model == model here, so no fallback path: straight to cooldown.
+            c = ContextCompressor(model="test", quiet_mode=True)
+
+        messages = [
+            {"role": "user", "content": "do something"},
+            {"role": "assistant", "content": "ok"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            summary = c._generate_summary(messages)
+        # Empty content → failure → None (drop turns), NOT a prefix-only summary.
+        assert summary is None
+        assert summary != SUMMARY_PREFIX
+        # Transient cooldown engaged so we don't immediately retry the bad proxy.
+        assert c._summary_failure_cooldown_until > 0
+
+    def test_empty_string_content_treated_as_failure(self):
+        """An empty-string (or whitespace-only) ``content`` is handled the same
+        as ``None`` — failure, not an empty summary (#11978)."""
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "   \n  "
+
         with patch("agent.context_compressor.get_model_context_length", return_value=100000):
             c = ContextCompressor(model="test", quiet_mode=True)
 
@@ -372,9 +402,36 @@ class TestNonStringContent:
 
         with patch("agent.context_compressor.call_llm", return_value=mock_response):
             summary = c._generate_summary(messages)
-        # None content → empty string → standardized compaction handoff prefix added
-        assert summary is not None
-        assert summary == SUMMARY_PREFIX
+        assert summary is None
+        assert c._summary_failure_cooldown_until > 0
+
+    def test_empty_content_falls_back_to_main_model(self):
+        """When the auxiliary summary model returns empty content and a distinct
+        main model is configured, compression falls back to the main model
+        before entering cooldown (#11978 glm-5.1 → glm-5 path)."""
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = ""
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="glm-5",
+                summary_model_override="glm-5.1",
+                quiet_mode=True,
+            )
+
+        messages = [
+            {"role": "user", "content": "do something"},
+            {"role": "assistant", "content": "ok"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response) as mock_call:
+            summary = c._generate_summary(messages)
+        # Two calls: aux model (glm-5.1) then fallback to main (glm-5).
+        assert mock_call.call_count == 2
+        assert c._summary_model_fallen_back is True
+        assert summary is None
+        assert c._summary_failure_cooldown_until > 0
 
     def test_summary_call_does_not_force_temperature(self):
         mock_response = MagicMock()