diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 5cbb133d78..4b08f3c11d 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -911,6 +911,30 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                 self._summary_failure_cooldown_until = 0.0  # no cooldown
                 return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately
 
+            # Unknown-error best-effort retry on main model.  Losing N turns of
+            # context is almost always worse than one extra summary attempt, so
+            # if we haven't already fallen back and the summary model differs
+            # from the main model, try once more on main before entering
+            # cooldown.  Errors that DID match _is_model_not_found above are
+            # already handled by the fast-path retry; this branch catches
+            # everything else (400s, provider-specific "no route" strings,
+            # aggregator rejections, etc.) where auto-retry is still safer
+            # than dropping the turns.
+            if (
+                self.summary_model
+                and self.summary_model != self.model
+                and not getattr(self, "_summary_model_fallen_back", False)
+            ):
+                self._summary_model_fallen_back = True
+                logging.warning(
+                    "Summary model '%s' failed (%s). "
+                    "Retrying on main model '%s' before giving up.",
+                    self.summary_model, e, self.model,
+                )
+                self.summary_model = ""  # empty = use main model
+                self._summary_failure_cooldown_until = 0.0
+                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
+
             # Transient errors (timeout, rate limit, network) — shorter cooldown
             _transient_cooldown = 60
             self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 582b517867..797c9edc3f 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -242,6 +242,131 @@ class TestSummaryFailureCooldown:
         assert mock_call.call_count == 1
 
 
+class TestSummaryFallbackToMainModel:
+    """When ``summary_model`` differs from the main model and the summary LLM
+    call fails, the compressor should retry once on the main model before
+    giving up — losing N turns of context is almost always worse than one
+    extra summary attempt.  Covers both the fast-path (explicit
+    model-not-found errors) and the unknown-error best-effort retry."""
+
+    def _msgs(self):
+        return [
+            {"role": "user", "content": "do something"},
+            {"role": "assistant", "content": "ok"},
+        ]
+
+    def test_model_not_found_404_falls_back_to_main_and_succeeds(self):
+        """Classic misconfiguration: ``auxiliary.compression.model`` points at
+        a model the main provider doesn't serve → 404 → retry on main."""
+        mock_ok = MagicMock()
+        mock_ok.choices = [MagicMock()]
+        mock_ok.choices[0].message.content = "summary via main model"
+
+        err_404 = Exception("404 model_not_found: no such model")
+        err_404.status_code = 404
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="main-model",
+                summary_model_override="broken-aux-model",
+                quiet_mode=True,
+            )
+
+        with patch(
+            "agent.context_compressor.call_llm",
+            side_effect=[err_404, mock_ok],
+        ) as mock_call:
+            result = c._generate_summary(self._msgs())
+
+        assert mock_call.call_count == 2
+        # First call used the misconfigured aux model
+        assert mock_call.call_args_list[0].kwargs.get("model") == "broken-aux-model"
+        # Second call used the main model (no model kwarg → call_llm uses main)
+        assert "model" not in mock_call.call_args_list[1].kwargs
+        assert result is not None
+        assert "summary via main model" in result
+
+    def test_unknown_error_falls_back_to_main_and_succeeds(self):
+        """Errors that don't match the 404/503/model_not_found fast-path
+        (400s, provider-specific 'no route', aggregator rejections) should
+        ALSO trigger a best-effort retry on main before entering cooldown."""
+        mock_ok = MagicMock()
+        mock_ok.choices = [MagicMock()]
+        mock_ok.choices[0].message.content = "summary via main model"
+
+        # A 400 from OpenRouter / Nous portal with an opaque message — does
+        # NOT match _is_model_not_found, but still an unrecoverable misconfig.
+        err_400 = Exception("400 Bad Request: provider rejected model")
+        err_400.status_code = 400
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="main-model",
+                summary_model_override="broken-aux-model",
+                quiet_mode=True,
+            )
+
+        with patch(
+            "agent.context_compressor.call_llm",
+            side_effect=[err_400, mock_ok],
+        ) as mock_call:
+            result = c._generate_summary(self._msgs())
+
+        assert mock_call.call_count == 2
+        assert mock_call.call_args_list[0].kwargs.get("model") == "broken-aux-model"
+        assert "model" not in mock_call.call_args_list[1].kwargs
+        assert result is not None
+        assert "summary via main model" in result
+
+    def test_no_fallback_when_summary_model_equals_main_model(self):
+        """If the aux model IS the main model, there's nowhere to fall back
+        to — go straight to cooldown, don't loop retrying the same call."""
+        err = Exception("500 internal error")
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="main-model",
+                summary_model_override="main-model",  # same as main
+                quiet_mode=True,
+            )
+
+        with patch(
+            "agent.context_compressor.call_llm",
+            side_effect=err,
+        ) as mock_call:
+            result = c._generate_summary(self._msgs())
+
+        # Only one attempt — retry gate blocks fallback when models match
+        assert mock_call.call_count == 1
+        assert result is None
+        # Not flagged as fallen back — the retry condition was never met
+        assert getattr(c, "_summary_model_fallen_back", False) is False
+
+    def test_fallback_only_happens_once_per_compressor(self):
+        """If the retry-on-main ALSO fails, don't loop forever — enter
+        cooldown like the normal failure path."""
+        err1 = Exception("400 aux model rejected")
+        err2 = Exception("500 main model also exploded")
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="main-model",
+                summary_model_override="broken-aux-model",
+                quiet_mode=True,
+            )
+
+        with patch(
+            "agent.context_compressor.call_llm",
+            side_effect=[err1, err2],
+        ) as mock_call:
+            result = c._generate_summary(self._msgs())
+
+        # Exactly 2 calls: initial + one retry on main.  No further retries.
+        assert mock_call.call_count == 2
+        assert result is None
+        assert c._summary_model_fallen_back is True
+
+
 class TestSummaryFailureTrackingForGatewayWarning:
     """When summary generation fails, the compressor must record dropped count
     + fallback flag so gateway hygiene & /compress can surface a visible