fix(compress): abort instead of dropping messages when summary LLM fails (#28102)

When auxiliary compression's summary generation returns None (aux model errored, returned non-JSON, timed out, etc.) the compressor previously still dropped every middle message between compress_start..compress_end and replaced them with a static 'Summary generation was unavailable' placeholder. The session kept going but the user silently lost N turns of context for nothing. New behavior: on summary failure, compress() aborts entirely — returns the input messages unchanged and sets _last_compress_aborted=True. The existing _summary_failure_cooldown_until gate (30-60s) keeps the aux model from being burned on every turn. Auto-compress callers detect the no-op (len(after) == len(before)) and stop looping. The chat is 'frozen' at its current size until the next /compress or /new. Manual /compress (CLI + gateway) now passes force=True which clears the cooldown so users can retry immediately after an auto-abort. If the manual retry also fails, the user gets a visible warning telling them nothing was dropped and how to retry. - agent/context_compressor.py: compress() gains force= kwarg; failure branch sets _last_compress_aborted and returns messages unchanged instead of inserting placeholder. - run_agent.py: _compress_context() detects abort, surfaces warning, skips session-rotation entirely, returns messages unchanged. - cli.py + gateway/run.py: manual /compress paths pass force=True. - gateway/run.py: hygiene + /compress handlers detect _last_compress_aborted and emit the new 'Compression aborted' warning (gateway.compress.aborted) instead of the old 'N historical messages were removed' message. - locales/*.yaml: new gateway.compress.aborted key in all 16 locales. - tests: updated to assert the abort contract (messages preserved, compression_count not incremented, abort flag set, no placeholder leaked). New test_force_true_bypasses_failure_cooldown covers the manual-retry path.
2026-07-21 16:18:55 +00:00 · 2026-05-18 10:19:40 -07:00 · 2026-05-18 10:19:40 -07:00 · 1634397ddb
commit 1634397ddb
parent 65e0c49b77
24 changed files with 249 additions and 103 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -586,6 +586,12 @@ class ContextCompressor(ContextEngine):
        # (gateway hygiene, /compress) can surface a visible warning.
        self._last_summary_dropped_count: int = 0
        self._last_summary_fallback_used: bool = False
+        # When summary generation fails we now ABORT compression entirely
+        # and return the original messages unchanged instead of dropping
+        # the middle window with a static placeholder.  Callers inspect
+        # this flag to know "compression was attempted but aborted, freeze
+        # the chat until the user manually retries via /compress".
+        self._last_compress_aborted: bool = False
        # When a user-configured summary model fails and we recover by
        # retrying on the main model, record the failure so gateway /
        # CLI callers can still warn the user even though compression
@ -1479,7 +1485,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
    # Main compression entry point
    # ------------------------------------------------------------------

-    def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, focus_topic: str = None) -> List[Dict[str, Any]]:
+    def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, focus_topic: str = None, force: bool = False) -> List[Dict[str, Any]]:
        """Compress conversation messages by summarizing middle turns.

        Algorithm:
@ -1497,6 +1503,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                provided, the summariser will prioritise preserving information
                related to this topic and be more aggressive about compressing
                everything else.  Inspired by Claude Code's ``/compact``.
+            force: If True, clear any active summary-failure cooldown before
+                running so a manual ``/compress`` can retry immediately after
+                an auto-compression abort.  Auto-compress callers pass False.
        """
        # Reset per-call summary failure state — callers inspect these fields
        # after compress() returns to decide whether to surface a warning.
@ -1505,6 +1514,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        self._last_summary_error = None
        self._last_aux_model_failure_error = None
        self._last_aux_model_failure_model = None
+        self._last_compress_aborted = False
+
+        # Manual /compress (force=True) bypasses the failure cooldown so the
+        # user can retry immediately after an auto-compress abort.  Without
+        # this, /compress would silently no-op for 30-60s after a failure.
+        if force and self._summary_failure_cooldown_until > 0.0:
+            self._summary_failure_cooldown_until = 0.0
        n_messages = len(messages)
        # Only need head + 3 tail messages minimum (token budget decides the real tail size)
        _min_for_compress = self._protect_head_size(messages) + 3 + 1
@ -1580,6 +1596,30 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        # Phase 3: Generate structured summary
        summary = self._generate_summary(turns_to_summarize, focus_topic=focus_topic)

+        # If summary generation failed, ABORT compression entirely.  Returning
+        # the original messages unchanged preserves the full conversation
+        # context.  Previously this branch dropped every middle message and
+        # replaced them with a static "summary unavailable" placeholder,
+        # which silently lost N turns of work whenever the aux LLM hiccuped.
+        # Auto-compress callers detect the no-op (post-compress length ==
+        # pre-compress length) and stop looping.  The next call to
+        # _generate_summary is gated by _summary_failure_cooldown_until, so
+        # we don't burn the aux model every turn.  Users can force a retry
+        # via /compress (which passes force=True to clear the cooldown).
+        if not summary:
+            n_skipped = compress_end - compress_start
+            self._last_summary_dropped_count = 0  # nothing actually dropped
+            self._last_summary_fallback_used = False
+            self._last_compress_aborted = True
+            if not self.quiet_mode:
+                logger.warning(
+                    "Summary generation failed — aborting compression. "
+                    "%d message(s) preserved unchanged. Conversation is "
+                    "frozen until the next /compress or /new.",
+                    n_skipped,
+                )
+            return messages
+
        # Phase 4: Assemble compressed message list
        compressed = []
        for i in range(compress_start):
@ -1594,22 +1634,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    )
            compressed.append(msg)

-        # If LLM summary failed, insert a static fallback so the model
-        # knows context was lost rather than silently dropping everything.
-        if not summary:
-            if not self.quiet_mode:
-                logger.warning("Summary generation failed — inserting static fallback context marker")
-            n_dropped = compress_end - compress_start
-            self._last_summary_dropped_count = n_dropped
-            self._last_summary_fallback_used = True
-            summary = (
-                f"{SUMMARY_PREFIX}\n"
-                f"Summary generation was unavailable. {n_dropped} message(s) were "
-                f"removed to free context space but could not be summarized. The removed "
-                f"messages contained earlier work in this session. Continue based on the "
-                f"recent messages below and the current state of any files or resources."
-            )
-
        _merge_summary_into_tail = False
        last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
        first_tail_role = messages[compress_end].get("role", "user") if compress_end < n_messages else "user"
--- a/agent/conversation_compression.py
+++ b/agent/conversation_compression.py
@ -256,6 +256,7 @@ def compress_context(
    approx_tokens: Optional[int] = None,
    task_id: str = "default",
    focus_topic: Optional[str] = None,
+    force: bool = False,
 ) -> Tuple[list, str]:
    """Compress conversation context and split the session in SQLite.

@ -268,9 +269,17 @@ def compress_context(
        focus_topic: Optional focus string for guided compression — the
            summariser will prioritise preserving information related to
            this topic.  Inspired by Claude Code's ``/compact <focus>``.
+        force: If True, bypass any active summary-failure cooldown.  Set
+            by the manual ``/compress`` slash command so users can retry
+            immediately after an auto-compress abort.  Auto-compress
+            callers use the default ``False``.

    Returns:
-        ``(compressed_messages, new_system_prompt)`` tuple.
+        ``(compressed_messages, new_system_prompt)`` tuple.  When
+        compression aborts (aux LLM failed to produce a usable summary),
+        returns the original messages unchanged and the existing system
+        prompt — the session is NOT rotated.  Callers should detect the
+        no-op via ``len(returned) == len(input)`` and stop the retry loop.
    """
    _pre_msg_count = len(messages)
    logger.info(
@ -291,12 +300,31 @@ def compress_context(
            pass

    try:
-        compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
+        compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic, force=force)
    except TypeError:
        # Plugin context engine with strict signature that doesn't accept
-        # focus_topic — fall back to calling without it.
+        # focus_topic / force — fall back to calling without them.
        compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)

+    # If compression aborted (aux LLM failed to produce a usable summary)
+    # the compressor returns the input messages unchanged.  Surface the
+    # error to the user, skip the session-rotation work entirely (no
+    # session has logically ended), and let auto-compress callers detect
+    # the no-op via len(returned) == len(input).
+    if getattr(agent.context_compressor, "_last_compress_aborted", False):
+        _err = getattr(agent.context_compressor, "_last_summary_error", None) or "unknown error"
+        if getattr(agent, "_last_compression_summary_warning", None) != _err:
+            agent._last_compression_summary_warning = _err
+            agent._emit_warning(
+                f"⚠ Compression aborted: {_err}. "
+                "No messages were dropped — conversation continues unchanged. "
+                "Run /compress to retry, or /new to start a fresh session."
+            )
+        _existing_sp = getattr(agent, "_cached_system_prompt", None)
+        if not _existing_sp:
+            _existing_sp = agent._build_system_prompt(system_message)
+        return messages, _existing_sp
+
    summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
    if summary_error:
        if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
--- a/cli.py
+++ b/cli.py
@ -9183,6 +9183,7 @@ class HermesCLI:
                    None,
                    approx_tokens=approx_tokens,
                    focus_topic=focus_topic or None,
+                    force=True,
                )
                self.conversation_history = compressed
                # _compress_context ends the old session and creates a new child
--- a/gateway/run.py
+++ b/gateway/run.py
@ -7778,22 +7778,24 @@ class GatewayRunner:
                                        )

                                    # If summary generation failed, the
-                                    # compressor inserted a static fallback
-                                    # placeholder and the dropped turns are
-                                    # gone for good.  Surface a visible
-                                    # warning to the gateway user — agent.log
-                                    # alone is invisible on TG/Discord/etc.
+                                    # compressor aborts entirely and returns
+                                    # messages unchanged — nothing is dropped.
+                                    # Surface a visible warning to the gateway
+                                    # user — agent.log alone is invisible on
+                                    # TG/Discord/etc. — so they know the chat
+                                    # is "frozen" at the current size and can
+                                    # /compress to retry or /reset to start
+                                    # fresh.
                                    _comp = getattr(_hyg_agent, "context_compressor", None)
-                                    if _comp is not None and getattr(_comp, "_last_summary_fallback_used", False):
-                                        _dropped = getattr(_comp, "_last_summary_dropped_count", 0)
+                                    if _comp is not None and getattr(_comp, "_last_compress_aborted", False):
                                        _err = getattr(_comp, "_last_summary_error", None) or "unknown error"
                                        _warn_msg = (
-                                            "⚠️ Context compression summary failed "
-                                            f"({_err}). {_dropped} historical message(s) "
-                                            "were removed and replaced with a placeholder. "
-                                            "Earlier context is no longer recoverable. "
-                                            "Consider /reset for a clean session, or check "
-                                            "your auxiliary.compression model configuration."
+                                            "⚠️ Context compression aborted "
+                                            f"({_err}). No messages were dropped — "
+                                            "conversation is unchanged. Run /compress "
+                                            "to retry, /reset for a clean session, or "
+                                            "check your auxiliary.compression model "
+                                            "configuration."
                                        )
                                        try:
                                            _adapter = self.adapters.get(source.platform)
@ -11404,7 +11406,7 @@ class GatewayRunner:
                loop = asyncio.get_running_loop()
                compressed, _ = await loop.run_in_executor(
                    None,
-                    lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
+                    lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic, force=True)
                )

                # _compress_context already calls end_session() on the old session
@ -11433,8 +11435,11 @@ class GatewayRunner:
                # Detect summary-generation failure so we can surface a
                # visible warning to the user even on the manual /compress
                # path (otherwise the failure is silently logged).
-                _summary_failed = bool(getattr(compressor, "_last_summary_fallback_used", False))
-                _dropped_count = int(getattr(compressor, "_last_summary_dropped_count", 0) or 0)
+                # _last_compress_aborted means the aux LLM returned no
+                # usable summary and the compressor preserved messages
+                # unchanged (no drop, no placeholder).  force=True was
+                # passed above so any active cooldown is bypassed.
+                _summary_aborted = bool(getattr(compressor, "_last_compress_aborted", False))
                _summary_err = getattr(compressor, "_last_summary_error", None)
                # Separately: did the user's CONFIGURED aux model fail
                # and we recovered via main?  Surface that as an info
@ -11452,12 +11457,11 @@ class GatewayRunner:
            lines.append(summary["token_line"])
            if summary["note"]:
                lines.append(summary["note"])
-            if _summary_failed:
+            if _summary_aborted:
                lines.append(
                    t(
-                        "gateway.compress.summary_failed",
+                        "gateway.compress.aborted",
                        error=(_summary_err or "unknown error"),
-                        count=_dropped_count,
                    )
                )
            elif _aux_fail_model:
--- a/locales/af.yaml
+++ b/locales/af.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Niks om saam te pers nie (die transkripsie is steeds heeltemal beskermde konteks)."
    focus_line:            "Fokus: \"{topic}\""
    summary_failed:        "⚠️ Opsomming kon nie gegenereer word nie ({error}). {count} historiese boodskap(pe) is verwyder en met 'n plekhouer vervang; vroeëre konteks kan nie meer herstel word nie. Oorweeg om jou auxiliary.compression-modelopstelling na te gaan."
+    aborted:               "⚠️ Kompressie gestaak ({error}). Geen boodskappe is laat val nie — die gesprek is onveranderd. Voer /compress uit om weer te probeer, /reset vir 'n skoon sessie, of kyk na jou auxiliary.compression-modelkonfigurasie."
    aux_failed:            "ℹ️ Opgestelde saamperseringsmodel `{model}` het misluk ({error}). Herstel met jou hoofmodel — konteks is intakt — maar jy mag dalk `auxiliary.compression.model` in config.yaml wil nagaan."
    failed:                "Saampersing het misluk: {error}"

--- a/locales/de.yaml
+++ b/locales/de.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Noch nichts zu komprimieren (das Transkript ist weiterhin vollständig geschützter Kontext)."
    focus_line:            "Fokus: \"{topic}\""
    summary_failed:        "⚠️ Zusammenfassungsgenerierung fehlgeschlagen ({error}). {count} historische Nachricht(en) wurden entfernt und durch einen Platzhalter ersetzt; früherer Kontext ist nicht mehr wiederherstellbar. Überprüfen Sie die Konfiguration des auxiliary.compression-Modells."
+    aborted:               "⚠️ Komprimierung abgebrochen ({error}). Keine Nachrichten wurden entfernt — die Konversation ist unverändert. Führe /compress aus, um es erneut zu versuchen, /reset für eine neue Sitzung, oder prüfe deine auxiliary.compression-Modellkonfiguration."
    aux_failed:            "ℹ️ Das konfigurierte Komprimierungsmodell `{model}` ist fehlgeschlagen ({error}). Wiederherstellung mit Ihrem Hauptmodell — Kontext ist intakt — Sie sollten jedoch `auxiliary.compression.model` in config.yaml überprüfen."
    failed:                "Komprimierung fehlgeschlagen: {error}"

--- a/locales/en.yaml
+++ b/locales/en.yaml
@ -105,6 +105,7 @@ gateway:
    nothing_to_do:         "Nothing to compress yet (the transcript is still all protected context)."
    focus_line:            "Focus: \"{topic}\""
    summary_failed:        "⚠️ Summary generation failed ({error}). {count} historical message(s) were removed and replaced with a placeholder; earlier context is no longer recoverable. Consider checking your auxiliary.compression model configuration."
+    aborted:               "⚠️ Compression aborted ({error}). No messages were dropped — conversation is unchanged. Run /compress to retry, /reset for a clean session, or check your auxiliary.compression model configuration."
    aux_failed:            "ℹ️ Configured compression model `{model}` failed ({error}). Recovered using your main model — context is intact — but you may want to check `auxiliary.compression.model` in config.yaml."
    failed:                "Compression failed: {error}"

--- a/locales/es.yaml
+++ b/locales/es.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Aún no hay nada que comprimir (la transcripción sigue siendo todo contexto protegido)."
    focus_line:            "Enfoque: \"{topic}\""
    summary_failed:        "⚠️ Falló la generación del resumen ({error}). Se eliminaron {count} mensaje(s) históricos y se reemplazaron por un marcador; el contexto anterior ya no se puede recuperar. Considera revisar la configuración del modelo auxiliary.compression."
+    aborted:               "⚠️ Compresión abortada ({error}). No se eliminó ningún mensaje — la conversación está intacta. Ejecuta /compress para reintentar, /reset para una sesión limpia, o revisa la configuración de tu modelo auxiliary.compression."
    aux_failed:            "ℹ️ El modelo de compresión configurado `{model}` falló ({error}). Recuperado con tu modelo principal — el contexto está intacto — pero quizá quieras revisar `auxiliary.compression.model` en config.yaml."
    failed:                "Compresión fallida: {error}"

--- a/locales/fr.yaml
+++ b/locales/fr.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Rien à compresser pour l'instant (la transcription est encore entièrement du contexte protégé)."
    focus_line:            "Focus : \"{topic}\""
    summary_failed:        "⚠️ Échec de la génération du résumé ({error}). {count} message(s) historique(s) ont été supprimés et remplacés par un espace réservé ; le contexte antérieur n'est plus récupérable. Vérifiez la configuration du modèle auxiliary.compression."
+    aborted:               "⚠️ Compression interrompue ({error}). Aucun message n'a été supprimé — la conversation est inchangée. Lancez /compress pour réessayer, /reset pour une nouvelle session, ou vérifiez la configuration de votre modèle auxiliary.compression."
    aux_failed:            "ℹ️ Le modèle de compression configuré `{model}` a échoué ({error}). Récupéré avec votre modèle principal — le contexte est intact — mais vous pouvez vérifier `auxiliary.compression.model` dans config.yaml."
    failed:                "Échec de la compression : {error}"

--- a/locales/ga.yaml
+++ b/locales/ga.yaml
@ -94,6 +94,7 @@ gateway:
    nothing_to_do:         "Níl aon rud le dlúthú fós (tá an traschríbhinn fós uile mar chomhthéacs cosanta)."
    focus_line:            "Fócas: \"{topic}\""
    summary_failed:        "⚠️ Theip ar ghiniúint achoimre ({error}). Baineadh {count} teachtaireacht stairiúil agus cuireadh ionadaí ina n-áit; níl an comhthéacs roimhe seo in-aisghabhála a thuilleadh. Smaoinigh ar an gcumraíocht auxiliary.compression a sheiceáil."
+    aborted:               "⚠️ Cuireadh deireadh leis an dlúthú ({error}). Níor baineadh aon teachtaireacht — tá an comhrá gan athrú. Rith /compress chun é a thriail arís, /reset le haghaidh seisiún glan, nó seiceáil do chumraíocht samhla auxiliary.compression."
    aux_failed:            "ℹ️ Theip ar an tsamhail dlúthúcháin chumraithe `{model}` ({error}). Aisghafa ag baint úsáide as do phríomhshamhail — tá an comhthéacs slán — ach b'fhéidir gur mhaith leat `auxiliary.compression.model` i config.yaml a sheiceáil."
    failed:                "Theip ar dhlúthú: {error}"

--- a/locales/hu.yaml
+++ b/locales/hu.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Még nincs mit tömöríteni (a teljes átirat még védett kontextus)."
    focus_line:            "Fókusz: \"{topic}\""
    summary_failed:        "⚠️ Az összefoglaló generálása sikertelen ({error}). {count} korábbi üzenet eltávolítva és helykitöltővel helyettesítve; a korábbi kontextus már nem helyreállítható. Érdemes ellenőrizni az auxiliary.compression modell konfigurációját."
+    aborted:               "⚠️ Tömörítés megszakítva ({error}). Egyetlen üzenet sem lett eldobva — a beszélgetés változatlan. Futtass /compress parancsot az újrapróbálkozáshoz, /reset egy új munkamenethez, vagy ellenőrizd az auxiliary.compression modell konfigurációt."
    aux_failed:            "ℹ️ A beállított tömörítőmodell (`{model}`) hibát adott ({error}). A főmodellel helyreállítva — a kontextus érintetlen — de érdemes ellenőrizni az `auxiliary.compression.model` beállítást a config.yaml fájlban."
    failed:                "Tömörítés sikertelen: {error}"

--- a/locales/it.yaml
+++ b/locales/it.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Niente da comprimere per ora (la trascrizione è ancora tutta contesto protetto)."
    focus_line:            "Focus: \"{topic}\""
    summary_failed:        "⚠️ Generazione del riepilogo non riuscita ({error}). {count} messaggio/i storico/i sono stati rimossi e sostituiti con un segnaposto; il contesto precedente non è più recuperabile. Considera di controllare la configurazione del modello auxiliary.compression."
+    aborted:               "⚠️ Compressione interrotta ({error}). Nessun messaggio è stato eliminato — la conversazione è invariata. Esegui /compress per riprovare, /reset per una nuova sessione, o controlla la configurazione del modello auxiliary.compression."
    aux_failed:            "ℹ️ Il modello di compressione configurato `{model}` non è riuscito ({error}). Recupero effettuato usando il modello principale — il contesto è intatto — ma potresti voler controllare `auxiliary.compression.model` in config.yaml."
    failed:                "Compressione non riuscita: {error}"

--- a/locales/ja.yaml
+++ b/locales/ja.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "まだ圧縮するものがありません (トランスクリプトはすべて保護されたコンテキストのままです)。"
    focus_line:            "フォーカス: \"{topic}\""
    summary_failed:        "⚠️ 要約の生成に失敗しました ({error})。{count} 件の履歴メッセージが削除され、プレースホルダーに置き換えられました。以前のコンテキストは復元できません。auxiliary.compression モデルの設定を確認してください。"
+    aborted:               "⚠️ 圧縮が中止されました ({error})。メッセージは削除されていません — 会話はそのままです。再試行するには /compress、新しいセッションを開始するには /reset を実行するか、auxiliary.compression モデル設定を確認してください。"
    aux_failed:            "ℹ️ 構成された圧縮モデル `{model}` が失敗しました ({error})。メインモデルで復旧しました — コンテキストは無傷です — config.yaml の `auxiliary.compression.model` を確認するとよいでしょう。"
    failed:                "圧縮に失敗しました: {error}"

--- a/locales/ko.yaml
+++ b/locales/ko.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "아직 압축할 내용이 없습니다 (대화 내용이 모두 보호된 컨텍스트입니다)."
    focus_line:            "초점: \"{topic}\""
    summary_failed:        "⚠️ 요약 생성에 실패했습니다 ({error}). 과거 메시지 {count}개가 제거되어 자리표시자로 대체되었으며, 이전 컨텍스트는 더 이상 복구할 수 없습니다. auxiliary.compression 모델 설정을 확인해 보세요."
+    aborted:               "⚠️ 압축이 중단되었습니다 ({error}). 메시지가 삭제되지 않았으며 대화는 그대로 유지됩니다. 다시 시도하려면 /compress를 실행하거나, 새 세션을 시작하려면 /reset을 사용하거나, auxiliary.compression 모델 설정을 확인하세요."
    aux_failed:            "ℹ️ 구성된 압축 모델 `{model}`이(가) 실패했습니다 ({error}). 메인 모델로 복구되어 컨텍스트는 보존되었지만, config.yaml의 `auxiliary.compression.model` 설정을 확인하는 것이 좋습니다."
    failed:                "압축 실패: {error}"

--- a/locales/pt.yaml
+++ b/locales/pt.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Ainda não há nada para comprimir (a transcrição continua a ser todo o contexto protegido)."
    focus_line:            "Foco: \"{topic}\""
    summary_failed:        "⚠️ Falha ao gerar o resumo ({error}). {count} mensagem(ns) histórica(s) foram removidas e substituídas por um marcador; o contexto anterior já não pode ser recuperado. Considera verificar a configuração do modelo auxiliary.compression."
+    aborted:               "⚠️ Compressão abortada ({error}). Nenhuma mensagem foi removida — a conversa está inalterada. Executa /compress para tentar de novo, /reset para uma sessão nova, ou verifica a configuração do modelo auxiliary.compression."
    aux_failed:            "ℹ️ O modelo de compressão configurado `{model}` falhou ({error}). Recuperado com o teu modelo principal — o contexto está intacto — mas talvez queiras verificar `auxiliary.compression.model` em config.yaml."
    failed:                "Compressão falhou: {error}"

--- a/locales/ru.yaml
+++ b/locales/ru.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Пока нечего сжимать (стенограмма всё ещё полностью является защищённым контекстом)."
    focus_line:            "Фокус: \"{topic}\""
    summary_failed:        "⚠️ Не удалось сгенерировать сводку ({error}). {count} историч. сообщений было удалено и заменено заполнителем; предыдущий контекст больше нельзя восстановить. Проверьте конфигурацию модели auxiliary.compression."
+    aborted:               "⚠️ Сжатие прервано ({error}). Сообщения не были удалены — разговор не изменился. Запустите /compress для повторной попытки, /reset для новой сессии или проверьте конфигурацию модели auxiliary.compression."
    aux_failed:            "ℹ️ Настроенная модель сжатия `{model}` дала сбой ({error}). Восстановлено с помощью основной модели — контекст не повреждён — но рекомендуется проверить `auxiliary.compression.model` в config.yaml."
    failed:                "Сжатие не удалось: {error}"

--- a/locales/tr.yaml
+++ b/locales/tr.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Henüz sıkıştırılacak bir şey yok (transkript hâlâ tamamen korunan bağlam)."
    focus_line:            "Odak: \"{topic}\""
    summary_failed:        "⚠️ Özet oluşturma başarısız ({error}). {count} geçmiş mesaj kaldırılıp yer tutucuyla değiştirildi; önceki bağlam artık kurtarılamaz. auxiliary.compression model yapılandırmanızı kontrol edin."
+    aborted:               "⚠️ Sıkıştırma iptal edildi ({error}). Hiçbir mesaj silinmedi — konuşma değişmedi. Tekrar denemek için /compress, temiz bir oturum için /reset komutunu çalıştırın veya auxiliary.compression model yapılandırmanızı kontrol edin."
    aux_failed:            "ℹ️ Yapılandırılmış sıkıştırma modeli `{model}` başarısız oldu ({error}). Ana modelinizle kurtarıldı — bağlam sağlam — ancak config.yaml içindeki `auxiliary.compression.model` öğesini kontrol etmek isteyebilirsiniz."
    failed:                "Sıkıştırma başarısız: {error}"

--- a/locales/uk.yaml
+++ b/locales/uk.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "Поки що немає що стискати (стенограма все ще є повністю захищеним контекстом)."
    focus_line:            "Фокус: \"{topic}\""
    summary_failed:        "⚠️ Не вдалося згенерувати зведення ({error}). {count} історичних повідомлень було видалено та замінено заповнювачем; попередній контекст більше не можна відновити. Перевірте конфігурацію моделі auxiliary.compression."
+    aborted:               "⚠️ Стиснення скасовано ({error}). Жодне повідомлення не було видалено — розмова не змінилася. Виконайте /compress, щоб повторити спробу, /reset для нової сесії, або перевірте конфігурацію моделі auxiliary.compression."
    aux_failed:            "ℹ️ Налаштована модель стиснення `{model}` зазнала збою ({error}). Відновлено за допомогою основної моделі — контекст не пошкоджений — але варто перевірити `auxiliary.compression.model` у config.yaml."
    failed:                "Стиснення не вдалося: {error}"

--- a/locales/zh-hant.yaml
+++ b/locales/zh-hant.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "目前沒有可壓縮的內容（對話記錄仍全部為受保護的上下文）。"
    focus_line:            "聚焦：\"{topic}\""
    summary_failed:        "⚠️ 摘要產生失敗（{error}）。{count} 則歷史訊息已被移除並以佔位符取代；先前的上下文已無法復原。建議檢查 auxiliary.compression 模型設定。"
+    aborted:               "⚠️ 壓縮已中止 ({error})。未刪除任何訊息 — 對話保持不變。執行 /compress 重試，執行 /reset 開始新工作階段，或檢查你的 auxiliary.compression 模型設定。"
    aux_failed:            "ℹ️ 設定的壓縮模型 `{model}` 失敗（{error}）。已使用主要模型復原 — 上下文完整 — 但您可能想檢查 config.yaml 中的 `auxiliary.compression.model`。"
    failed:                "壓縮失敗：{error}"

--- a/locales/zh.yaml
+++ b/locales/zh.yaml
@ -90,6 +90,7 @@ gateway:
    nothing_to_do:         "暂无可压缩内容（对话记录仍全部为受保护上下文）。"
    focus_line:            "聚焦：\"{topic}\""
    summary_failed:        "⚠️ 摘要生成失败（{error}）。{count} 条历史消息已被移除并替换为占位符；之前的上下文已无法恢复。建议检查 auxiliary.compression 模型配置。"
+    aborted:               "⚠️ 压缩已中止 ({error})。未删除任何消息 — 对话保持不变。运行 /compress 重试，运行 /reset 开始新会话，或检查你的 auxiliary.compression 模型配置。"
    aux_failed:            "ℹ️ 配置的压缩模型 `{model}` 失败（{error}）。已使用主模型恢复 — 上下文完好 — 但您可能想检查 config.yaml 中的 `auxiliary.compression.model`。"
    failed:                "压缩失败：{error}"

--- a/run_agent.py
+++ b/run_agent.py
@ -3714,12 +3714,19 @@ class AIAgent:
        """
        return self.api_mode != "codex_responses"

-    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
-        """Forwarder — see ``agent.conversation_compression.compress_context``."""
+    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None, force: bool = False) -> tuple:
+        """Forwarder — see ``agent.conversation_compression.compress_context``.
+
+        ``force=True`` is passed by the manual ``/compress`` slash command
+        so users can bypass the summary-failure cooldown after an
+        auto-compress abort.  Auto-compress callers use the default
+        ``force=False``.
+        """
        from agent.conversation_compression import compress_context
        return compress_context(
            self, messages, system_message,
            approx_tokens=approx_tokens, task_id=task_id, focus_topic=focus_topic,
+            force=force,
        )

    def _set_tool_guardrail_halt(self, decision: ToolGuardrailDecision) -> None:
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@ -64,21 +64,31 @@ class TestCompress:
        result = compressor.compress(msgs)
        assert result == msgs

-    def test_truncation_fallback_no_client(self, compressor):
-        # compressor has client=None, so should use truncation fallback
+    def test_no_client_aborts_compression_with_messages_preserved(self, compressor):
+        """compressor has no provider configured, so _generate_summary returns
+        None → compression aborts entirely.  Messages must be returned
+        unchanged (no placeholder, no drop) and _last_compress_aborted set."""
        msgs = [{"role": "system", "content": "System prompt"}] + self._make_messages(10)
        result = compressor.compress(msgs)
-        assert len(result) < len(msgs)
-        # Should keep system message and last N
-        assert result[0]["role"] == "system"
-        assert compressor.compression_count == 1
+        # Abort path: messages preserved byte-for-byte
+        assert result == msgs
+        assert compressor._last_compress_aborted is True
+        # Compression count NOT incremented on abort — nothing was compressed.
+        assert compressor.compression_count == 0

    def test_compression_increments_count(self, compressor):
        msgs = self._make_messages(10)
-        compressor.compress(msgs)
-        assert compressor.compression_count == 1
-        compressor.compress(msgs)
-        assert compressor.compression_count == 2
+        mock_resp = MagicMock()
+        mock_resp.choices = [MagicMock()]
+        mock_resp.choices[0].message.content = "summary text"
+        with patch("agent.context_compressor.call_llm", return_value=mock_resp):
+            compressor.compress(msgs)
+            assert compressor.compression_count == 1
+            # Reset cooldown isn't needed (no prior failure) but reset
+            # iterative-summary state so the next call follows the same
+            # path as the first.
+            compressor.compress(msgs)
+            assert compressor.compression_count == 2

    def test_protects_first_and_last(self, compressor):
        msgs = self._make_messages(10)
@ -128,7 +138,11 @@ class TestGenerateSummaryNoneContent:
            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
            for i in range(10)
        ]
-        result = c.compress(msgs)
+        mock_resp = MagicMock()
+        mock_resp.choices = [MagicMock()]
+        mock_resp.choices[0].message.content = "summary text"
+        with patch("agent.context_compressor.call_llm", return_value=mock_resp):
+            result = c.compress(msgs)
        assert len(result) < len(msgs)


@ -716,11 +730,14 @@ class TestAuxModelFallbackSurfacedToCallers:


 class TestSummaryFailureTrackingForGatewayWarning:
-    """When summary generation fails, the compressor must record dropped count
-    + fallback flag so gateway hygiene & /compress can surface a visible
-    warning instead of silently dropping context."""
+    """When summary generation fails, the compressor must ABORT compression
+    entirely (return the original messages unchanged) and set the abort flag
+    so gateway hygiene & /compress can surface a visible warning.  Previous
+    behavior of inserting a static "summary unavailable" placeholder while
+    silently dropping the middle window has been removed — losing N turns
+    of context is worse than freezing the chat until the user retries."""

-    def test_compress_records_fallback_and_dropped_count_on_summary_failure(self):
+    def test_compress_aborts_and_preserves_messages_on_summary_failure(self):
        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)

@ -740,16 +757,23 @@ class TestSummaryFailureTrackingForGatewayWarning:
        with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")):
            result = c.compress(msgs)

-        assert c._last_summary_fallback_used is True
-        assert c._last_summary_dropped_count > 0
+        # Abort flag set, error recorded
+        assert c._last_compress_aborted is True
        assert c._last_summary_error is not None
-        # Result must still be well-formed (fallback summary present).
-        assert any(
+        # No fallback inserted, no messages dropped
+        assert c._last_summary_fallback_used is False
+        assert c._last_summary_dropped_count == 0
+        # Original messages preserved byte-for-byte — the agent loop's
+        # "did compression help?" check (len(after) < len(before)) sees a
+        # no-op and stops looping.
+        assert result == msgs
+        # No "Summary generation was unavailable" placeholder leaked in.
+        assert not any(
            isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"]
            for m in result
        )

-    def test_compress_clears_fallback_flag_on_subsequent_success(self):
+    def test_compress_clears_abort_flag_on_subsequent_success(self):
        mock_response = MagicMock()
        mock_response.choices = [MagicMock()]
        mock_response.choices[0].message.content = "summary text"
@ -768,18 +792,57 @@ class TestSummaryFailureTrackingForGatewayWarning:
            {"role": "user", "content": "msg 7"},
        ]

-        # First call fails, second succeeds — flag must reset on second compress.
+        # First call fails, second succeeds — abort flag must reset on second compress.
        with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")):
            c.compress(msgs)
-        assert c._last_summary_fallback_used is True
+        assert c._last_compress_aborted is True

        # Reset cooldown to allow retry on second compress
        c._summary_failure_cooldown_until = 0.0
        with patch("agent.context_compressor.call_llm", return_value=mock_response):
            c.compress(msgs)
+        assert c._last_compress_aborted is False
        assert c._last_summary_fallback_used is False
        assert c._last_summary_dropped_count == 0

+    def test_force_true_bypasses_failure_cooldown(self):
+        """Manual /compress passes force=True so it can retry immediately
+        after an auto-compress abort instead of waiting out the 30-60s
+        cooldown."""
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [
+            {"role": "system", "content": "sys"},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "assistant", "content": "msg 6"},
+            {"role": "user", "content": "msg 7"},
+        ]
+
+        # Pre-populate an active cooldown (as if a prior auto-compress aborted).
+        import time as _time
+        c._summary_failure_cooldown_until = _time.monotonic() + 999.0
+
+        # Without force, _generate_summary would short-circuit on cooldown
+        # and return None → abort.  With force=True the cooldown is cleared
+        # and the call goes through.
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs, force=True)
+
+        assert c._last_compress_aborted is False
+        # Cooldown was cleared and a real summary attempt was made.
+        assert c._summary_failure_cooldown_until == 0.0
+        # Result is actually compressed (shorter than input).
+        assert len(result) < len(msgs)
+

 class TestSummaryPrefixNormalization:
    def test_legacy_prefix_is_replaced(self):
@ -1338,7 +1401,11 @@ class TestSummaryTargetRatio:
            + [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
               for i in range(8)]
        )
-        result = c.compress(msgs)
+        mock_resp = MagicMock()
+        mock_resp.choices = [MagicMock()]
+        mock_resp.choices[0].message.content = "summary text"
+        with patch("agent.context_compressor.call_llm", return_value=mock_resp):
+            result = c.compress(msgs)
        # System prompt (msg[0]) survives as head
        assert result[0]["role"] == "system"
        assert result[0]["content"].startswith("System prompt")
--- a/tests/gateway/test_compress_command.py
+++ b/tests/gateway/test_compress_command.py
@ -130,19 +130,15 @@ async def test_compress_command_explains_when_token_estimate_rises():


@pytest.mark.asyncio
-async def test_compress_command_appends_warning_when_summary_generation_fails():
-    """When the auxiliary summariser fails and the compressor inserts a static
-    fallback placeholder, /compress must append a visible ⚠️ warning to its
-    reply. Otherwise the failure is silently logged and the user has no idea
-    earlier context is unrecoverable."""
+async def test_compress_command_appends_warning_when_compression_aborts():
+    """When the auxiliary summariser fails and the compressor ABORTS (returns
+    messages unchanged), /compress must append a visible ⚠️ warning to its
+    reply telling the user nothing was dropped and how to retry. Otherwise
+    the failure is silently logged and the user has no idea why nothing
+    happened."""
    history = _make_history()
-    # Compressed shape is irrelevant for this test — we only care that the
-    # warning surfaces. Drop one message so the headline is non-noop.
-    compressed = [
-        history[0],
-        {"role": "assistant", "content": "[fallback placeholder]"},
-        history[-1],
-    ]
+    # Abort path: compressor returns the input messages unchanged.
+    compressed = list(history)
    runner = _make_runner(history)
    agent_instance = MagicMock()
    agent_instance.shutdown_memory_provider = MagicMock()
@ -150,10 +146,11 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
    agent_instance._cached_system_prompt = ""
    agent_instance.tools = None
    agent_instance.context_compressor.has_content_to_compress.return_value = True
-    # Simulate summary-generation failure: fallback flag set, dropped count
-    # populated, error string captured.
-    agent_instance.context_compressor._last_summary_fallback_used = True
-    agent_instance.context_compressor._last_summary_dropped_count = 7
+    # Simulate compression aborting (force=True bypassed cooldown but the
+    # aux LLM is genuinely broken).
+    agent_instance.context_compressor._last_compress_aborted = True
+    agent_instance.context_compressor._last_summary_fallback_used = False
+    agent_instance.context_compressor._last_summary_dropped_count = 0
    agent_instance.context_compressor._last_summary_error = (
        "404 model not found: gemini-3-flash-preview"
    )
@ -164,7 +161,7 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
        if messages == history:
            return 100
        if messages == compressed:
-            return 60
+            return 100
        raise AssertionError(f"unexpected transcript: {messages!r}")

    with (
@ -175,16 +172,14 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
    ):
        result = await runner._handle_compress_command(_make_event())

-    # The compress reply itself still goes through (the transcript was rewritten).
-    assert "Compressed:" in result
-    # ...but a clearly-marked warning must be appended.
+    # A clearly-marked warning must be appended.
    assert "⚠️" in result
-    assert "Summary generation failed" in result
+    assert "Compression aborted" in result
    # Underlying error must surface so users can fix their config.
    assert "404 model not found" in result
-    # Dropped count must be visible — silently losing N messages is the bug.
-    assert "7" in result
-    assert "historical message(s) were removed" in result
+    # User must be told nothing was dropped — the whole point of the
+    # new behavior is no silent data loss.
+    assert "No messages were dropped" in result
    agent_instance.shutdown_memory_provider.assert_called_once()
    agent_instance.close.assert_called_once()

@ -210,6 +205,7 @@ async def test_compress_command_surfaces_aux_model_failure_even_when_recovered()
    agent_instance.tools = None
    agent_instance.context_compressor.has_content_to_compress.return_value = True
    # Fallback placeholder was NOT used — recovery succeeded.
+    agent_instance.context_compressor._last_compress_aborted = False
    agent_instance.context_compressor._last_summary_fallback_used = False
    agent_instance.context_compressor._last_summary_dropped_count = 0
    agent_instance.context_compressor._last_summary_error = None
--- a/tests/gateway/test_session_hygiene.py
+++ b/tests/gateway/test_session_hygiene.py
@ -396,11 +396,12 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t


@pytest.mark.asyncio
-async def test_session_hygiene_warns_user_when_summary_generation_fails(monkeypatch, tmp_path):
+async def test_session_hygiene_warns_user_when_compression_aborts(monkeypatch, tmp_path):
    """When auxiliary compression's summary LLM call fails, the compressor
-    inserts a static fallback and the dropped turns are unrecoverable.
-    Gateway must surface a visible ⚠️ warning to the user, including
-    thread_id metadata so it lands in the originating topic/thread."""
+    ABORTS — returns messages unchanged, sets _last_compress_aborted=True,
+    and drops nothing.  Gateway must surface a visible ⚠️ warning to the
+    user (including thread_id metadata so it lands in the originating
+    topic/thread) saying the conversation is unchanged and how to retry."""
    fake_dotenv = types.ModuleType("dotenv")
    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
@ -415,17 +416,18 @@ async def test_session_hygiene_warns_user_when_summary_generation_fails(monkeypa
            self.shutdown_memory_provider = MagicMock()
            self.close = MagicMock()
            # Simulate a compressor that hit summary-generation failure
-            # and inserted the static fallback placeholder.
+            # and ABORTED — no fallback inserted, no messages dropped.
            self.context_compressor = SimpleNamespace(
-                _last_summary_fallback_used=True,
-                _last_summary_dropped_count=42,
+                _last_compress_aborted=True,
+                _last_summary_fallback_used=False,
+                _last_summary_dropped_count=0,
                _last_summary_error="404 model not found: gemini-3-flash-preview",
            )
            type(self).last_instance = self

        def _compress_context(self, messages, *_args, **_kwargs):
-            self.session_id = f"{self.session_id}_compressed"
-            return ([{"role": "assistant", "content": "compressed"}], None)
+            # Abort path: messages preserved unchanged, session NOT rotated.
+            return (messages, None)

    fake_run_agent = types.ModuleType("run_agent")
    fake_run_agent.AIAgent = FakeCompressAgentWithSummaryFailure
@ -494,16 +496,17 @@ async def test_session_hygiene_warns_user_when_summary_generation_fails(monkeypa
    result = await runner._handle_message(event)

    assert result == "ok"
-    # The compressor reported summary-failure → exactly one warning
-    # message must have been delivered to the user.
-    warning_messages = [s for s in adapter.sent if "Context compression summary failed" in s["content"]]
+    # The compressor reported abort → exactly one warning message must
+    # have been delivered to the user.
+    warning_messages = [s for s in adapter.sent if "Context compression aborted" in s["content"]]
    assert len(warning_messages) == 1, (
-        f"Expected 1 compression-failure warning, got {len(warning_messages)}: {adapter.sent}"
+        f"Expected 1 compression-aborted warning, got {len(warning_messages)}: {adapter.sent}"
    )
    warn = warning_messages[0]
-    # Warning must include the dropped count and the underlying error.
-    assert "42" in warn["content"]
+    # Warning must include the underlying error and tell the user nothing
+    # was dropped.
    assert "404" in warn["content"]
+    assert "No messages were dropped" in warn["content"]
    # Warning must land in the originating topic/thread, not the main channel.
    assert warn["chat_id"] == "-1001"
    assert warn["metadata"] == {"thread_id": "17585"}