mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-02 07:11:49 +00:00
fix(compress): abort instead of dropping messages when summary LLM fails (#28102)
When auxiliary compression's summary generation returns None (aux model errored, returned non-JSON, timed out, etc.) the compressor previously still dropped every middle message between compress_start..compress_end and replaced them with a static 'Summary generation was unavailable' placeholder. The session kept going but the user silently lost N turns of context for nothing. New behavior: on summary failure, compress() aborts entirely — returns the input messages unchanged and sets _last_compress_aborted=True. The existing _summary_failure_cooldown_until gate (30-60s) keeps the aux model from being burned on every turn. Auto-compress callers detect the no-op (len(after) == len(before)) and stop looping. The chat is 'frozen' at its current size until the next /compress or /new. Manual /compress (CLI + gateway) now passes force=True which clears the cooldown so users can retry immediately after an auto-abort. If the manual retry also fails, the user gets a visible warning telling them nothing was dropped and how to retry. - agent/context_compressor.py: compress() gains force= kwarg; failure branch sets _last_compress_aborted and returns messages unchanged instead of inserting placeholder. - run_agent.py: _compress_context() detects abort, surfaces warning, skips session-rotation entirely, returns messages unchanged. - cli.py + gateway/run.py: manual /compress paths pass force=True. - gateway/run.py: hygiene + /compress handlers detect _last_compress_aborted and emit the new 'Compression aborted' warning (gateway.compress.aborted) instead of the old 'N historical messages were removed' message. - locales/*.yaml: new gateway.compress.aborted key in all 16 locales. - tests: updated to assert the abort contract (messages preserved, compression_count not incremented, abort flag set, no placeholder leaked). New test_force_true_bypasses_failure_cooldown covers the manual-retry path.
This commit is contained in:
parent
65e0c49b77
commit
1634397ddb
24 changed files with 249 additions and 103 deletions
|
|
@ -586,6 +586,12 @@ class ContextCompressor(ContextEngine):
|
|||
# (gateway hygiene, /compress) can surface a visible warning.
|
||||
self._last_summary_dropped_count: int = 0
|
||||
self._last_summary_fallback_used: bool = False
|
||||
# When summary generation fails we now ABORT compression entirely
|
||||
# and return the original messages unchanged instead of dropping
|
||||
# the middle window with a static placeholder. Callers inspect
|
||||
# this flag to know "compression was attempted but aborted, freeze
|
||||
# the chat until the user manually retries via /compress".
|
||||
self._last_compress_aborted: bool = False
|
||||
# When a user-configured summary model fails and we recover by
|
||||
# retrying on the main model, record the failure so gateway /
|
||||
# CLI callers can still warn the user even though compression
|
||||
|
|
@ -1479,7 +1485,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
# Main compression entry point
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, focus_topic: str = None) -> List[Dict[str, Any]]:
|
||||
def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, focus_topic: str = None, force: bool = False) -> List[Dict[str, Any]]:
|
||||
"""Compress conversation messages by summarizing middle turns.
|
||||
|
||||
Algorithm:
|
||||
|
|
@ -1497,6 +1503,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
provided, the summariser will prioritise preserving information
|
||||
related to this topic and be more aggressive about compressing
|
||||
everything else. Inspired by Claude Code's ``/compact``.
|
||||
force: If True, clear any active summary-failure cooldown before
|
||||
running so a manual ``/compress`` can retry immediately after
|
||||
an auto-compression abort. Auto-compress callers pass False.
|
||||
"""
|
||||
# Reset per-call summary failure state — callers inspect these fields
|
||||
# after compress() returns to decide whether to surface a warning.
|
||||
|
|
@ -1505,6 +1514,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
self._last_summary_error = None
|
||||
self._last_aux_model_failure_error = None
|
||||
self._last_aux_model_failure_model = None
|
||||
self._last_compress_aborted = False
|
||||
|
||||
# Manual /compress (force=True) bypasses the failure cooldown so the
|
||||
# user can retry immediately after an auto-compress abort. Without
|
||||
# this, /compress would silently no-op for 30-60s after a failure.
|
||||
if force and self._summary_failure_cooldown_until > 0.0:
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
n_messages = len(messages)
|
||||
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
|
||||
_min_for_compress = self._protect_head_size(messages) + 3 + 1
|
||||
|
|
@ -1580,6 +1596,30 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
# Phase 3: Generate structured summary
|
||||
summary = self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
|
||||
|
||||
# If summary generation failed, ABORT compression entirely. Returning
|
||||
# the original messages unchanged preserves the full conversation
|
||||
# context. Previously this branch dropped every middle message and
|
||||
# replaced them with a static "summary unavailable" placeholder,
|
||||
# which silently lost N turns of work whenever the aux LLM hiccuped.
|
||||
# Auto-compress callers detect the no-op (post-compress length ==
|
||||
# pre-compress length) and stop looping. The next call to
|
||||
# _generate_summary is gated by _summary_failure_cooldown_until, so
|
||||
# we don't burn the aux model every turn. Users can force a retry
|
||||
# via /compress (which passes force=True to clear the cooldown).
|
||||
if not summary:
|
||||
n_skipped = compress_end - compress_start
|
||||
self._last_summary_dropped_count = 0 # nothing actually dropped
|
||||
self._last_summary_fallback_used = False
|
||||
self._last_compress_aborted = True
|
||||
if not self.quiet_mode:
|
||||
logger.warning(
|
||||
"Summary generation failed — aborting compression. "
|
||||
"%d message(s) preserved unchanged. Conversation is "
|
||||
"frozen until the next /compress or /new.",
|
||||
n_skipped,
|
||||
)
|
||||
return messages
|
||||
|
||||
# Phase 4: Assemble compressed message list
|
||||
compressed = []
|
||||
for i in range(compress_start):
|
||||
|
|
@ -1594,22 +1634,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
)
|
||||
compressed.append(msg)
|
||||
|
||||
# If LLM summary failed, insert a static fallback so the model
|
||||
# knows context was lost rather than silently dropping everything.
|
||||
if not summary:
|
||||
if not self.quiet_mode:
|
||||
logger.warning("Summary generation failed — inserting static fallback context marker")
|
||||
n_dropped = compress_end - compress_start
|
||||
self._last_summary_dropped_count = n_dropped
|
||||
self._last_summary_fallback_used = True
|
||||
summary = (
|
||||
f"{SUMMARY_PREFIX}\n"
|
||||
f"Summary generation was unavailable. {n_dropped} message(s) were "
|
||||
f"removed to free context space but could not be summarized. The removed "
|
||||
f"messages contained earlier work in this session. Continue based on the "
|
||||
f"recent messages below and the current state of any files or resources."
|
||||
)
|
||||
|
||||
_merge_summary_into_tail = False
|
||||
last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user"
|
||||
first_tail_role = messages[compress_end].get("role", "user") if compress_end < n_messages else "user"
|
||||
|
|
|
|||
|
|
@ -256,6 +256,7 @@ def compress_context(
|
|||
approx_tokens: Optional[int] = None,
|
||||
task_id: str = "default",
|
||||
focus_topic: Optional[str] = None,
|
||||
force: bool = False,
|
||||
) -> Tuple[list, str]:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
|
|
@ -268,9 +269,17 @@ def compress_context(
|
|||
focus_topic: Optional focus string for guided compression — the
|
||||
summariser will prioritise preserving information related to
|
||||
this topic. Inspired by Claude Code's ``/compact <focus>``.
|
||||
force: If True, bypass any active summary-failure cooldown. Set
|
||||
by the manual ``/compress`` slash command so users can retry
|
||||
immediately after an auto-compress abort. Auto-compress
|
||||
callers use the default ``False``.
|
||||
|
||||
Returns:
|
||||
``(compressed_messages, new_system_prompt)`` tuple.
|
||||
``(compressed_messages, new_system_prompt)`` tuple. When
|
||||
compression aborts (aux LLM failed to produce a usable summary),
|
||||
returns the original messages unchanged and the existing system
|
||||
prompt — the session is NOT rotated. Callers should detect the
|
||||
no-op via ``len(returned) == len(input)`` and stop the retry loop.
|
||||
"""
|
||||
_pre_msg_count = len(messages)
|
||||
logger.info(
|
||||
|
|
@ -291,12 +300,31 @@ def compress_context(
|
|||
pass
|
||||
|
||||
try:
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic, force=force)
|
||||
except TypeError:
|
||||
# Plugin context engine with strict signature that doesn't accept
|
||||
# focus_topic — fall back to calling without it.
|
||||
# focus_topic / force — fall back to calling without them.
|
||||
compressed = agent.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||
|
||||
# If compression aborted (aux LLM failed to produce a usable summary)
|
||||
# the compressor returns the input messages unchanged. Surface the
|
||||
# error to the user, skip the session-rotation work entirely (no
|
||||
# session has logically ended), and let auto-compress callers detect
|
||||
# the no-op via len(returned) == len(input).
|
||||
if getattr(agent.context_compressor, "_last_compress_aborted", False):
|
||||
_err = getattr(agent.context_compressor, "_last_summary_error", None) or "unknown error"
|
||||
if getattr(agent, "_last_compression_summary_warning", None) != _err:
|
||||
agent._last_compression_summary_warning = _err
|
||||
agent._emit_warning(
|
||||
f"⚠ Compression aborted: {_err}. "
|
||||
"No messages were dropped — conversation continues unchanged. "
|
||||
"Run /compress to retry, or /new to start a fresh session."
|
||||
)
|
||||
_existing_sp = getattr(agent, "_cached_system_prompt", None)
|
||||
if not _existing_sp:
|
||||
_existing_sp = agent._build_system_prompt(system_message)
|
||||
return messages, _existing_sp
|
||||
|
||||
summary_error = getattr(agent.context_compressor, "_last_summary_error", None)
|
||||
if summary_error:
|
||||
if getattr(agent, "_last_compression_summary_warning", None) != summary_error:
|
||||
|
|
|
|||
1
cli.py
1
cli.py
|
|
@ -9183,6 +9183,7 @@ class HermesCLI:
|
|||
None,
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
force=True,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
# _compress_context ends the old session and creates a new child
|
||||
|
|
|
|||
|
|
@ -7778,22 +7778,24 @@ class GatewayRunner:
|
|||
)
|
||||
|
||||
# If summary generation failed, the
|
||||
# compressor inserted a static fallback
|
||||
# placeholder and the dropped turns are
|
||||
# gone for good. Surface a visible
|
||||
# warning to the gateway user — agent.log
|
||||
# alone is invisible on TG/Discord/etc.
|
||||
# compressor aborts entirely and returns
|
||||
# messages unchanged — nothing is dropped.
|
||||
# Surface a visible warning to the gateway
|
||||
# user — agent.log alone is invisible on
|
||||
# TG/Discord/etc. — so they know the chat
|
||||
# is "frozen" at the current size and can
|
||||
# /compress to retry or /reset to start
|
||||
# fresh.
|
||||
_comp = getattr(_hyg_agent, "context_compressor", None)
|
||||
if _comp is not None and getattr(_comp, "_last_summary_fallback_used", False):
|
||||
_dropped = getattr(_comp, "_last_summary_dropped_count", 0)
|
||||
if _comp is not None and getattr(_comp, "_last_compress_aborted", False):
|
||||
_err = getattr(_comp, "_last_summary_error", None) or "unknown error"
|
||||
_warn_msg = (
|
||||
"⚠️ Context compression summary failed "
|
||||
f"({_err}). {_dropped} historical message(s) "
|
||||
"were removed and replaced with a placeholder. "
|
||||
"Earlier context is no longer recoverable. "
|
||||
"Consider /reset for a clean session, or check "
|
||||
"your auxiliary.compression model configuration."
|
||||
"⚠️ Context compression aborted "
|
||||
f"({_err}). No messages were dropped — "
|
||||
"conversation is unchanged. Run /compress "
|
||||
"to retry, /reset for a clean session, or "
|
||||
"check your auxiliary.compression model "
|
||||
"configuration."
|
||||
)
|
||||
try:
|
||||
_adapter = self.adapters.get(source.platform)
|
||||
|
|
@ -11404,7 +11406,7 @@ class GatewayRunner:
|
|||
loop = asyncio.get_running_loop()
|
||||
compressed, _ = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
|
||||
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic, force=True)
|
||||
)
|
||||
|
||||
# _compress_context already calls end_session() on the old session
|
||||
|
|
@ -11433,8 +11435,11 @@ class GatewayRunner:
|
|||
# Detect summary-generation failure so we can surface a
|
||||
# visible warning to the user even on the manual /compress
|
||||
# path (otherwise the failure is silently logged).
|
||||
_summary_failed = bool(getattr(compressor, "_last_summary_fallback_used", False))
|
||||
_dropped_count = int(getattr(compressor, "_last_summary_dropped_count", 0) or 0)
|
||||
# _last_compress_aborted means the aux LLM returned no
|
||||
# usable summary and the compressor preserved messages
|
||||
# unchanged (no drop, no placeholder). force=True was
|
||||
# passed above so any active cooldown is bypassed.
|
||||
_summary_aborted = bool(getattr(compressor, "_last_compress_aborted", False))
|
||||
_summary_err = getattr(compressor, "_last_summary_error", None)
|
||||
# Separately: did the user's CONFIGURED aux model fail
|
||||
# and we recovered via main? Surface that as an info
|
||||
|
|
@ -11452,12 +11457,11 @@ class GatewayRunner:
|
|||
lines.append(summary["token_line"])
|
||||
if summary["note"]:
|
||||
lines.append(summary["note"])
|
||||
if _summary_failed:
|
||||
if _summary_aborted:
|
||||
lines.append(
|
||||
t(
|
||||
"gateway.compress.summary_failed",
|
||||
"gateway.compress.aborted",
|
||||
error=(_summary_err or "unknown error"),
|
||||
count=_dropped_count,
|
||||
)
|
||||
)
|
||||
elif _aux_fail_model:
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Niks om saam te pers nie (die transkripsie is steeds heeltemal beskermde konteks)."
|
||||
focus_line: "Fokus: \"{topic}\""
|
||||
summary_failed: "⚠️ Opsomming kon nie gegenereer word nie ({error}). {count} historiese boodskap(pe) is verwyder en met 'n plekhouer vervang; vroeëre konteks kan nie meer herstel word nie. Oorweeg om jou auxiliary.compression-modelopstelling na te gaan."
|
||||
aborted: "⚠️ Kompressie gestaak ({error}). Geen boodskappe is laat val nie — die gesprek is onveranderd. Voer /compress uit om weer te probeer, /reset vir 'n skoon sessie, of kyk na jou auxiliary.compression-modelkonfigurasie."
|
||||
aux_failed: "ℹ️ Opgestelde saamperseringsmodel `{model}` het misluk ({error}). Herstel met jou hoofmodel — konteks is intakt — maar jy mag dalk `auxiliary.compression.model` in config.yaml wil nagaan."
|
||||
failed: "Saampersing het misluk: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Noch nichts zu komprimieren (das Transkript ist weiterhin vollständig geschützter Kontext)."
|
||||
focus_line: "Fokus: \"{topic}\""
|
||||
summary_failed: "⚠️ Zusammenfassungsgenerierung fehlgeschlagen ({error}). {count} historische Nachricht(en) wurden entfernt und durch einen Platzhalter ersetzt; früherer Kontext ist nicht mehr wiederherstellbar. Überprüfen Sie die Konfiguration des auxiliary.compression-Modells."
|
||||
aborted: "⚠️ Komprimierung abgebrochen ({error}). Keine Nachrichten wurden entfernt — die Konversation ist unverändert. Führe /compress aus, um es erneut zu versuchen, /reset für eine neue Sitzung, oder prüfe deine auxiliary.compression-Modellkonfiguration."
|
||||
aux_failed: "ℹ️ Das konfigurierte Komprimierungsmodell `{model}` ist fehlgeschlagen ({error}). Wiederherstellung mit Ihrem Hauptmodell — Kontext ist intakt — Sie sollten jedoch `auxiliary.compression.model` in config.yaml überprüfen."
|
||||
failed: "Komprimierung fehlgeschlagen: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ gateway:
|
|||
nothing_to_do: "Nothing to compress yet (the transcript is still all protected context)."
|
||||
focus_line: "Focus: \"{topic}\""
|
||||
summary_failed: "⚠️ Summary generation failed ({error}). {count} historical message(s) were removed and replaced with a placeholder; earlier context is no longer recoverable. Consider checking your auxiliary.compression model configuration."
|
||||
aborted: "⚠️ Compression aborted ({error}). No messages were dropped — conversation is unchanged. Run /compress to retry, /reset for a clean session, or check your auxiliary.compression model configuration."
|
||||
aux_failed: "ℹ️ Configured compression model `{model}` failed ({error}). Recovered using your main model — context is intact — but you may want to check `auxiliary.compression.model` in config.yaml."
|
||||
failed: "Compression failed: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Aún no hay nada que comprimir (la transcripción sigue siendo todo contexto protegido)."
|
||||
focus_line: "Enfoque: \"{topic}\""
|
||||
summary_failed: "⚠️ Falló la generación del resumen ({error}). Se eliminaron {count} mensaje(s) históricos y se reemplazaron por un marcador; el contexto anterior ya no se puede recuperar. Considera revisar la configuración del modelo auxiliary.compression."
|
||||
aborted: "⚠️ Compresión abortada ({error}). No se eliminó ningún mensaje — la conversación está intacta. Ejecuta /compress para reintentar, /reset para una sesión limpia, o revisa la configuración de tu modelo auxiliary.compression."
|
||||
aux_failed: "ℹ️ El modelo de compresión configurado `{model}` falló ({error}). Recuperado con tu modelo principal — el contexto está intacto — pero quizá quieras revisar `auxiliary.compression.model` en config.yaml."
|
||||
failed: "Compresión fallida: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Rien à compresser pour l'instant (la transcription est encore entièrement du contexte protégé)."
|
||||
focus_line: "Focus : \"{topic}\""
|
||||
summary_failed: "⚠️ Échec de la génération du résumé ({error}). {count} message(s) historique(s) ont été supprimés et remplacés par un espace réservé ; le contexte antérieur n'est plus récupérable. Vérifiez la configuration du modèle auxiliary.compression."
|
||||
aborted: "⚠️ Compression interrompue ({error}). Aucun message n'a été supprimé — la conversation est inchangée. Lancez /compress pour réessayer, /reset pour une nouvelle session, ou vérifiez la configuration de votre modèle auxiliary.compression."
|
||||
aux_failed: "ℹ️ Le modèle de compression configuré `{model}` a échoué ({error}). Récupéré avec votre modèle principal — le contexte est intact — mais vous pouvez vérifier `auxiliary.compression.model` dans config.yaml."
|
||||
failed: "Échec de la compression : {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ gateway:
|
|||
nothing_to_do: "Níl aon rud le dlúthú fós (tá an traschríbhinn fós uile mar chomhthéacs cosanta)."
|
||||
focus_line: "Fócas: \"{topic}\""
|
||||
summary_failed: "⚠️ Theip ar ghiniúint achoimre ({error}). Baineadh {count} teachtaireacht stairiúil agus cuireadh ionadaí ina n-áit; níl an comhthéacs roimhe seo in-aisghabhála a thuilleadh. Smaoinigh ar an gcumraíocht auxiliary.compression a sheiceáil."
|
||||
aborted: "⚠️ Cuireadh deireadh leis an dlúthú ({error}). Níor baineadh aon teachtaireacht — tá an comhrá gan athrú. Rith /compress chun é a thriail arís, /reset le haghaidh seisiún glan, nó seiceáil do chumraíocht samhla auxiliary.compression."
|
||||
aux_failed: "ℹ️ Theip ar an tsamhail dlúthúcháin chumraithe `{model}` ({error}). Aisghafa ag baint úsáide as do phríomhshamhail — tá an comhthéacs slán — ach b'fhéidir gur mhaith leat `auxiliary.compression.model` i config.yaml a sheiceáil."
|
||||
failed: "Theip ar dhlúthú: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Még nincs mit tömöríteni (a teljes átirat még védett kontextus)."
|
||||
focus_line: "Fókusz: \"{topic}\""
|
||||
summary_failed: "⚠️ Az összefoglaló generálása sikertelen ({error}). {count} korábbi üzenet eltávolítva és helykitöltővel helyettesítve; a korábbi kontextus már nem helyreállítható. Érdemes ellenőrizni az auxiliary.compression modell konfigurációját."
|
||||
aborted: "⚠️ Tömörítés megszakítva ({error}). Egyetlen üzenet sem lett eldobva — a beszélgetés változatlan. Futtass /compress parancsot az újrapróbálkozáshoz, /reset egy új munkamenethez, vagy ellenőrizd az auxiliary.compression modell konfigurációt."
|
||||
aux_failed: "ℹ️ A beállított tömörítőmodell (`{model}`) hibát adott ({error}). A főmodellel helyreállítva — a kontextus érintetlen — de érdemes ellenőrizni az `auxiliary.compression.model` beállítást a config.yaml fájlban."
|
||||
failed: "Tömörítés sikertelen: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Niente da comprimere per ora (la trascrizione è ancora tutta contesto protetto)."
|
||||
focus_line: "Focus: \"{topic}\""
|
||||
summary_failed: "⚠️ Generazione del riepilogo non riuscita ({error}). {count} messaggio/i storico/i sono stati rimossi e sostituiti con un segnaposto; il contesto precedente non è più recuperabile. Considera di controllare la configurazione del modello auxiliary.compression."
|
||||
aborted: "⚠️ Compressione interrotta ({error}). Nessun messaggio è stato eliminato — la conversazione è invariata. Esegui /compress per riprovare, /reset per una nuova sessione, o controlla la configurazione del modello auxiliary.compression."
|
||||
aux_failed: "ℹ️ Il modello di compressione configurato `{model}` non è riuscito ({error}). Recupero effettuato usando il modello principale — il contesto è intatto — ma potresti voler controllare `auxiliary.compression.model` in config.yaml."
|
||||
failed: "Compressione non riuscita: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "まだ圧縮するものがありません (トランスクリプトはすべて保護されたコンテキストのままです)。"
|
||||
focus_line: "フォーカス: \"{topic}\""
|
||||
summary_failed: "⚠️ 要約の生成に失敗しました ({error})。{count} 件の履歴メッセージが削除され、プレースホルダーに置き換えられました。以前のコンテキストは復元できません。auxiliary.compression モデルの設定を確認してください。"
|
||||
aborted: "⚠️ 圧縮が中止されました ({error})。メッセージは削除されていません — 会話はそのままです。再試行するには /compress、新しいセッションを開始するには /reset を実行するか、auxiliary.compression モデル設定を確認してください。"
|
||||
aux_failed: "ℹ️ 構成された圧縮モデル `{model}` が失敗しました ({error})。メインモデルで復旧しました — コンテキストは無傷です — config.yaml の `auxiliary.compression.model` を確認するとよいでしょう。"
|
||||
failed: "圧縮に失敗しました: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "아직 압축할 내용이 없습니다 (대화 내용이 모두 보호된 컨텍스트입니다)."
|
||||
focus_line: "초점: \"{topic}\""
|
||||
summary_failed: "⚠️ 요약 생성에 실패했습니다 ({error}). 과거 메시지 {count}개가 제거되어 자리표시자로 대체되었으며, 이전 컨텍스트는 더 이상 복구할 수 없습니다. auxiliary.compression 모델 설정을 확인해 보세요."
|
||||
aborted: "⚠️ 압축이 중단되었습니다 ({error}). 메시지가 삭제되지 않았으며 대화는 그대로 유지됩니다. 다시 시도하려면 /compress를 실행하거나, 새 세션을 시작하려면 /reset을 사용하거나, auxiliary.compression 모델 설정을 확인하세요."
|
||||
aux_failed: "ℹ️ 구성된 압축 모델 `{model}`이(가) 실패했습니다 ({error}). 메인 모델로 복구되어 컨텍스트는 보존되었지만, config.yaml의 `auxiliary.compression.model` 설정을 확인하는 것이 좋습니다."
|
||||
failed: "압축 실패: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Ainda não há nada para comprimir (a transcrição continua a ser todo o contexto protegido)."
|
||||
focus_line: "Foco: \"{topic}\""
|
||||
summary_failed: "⚠️ Falha ao gerar o resumo ({error}). {count} mensagem(ns) histórica(s) foram removidas e substituídas por um marcador; o contexto anterior já não pode ser recuperado. Considera verificar a configuração do modelo auxiliary.compression."
|
||||
aborted: "⚠️ Compressão abortada ({error}). Nenhuma mensagem foi removida — a conversa está inalterada. Executa /compress para tentar de novo, /reset para uma sessão nova, ou verifica a configuração do modelo auxiliary.compression."
|
||||
aux_failed: "ℹ️ O modelo de compressão configurado `{model}` falhou ({error}). Recuperado com o teu modelo principal — o contexto está intacto — mas talvez queiras verificar `auxiliary.compression.model` em config.yaml."
|
||||
failed: "Compressão falhou: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Пока нечего сжимать (стенограмма всё ещё полностью является защищённым контекстом)."
|
||||
focus_line: "Фокус: \"{topic}\""
|
||||
summary_failed: "⚠️ Не удалось сгенерировать сводку ({error}). {count} историч. сообщений было удалено и заменено заполнителем; предыдущий контекст больше нельзя восстановить. Проверьте конфигурацию модели auxiliary.compression."
|
||||
aborted: "⚠️ Сжатие прервано ({error}). Сообщения не были удалены — разговор не изменился. Запустите /compress для повторной попытки, /reset для новой сессии или проверьте конфигурацию модели auxiliary.compression."
|
||||
aux_failed: "ℹ️ Настроенная модель сжатия `{model}` дала сбой ({error}). Восстановлено с помощью основной модели — контекст не повреждён — но рекомендуется проверить `auxiliary.compression.model` в config.yaml."
|
||||
failed: "Сжатие не удалось: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Henüz sıkıştırılacak bir şey yok (transkript hâlâ tamamen korunan bağlam)."
|
||||
focus_line: "Odak: \"{topic}\""
|
||||
summary_failed: "⚠️ Özet oluşturma başarısız ({error}). {count} geçmiş mesaj kaldırılıp yer tutucuyla değiştirildi; önceki bağlam artık kurtarılamaz. auxiliary.compression model yapılandırmanızı kontrol edin."
|
||||
aborted: "⚠️ Sıkıştırma iptal edildi ({error}). Hiçbir mesaj silinmedi — konuşma değişmedi. Tekrar denemek için /compress, temiz bir oturum için /reset komutunu çalıştırın veya auxiliary.compression model yapılandırmanızı kontrol edin."
|
||||
aux_failed: "ℹ️ Yapılandırılmış sıkıştırma modeli `{model}` başarısız oldu ({error}). Ana modelinizle kurtarıldı — bağlam sağlam — ancak config.yaml içindeki `auxiliary.compression.model` öğesini kontrol etmek isteyebilirsiniz."
|
||||
failed: "Sıkıştırma başarısız: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "Поки що немає що стискати (стенограма все ще є повністю захищеним контекстом)."
|
||||
focus_line: "Фокус: \"{topic}\""
|
||||
summary_failed: "⚠️ Не вдалося згенерувати зведення ({error}). {count} історичних повідомлень було видалено та замінено заповнювачем; попередній контекст більше не можна відновити. Перевірте конфігурацію моделі auxiliary.compression."
|
||||
aborted: "⚠️ Стиснення скасовано ({error}). Жодне повідомлення не було видалено — розмова не змінилася. Виконайте /compress, щоб повторити спробу, /reset для нової сесії, або перевірте конфігурацію моделі auxiliary.compression."
|
||||
aux_failed: "ℹ️ Налаштована модель стиснення `{model}` зазнала збою ({error}). Відновлено за допомогою основної моделі — контекст не пошкоджений — але варто перевірити `auxiliary.compression.model` у config.yaml."
|
||||
failed: "Стиснення не вдалося: {error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "目前沒有可壓縮的內容(對話記錄仍全部為受保護的上下文)。"
|
||||
focus_line: "聚焦:\"{topic}\""
|
||||
summary_failed: "⚠️ 摘要產生失敗({error})。{count} 則歷史訊息已被移除並以佔位符取代;先前的上下文已無法復原。建議檢查 auxiliary.compression 模型設定。"
|
||||
aborted: "⚠️ 壓縮已中止 ({error})。未刪除任何訊息 — 對話保持不變。執行 /compress 重試,執行 /reset 開始新工作階段,或檢查你的 auxiliary.compression 模型設定。"
|
||||
aux_failed: "ℹ️ 設定的壓縮模型 `{model}` 失敗({error})。已使用主要模型復原 — 上下文完整 — 但您可能想檢查 config.yaml 中的 `auxiliary.compression.model`。"
|
||||
failed: "壓縮失敗:{error}"
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ gateway:
|
|||
nothing_to_do: "暂无可压缩内容(对话记录仍全部为受保护上下文)。"
|
||||
focus_line: "聚焦:\"{topic}\""
|
||||
summary_failed: "⚠️ 摘要生成失败({error})。{count} 条历史消息已被移除并替换为占位符;之前的上下文已无法恢复。建议检查 auxiliary.compression 模型配置。"
|
||||
aborted: "⚠️ 压缩已中止 ({error})。未删除任何消息 — 对话保持不变。运行 /compress 重试,运行 /reset 开始新会话,或检查你的 auxiliary.compression 模型配置。"
|
||||
aux_failed: "ℹ️ 配置的压缩模型 `{model}` 失败({error})。已使用主模型恢复 — 上下文完好 — 但您可能想检查 config.yaml 中的 `auxiliary.compression.model`。"
|
||||
failed: "压缩失败:{error}"
|
||||
|
||||
|
|
|
|||
11
run_agent.py
11
run_agent.py
|
|
@ -3714,12 +3714,19 @@ class AIAgent:
|
|||
"""
|
||||
return self.api_mode != "codex_responses"
|
||||
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
|
||||
"""Forwarder — see ``agent.conversation_compression.compress_context``."""
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None, force: bool = False) -> tuple:
|
||||
"""Forwarder — see ``agent.conversation_compression.compress_context``.
|
||||
|
||||
``force=True`` is passed by the manual ``/compress`` slash command
|
||||
so users can bypass the summary-failure cooldown after an
|
||||
auto-compress abort. Auto-compress callers use the default
|
||||
``force=False``.
|
||||
"""
|
||||
from agent.conversation_compression import compress_context
|
||||
return compress_context(
|
||||
self, messages, system_message,
|
||||
approx_tokens=approx_tokens, task_id=task_id, focus_topic=focus_topic,
|
||||
force=force,
|
||||
)
|
||||
|
||||
def _set_tool_guardrail_halt(self, decision: ToolGuardrailDecision) -> None:
|
||||
|
|
|
|||
|
|
@ -64,21 +64,31 @@ class TestCompress:
|
|||
result = compressor.compress(msgs)
|
||||
assert result == msgs
|
||||
|
||||
def test_truncation_fallback_no_client(self, compressor):
|
||||
# compressor has client=None, so should use truncation fallback
|
||||
def test_no_client_aborts_compression_with_messages_preserved(self, compressor):
|
||||
"""compressor has no provider configured, so _generate_summary returns
|
||||
None → compression aborts entirely. Messages must be returned
|
||||
unchanged (no placeholder, no drop) and _last_compress_aborted set."""
|
||||
msgs = [{"role": "system", "content": "System prompt"}] + self._make_messages(10)
|
||||
result = compressor.compress(msgs)
|
||||
assert len(result) < len(msgs)
|
||||
# Should keep system message and last N
|
||||
assert result[0]["role"] == "system"
|
||||
assert compressor.compression_count == 1
|
||||
# Abort path: messages preserved byte-for-byte
|
||||
assert result == msgs
|
||||
assert compressor._last_compress_aborted is True
|
||||
# Compression count NOT incremented on abort — nothing was compressed.
|
||||
assert compressor.compression_count == 0
|
||||
|
||||
def test_compression_increments_count(self, compressor):
|
||||
msgs = self._make_messages(10)
|
||||
compressor.compress(msgs)
|
||||
assert compressor.compression_count == 1
|
||||
compressor.compress(msgs)
|
||||
assert compressor.compression_count == 2
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.choices = [MagicMock()]
|
||||
mock_resp.choices[0].message.content = "summary text"
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_resp):
|
||||
compressor.compress(msgs)
|
||||
assert compressor.compression_count == 1
|
||||
# Reset cooldown isn't needed (no prior failure) but reset
|
||||
# iterative-summary state so the next call follows the same
|
||||
# path as the first.
|
||||
compressor.compress(msgs)
|
||||
assert compressor.compression_count == 2
|
||||
|
||||
def test_protects_first_and_last(self, compressor):
|
||||
msgs = self._make_messages(10)
|
||||
|
|
@ -128,7 +138,11 @@ class TestGenerateSummaryNoneContent:
|
|||
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
||||
for i in range(10)
|
||||
]
|
||||
result = c.compress(msgs)
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.choices = [MagicMock()]
|
||||
mock_resp.choices[0].message.content = "summary text"
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_resp):
|
||||
result = c.compress(msgs)
|
||||
assert len(result) < len(msgs)
|
||||
|
||||
|
||||
|
|
@ -716,11 +730,14 @@ class TestAuxModelFallbackSurfacedToCallers:
|
|||
|
||||
|
||||
class TestSummaryFailureTrackingForGatewayWarning:
|
||||
"""When summary generation fails, the compressor must record dropped count
|
||||
+ fallback flag so gateway hygiene & /compress can surface a visible
|
||||
warning instead of silently dropping context."""
|
||||
"""When summary generation fails, the compressor must ABORT compression
|
||||
entirely (return the original messages unchanged) and set the abort flag
|
||||
so gateway hygiene & /compress can surface a visible warning. Previous
|
||||
behavior of inserting a static "summary unavailable" placeholder while
|
||||
silently dropping the middle window has been removed — losing N turns
|
||||
of context is worse than freezing the chat until the user retries."""
|
||||
|
||||
def test_compress_records_fallback_and_dropped_count_on_summary_failure(self):
|
||||
def test_compress_aborts_and_preserves_messages_on_summary_failure(self):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
|
|
@ -740,16 +757,23 @@ class TestSummaryFailureTrackingForGatewayWarning:
|
|||
with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")):
|
||||
result = c.compress(msgs)
|
||||
|
||||
assert c._last_summary_fallback_used is True
|
||||
assert c._last_summary_dropped_count > 0
|
||||
# Abort flag set, error recorded
|
||||
assert c._last_compress_aborted is True
|
||||
assert c._last_summary_error is not None
|
||||
# Result must still be well-formed (fallback summary present).
|
||||
assert any(
|
||||
# No fallback inserted, no messages dropped
|
||||
assert c._last_summary_fallback_used is False
|
||||
assert c._last_summary_dropped_count == 0
|
||||
# Original messages preserved byte-for-byte — the agent loop's
|
||||
# "did compression help?" check (len(after) < len(before)) sees a
|
||||
# no-op and stops looping.
|
||||
assert result == msgs
|
||||
# No "Summary generation was unavailable" placeholder leaked in.
|
||||
assert not any(
|
||||
isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"]
|
||||
for m in result
|
||||
)
|
||||
|
||||
def test_compress_clears_fallback_flag_on_subsequent_success(self):
|
||||
def test_compress_clears_abort_flag_on_subsequent_success(self):
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "summary text"
|
||||
|
|
@ -768,18 +792,57 @@ class TestSummaryFailureTrackingForGatewayWarning:
|
|||
{"role": "user", "content": "msg 7"},
|
||||
]
|
||||
|
||||
# First call fails, second succeeds — flag must reset on second compress.
|
||||
# First call fails, second succeeds — abort flag must reset on second compress.
|
||||
with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")):
|
||||
c.compress(msgs)
|
||||
assert c._last_summary_fallback_used is True
|
||||
assert c._last_compress_aborted is True
|
||||
|
||||
# Reset cooldown to allow retry on second compress
|
||||
c._summary_failure_cooldown_until = 0.0
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
c.compress(msgs)
|
||||
assert c._last_compress_aborted is False
|
||||
assert c._last_summary_fallback_used is False
|
||||
assert c._last_summary_dropped_count == 0
|
||||
|
||||
def test_force_true_bypasses_failure_cooldown(self):
|
||||
"""Manual /compress passes force=True so it can retry immediately
|
||||
after an auto-compress abort instead of waiting out the 30-60s
|
||||
cooldown."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "summary text"
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "user", "content": "msg 1"},
|
||||
{"role": "assistant", "content": "msg 2"},
|
||||
{"role": "user", "content": "msg 3"},
|
||||
{"role": "assistant", "content": "msg 4"},
|
||||
{"role": "user", "content": "msg 5"},
|
||||
{"role": "assistant", "content": "msg 6"},
|
||||
{"role": "user", "content": "msg 7"},
|
||||
]
|
||||
|
||||
# Pre-populate an active cooldown (as if a prior auto-compress aborted).
|
||||
import time as _time
|
||||
c._summary_failure_cooldown_until = _time.monotonic() + 999.0
|
||||
|
||||
# Without force, _generate_summary would short-circuit on cooldown
|
||||
# and return None → abort. With force=True the cooldown is cleared
|
||||
# and the call goes through.
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs, force=True)
|
||||
|
||||
assert c._last_compress_aborted is False
|
||||
# Cooldown was cleared and a real summary attempt was made.
|
||||
assert c._summary_failure_cooldown_until == 0.0
|
||||
# Result is actually compressed (shorter than input).
|
||||
assert len(result) < len(msgs)
|
||||
|
||||
|
||||
class TestSummaryPrefixNormalization:
|
||||
def test_legacy_prefix_is_replaced(self):
|
||||
|
|
@ -1338,7 +1401,11 @@ class TestSummaryTargetRatio:
|
|||
+ [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"}
|
||||
for i in range(8)]
|
||||
)
|
||||
result = c.compress(msgs)
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.choices = [MagicMock()]
|
||||
mock_resp.choices[0].message.content = "summary text"
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_resp):
|
||||
result = c.compress(msgs)
|
||||
# System prompt (msg[0]) survives as head
|
||||
assert result[0]["role"] == "system"
|
||||
assert result[0]["content"].startswith("System prompt")
|
||||
|
|
|
|||
|
|
@ -130,19 +130,15 @@ async def test_compress_command_explains_when_token_estimate_rises():
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compress_command_appends_warning_when_summary_generation_fails():
|
||||
"""When the auxiliary summariser fails and the compressor inserts a static
|
||||
fallback placeholder, /compress must append a visible ⚠️ warning to its
|
||||
reply. Otherwise the failure is silently logged and the user has no idea
|
||||
earlier context is unrecoverable."""
|
||||
async def test_compress_command_appends_warning_when_compression_aborts():
|
||||
"""When the auxiliary summariser fails and the compressor ABORTS (returns
|
||||
messages unchanged), /compress must append a visible ⚠️ warning to its
|
||||
reply telling the user nothing was dropped and how to retry. Otherwise
|
||||
the failure is silently logged and the user has no idea why nothing
|
||||
happened."""
|
||||
history = _make_history()
|
||||
# Compressed shape is irrelevant for this test — we only care that the
|
||||
# warning surfaces. Drop one message so the headline is non-noop.
|
||||
compressed = [
|
||||
history[0],
|
||||
{"role": "assistant", "content": "[fallback placeholder]"},
|
||||
history[-1],
|
||||
]
|
||||
# Abort path: compressor returns the input messages unchanged.
|
||||
compressed = list(history)
|
||||
runner = _make_runner(history)
|
||||
agent_instance = MagicMock()
|
||||
agent_instance.shutdown_memory_provider = MagicMock()
|
||||
|
|
@ -150,10 +146,11 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
|
|||
agent_instance._cached_system_prompt = ""
|
||||
agent_instance.tools = None
|
||||
agent_instance.context_compressor.has_content_to_compress.return_value = True
|
||||
# Simulate summary-generation failure: fallback flag set, dropped count
|
||||
# populated, error string captured.
|
||||
agent_instance.context_compressor._last_summary_fallback_used = True
|
||||
agent_instance.context_compressor._last_summary_dropped_count = 7
|
||||
# Simulate compression aborting (force=True bypassed cooldown but the
|
||||
# aux LLM is genuinely broken).
|
||||
agent_instance.context_compressor._last_compress_aborted = True
|
||||
agent_instance.context_compressor._last_summary_fallback_used = False
|
||||
agent_instance.context_compressor._last_summary_dropped_count = 0
|
||||
agent_instance.context_compressor._last_summary_error = (
|
||||
"404 model not found: gemini-3-flash-preview"
|
||||
)
|
||||
|
|
@ -164,7 +161,7 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
|
|||
if messages == history:
|
||||
return 100
|
||||
if messages == compressed:
|
||||
return 60
|
||||
return 100
|
||||
raise AssertionError(f"unexpected transcript: {messages!r}")
|
||||
|
||||
with (
|
||||
|
|
@ -175,16 +172,14 @@ async def test_compress_command_appends_warning_when_summary_generation_fails():
|
|||
):
|
||||
result = await runner._handle_compress_command(_make_event())
|
||||
|
||||
# The compress reply itself still goes through (the transcript was rewritten).
|
||||
assert "Compressed:" in result
|
||||
# ...but a clearly-marked warning must be appended.
|
||||
# A clearly-marked warning must be appended.
|
||||
assert "⚠️" in result
|
||||
assert "Summary generation failed" in result
|
||||
assert "Compression aborted" in result
|
||||
# Underlying error must surface so users can fix their config.
|
||||
assert "404 model not found" in result
|
||||
# Dropped count must be visible — silently losing N messages is the bug.
|
||||
assert "7" in result
|
||||
assert "historical message(s) were removed" in result
|
||||
# User must be told nothing was dropped — the whole point of the
|
||||
# new behavior is no silent data loss.
|
||||
assert "No messages were dropped" in result
|
||||
agent_instance.shutdown_memory_provider.assert_called_once()
|
||||
agent_instance.close.assert_called_once()
|
||||
|
||||
|
|
@ -210,6 +205,7 @@ async def test_compress_command_surfaces_aux_model_failure_even_when_recovered()
|
|||
agent_instance.tools = None
|
||||
agent_instance.context_compressor.has_content_to_compress.return_value = True
|
||||
# Fallback placeholder was NOT used — recovery succeeded.
|
||||
agent_instance.context_compressor._last_compress_aborted = False
|
||||
agent_instance.context_compressor._last_summary_fallback_used = False
|
||||
agent_instance.context_compressor._last_summary_dropped_count = 0
|
||||
agent_instance.context_compressor._last_summary_error = None
|
||||
|
|
|
|||
|
|
@ -396,11 +396,12 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_session_hygiene_warns_user_when_summary_generation_fails(monkeypatch, tmp_path):
|
||||
async def test_session_hygiene_warns_user_when_compression_aborts(monkeypatch, tmp_path):
|
||||
"""When auxiliary compression's summary LLM call fails, the compressor
|
||||
inserts a static fallback and the dropped turns are unrecoverable.
|
||||
Gateway must surface a visible ⚠️ warning to the user, including
|
||||
thread_id metadata so it lands in the originating topic/thread."""
|
||||
ABORTS — returns messages unchanged, sets _last_compress_aborted=True,
|
||||
and drops nothing. Gateway must surface a visible ⚠️ warning to the
|
||||
user (including thread_id metadata so it lands in the originating
|
||||
topic/thread) saying the conversation is unchanged and how to retry."""
|
||||
fake_dotenv = types.ModuleType("dotenv")
|
||||
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
|
||||
monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
|
||||
|
|
@ -415,17 +416,18 @@ async def test_session_hygiene_warns_user_when_summary_generation_fails(monkeypa
|
|||
self.shutdown_memory_provider = MagicMock()
|
||||
self.close = MagicMock()
|
||||
# Simulate a compressor that hit summary-generation failure
|
||||
# and inserted the static fallback placeholder.
|
||||
# and ABORTED — no fallback inserted, no messages dropped.
|
||||
self.context_compressor = SimpleNamespace(
|
||||
_last_summary_fallback_used=True,
|
||||
_last_summary_dropped_count=42,
|
||||
_last_compress_aborted=True,
|
||||
_last_summary_fallback_used=False,
|
||||
_last_summary_dropped_count=0,
|
||||
_last_summary_error="404 model not found: gemini-3-flash-preview",
|
||||
)
|
||||
type(self).last_instance = self
|
||||
|
||||
def _compress_context(self, messages, *_args, **_kwargs):
|
||||
self.session_id = f"{self.session_id}_compressed"
|
||||
return ([{"role": "assistant", "content": "compressed"}], None)
|
||||
# Abort path: messages preserved unchanged, session NOT rotated.
|
||||
return (messages, None)
|
||||
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = FakeCompressAgentWithSummaryFailure
|
||||
|
|
@ -494,16 +496,17 @@ async def test_session_hygiene_warns_user_when_summary_generation_fails(monkeypa
|
|||
result = await runner._handle_message(event)
|
||||
|
||||
assert result == "ok"
|
||||
# The compressor reported summary-failure → exactly one warning
|
||||
# message must have been delivered to the user.
|
||||
warning_messages = [s for s in adapter.sent if "Context compression summary failed" in s["content"]]
|
||||
# The compressor reported abort → exactly one warning message must
|
||||
# have been delivered to the user.
|
||||
warning_messages = [s for s in adapter.sent if "Context compression aborted" in s["content"]]
|
||||
assert len(warning_messages) == 1, (
|
||||
f"Expected 1 compression-failure warning, got {len(warning_messages)}: {adapter.sent}"
|
||||
f"Expected 1 compression-aborted warning, got {len(warning_messages)}: {adapter.sent}"
|
||||
)
|
||||
warn = warning_messages[0]
|
||||
# Warning must include the dropped count and the underlying error.
|
||||
assert "42" in warn["content"]
|
||||
# Warning must include the underlying error and tell the user nothing
|
||||
# was dropped.
|
||||
assert "404" in warn["content"]
|
||||
assert "No messages were dropped" in warn["content"]
|
||||
# Warning must land in the originating topic/thread, not the main channel.
|
||||
assert warn["chat_id"] == "-1001"
|
||||
assert warn["metadata"] == {"thread_id": "17585"}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue