refactor(context_compressor): improve summary generation logic and error handling

Updated the _generate_summary method to attempt summary generation using the auxiliary model first, with a fallback to the main model. If both attempts fail, the method now returns None instead of a placeholder, allowing the caller to handle missing summaries appropriately. This change enhances the robustness of context compression and improves logging for failure scenarios.
2026-04-25 00:51:20 +00:00 · 2026-03-07 11:54:51 -08:00 · 2026-03-07 11:54:51 -08:00 · 306d92a9d7
commit 306d92a9d7
parent 5baae0df88
1 changed files with 33 additions and 45 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -7,7 +7,7 @@ protecting head and tail context.
 import logging
 import os
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 from agent.auxiliary_client import get_text_auxiliary_client
 from agent.model_metadata import (
@ -82,11 +82,14 @@ class ContextCompressor:
            "compression_count": self.compression_count,
        }
-    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> str:
+    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
-        """Generate a concise summary of conversation turns using a fast model."""
+        """Generate a concise summary of conversation turns.
        if not self.client:
            return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed to save space. The assistant performed various actions and received responses."
        Tries the auxiliary model first, then falls back to the user's main
        model.  Returns None if all attempts fail — the caller should drop
        the middle turns without a summary rather than inject a useless
        placeholder.
        """
        parts = []
        for msg in turns_to_summarize:
            role = msg.get("role", "unknown")
@ -117,28 +120,28 @@ TURNS TO SUMMARIZE:
 Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
-        try:
+        # 1. Try the auxiliary model (cheap/fast)
-            return self._call_summary_model(self.client, self.summary_model, prompt)
+        if self.client:
-        except Exception as e:
+            try:
-            logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
+                return self._call_summary_model(self.client, self.summary_model, prompt)
            except Exception as e:
                logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
-            # Fallback: try the main model's endpoint.  This handles the common
+        # 2. Fallback: try the user's main model endpoint
-            # case where the user switched providers (e.g. OpenRouter → local LLM)
+        fallback_client, fallback_model = self._get_fallback_client()
-            # but a stale API key causes the auxiliary client to pick the old
+        if fallback_client is not None:
-            # provider which then fails (402, auth error, etc.).
+            try:
-            fallback_client, fallback_model = self._get_fallback_client()
+                logger.info("Retrying context summary with main model (%s)", fallback_model)
-            if fallback_client is not None:
+                summary = self._call_summary_model(fallback_client, fallback_model, prompt)
-                try:
+                self.client = fallback_client
-                    logger.info("Retrying context summary with fallback client (%s)", fallback_model)
+                self.summary_model = fallback_model
-                    summary = self._call_summary_model(fallback_client, fallback_model, prompt)
+                return summary
-                    # Success — swap in the working client for future compressions
+            except Exception as fallback_err:
-                    self.client = fallback_client
+                logging.warning(f"Main model summary also failed: {fallback_err}")
                    self.summary_model = fallback_model
                    return summary
                except Exception as fallback_err:
                    logging.warning(f"Fallback summary model also failed: {fallback_err}")
-            return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed. The assistant performed tool calls and received responses."
+        # 3. All models failed — return None so the caller drops turns without a summary
        logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.")
        return None
    def _call_summary_model(self, client, model: str, prompt: str) -> str:
        """Make the actual LLM call to generate a summary. Raises on failure."""
@ -326,25 +329,6 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
            print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
            print(f"   📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
        # Truncation fallback when no auxiliary model is available
        if self.client is None:
            print("⚠️  Context compression: no auxiliary model available. Falling back to message truncation.")
            # Keep system message(s) at the front and the protected tail;
            # simply drop the oldest non-system messages until under threshold.
            kept = []
            for msg in messages:
                if msg.get("role") == "system":
                    kept.append(msg.copy())
                else:
                    break
            tail = messages[-self.protect_last_n:]
            kept.extend(m.copy() for m in tail)
            self.compression_count += 1
            kept = self._sanitize_tool_pairs(kept)
            if not self.quiet_mode:
                print(f"   ✂️  Truncated: {len(messages)} → {len(kept)} messages (dropped middle turns)")
            return kept
        if not self.quiet_mode:
            print(f"   🗜️  Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
@ -357,7 +341,11 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
            compressed.append(msg)
-        compressed.append({"role": "user", "content": summary})
+        if summary:
            compressed.append({"role": "user", "content": summary})
        else:
            if not self.quiet_mode:
                print("   ⚠️  No summary model available — middle turns dropped without summary")
        for i in range(compress_end, n_messages):
            compressed.append(messages[i].copy())