diff --git a/run_agent.py b/run_agent.py index ce8954d6d..b16738d96 100644 --- a/run_agent.py +++ b/run_agent.py @@ -7458,7 +7458,7 @@ class AIAgent: or 'error code: 413' in error_msg ) - if is_payload_too_large: + if is_payload_too_large and self.compression_enabled: compression_attempts += 1 if compression_attempts > max_compression_attempts: self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True) @@ -7473,30 +7473,14 @@ class AIAgent: "partial": True } self._emit_status(f"⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...") - - original_len = len(messages) - messages, active_system_prompt = self._compress_context( - messages, system_message, approx_tokens=approx_tokens, - task_id=effective_task_id, - ) - - if len(messages) < original_len: - self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") - time.sleep(2) # Brief pause between compression retries - restart_with_compressed_messages = True - break - else: - self._vprint(f"{self.log_prefix}❌ Payload too large and cannot compress further.", force=True) - self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True) - logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.") - self._persist_session(messages, conversation_history) - return { - "messages": messages, - "completed": False, - "api_calls": api_call_count, - "error": "Request payload too large (413). Cannot compress further.", - "partial": True - } + elif is_payload_too_large and not self.compression_enabled: + return { + "messages": messages, + "completed": False, + "api_calls": api_call_count, + "error": "Request payload too large (413). Cannot compress further.", + "partial": True + } # Check for context-length errors BEFORE generic 4xx handler. # Local backends (LM Studio, Ollama, llama.cpp) often return