import { storagePut } from "server/storage"; export type GenerateImageOptions = { prompt: string; originalImages?: Array<{ url?: string; b64Json?: string; mimeType?: string; }>; /** Mask URL for localized editing (white=edit zone, black=preserve) */ maskUrl?: string; /** Target aspect ratio "W:H" (e.g. "16:9") - used to pick best size and crop output */ targetAspectRatio?: string; /** Explicit target dimensions for final crop (overrides aspectRatio) */ targetWidth?: number; targetHeight?: number; }; export type GenerateImageResponse = { url?: string; provider?: "gemini" | "openai"; width?: number; height?: number; }; /** * Parse aspect ratio string "W:H" to number */ function parseAspectRatio(ratio?: string): number | null { if (!ratio) return null; const m = ratio.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/); if (!m) return null; const w = parseFloat(m[1]); const h = parseFloat(m[2]); if (h === 0) return null; return w / h; } /** * Pick the best OpenAI gpt-image-1 size for a target aspect ratio. * Available: 1024x1024 (1.0), 1536x1024 (1.5), 1024x1536 (0.667) */ function pickOpenAISize(aspectRatio: number): "1024x1024" | "1536x1024" | "1024x1536" { const options = [ { size: "1024x1024" as const, ratio: 1.0 }, { size: "1536x1024" as const, ratio: 1.5 }, { size: "1024x1536" as const, ratio: 1 / 1.5 }, ]; let best = options[0]; let bestDiff = Math.abs(Math.log(aspectRatio / best.ratio)); for (const opt of options) { const diff = Math.abs(Math.log(aspectRatio / opt.ratio)); if (diff < bestDiff) { best = opt; bestDiff = diff; } } return best.size; } /** * Convert our mask format (white=edit, black=preserve) to OpenAI format * OpenAI expects: alpha=0 in edit zones, opaque in preserve zones * Also ensures the mask matches the reference image dimensions */ async function convertMaskForOpenAI(maskUrl: string, refImageBuffer?: Buffer): Promise { const sharp = (await import("sharp")).default; const resp = await fetch(maskUrl.startsWith("http") ? maskUrl : `http://localhost:3000${maskUrl}`); if (!resp.ok) throw new Error(`Failed to fetch mask: ${resp.status}`); let maskBuffer: Buffer = Buffer.from(await resp.arrayBuffer()); // If we have a reference image, resize mask to match if (refImageBuffer) { const refMeta = await sharp(refImageBuffer).metadata(); if (refMeta.width && refMeta.height) { maskBuffer = Buffer.from(await sharp(maskBuffer) .resize(refMeta.width, refMeta.height, { fit: "fill" }) .toBuffer()); } } // Convert: white pixels → alpha=0 (will be edited), black → opaque (preserved) const grey = sharp(maskBuffer).greyscale(); const { data, info } = await grey.raw().toBuffer({ resolveWithObject: true }); // Build RGBA: RGB irrelevant, alpha = 255 - greyValue const rgba = Buffer.alloc(info.width * info.height * 4); for (let i = 0; i < info.width * info.height; i++) { const g = data[i]; rgba[i * 4] = 255; rgba[i * 4 + 1] = 255; rgba[i * 4 + 2] = 255; rgba[i * 4 + 3] = 255 - g; // white→0 (edit), black→255 (preserve) } return sharp(rgba, { raw: { width: info.width, height: info.height, channels: 4 } }).png().toBuffer(); } /** * Crop a generated image (PNG buffer) to the target aspect ratio * Centers the crop (smart crop attempt: keep center) */ async function cropToAspectRatio(buffer: Buffer, targetAspect: number, targetWidth?: number, targetHeight?: number): Promise { const sharp = (await import("sharp")).default; const img = sharp(buffer); const meta = await img.metadata(); if (!meta.width || !meta.height) return buffer; const currentAspect = meta.width / meta.height; // If already close enough, skip crop if (Math.abs(currentAspect - targetAspect) < 0.02) { if (targetWidth && targetHeight) { return img.resize(targetWidth, targetHeight, { fit: "fill" }).png().toBuffer(); } return buffer; } let cropW = meta.width; let cropH = meta.height; if (currentAspect > targetAspect) { // too wide, crop width cropW = Math.round(meta.height * targetAspect); } else { // too tall, crop height cropH = Math.round(meta.width / targetAspect); } const left = Math.round((meta.width - cropW) / 2); const top = Math.round((meta.height - cropH) / 2); let pipeline = img.extract({ left, top, width: cropW, height: cropH }); if (targetWidth && targetHeight) { pipeline = pipeline.resize(targetWidth, targetHeight, { fit: "fill" }); } return pipeline.png().toBuffer(); } /** * Try Gemini first, fallback to OpenAI gpt-image-1 if Gemini fails */ export async function generateImage( options: GenerateImageOptions ): Promise { // Compute target aspect ratio let targetAspect: number | null = parseAspectRatio(options.targetAspectRatio); if (!targetAspect && options.targetWidth && options.targetHeight) { targetAspect = options.targetWidth / options.targetHeight; } // Try Gemini first if (process.env.GEMINI_API_KEY) { try { const result = await generateWithGemini(options, targetAspect); if (result.url) return { ...result, provider: "gemini" }; } catch (err: any) { console.warn(`[ImageGen] Gemini failed (${err.message?.slice(0, 100)}), falling back to OpenAI...`); } } // Fallback to OpenAI if (process.env.OPENAI_API_KEY) { const result = await generateWithOpenAI(options, targetAspect); return { ...result, provider: "openai" }; } throw new Error("No image generation provider configured (need GEMINI_API_KEY or OPENAI_API_KEY)"); } async function generateWithGemini(options: GenerateImageOptions, targetAspect: number | null): Promise { const apiKey = process.env.GEMINI_API_KEY!; const url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image:generateContent"; const parts: any[] = [{ text: options.prompt }]; if (options.originalImages) { for (const img of options.originalImages) { if (img.url && img.url.startsWith("http")) { try { const resp = await fetch(img.url); if (resp.ok) { const buffer = Buffer.from(await resp.arrayBuffer()); const mime = img.mimeType || resp.headers.get("content-type") || "image/jpeg"; parts.push({ inlineData: { mimeType: mime, data: buffer.toString("base64") }, }); } } catch (e) { console.warn("[Gemini] Failed to download reference image:", e); } } else if (img.b64Json) { parts.push({ inlineData: { mimeType: img.mimeType || "image/png", data: img.b64Json }, }); } } } const payload = { contents: [{ parts }], generationConfig: { responseModalities: ["TEXT", "IMAGE"] }, }; const response = await fetch(`${url}?key=${apiKey}`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(payload), }); if (!response.ok) { const detail = await response.text().catch(() => ""); throw new Error(`Gemini ${response.status}: ${detail.slice(0, 150)}`); } const result = await response.json() as any; for (const candidate of result.candidates || []) { for (const part of candidate.content?.parts || []) { if (part.inlineData?.data) { let buffer: Buffer = Buffer.from(part.inlineData.data, "base64"); // Crop to target aspect if specified if (targetAspect) { buffer = Buffer.from(await cropToAspectRatio(buffer, targetAspect, options.targetWidth, options.targetHeight)); } const { url: storageUrl } = await storagePut( `generated/${Date.now()}.png`, buffer, "image/png" ); return { url: storageUrl }; } } } throw new Error("Gemini: no image in response"); } async function generateWithOpenAI(options: GenerateImageOptions, targetAspect: number | null): Promise { const apiKey = process.env.OPENAI_API_KEY!; const hasReferenceImage = options.originalImages && options.originalImages.length > 0 && options.originalImages[0].url; const isInpainting = !!options.maskUrl; // Pick best size based on aspect ratio (default to landscape 16:9 → 1536x1024) const size = pickOpenAISize(targetAspect ?? 1.78); let response: Response; if (hasReferenceImage) { const formData = new FormData(); formData.append("model", "gpt-image-1"); formData.append("prompt", options.prompt); formData.append("n", "1"); formData.append("size", size); // Download reference images and (for inpainting) attach the first one as image let firstImageBuffer: Buffer | undefined; for (let i = 0; i < options.originalImages!.length; i++) { const img = options.originalImages![i]; if (img.url && img.url.startsWith("http")) { try { const resp = await fetch(img.url); if (!resp.ok) continue; const arrayBuffer = await resp.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); if (i === 0) firstImageBuffer = buffer; const mime = img.mimeType || resp.headers.get("content-type") || "image/png"; const ext = mime.includes("png") ? "png" : mime.includes("jpeg") ? "jpg" : "png"; const blob = new Blob([buffer], { type: mime }); // For inpainting, OpenAI wants "image" (singular), for edits with refs use "image[]" const fieldName = isInpainting && i === 0 ? "image" : "image[]"; formData.append(fieldName, blob, `reference_${i}.${ext}`); } catch (e) { console.warn("[OpenAI] Failed to download reference:", e); } } } // Attach mask for inpainting if (isInpainting && firstImageBuffer) { try { const openaiMask = await convertMaskForOpenAI(options.maskUrl!, firstImageBuffer); const maskBytes = new Uint8Array(openaiMask); const maskBlob = new Blob([maskBytes], { type: "image/png" }); formData.append("mask", maskBlob, "mask.png"); } catch (e) { console.warn("[OpenAI] Failed to prepare mask, falling back to full edit:", e); } } response = await fetch("https://api.openai.com/v1/images/edits", { method: "POST", headers: { Authorization: `Bearer ${apiKey}` }, body: formData, }); } else { response = await fetch("https://api.openai.com/v1/images/generations", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json", }, body: JSON.stringify({ model: "gpt-image-1", prompt: options.prompt, n: 1, size, }), }); } if (!response.ok) { const detail = await response.text().catch(() => ""); throw new Error(`OpenAI ${response.status}: ${detail.slice(0, 200)}`); } const result = await response.json() as any; const img = result.data?.[0]; if (!img) throw new Error("OpenAI: no image in response"); let buffer: Buffer; if (img.b64_json) { buffer = Buffer.from(img.b64_json, "base64"); } else if (img.url) { const downloadResp = await fetch(img.url); if (!downloadResp.ok) throw new Error("OpenAI: failed to download generated image"); buffer = Buffer.from(await downloadResp.arrayBuffer()); } else { throw new Error("OpenAI: no b64_json or url in response"); } // Crop to target aspect if specified (OpenAI gives 3:2, we want exact source ratio) if (targetAspect) { buffer = Buffer.from(await cropToAspectRatio(buffer, targetAspect, options.targetWidth, options.targetHeight)); } const { url: storageUrl } = await storagePut( `generated/${Date.now()}.png`, buffer, "image/png" ); return { url: storageUrl }; }