retrotoon-studio/server/_core/imageGeneration.ts
Ubuntu 6a875ad0d5 feat(M3): Édition par zone - inpainting localisé via masque
L'outil d'annotation existant ne servait qu'à créer un masque global.
Maintenant chaque masque peut déclencher un inpainting IA qui
modifie UNIQUEMENT la zone sélectionnée.

Backend:
- convertMaskForOpenAI(): convertit notre format (blanc=édit/noir=préserve)
  vers format OpenAI (alpha=0=édit/opaque=préserve)
- Auto-redimensionne le mask aux dims de l'image source
- generateImage() accepte maintenant un paramètre maskUrl
- OpenAI images.edits utilise le param "mask" + champ "image" (singulier)
  pour le mode inpainting
- Nouveau endpoint generation.inpaintZone(frameId, maskUrl, prompt, sourceType)
- sourceType: original / bg (regen actif) / fg (perso actif) / composite
- Crée une nouvelle variante du type approprié (Module 1)
- Synchronise les champs legacy

Frontend (AnnotationCanvas):
- Nouveau bouton "Inpainter zone" dans la toolbar
- Form dropdown avec sélecteur de source (original/composite/bg/fg)
  et prompt textarea
- handleInpaint: upload du masque + appel inpaintZone + new variant
- Sauve masque (bouton existant renommé "Sauver masque") séparé de l'inpainting
- AnnotationCanvas reçoit projectId + frameIndex pour pouvoir appeler les routes

Workflow utilisateur:
1. Mode "Annoter" dans le viewport (sur frame originale)
2. Dessine au pinceau/rectangle/lasso la zone à modifier
3. Click "Inpainter zone"
4. Choisit source (original/composite/etc.) + écrit le prompt
5. Click "Lancer inpainting"
6. OpenAI génère uniquement la zone masquée
7. Nouvelle variante créée et visible dans la galerie M1

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 06:48:21 +00:00

340 lines
12 KiB
TypeScript

import { storagePut } from "server/storage";
export type GenerateImageOptions = {
prompt: string;
originalImages?: Array<{
url?: string;
b64Json?: string;
mimeType?: string;
}>;
/** Mask URL for localized editing (white=edit zone, black=preserve) */
maskUrl?: string;
/** Target aspect ratio "W:H" (e.g. "16:9") - used to pick best size and crop output */
targetAspectRatio?: string;
/** Explicit target dimensions for final crop (overrides aspectRatio) */
targetWidth?: number;
targetHeight?: number;
};
export type GenerateImageResponse = {
url?: string;
provider?: "gemini" | "openai";
width?: number;
height?: number;
};
/**
* Parse aspect ratio string "W:H" to number
*/
function parseAspectRatio(ratio?: string): number | null {
if (!ratio) return null;
const m = ratio.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
if (!m) return null;
const w = parseFloat(m[1]);
const h = parseFloat(m[2]);
if (h === 0) return null;
return w / h;
}
/**
* Pick the best OpenAI gpt-image-1 size for a target aspect ratio.
* Available: 1024x1024 (1.0), 1536x1024 (1.5), 1024x1536 (0.667)
*/
function pickOpenAISize(aspectRatio: number): "1024x1024" | "1536x1024" | "1024x1536" {
const options = [
{ size: "1024x1024" as const, ratio: 1.0 },
{ size: "1536x1024" as const, ratio: 1.5 },
{ size: "1024x1536" as const, ratio: 1 / 1.5 },
];
let best = options[0];
let bestDiff = Math.abs(Math.log(aspectRatio / best.ratio));
for (const opt of options) {
const diff = Math.abs(Math.log(aspectRatio / opt.ratio));
if (diff < bestDiff) {
best = opt;
bestDiff = diff;
}
}
return best.size;
}
/**
* Convert our mask format (white=edit, black=preserve) to OpenAI format
* OpenAI expects: alpha=0 in edit zones, opaque in preserve zones
* Also ensures the mask matches the reference image dimensions
*/
async function convertMaskForOpenAI(maskUrl: string, refImageBuffer?: Buffer): Promise<Buffer> {
const sharp = (await import("sharp")).default;
const resp = await fetch(maskUrl.startsWith("http") ? maskUrl : `http://localhost:3000${maskUrl}`);
if (!resp.ok) throw new Error(`Failed to fetch mask: ${resp.status}`);
let maskBuffer: Buffer = Buffer.from(await resp.arrayBuffer());
// If we have a reference image, resize mask to match
if (refImageBuffer) {
const refMeta = await sharp(refImageBuffer).metadata();
if (refMeta.width && refMeta.height) {
maskBuffer = Buffer.from(await sharp(maskBuffer)
.resize(refMeta.width, refMeta.height, { fit: "fill" })
.toBuffer());
}
}
// Convert: white pixels → alpha=0 (will be edited), black → opaque (preserved)
const grey = sharp(maskBuffer).greyscale();
const { data, info } = await grey.raw().toBuffer({ resolveWithObject: true });
// Build RGBA: RGB irrelevant, alpha = 255 - greyValue
const rgba = Buffer.alloc(info.width * info.height * 4);
for (let i = 0; i < info.width * info.height; i++) {
const g = data[i];
rgba[i * 4] = 255;
rgba[i * 4 + 1] = 255;
rgba[i * 4 + 2] = 255;
rgba[i * 4 + 3] = 255 - g; // white→0 (edit), black→255 (preserve)
}
return sharp(rgba, { raw: { width: info.width, height: info.height, channels: 4 } }).png().toBuffer();
}
/**
* Crop a generated image (PNG buffer) to the target aspect ratio
* Centers the crop (smart crop attempt: keep center)
*/
async function cropToAspectRatio(buffer: Buffer, targetAspect: number, targetWidth?: number, targetHeight?: number): Promise<Buffer> {
const sharp = (await import("sharp")).default;
const img = sharp(buffer);
const meta = await img.metadata();
if (!meta.width || !meta.height) return buffer;
const currentAspect = meta.width / meta.height;
// If already close enough, skip crop
if (Math.abs(currentAspect - targetAspect) < 0.02) {
if (targetWidth && targetHeight) {
return img.resize(targetWidth, targetHeight, { fit: "fill" }).png().toBuffer();
}
return buffer;
}
let cropW = meta.width;
let cropH = meta.height;
if (currentAspect > targetAspect) {
// too wide, crop width
cropW = Math.round(meta.height * targetAspect);
} else {
// too tall, crop height
cropH = Math.round(meta.width / targetAspect);
}
const left = Math.round((meta.width - cropW) / 2);
const top = Math.round((meta.height - cropH) / 2);
let pipeline = img.extract({ left, top, width: cropW, height: cropH });
if (targetWidth && targetHeight) {
pipeline = pipeline.resize(targetWidth, targetHeight, { fit: "fill" });
}
return pipeline.png().toBuffer();
}
/**
* Try Gemini first, fallback to OpenAI gpt-image-1 if Gemini fails
*/
export async function generateImage(
options: GenerateImageOptions
): Promise<GenerateImageResponse> {
// Compute target aspect ratio
let targetAspect: number | null = parseAspectRatio(options.targetAspectRatio);
if (!targetAspect && options.targetWidth && options.targetHeight) {
targetAspect = options.targetWidth / options.targetHeight;
}
// Try Gemini first
if (process.env.GEMINI_API_KEY) {
try {
const result = await generateWithGemini(options, targetAspect);
if (result.url) return { ...result, provider: "gemini" };
} catch (err: any) {
console.warn(`[ImageGen] Gemini failed (${err.message?.slice(0, 100)}), falling back to OpenAI...`);
}
}
// Fallback to OpenAI
if (process.env.OPENAI_API_KEY) {
const result = await generateWithOpenAI(options, targetAspect);
return { ...result, provider: "openai" };
}
throw new Error("No image generation provider configured (need GEMINI_API_KEY or OPENAI_API_KEY)");
}
async function generateWithGemini(options: GenerateImageOptions, targetAspect: number | null): Promise<GenerateImageResponse> {
const apiKey = process.env.GEMINI_API_KEY!;
const url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image:generateContent";
const parts: any[] = [{ text: options.prompt }];
if (options.originalImages) {
for (const img of options.originalImages) {
if (img.url && img.url.startsWith("http")) {
try {
const resp = await fetch(img.url);
if (resp.ok) {
const buffer = Buffer.from(await resp.arrayBuffer());
const mime = img.mimeType || resp.headers.get("content-type") || "image/jpeg";
parts.push({
inlineData: { mimeType: mime, data: buffer.toString("base64") },
});
}
} catch (e) {
console.warn("[Gemini] Failed to download reference image:", e);
}
} else if (img.b64Json) {
parts.push({
inlineData: { mimeType: img.mimeType || "image/png", data: img.b64Json },
});
}
}
}
const payload = {
contents: [{ parts }],
generationConfig: { responseModalities: ["TEXT", "IMAGE"] },
};
const response = await fetch(`${url}?key=${apiKey}`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload),
});
if (!response.ok) {
const detail = await response.text().catch(() => "");
throw new Error(`Gemini ${response.status}: ${detail.slice(0, 150)}`);
}
const result = await response.json() as any;
for (const candidate of result.candidates || []) {
for (const part of candidate.content?.parts || []) {
if (part.inlineData?.data) {
let buffer: Buffer = Buffer.from(part.inlineData.data, "base64");
// Crop to target aspect if specified
if (targetAspect) {
buffer = Buffer.from(await cropToAspectRatio(buffer, targetAspect, options.targetWidth, options.targetHeight));
}
const { url: storageUrl } = await storagePut(
`generated/${Date.now()}.png`,
buffer,
"image/png"
);
return { url: storageUrl };
}
}
}
throw new Error("Gemini: no image in response");
}
async function generateWithOpenAI(options: GenerateImageOptions, targetAspect: number | null): Promise<GenerateImageResponse> {
const apiKey = process.env.OPENAI_API_KEY!;
const hasReferenceImage = options.originalImages && options.originalImages.length > 0 && options.originalImages[0].url;
const isInpainting = !!options.maskUrl;
// Pick best size based on aspect ratio (default to landscape 16:9 → 1536x1024)
const size = pickOpenAISize(targetAspect ?? 1.78);
let response: Response;
if (hasReferenceImage) {
const formData = new FormData();
formData.append("model", "gpt-image-1");
formData.append("prompt", options.prompt);
formData.append("n", "1");
formData.append("size", size);
// Download reference images and (for inpainting) attach the first one as image
let firstImageBuffer: Buffer | undefined;
for (let i = 0; i < options.originalImages!.length; i++) {
const img = options.originalImages![i];
if (img.url && img.url.startsWith("http")) {
try {
const resp = await fetch(img.url);
if (!resp.ok) continue;
const arrayBuffer = await resp.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
if (i === 0) firstImageBuffer = buffer;
const mime = img.mimeType || resp.headers.get("content-type") || "image/png";
const ext = mime.includes("png") ? "png" : mime.includes("jpeg") ? "jpg" : "png";
const blob = new Blob([buffer], { type: mime });
// For inpainting, OpenAI wants "image" (singular), for edits with refs use "image[]"
const fieldName = isInpainting && i === 0 ? "image" : "image[]";
formData.append(fieldName, blob, `reference_${i}.${ext}`);
} catch (e) {
console.warn("[OpenAI] Failed to download reference:", e);
}
}
}
// Attach mask for inpainting
if (isInpainting && firstImageBuffer) {
try {
const openaiMask = await convertMaskForOpenAI(options.maskUrl!, firstImageBuffer);
const maskBytes = new Uint8Array(openaiMask);
const maskBlob = new Blob([maskBytes], { type: "image/png" });
formData.append("mask", maskBlob, "mask.png");
} catch (e) {
console.warn("[OpenAI] Failed to prepare mask, falling back to full edit:", e);
}
}
response = await fetch("https://api.openai.com/v1/images/edits", {
method: "POST",
headers: { Authorization: `Bearer ${apiKey}` },
body: formData,
});
} else {
response = await fetch("https://api.openai.com/v1/images/generations", {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "gpt-image-1",
prompt: options.prompt,
n: 1,
size,
}),
});
}
if (!response.ok) {
const detail = await response.text().catch(() => "");
throw new Error(`OpenAI ${response.status}: ${detail.slice(0, 200)}`);
}
const result = await response.json() as any;
const img = result.data?.[0];
if (!img) throw new Error("OpenAI: no image in response");
let buffer: Buffer;
if (img.b64_json) {
buffer = Buffer.from(img.b64_json, "base64");
} else if (img.url) {
const downloadResp = await fetch(img.url);
if (!downloadResp.ok) throw new Error("OpenAI: failed to download generated image");
buffer = Buffer.from(await downloadResp.arrayBuffer());
} else {
throw new Error("OpenAI: no b64_json or url in response");
}
// Crop to target aspect if specified (OpenAI gives 3:2, we want exact source ratio)
if (targetAspect) {
buffer = Buffer.from(await cropToAspectRatio(buffer, targetAspect, options.targetWidth, options.targetHeight));
}
const { url: storageUrl } = await storagePut(
`generated/${Date.now()}.png`,
buffer,
"image/png"
);
return { url: storageUrl };
}