L'outil d'annotation existant ne servait qu'à créer un masque global. Maintenant chaque masque peut déclencher un inpainting IA qui modifie UNIQUEMENT la zone sélectionnée. Backend: - convertMaskForOpenAI(): convertit notre format (blanc=édit/noir=préserve) vers format OpenAI (alpha=0=édit/opaque=préserve) - Auto-redimensionne le mask aux dims de l'image source - generateImage() accepte maintenant un paramètre maskUrl - OpenAI images.edits utilise le param "mask" + champ "image" (singulier) pour le mode inpainting - Nouveau endpoint generation.inpaintZone(frameId, maskUrl, prompt, sourceType) - sourceType: original / bg (regen actif) / fg (perso actif) / composite - Crée une nouvelle variante du type approprié (Module 1) - Synchronise les champs legacy Frontend (AnnotationCanvas): - Nouveau bouton "Inpainter zone" dans la toolbar - Form dropdown avec sélecteur de source (original/composite/bg/fg) et prompt textarea - handleInpaint: upload du masque + appel inpaintZone + new variant - Sauve masque (bouton existant renommé "Sauver masque") séparé de l'inpainting - AnnotationCanvas reçoit projectId + frameIndex pour pouvoir appeler les routes Workflow utilisateur: 1. Mode "Annoter" dans le viewport (sur frame originale) 2. Dessine au pinceau/rectangle/lasso la zone à modifier 3. Click "Inpainter zone" 4. Choisit source (original/composite/etc.) + écrit le prompt 5. Click "Lancer inpainting" 6. OpenAI génère uniquement la zone masquée 7. Nouvelle variante créée et visible dans la galerie M1 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
340 lines
12 KiB
TypeScript
340 lines
12 KiB
TypeScript
import { storagePut } from "server/storage";
|
|
|
|
export type GenerateImageOptions = {
|
|
prompt: string;
|
|
originalImages?: Array<{
|
|
url?: string;
|
|
b64Json?: string;
|
|
mimeType?: string;
|
|
}>;
|
|
/** Mask URL for localized editing (white=edit zone, black=preserve) */
|
|
maskUrl?: string;
|
|
/** Target aspect ratio "W:H" (e.g. "16:9") - used to pick best size and crop output */
|
|
targetAspectRatio?: string;
|
|
/** Explicit target dimensions for final crop (overrides aspectRatio) */
|
|
targetWidth?: number;
|
|
targetHeight?: number;
|
|
};
|
|
|
|
export type GenerateImageResponse = {
|
|
url?: string;
|
|
provider?: "gemini" | "openai";
|
|
width?: number;
|
|
height?: number;
|
|
};
|
|
|
|
/**
|
|
* Parse aspect ratio string "W:H" to number
|
|
*/
|
|
function parseAspectRatio(ratio?: string): number | null {
|
|
if (!ratio) return null;
|
|
const m = ratio.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
|
|
if (!m) return null;
|
|
const w = parseFloat(m[1]);
|
|
const h = parseFloat(m[2]);
|
|
if (h === 0) return null;
|
|
return w / h;
|
|
}
|
|
|
|
/**
|
|
* Pick the best OpenAI gpt-image-1 size for a target aspect ratio.
|
|
* Available: 1024x1024 (1.0), 1536x1024 (1.5), 1024x1536 (0.667)
|
|
*/
|
|
function pickOpenAISize(aspectRatio: number): "1024x1024" | "1536x1024" | "1024x1536" {
|
|
const options = [
|
|
{ size: "1024x1024" as const, ratio: 1.0 },
|
|
{ size: "1536x1024" as const, ratio: 1.5 },
|
|
{ size: "1024x1536" as const, ratio: 1 / 1.5 },
|
|
];
|
|
let best = options[0];
|
|
let bestDiff = Math.abs(Math.log(aspectRatio / best.ratio));
|
|
for (const opt of options) {
|
|
const diff = Math.abs(Math.log(aspectRatio / opt.ratio));
|
|
if (diff < bestDiff) {
|
|
best = opt;
|
|
bestDiff = diff;
|
|
}
|
|
}
|
|
return best.size;
|
|
}
|
|
|
|
/**
|
|
* Convert our mask format (white=edit, black=preserve) to OpenAI format
|
|
* OpenAI expects: alpha=0 in edit zones, opaque in preserve zones
|
|
* Also ensures the mask matches the reference image dimensions
|
|
*/
|
|
async function convertMaskForOpenAI(maskUrl: string, refImageBuffer?: Buffer): Promise<Buffer> {
|
|
const sharp = (await import("sharp")).default;
|
|
const resp = await fetch(maskUrl.startsWith("http") ? maskUrl : `http://localhost:3000${maskUrl}`);
|
|
if (!resp.ok) throw new Error(`Failed to fetch mask: ${resp.status}`);
|
|
let maskBuffer: Buffer = Buffer.from(await resp.arrayBuffer());
|
|
|
|
// If we have a reference image, resize mask to match
|
|
if (refImageBuffer) {
|
|
const refMeta = await sharp(refImageBuffer).metadata();
|
|
if (refMeta.width && refMeta.height) {
|
|
maskBuffer = Buffer.from(await sharp(maskBuffer)
|
|
.resize(refMeta.width, refMeta.height, { fit: "fill" })
|
|
.toBuffer());
|
|
}
|
|
}
|
|
|
|
// Convert: white pixels → alpha=0 (will be edited), black → opaque (preserved)
|
|
const grey = sharp(maskBuffer).greyscale();
|
|
const { data, info } = await grey.raw().toBuffer({ resolveWithObject: true });
|
|
|
|
// Build RGBA: RGB irrelevant, alpha = 255 - greyValue
|
|
const rgba = Buffer.alloc(info.width * info.height * 4);
|
|
for (let i = 0; i < info.width * info.height; i++) {
|
|
const g = data[i];
|
|
rgba[i * 4] = 255;
|
|
rgba[i * 4 + 1] = 255;
|
|
rgba[i * 4 + 2] = 255;
|
|
rgba[i * 4 + 3] = 255 - g; // white→0 (edit), black→255 (preserve)
|
|
}
|
|
|
|
return sharp(rgba, { raw: { width: info.width, height: info.height, channels: 4 } }).png().toBuffer();
|
|
}
|
|
|
|
/**
|
|
* Crop a generated image (PNG buffer) to the target aspect ratio
|
|
* Centers the crop (smart crop attempt: keep center)
|
|
*/
|
|
async function cropToAspectRatio(buffer: Buffer, targetAspect: number, targetWidth?: number, targetHeight?: number): Promise<Buffer> {
|
|
const sharp = (await import("sharp")).default;
|
|
const img = sharp(buffer);
|
|
const meta = await img.metadata();
|
|
if (!meta.width || !meta.height) return buffer;
|
|
|
|
const currentAspect = meta.width / meta.height;
|
|
// If already close enough, skip crop
|
|
if (Math.abs(currentAspect - targetAspect) < 0.02) {
|
|
if (targetWidth && targetHeight) {
|
|
return img.resize(targetWidth, targetHeight, { fit: "fill" }).png().toBuffer();
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
let cropW = meta.width;
|
|
let cropH = meta.height;
|
|
if (currentAspect > targetAspect) {
|
|
// too wide, crop width
|
|
cropW = Math.round(meta.height * targetAspect);
|
|
} else {
|
|
// too tall, crop height
|
|
cropH = Math.round(meta.width / targetAspect);
|
|
}
|
|
const left = Math.round((meta.width - cropW) / 2);
|
|
const top = Math.round((meta.height - cropH) / 2);
|
|
|
|
let pipeline = img.extract({ left, top, width: cropW, height: cropH });
|
|
if (targetWidth && targetHeight) {
|
|
pipeline = pipeline.resize(targetWidth, targetHeight, { fit: "fill" });
|
|
}
|
|
return pipeline.png().toBuffer();
|
|
}
|
|
|
|
/**
|
|
* Try Gemini first, fallback to OpenAI gpt-image-1 if Gemini fails
|
|
*/
|
|
export async function generateImage(
|
|
options: GenerateImageOptions
|
|
): Promise<GenerateImageResponse> {
|
|
// Compute target aspect ratio
|
|
let targetAspect: number | null = parseAspectRatio(options.targetAspectRatio);
|
|
if (!targetAspect && options.targetWidth && options.targetHeight) {
|
|
targetAspect = options.targetWidth / options.targetHeight;
|
|
}
|
|
|
|
// Try Gemini first
|
|
if (process.env.GEMINI_API_KEY) {
|
|
try {
|
|
const result = await generateWithGemini(options, targetAspect);
|
|
if (result.url) return { ...result, provider: "gemini" };
|
|
} catch (err: any) {
|
|
console.warn(`[ImageGen] Gemini failed (${err.message?.slice(0, 100)}), falling back to OpenAI...`);
|
|
}
|
|
}
|
|
|
|
// Fallback to OpenAI
|
|
if (process.env.OPENAI_API_KEY) {
|
|
const result = await generateWithOpenAI(options, targetAspect);
|
|
return { ...result, provider: "openai" };
|
|
}
|
|
|
|
throw new Error("No image generation provider configured (need GEMINI_API_KEY or OPENAI_API_KEY)");
|
|
}
|
|
|
|
async function generateWithGemini(options: GenerateImageOptions, targetAspect: number | null): Promise<GenerateImageResponse> {
|
|
const apiKey = process.env.GEMINI_API_KEY!;
|
|
const url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-image:generateContent";
|
|
|
|
const parts: any[] = [{ text: options.prompt }];
|
|
|
|
if (options.originalImages) {
|
|
for (const img of options.originalImages) {
|
|
if (img.url && img.url.startsWith("http")) {
|
|
try {
|
|
const resp = await fetch(img.url);
|
|
if (resp.ok) {
|
|
const buffer = Buffer.from(await resp.arrayBuffer());
|
|
const mime = img.mimeType || resp.headers.get("content-type") || "image/jpeg";
|
|
parts.push({
|
|
inlineData: { mimeType: mime, data: buffer.toString("base64") },
|
|
});
|
|
}
|
|
} catch (e) {
|
|
console.warn("[Gemini] Failed to download reference image:", e);
|
|
}
|
|
} else if (img.b64Json) {
|
|
parts.push({
|
|
inlineData: { mimeType: img.mimeType || "image/png", data: img.b64Json },
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
const payload = {
|
|
contents: [{ parts }],
|
|
generationConfig: { responseModalities: ["TEXT", "IMAGE"] },
|
|
};
|
|
|
|
const response = await fetch(`${url}?key=${apiKey}`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify(payload),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const detail = await response.text().catch(() => "");
|
|
throw new Error(`Gemini ${response.status}: ${detail.slice(0, 150)}`);
|
|
}
|
|
|
|
const result = await response.json() as any;
|
|
for (const candidate of result.candidates || []) {
|
|
for (const part of candidate.content?.parts || []) {
|
|
if (part.inlineData?.data) {
|
|
let buffer: Buffer = Buffer.from(part.inlineData.data, "base64");
|
|
// Crop to target aspect if specified
|
|
if (targetAspect) {
|
|
buffer = Buffer.from(await cropToAspectRatio(buffer, targetAspect, options.targetWidth, options.targetHeight));
|
|
}
|
|
const { url: storageUrl } = await storagePut(
|
|
`generated/${Date.now()}.png`,
|
|
buffer,
|
|
"image/png"
|
|
);
|
|
return { url: storageUrl };
|
|
}
|
|
}
|
|
}
|
|
|
|
throw new Error("Gemini: no image in response");
|
|
}
|
|
|
|
async function generateWithOpenAI(options: GenerateImageOptions, targetAspect: number | null): Promise<GenerateImageResponse> {
|
|
const apiKey = process.env.OPENAI_API_KEY!;
|
|
const hasReferenceImage = options.originalImages && options.originalImages.length > 0 && options.originalImages[0].url;
|
|
const isInpainting = !!options.maskUrl;
|
|
|
|
// Pick best size based on aspect ratio (default to landscape 16:9 → 1536x1024)
|
|
const size = pickOpenAISize(targetAspect ?? 1.78);
|
|
|
|
let response: Response;
|
|
|
|
if (hasReferenceImage) {
|
|
const formData = new FormData();
|
|
formData.append("model", "gpt-image-1");
|
|
formData.append("prompt", options.prompt);
|
|
formData.append("n", "1");
|
|
formData.append("size", size);
|
|
|
|
// Download reference images and (for inpainting) attach the first one as image
|
|
let firstImageBuffer: Buffer | undefined;
|
|
|
|
for (let i = 0; i < options.originalImages!.length; i++) {
|
|
const img = options.originalImages![i];
|
|
if (img.url && img.url.startsWith("http")) {
|
|
try {
|
|
const resp = await fetch(img.url);
|
|
if (!resp.ok) continue;
|
|
const arrayBuffer = await resp.arrayBuffer();
|
|
const buffer = Buffer.from(arrayBuffer);
|
|
if (i === 0) firstImageBuffer = buffer;
|
|
const mime = img.mimeType || resp.headers.get("content-type") || "image/png";
|
|
const ext = mime.includes("png") ? "png" : mime.includes("jpeg") ? "jpg" : "png";
|
|
const blob = new Blob([buffer], { type: mime });
|
|
// For inpainting, OpenAI wants "image" (singular), for edits with refs use "image[]"
|
|
const fieldName = isInpainting && i === 0 ? "image" : "image[]";
|
|
formData.append(fieldName, blob, `reference_${i}.${ext}`);
|
|
} catch (e) {
|
|
console.warn("[OpenAI] Failed to download reference:", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Attach mask for inpainting
|
|
if (isInpainting && firstImageBuffer) {
|
|
try {
|
|
const openaiMask = await convertMaskForOpenAI(options.maskUrl!, firstImageBuffer);
|
|
const maskBytes = new Uint8Array(openaiMask);
|
|
const maskBlob = new Blob([maskBytes], { type: "image/png" });
|
|
formData.append("mask", maskBlob, "mask.png");
|
|
} catch (e) {
|
|
console.warn("[OpenAI] Failed to prepare mask, falling back to full edit:", e);
|
|
}
|
|
}
|
|
|
|
response = await fetch("https://api.openai.com/v1/images/edits", {
|
|
method: "POST",
|
|
headers: { Authorization: `Bearer ${apiKey}` },
|
|
body: formData,
|
|
});
|
|
} else {
|
|
response = await fetch("https://api.openai.com/v1/images/generations", {
|
|
method: "POST",
|
|
headers: {
|
|
Authorization: `Bearer ${apiKey}`,
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
model: "gpt-image-1",
|
|
prompt: options.prompt,
|
|
n: 1,
|
|
size,
|
|
}),
|
|
});
|
|
}
|
|
|
|
if (!response.ok) {
|
|
const detail = await response.text().catch(() => "");
|
|
throw new Error(`OpenAI ${response.status}: ${detail.slice(0, 200)}`);
|
|
}
|
|
|
|
const result = await response.json() as any;
|
|
const img = result.data?.[0];
|
|
if (!img) throw new Error("OpenAI: no image in response");
|
|
|
|
let buffer: Buffer;
|
|
if (img.b64_json) {
|
|
buffer = Buffer.from(img.b64_json, "base64");
|
|
} else if (img.url) {
|
|
const downloadResp = await fetch(img.url);
|
|
if (!downloadResp.ok) throw new Error("OpenAI: failed to download generated image");
|
|
buffer = Buffer.from(await downloadResp.arrayBuffer());
|
|
} else {
|
|
throw new Error("OpenAI: no b64_json or url in response");
|
|
}
|
|
|
|
// Crop to target aspect if specified (OpenAI gives 3:2, we want exact source ratio)
|
|
if (targetAspect) {
|
|
buffer = Buffer.from(await cropToAspectRatio(buffer, targetAspect, options.targetWidth, options.targetHeight));
|
|
}
|
|
|
|
const { url: storageUrl } = await storagePut(
|
|
`generated/${Date.now()}.png`,
|
|
buffer,
|
|
"image/png"
|
|
);
|
|
return { url: storageUrl };
|
|
}
|