/** * Segmentation Service * Handles separation of foreground (characters/objects) from background * * Integrates with: * - SAM 2 (Segment Anything Model 2) for automatic segmentation * - Built-in image generation for inpainting (with mask guidance) * - LLM for intelligent mask refinement * - Temporal propagation for video consistency */ import { generateImage } from "./_core/imageGeneration"; import { invokeLLM } from "./_core/llm"; import { storageGetSignedUrl } from "./storage"; import { getServicesConfig, callExternalSAM2, getCharacterGenerationStrategy, buildPoseConstraints } from "./servicesConfig"; async function resolveToAbsoluteUrl(url: string): Promise { if (url.startsWith("http")) return url; const key = url.replace(/^\/(manus-)?storage\//, ""); return storageGetSignedUrl(key); } export interface SegmentationResult { maskUrl: string; foregroundUrl: string; backgroundUrl: string; confidence: number; segments: SegmentInfo[]; } export interface SegmentInfo { id: string; label: string; type: "character" | "object" | "background"; boundingBox: { x: number; y: number; width: number; height: number }; area: number; trackingId?: string; // For temporal consistency } export interface PropagationResult { frameIndex: number; maskUrl: string; confidence: number; drift: number; // How much the mask has shifted from the reference } /** * Segment a frame into foreground and background layers * Uses the configured segmentation engine: * - "external" mode: calls SAM 2 API for pixel-perfect segmentation * - "simulated" mode: returns synthetic mask URLs (for development/testing) */ export async function segmentFrame( frameUrl: string, options: { mode: "auto" | "point" | "box"; points?: Array<{ x: number; y: number; label: 0 | 1 }>; boxes?: Array<{ x: number; y: number; width: number; height: number }>; previousMask?: string; // For temporal guidance } = { mode: "auto" } ): Promise { const config = await getServicesConfig(); // Try external SAM 2 service first if (config.sam2Mode === "external" && config.sam2Endpoint) { try { const result = await callExternalSAM2(config.sam2Endpoint, "segment", { imageUrl: frameUrl, mode: options.mode, points: options.points, boxes: options.boxes, previousMask: options.previousMask, }) as any; if (result.maskUrl) { return { maskUrl: result.maskUrl, foregroundUrl: result.foregroundUrl || result.maskUrl, backgroundUrl: result.backgroundUrl || frameUrl, confidence: result.confidence || 0.95, segments: (result.segments || []).map((s: any) => ({ id: s.id || `seg_${Date.now()}`, label: s.label || "Unknown", type: s.type || "object", boundingBox: s.boundingBox || { x: 0, y: 0, width: 100, height: 100 }, area: s.area || 0, trackingId: s.trackingId, })), }; } } catch (error) { console.warn("[Segmentation] External SAM 2 failed, falling back to simulated:", error); } } // Simulated fallback const timestamp = Date.now(); const maskKey = `masks/mask_${timestamp}`; const fgKey = `fg/fg_${timestamp}`; const bgKey = `bg/bg_${timestamp}`; return { maskUrl: `/storage/${maskKey}.png`, foregroundUrl: `/storage/${fgKey}.png`, backgroundUrl: `/storage/${bgKey}.png`, confidence: 0.92, segments: [ { id: `seg_${timestamp}_1`, label: "Personnage principal", type: "character", boundingBox: { x: 200, y: 100, width: 150, height: 300 }, area: 45000, trackingId: "track_main_char", }, ], }; } /** * Inpaint the background where characters were removed * Uses the mask to guide the inpainting - only regenerates masked areas * * The maskUrl is passed as a reference image to guide the generation: * - White areas in the mask = areas to inpaint (where characters were) * - Black areas = areas to preserve (existing background) */ export async function inpaintBackground( frameUrl: string, maskUrl: string, prompt?: string ): Promise { const absoluteFrameUrl = await resolveToAbsoluteUrl(frameUrl); const absoluteMaskUrl = await resolveToAbsoluteUrl(maskUrl); const inpaintPrompt = [ prompt || "Clean background plate, seamlessly fill the masked areas", "Maintain original art style, color palette, and perspective.", "The second reference image is the mask: white areas should be inpainted,", "black areas should remain unchanged. Produce a complete background without characters.", ].join(" "); const result = await generateImage({ prompt: inpaintPrompt, originalImages: [ { url: absoluteFrameUrl, mimeType: "image/jpeg" }, { url: absoluteMaskUrl, mimeType: "image/jpeg" }, ], }); return result.url || frameUrl; } /** * Regenerate background with a new style based on user prompt * Preserves perspective and composition while changing the visual style */ export async function regenerateBackground( referenceFrameUrl: string, prompt: string, style: string = "same art style", targetDimensions?: { width: number; height: number } ): Promise { const absoluteUrl = await resolveToAbsoluteUrl(referenceFrameUrl); const fullPrompt = [ prompt, `Maintain exact same perspective, composition, and spatial layout.`, `Style: ${style}.`, `This is a background for animation - no characters should be present.`, `Keep the same camera angle and depth of field as the reference.`, ].join(" "); const aspectRatio = targetDimensions ? `${targetDimensions.width}:${targetDimensions.height}` : undefined; const result = await generateImage({ prompt: fullPrompt, originalImages: [ { url: absoluteUrl, mimeType: "image/jpeg", }, ], targetAspectRatio: aspectRatio, targetWidth: targetDimensions?.width, targetHeight: targetDimensions?.height, }); return result.url || referenceFrameUrl; } /** * Regenerate a character with a new style while preserving pose and proportions * Uses the character sheet as style reference for consistency * Integrates with LoRA/IP-Adapter strategy based on character modelType */ export async function regenerateCharacter( characterFrameUrl: string, prompt: string, characterSheet?: string, maskUrl?: string, characterConfig?: { name: string; modelType: "lora" | "ip_adapter" | "none" }, targetDimensions?: { width: number; height: number } ): Promise { const absoluteFrameUrl = await resolveToAbsoluteUrl(characterFrameUrl); const images: Array<{ url: string; mimeType: "image/png" | "image/jpeg" }> = [ { url: absoluteFrameUrl, mimeType: "image/jpeg" }, ]; let strategyPrefix = ""; if (characterConfig) { const strategy = getCharacterGenerationStrategy( characterConfig.modelType, characterConfig.name, characterSheet ); strategyPrefix = strategy.promptPrefix; for (const ref of strategy.referenceImages) { const absRef = await resolveToAbsoluteUrl(ref.url); if (!images.some(img => img.url === absRef)) { images.push({ url: absRef, mimeType: ref.mimeType }); } } } else if (characterSheet) { const absSheet = await resolveToAbsoluteUrl(characterSheet); images.push({ url: absSheet, mimeType: "image/jpeg" }); } if (maskUrl) { const absMask = await resolveToAbsoluteUrl(maskUrl); images.push({ url: absMask, mimeType: "image/jpeg" }); } const basePrompt = [ strategyPrefix, prompt, characterSheet && !characterConfig ? "Match the character reference sheet style exactly." : "", maskUrl ? "The mask image indicates the character silhouette to preserve." : "", "Output only the character on a transparent background.", ].filter(Boolean).join(" "); const fullPrompt = buildPoseConstraints(basePrompt, { preservePose: true, preserveProportions: true, preservePosition: true, }); const aspectRatio = targetDimensions ? `${targetDimensions.width}:${targetDimensions.height}` : undefined; const result = await generateImage({ prompt: fullPrompt, originalImages: images, targetAspectRatio: aspectRatio, targetWidth: targetDimensions?.width, targetHeight: targetDimensions?.height, }); return result.url || characterFrameUrl; } /** * Propagate segmentation mask across a sequence of frames * Uses temporal consistency to track objects across frames * * When SAM 2 external service is configured, uses the /propagate endpoint. * Otherwise, uses simulated drift model. */ export async function propagateMask( startFrameUrl: string, startMaskUrl: string, targetFrameUrls: string[] ): Promise { const config = await getServicesConfig(); // Try external SAM 2 propagation if (config.sam2Mode === "external" && config.sam2Endpoint) { try { const result = await callExternalSAM2(config.sam2Endpoint, "propagate", { startFrameUrl, startMaskUrl, targetFrameUrls, }) as any; if (result.results && Array.isArray(result.results)) { return result.results.map((r: any, i: number) => ({ frameIndex: i, maskUrl: r.maskUrl, confidence: r.confidence || 0.9, drift: r.drift || 0, })); } } catch (error) { console.warn("[Segmentation] External SAM 2 propagation failed, falling back:", error); } } // Simulated fallback with drift model const results: PropagationResult[] = []; let currentMask = startMaskUrl; let totalDrift = 0; for (let i = 0; i < targetFrameUrls.length; i++) { const frameDrift = 0.01 + Math.random() * 0.02; totalDrift += frameDrift; const confidence = Math.max(0.5, 0.95 - totalDrift * 0.5); const needsRekey = totalDrift > 0.3; if (needsRekey) { totalDrift = 0; } const propagatedMaskUrl = `/storage/masks/propagated_${i}_${Date.now()}.png`; results.push({ frameIndex: i, maskUrl: propagatedMaskUrl, confidence, drift: totalDrift, }); currentMask = propagatedMaskUrl; } return results; } /** * Composite layers back together with proper alpha blending * Combines regenerated background + original/regenerated foreground using the mask * * The mask defines the alpha channel: * - White = foreground visible (character) * - Black = background visible * - Gray = partial transparency (edges, anti-aliasing) */ export interface LayerTransform { /** Translation X relative to canvas width: -1..1 (0 = no shift) */ x?: number; /** Translation Y relative to canvas height: -1..1 */ y?: number; /** Scale multiplier (1.0 = original size) */ scale?: number; /** Rotation in degrees */ rotation?: number; /** Horizontal flip */ flipH?: boolean; /** Vertical flip */ flipV?: boolean; } export interface CompositeLayer { imageUrl: string; opacity: number; blendMode?: "normal" | "multiply" | "screen" | "overlay"; maskUrl?: string; transform?: LayerTransform; } type SharpBlendMode = "over" | "multiply" | "screen" | "overlay"; const blendModeMap: Record = { normal: "over", multiply: "multiply", screen: "screen", overlay: "overlay", }; async function fetchImageBuffer(url: string): Promise { const absoluteUrl = url.startsWith("http") ? url : await resolveToAbsoluteUrl(url); const resp = await fetch(absoluteUrl); if (!resp.ok) throw new Error(`Failed to fetch image: ${absoluteUrl} (${resp.status})`); return Buffer.from(await resp.arrayBuffer()); } /** * Composite multiple layers together using sharp * Layers should be provided in render order (bottom to top) */ export async function compositeLayers( layers: CompositeLayer[], options: { featherRadius?: number; outputKey?: string } = {} ): Promise { const sharp = (await import("sharp")).default; const { storagePut } = await import("./storage"); if (layers.length === 0) throw new Error("No layers to composite"); // First layer = base const baseBuffer = await fetchImageBuffer(layers[0].imageUrl); let pipeline = sharp(baseBuffer); const metadata = await pipeline.metadata(); const width = metadata.width || 1920; const height = metadata.height || 1080; // Apply opacity to base if < 100 if (layers[0].opacity < 100) { const alpha = layers[0].opacity / 100; pipeline = pipeline.ensureAlpha(alpha); } // Composite remaining layers on top const composites = []; for (let i = 1; i < layers.length; i++) { const layer = layers[i]; let layerBuffer = await fetchImageBuffer(layer.imageUrl); // Apply transform if present (translate, scale, rotate, flip) const t = layer.transform; const hasTransform = t && (t.x || t.y || (t.scale !== undefined && t.scale !== 1) || t.rotation || t.flipH || t.flipV); // Compute target layer dimensions const scale = t?.scale ?? 1; const layerW = Math.max(1, Math.round(width * scale)); const layerH = Math.max(1, Math.round(height * scale)); let layerPipeline = sharp(layerBuffer).resize(layerW, layerH, { fit: "contain", background: { r: 0, g: 0, b: 0, alpha: 0 } }); // Apply mask if provided (mask is matched to layer pre-transform) if (layer.maskUrl) { const maskBuffer = await fetchImageBuffer(layer.maskUrl); let maskPipeline = sharp(maskBuffer).resize(layerW, layerH, { fit: "contain" }).greyscale(); if (options.featherRadius && options.featherRadius > 0) { maskPipeline = maskPipeline.blur(options.featherRadius); } const maskRaw = await maskPipeline.raw().toBuffer(); layerPipeline = layerPipeline.ensureAlpha().joinChannel(maskRaw, { raw: { width: layerW, height: layerH, channels: 1 } }); } else { layerPipeline = layerPipeline.ensureAlpha(); } // Apply opacity if (layer.opacity < 100) { const alphaMultiplier = layer.opacity / 100; const rawData = await layerPipeline.raw().toBuffer({ resolveWithObject: true }); const channels = rawData.info.channels; for (let p = 0; p < rawData.data.length; p += channels) { rawData.data[p + channels - 1] = Math.round(rawData.data[p + channels - 1] * alphaMultiplier); } layerBuffer = await sharp(rawData.data, { raw: { width: layerW, height: layerH, channels } }).png().toBuffer(); } else { layerBuffer = await layerPipeline.png().toBuffer(); } // Apply flip + rotation if (hasTransform) { let transformedPipeline = sharp(layerBuffer); if (t.flipH) transformedPipeline = transformedPipeline.flop(); if (t.flipV) transformedPipeline = transformedPipeline.flip(); if (t.rotation && t.rotation !== 0) { transformedPipeline = transformedPipeline.rotate(t.rotation, { background: { r: 0, g: 0, b: 0, alpha: 0 } }); } layerBuffer = await transformedPipeline.png().toBuffer(); } // Compute placement on the base canvas const finalDims = await sharp(layerBuffer).metadata(); const fW = finalDims.width || layerW; const fH = finalDims.height || layerH; const baseLeft = Math.round((width - fW) / 2); const baseTop = Math.round((height - fH) / 2); const offsetX = Math.round((t?.x ?? 0) * width); const offsetY = Math.round((t?.y ?? 0) * height); const placeLeft = baseLeft + offsetX; const placeTop = baseTop + offsetY; // Sharp's composite requires top/left >= 0 AND layer to fit within base. // For transformed layers that overflow or have negative placement, we // need to extract the visible portion from the layer before compositing. if (fW > width || fH > height || placeLeft < 0 || placeTop < 0 || placeLeft + fW > width || placeTop + fH > height) { // Compute visible crop from layer const cropLeft = Math.max(0, -placeLeft); const cropTop = Math.max(0, -placeTop); const cropRight = Math.min(fW, width - placeLeft); const cropBottom = Math.min(fH, height - placeTop); const cropW = Math.max(0, cropRight - cropLeft); const cropH = Math.max(0, cropBottom - cropTop); if (cropW > 0 && cropH > 0) { const croppedLayer = await sharp(layerBuffer) .extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH }) .png() .toBuffer(); composites.push({ input: croppedLayer, blend: blendModeMap[layer.blendMode || "normal"] || "over", top: Math.max(0, placeTop), left: Math.max(0, placeLeft), }); } // else: layer is fully out of bounds, skip it } else { composites.push({ input: layerBuffer, blend: blendModeMap[layer.blendMode || "normal"] || "over", top: placeTop, left: placeLeft, }); } } const finalBuffer = await pipeline.composite(composites).png().toBuffer(); const outputKey = options.outputKey || `composited/comp_${Date.now()}.png`; const { url } = await storagePut(outputKey, finalBuffer, "image/png"); console.log(`[Compositor] Composited ${layers.length} layers -> ${url} (${(finalBuffer.length / 1024).toFixed(1)}KB)`); return url; } /** * Composite a frame from individual background + foreground + mask URLs * Backwards-compatible signature */ export async function compositeFrame( backgroundUrl: string, foregroundUrl: string, maskUrl: string, options: { featherRadius?: number; opacity?: number; } = {} ): Promise { const layers: CompositeLayer[] = [ { imageUrl: backgroundUrl, opacity: 100, blendMode: "normal" }, { imageUrl: foregroundUrl, opacity: (options.opacity ?? 1.0) * 100, blendMode: "normal", maskUrl }, ]; return compositeLayers(layers, { featherRadius: options.featherRadius }); } /** * Batch composite an entire sequence * Applies the same background to all frames with per-frame foreground masks */ export async function compositeSequence( backgroundUrl: string, frames: Array<{ foregroundUrl: string; maskUrl: string }>, options: { featherRadius?: number; opacity?: number } = {} ): Promise { const results: string[] = []; for (let i = 0; i < frames.length; i++) { const result = await compositeFrame( backgroundUrl, frames[i].foregroundUrl, frames[i].maskUrl, options ); results.push(result); } return results; }