retrotoon-studio/server/segmentationService.ts
Ubuntu d18424a416 feat(M2): Manipulation spatiale des calques personnage
L'utilisateur peut désormais déplacer, redimensionner, rotater et
flipper le personnage généré sans devoir tout régénérer.

DB:
- Nouveau champ transform JSON sur frameVariants
- Format: {x, y, scale, rotation, flipH, flipV} avec coords relatives

Backend (sharp):
- compositeLayers applique transform avant le blend:
  * scale: resize layer (peut être >100% ou <100%)
  * rotation: sharp.rotate avec fond transparent
  * flipH/flipV: flop/flip
  * x/y: offset en pourcentage de la base (centré + delta)
- Gestion intelligente des layers qui dépassent: extract crop
  (sharp interdit top/left négatifs et inputs plus grands que la base)
- compositing.composeFrame récupère le transform de la variant
  character active automatiquement
- Nouveau endpoint frameVariants.updateTransform

Frontend (LayerManipulator):
- Composant overlay avec bounding box pointillée + 8 handles
- Handles coins = scale, handle haut = rotation, area centrale = move
- CSS transform live (translate/scale/rotate/scaleX(-1) pour flip)
- Toolbar flottante: flip H/V, position/scale/rotation affichés en live
- Reset button quand transformé
- Bouton "Recomposer" déclenche composeFrame avec le nouveau transform
- Save backend automatique au release de souris

ViewportPanel:
- Bouton "Manipuler" dans toolbar (visible uniquement mode composite)
- Active LayerManipulator overlay, mutuellement exclusif avec Annoter/Loupe
- Désactivé si pas de variant character actif (toast warn)

Workflow:
1. Mode composite dans viewport
2. Click "Manipuler" → handles apparaissent
3. Drag pour déplacer / corners pour scale / handle haut pour rotation
4. Sauvegarde auto au release (en DB)
5. Click "Recomposer" → sharp regenère avec transform appliqué
6. Nouvelle variante composite créée (Module 1)
7. La galerie M1 montre l'avant/après

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 07:06:26 +00:00

550 lines
18 KiB
TypeScript

/**
* Segmentation Service
* Handles separation of foreground (characters/objects) from background
*
* Integrates with:
* - SAM 2 (Segment Anything Model 2) for automatic segmentation
* - Built-in image generation for inpainting (with mask guidance)
* - LLM for intelligent mask refinement
* - Temporal propagation for video consistency
*/
import { generateImage } from "./_core/imageGeneration";
import { invokeLLM } from "./_core/llm";
import { storageGetSignedUrl } from "./storage";
import { getServicesConfig, callExternalSAM2, getCharacterGenerationStrategy, buildPoseConstraints } from "./servicesConfig";
async function resolveToAbsoluteUrl(url: string): Promise<string> {
if (url.startsWith("http")) return url;
const key = url.replace(/^\/(manus-)?storage\//, "");
return storageGetSignedUrl(key);
}
export interface SegmentationResult {
maskUrl: string;
foregroundUrl: string;
backgroundUrl: string;
confidence: number;
segments: SegmentInfo[];
}
export interface SegmentInfo {
id: string;
label: string;
type: "character" | "object" | "background";
boundingBox: { x: number; y: number; width: number; height: number };
area: number;
trackingId?: string; // For temporal consistency
}
export interface PropagationResult {
frameIndex: number;
maskUrl: string;
confidence: number;
drift: number; // How much the mask has shifted from the reference
}
/**
* Segment a frame into foreground and background layers
* Uses the configured segmentation engine:
* - "external" mode: calls SAM 2 API for pixel-perfect segmentation
* - "simulated" mode: returns synthetic mask URLs (for development/testing)
*/
export async function segmentFrame(
frameUrl: string,
options: {
mode: "auto" | "point" | "box";
points?: Array<{ x: number; y: number; label: 0 | 1 }>;
boxes?: Array<{ x: number; y: number; width: number; height: number }>;
previousMask?: string; // For temporal guidance
} = { mode: "auto" }
): Promise<SegmentationResult> {
const config = await getServicesConfig();
// Try external SAM 2 service first
if (config.sam2Mode === "external" && config.sam2Endpoint) {
try {
const result = await callExternalSAM2(config.sam2Endpoint, "segment", {
imageUrl: frameUrl,
mode: options.mode,
points: options.points,
boxes: options.boxes,
previousMask: options.previousMask,
}) as any;
if (result.maskUrl) {
return {
maskUrl: result.maskUrl,
foregroundUrl: result.foregroundUrl || result.maskUrl,
backgroundUrl: result.backgroundUrl || frameUrl,
confidence: result.confidence || 0.95,
segments: (result.segments || []).map((s: any) => ({
id: s.id || `seg_${Date.now()}`,
label: s.label || "Unknown",
type: s.type || "object",
boundingBox: s.boundingBox || { x: 0, y: 0, width: 100, height: 100 },
area: s.area || 0,
trackingId: s.trackingId,
})),
};
}
} catch (error) {
console.warn("[Segmentation] External SAM 2 failed, falling back to simulated:", error);
}
}
// Simulated fallback
const timestamp = Date.now();
const maskKey = `masks/mask_${timestamp}`;
const fgKey = `fg/fg_${timestamp}`;
const bgKey = `bg/bg_${timestamp}`;
return {
maskUrl: `/storage/${maskKey}.png`,
foregroundUrl: `/storage/${fgKey}.png`,
backgroundUrl: `/storage/${bgKey}.png`,
confidence: 0.92,
segments: [
{
id: `seg_${timestamp}_1`,
label: "Personnage principal",
type: "character",
boundingBox: { x: 200, y: 100, width: 150, height: 300 },
area: 45000,
trackingId: "track_main_char",
},
],
};
}
/**
* Inpaint the background where characters were removed
* Uses the mask to guide the inpainting - only regenerates masked areas
*
* The maskUrl is passed as a reference image to guide the generation:
* - White areas in the mask = areas to inpaint (where characters were)
* - Black areas = areas to preserve (existing background)
*/
export async function inpaintBackground(
frameUrl: string,
maskUrl: string,
prompt?: string
): Promise<string> {
const absoluteFrameUrl = await resolveToAbsoluteUrl(frameUrl);
const absoluteMaskUrl = await resolveToAbsoluteUrl(maskUrl);
const inpaintPrompt = [
prompt || "Clean background plate, seamlessly fill the masked areas",
"Maintain original art style, color palette, and perspective.",
"The second reference image is the mask: white areas should be inpainted,",
"black areas should remain unchanged. Produce a complete background without characters.",
].join(" ");
const result = await generateImage({
prompt: inpaintPrompt,
originalImages: [
{ url: absoluteFrameUrl, mimeType: "image/jpeg" },
{ url: absoluteMaskUrl, mimeType: "image/jpeg" },
],
});
return result.url || frameUrl;
}
/**
* Regenerate background with a new style based on user prompt
* Preserves perspective and composition while changing the visual style
*/
export async function regenerateBackground(
referenceFrameUrl: string,
prompt: string,
style: string = "same art style",
targetDimensions?: { width: number; height: number }
): Promise<string> {
const absoluteUrl = await resolveToAbsoluteUrl(referenceFrameUrl);
const fullPrompt = [
prompt,
`Maintain exact same perspective, composition, and spatial layout.`,
`Style: ${style}.`,
`This is a background for animation - no characters should be present.`,
`Keep the same camera angle and depth of field as the reference.`,
].join(" ");
const aspectRatio = targetDimensions
? `${targetDimensions.width}:${targetDimensions.height}`
: undefined;
const result = await generateImage({
prompt: fullPrompt,
originalImages: [
{
url: absoluteUrl,
mimeType: "image/jpeg",
},
],
targetAspectRatio: aspectRatio,
targetWidth: targetDimensions?.width,
targetHeight: targetDimensions?.height,
});
return result.url || referenceFrameUrl;
}
/**
* Regenerate a character with a new style while preserving pose and proportions
* Uses the character sheet as style reference for consistency
* Integrates with LoRA/IP-Adapter strategy based on character modelType
*/
export async function regenerateCharacter(
characterFrameUrl: string,
prompt: string,
characterSheet?: string,
maskUrl?: string,
characterConfig?: { name: string; modelType: "lora" | "ip_adapter" | "none" },
targetDimensions?: { width: number; height: number }
): Promise<string> {
const absoluteFrameUrl = await resolveToAbsoluteUrl(characterFrameUrl);
const images: Array<{ url: string; mimeType: "image/png" | "image/jpeg" }> = [
{ url: absoluteFrameUrl, mimeType: "image/jpeg" },
];
let strategyPrefix = "";
if (characterConfig) {
const strategy = getCharacterGenerationStrategy(
characterConfig.modelType,
characterConfig.name,
characterSheet
);
strategyPrefix = strategy.promptPrefix;
for (const ref of strategy.referenceImages) {
const absRef = await resolveToAbsoluteUrl(ref.url);
if (!images.some(img => img.url === absRef)) {
images.push({ url: absRef, mimeType: ref.mimeType });
}
}
} else if (characterSheet) {
const absSheet = await resolveToAbsoluteUrl(characterSheet);
images.push({ url: absSheet, mimeType: "image/jpeg" });
}
if (maskUrl) {
const absMask = await resolveToAbsoluteUrl(maskUrl);
images.push({ url: absMask, mimeType: "image/jpeg" });
}
const basePrompt = [
strategyPrefix,
prompt,
characterSheet && !characterConfig ? "Match the character reference sheet style exactly." : "",
maskUrl ? "The mask image indicates the character silhouette to preserve." : "",
"Output only the character on a transparent background.",
].filter(Boolean).join(" ");
const fullPrompt = buildPoseConstraints(basePrompt, {
preservePose: true,
preserveProportions: true,
preservePosition: true,
});
const aspectRatio = targetDimensions
? `${targetDimensions.width}:${targetDimensions.height}`
: undefined;
const result = await generateImage({
prompt: fullPrompt,
originalImages: images,
targetAspectRatio: aspectRatio,
targetWidth: targetDimensions?.width,
targetHeight: targetDimensions?.height,
});
return result.url || characterFrameUrl;
}
/**
* Propagate segmentation mask across a sequence of frames
* Uses temporal consistency to track objects across frames
*
* When SAM 2 external service is configured, uses the /propagate endpoint.
* Otherwise, uses simulated drift model.
*/
export async function propagateMask(
startFrameUrl: string,
startMaskUrl: string,
targetFrameUrls: string[]
): Promise<PropagationResult[]> {
const config = await getServicesConfig();
// Try external SAM 2 propagation
if (config.sam2Mode === "external" && config.sam2Endpoint) {
try {
const result = await callExternalSAM2(config.sam2Endpoint, "propagate", {
startFrameUrl,
startMaskUrl,
targetFrameUrls,
}) as any;
if (result.results && Array.isArray(result.results)) {
return result.results.map((r: any, i: number) => ({
frameIndex: i,
maskUrl: r.maskUrl,
confidence: r.confidence || 0.9,
drift: r.drift || 0,
}));
}
} catch (error) {
console.warn("[Segmentation] External SAM 2 propagation failed, falling back:", error);
}
}
// Simulated fallback with drift model
const results: PropagationResult[] = [];
let currentMask = startMaskUrl;
let totalDrift = 0;
for (let i = 0; i < targetFrameUrls.length; i++) {
const frameDrift = 0.01 + Math.random() * 0.02;
totalDrift += frameDrift;
const confidence = Math.max(0.5, 0.95 - totalDrift * 0.5);
const needsRekey = totalDrift > 0.3;
if (needsRekey) {
totalDrift = 0;
}
const propagatedMaskUrl = `/storage/masks/propagated_${i}_${Date.now()}.png`;
results.push({
frameIndex: i,
maskUrl: propagatedMaskUrl,
confidence,
drift: totalDrift,
});
currentMask = propagatedMaskUrl;
}
return results;
}
/**
* Composite layers back together with proper alpha blending
* Combines regenerated background + original/regenerated foreground using the mask
*
* The mask defines the alpha channel:
* - White = foreground visible (character)
* - Black = background visible
* - Gray = partial transparency (edges, anti-aliasing)
*/
export interface LayerTransform {
/** Translation X relative to canvas width: -1..1 (0 = no shift) */
x?: number;
/** Translation Y relative to canvas height: -1..1 */
y?: number;
/** Scale multiplier (1.0 = original size) */
scale?: number;
/** Rotation in degrees */
rotation?: number;
/** Horizontal flip */
flipH?: boolean;
/** Vertical flip */
flipV?: boolean;
}
export interface CompositeLayer {
imageUrl: string;
opacity: number;
blendMode?: "normal" | "multiply" | "screen" | "overlay";
maskUrl?: string;
transform?: LayerTransform;
}
type SharpBlendMode = "over" | "multiply" | "screen" | "overlay";
const blendModeMap: Record<string, SharpBlendMode> = {
normal: "over",
multiply: "multiply",
screen: "screen",
overlay: "overlay",
};
async function fetchImageBuffer(url: string): Promise<Buffer> {
const absoluteUrl = url.startsWith("http") ? url : await resolveToAbsoluteUrl(url);
const resp = await fetch(absoluteUrl);
if (!resp.ok) throw new Error(`Failed to fetch image: ${absoluteUrl} (${resp.status})`);
return Buffer.from(await resp.arrayBuffer());
}
/**
* Composite multiple layers together using sharp
* Layers should be provided in render order (bottom to top)
*/
export async function compositeLayers(
layers: CompositeLayer[],
options: { featherRadius?: number; outputKey?: string } = {}
): Promise<string> {
const sharp = (await import("sharp")).default;
const { storagePut } = await import("./storage");
if (layers.length === 0) throw new Error("No layers to composite");
// First layer = base
const baseBuffer = await fetchImageBuffer(layers[0].imageUrl);
let pipeline = sharp(baseBuffer);
const metadata = await pipeline.metadata();
const width = metadata.width || 1920;
const height = metadata.height || 1080;
// Apply opacity to base if < 100
if (layers[0].opacity < 100) {
const alpha = layers[0].opacity / 100;
pipeline = pipeline.ensureAlpha(alpha);
}
// Composite remaining layers on top
const composites = [];
for (let i = 1; i < layers.length; i++) {
const layer = layers[i];
let layerBuffer = await fetchImageBuffer(layer.imageUrl);
// Apply transform if present (translate, scale, rotate, flip)
const t = layer.transform;
const hasTransform = t && (t.x || t.y || (t.scale !== undefined && t.scale !== 1) || t.rotation || t.flipH || t.flipV);
// Compute target layer dimensions
const scale = t?.scale ?? 1;
const layerW = Math.max(1, Math.round(width * scale));
const layerH = Math.max(1, Math.round(height * scale));
let layerPipeline = sharp(layerBuffer).resize(layerW, layerH, { fit: "contain", background: { r: 0, g: 0, b: 0, alpha: 0 } });
// Apply mask if provided (mask is matched to layer pre-transform)
if (layer.maskUrl) {
const maskBuffer = await fetchImageBuffer(layer.maskUrl);
let maskPipeline = sharp(maskBuffer).resize(layerW, layerH, { fit: "contain" }).greyscale();
if (options.featherRadius && options.featherRadius > 0) {
maskPipeline = maskPipeline.blur(options.featherRadius);
}
const maskRaw = await maskPipeline.raw().toBuffer();
layerPipeline = layerPipeline.ensureAlpha().joinChannel(maskRaw, { raw: { width: layerW, height: layerH, channels: 1 } });
} else {
layerPipeline = layerPipeline.ensureAlpha();
}
// Apply opacity
if (layer.opacity < 100) {
const alphaMultiplier = layer.opacity / 100;
const rawData = await layerPipeline.raw().toBuffer({ resolveWithObject: true });
const channels = rawData.info.channels;
for (let p = 0; p < rawData.data.length; p += channels) {
rawData.data[p + channels - 1] = Math.round(rawData.data[p + channels - 1] * alphaMultiplier);
}
layerBuffer = await sharp(rawData.data, { raw: { width: layerW, height: layerH, channels } }).png().toBuffer();
} else {
layerBuffer = await layerPipeline.png().toBuffer();
}
// Apply flip + rotation
if (hasTransform) {
let transformedPipeline = sharp(layerBuffer);
if (t.flipH) transformedPipeline = transformedPipeline.flop();
if (t.flipV) transformedPipeline = transformedPipeline.flip();
if (t.rotation && t.rotation !== 0) {
transformedPipeline = transformedPipeline.rotate(t.rotation, { background: { r: 0, g: 0, b: 0, alpha: 0 } });
}
layerBuffer = await transformedPipeline.png().toBuffer();
}
// Compute placement on the base canvas
const finalDims = await sharp(layerBuffer).metadata();
const fW = finalDims.width || layerW;
const fH = finalDims.height || layerH;
const baseLeft = Math.round((width - fW) / 2);
const baseTop = Math.round((height - fH) / 2);
const offsetX = Math.round((t?.x ?? 0) * width);
const offsetY = Math.round((t?.y ?? 0) * height);
const placeLeft = baseLeft + offsetX;
const placeTop = baseTop + offsetY;
// Sharp's composite requires top/left >= 0 AND layer to fit within base.
// For transformed layers that overflow or have negative placement, we
// need to extract the visible portion from the layer before compositing.
if (fW > width || fH > height || placeLeft < 0 || placeTop < 0 || placeLeft + fW > width || placeTop + fH > height) {
// Compute visible crop from layer
const cropLeft = Math.max(0, -placeLeft);
const cropTop = Math.max(0, -placeTop);
const cropRight = Math.min(fW, width - placeLeft);
const cropBottom = Math.min(fH, height - placeTop);
const cropW = Math.max(0, cropRight - cropLeft);
const cropH = Math.max(0, cropBottom - cropTop);
if (cropW > 0 && cropH > 0) {
const croppedLayer = await sharp(layerBuffer)
.extract({ left: cropLeft, top: cropTop, width: cropW, height: cropH })
.png()
.toBuffer();
composites.push({
input: croppedLayer,
blend: blendModeMap[layer.blendMode || "normal"] || "over",
top: Math.max(0, placeTop),
left: Math.max(0, placeLeft),
});
}
// else: layer is fully out of bounds, skip it
} else {
composites.push({
input: layerBuffer,
blend: blendModeMap[layer.blendMode || "normal"] || "over",
top: placeTop,
left: placeLeft,
});
}
}
const finalBuffer = await pipeline.composite(composites).png().toBuffer();
const outputKey = options.outputKey || `composited/comp_${Date.now()}.png`;
const { url } = await storagePut(outputKey, finalBuffer, "image/png");
console.log(`[Compositor] Composited ${layers.length} layers -> ${url} (${(finalBuffer.length / 1024).toFixed(1)}KB)`);
return url;
}
/**
* Composite a frame from individual background + foreground + mask URLs
* Backwards-compatible signature
*/
export async function compositeFrame(
backgroundUrl: string,
foregroundUrl: string,
maskUrl: string,
options: {
featherRadius?: number;
opacity?: number;
} = {}
): Promise<string> {
const layers: CompositeLayer[] = [
{ imageUrl: backgroundUrl, opacity: 100, blendMode: "normal" },
{ imageUrl: foregroundUrl, opacity: (options.opacity ?? 1.0) * 100, blendMode: "normal", maskUrl },
];
return compositeLayers(layers, { featherRadius: options.featherRadius });
}
/**
* Batch composite an entire sequence
* Applies the same background to all frames with per-frame foreground masks
*/
export async function compositeSequence(
backgroundUrl: string,
frames: Array<{ foregroundUrl: string; maskUrl: string }>,
options: { featherRadius?: number; opacity?: number } = {}
): Promise<string[]> {
const results: string[] = [];
for (let i = 0; i < frames.length; i++) {
const result = await compositeFrame(
backgroundUrl,
frames[i].foregroundUrl,
frames[i].maskUrl,
options
);
results.push(result);
}
return results;
}