retrotoon-studio/server/segmentationService.ts
Ubuntu 51bb69eb88 feat: panneau Génération IA avec regénération fond/personnage
- Nouvel onglet "Génération IA" dans le workspace avec 2 boutons:
  * Regénérer l'arrière-plan (prompt + style)
  * Redessiner le personnage (prompt + sélecteur de character sheet)
- 3 endpoints tRPC: generation.regenerateBackground,
  generation.regenerateCharacter, generation.inpaintBackground
- Fix URLs relatives -> signed URLs S3 absolues pour l'API Forge
- Résultat affiché en preview dans le panneau
- Testé: génération cyberpunk sur frame 200 -> PNG 1344x768 OK

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-21 02:17:12 +00:00

366 lines
11 KiB
TypeScript

/**
* Segmentation Service
* Handles separation of foreground (characters/objects) from background
*
* Integrates with:
* - SAM 2 (Segment Anything Model 2) for automatic segmentation
* - Built-in image generation for inpainting (with mask guidance)
* - LLM for intelligent mask refinement
* - Temporal propagation for video consistency
*/
import { generateImage } from "./_core/imageGeneration";
import { invokeLLM } from "./_core/llm";
import { storageGetSignedUrl } from "./storage";
import { getServicesConfig, callExternalSAM2, getCharacterGenerationStrategy, buildPoseConstraints } from "./servicesConfig";
async function resolveToAbsoluteUrl(url: string): Promise<string> {
if (url.startsWith("http")) return url;
const key = url.replace(/^\/manus-storage\//, "");
return storageGetSignedUrl(key);
}
export interface SegmentationResult {
maskUrl: string;
foregroundUrl: string;
backgroundUrl: string;
confidence: number;
segments: SegmentInfo[];
}
export interface SegmentInfo {
id: string;
label: string;
type: "character" | "object" | "background";
boundingBox: { x: number; y: number; width: number; height: number };
area: number;
trackingId?: string; // For temporal consistency
}
export interface PropagationResult {
frameIndex: number;
maskUrl: string;
confidence: number;
drift: number; // How much the mask has shifted from the reference
}
/**
* Segment a frame into foreground and background layers
* Uses the configured segmentation engine:
* - "external" mode: calls SAM 2 API for pixel-perfect segmentation
* - "simulated" mode: returns synthetic mask URLs (for development/testing)
*/
export async function segmentFrame(
frameUrl: string,
options: {
mode: "auto" | "point" | "box";
points?: Array<{ x: number; y: number; label: 0 | 1 }>;
boxes?: Array<{ x: number; y: number; width: number; height: number }>;
previousMask?: string; // For temporal guidance
} = { mode: "auto" }
): Promise<SegmentationResult> {
const config = await getServicesConfig();
// Try external SAM 2 service first
if (config.sam2Mode === "external" && config.sam2Endpoint) {
try {
const result = await callExternalSAM2(config.sam2Endpoint, "segment", {
imageUrl: frameUrl,
mode: options.mode,
points: options.points,
boxes: options.boxes,
previousMask: options.previousMask,
}) as any;
if (result.maskUrl) {
return {
maskUrl: result.maskUrl,
foregroundUrl: result.foregroundUrl || result.maskUrl,
backgroundUrl: result.backgroundUrl || frameUrl,
confidence: result.confidence || 0.95,
segments: (result.segments || []).map((s: any) => ({
id: s.id || `seg_${Date.now()}`,
label: s.label || "Unknown",
type: s.type || "object",
boundingBox: s.boundingBox || { x: 0, y: 0, width: 100, height: 100 },
area: s.area || 0,
trackingId: s.trackingId,
})),
};
}
} catch (error) {
console.warn("[Segmentation] External SAM 2 failed, falling back to simulated:", error);
}
}
// Simulated fallback
const timestamp = Date.now();
const maskKey = `masks/mask_${timestamp}`;
const fgKey = `fg/fg_${timestamp}`;
const bgKey = `bg/bg_${timestamp}`;
return {
maskUrl: `/manus-storage/${maskKey}.png`,
foregroundUrl: `/manus-storage/${fgKey}.png`,
backgroundUrl: `/manus-storage/${bgKey}.png`,
confidence: 0.92,
segments: [
{
id: `seg_${timestamp}_1`,
label: "Personnage principal",
type: "character",
boundingBox: { x: 200, y: 100, width: 150, height: 300 },
area: 45000,
trackingId: "track_main_char",
},
],
};
}
/**
* Inpaint the background where characters were removed
* Uses the mask to guide the inpainting - only regenerates masked areas
*
* The maskUrl is passed as a reference image to guide the generation:
* - White areas in the mask = areas to inpaint (where characters were)
* - Black areas = areas to preserve (existing background)
*/
export async function inpaintBackground(
frameUrl: string,
maskUrl: string,
prompt?: string
): Promise<string> {
const absoluteFrameUrl = await resolveToAbsoluteUrl(frameUrl);
const absoluteMaskUrl = await resolveToAbsoluteUrl(maskUrl);
const inpaintPrompt = [
prompt || "Clean background plate, seamlessly fill the masked areas",
"Maintain original art style, color palette, and perspective.",
"The second reference image is the mask: white areas should be inpainted,",
"black areas should remain unchanged. Produce a complete background without characters.",
].join(" ");
const result = await generateImage({
prompt: inpaintPrompt,
originalImages: [
{ url: absoluteFrameUrl, mimeType: "image/jpeg" },
{ url: absoluteMaskUrl, mimeType: "image/jpeg" },
],
});
return result.url || frameUrl;
}
/**
* Regenerate background with a new style based on user prompt
* Preserves perspective and composition while changing the visual style
*/
export async function regenerateBackground(
referenceFrameUrl: string,
prompt: string,
style: string = "same art style"
): Promise<string> {
const absoluteUrl = await resolveToAbsoluteUrl(referenceFrameUrl);
const fullPrompt = [
prompt,
`Maintain exact same perspective, composition, and spatial layout.`,
`Style: ${style}.`,
`This is a background for animation - no characters should be present.`,
`Keep the same camera angle and depth of field as the reference.`,
].join(" ");
const result = await generateImage({
prompt: fullPrompt,
originalImages: [
{
url: absoluteUrl,
mimeType: "image/jpeg",
},
],
});
return result.url || referenceFrameUrl;
}
/**
* Regenerate a character with a new style while preserving pose and proportions
* Uses the character sheet as style reference for consistency
* Integrates with LoRA/IP-Adapter strategy based on character modelType
*/
export async function regenerateCharacter(
characterFrameUrl: string,
prompt: string,
characterSheet?: string,
maskUrl?: string,
characterConfig?: { name: string; modelType: "lora" | "ip_adapter" | "none" }
): Promise<string> {
const absoluteFrameUrl = await resolveToAbsoluteUrl(characterFrameUrl);
const images: Array<{ url: string; mimeType: "image/png" | "image/jpeg" }> = [
{ url: absoluteFrameUrl, mimeType: "image/jpeg" },
];
let strategyPrefix = "";
if (characterConfig) {
const strategy = getCharacterGenerationStrategy(
characterConfig.modelType,
characterConfig.name,
characterSheet
);
strategyPrefix = strategy.promptPrefix;
for (const ref of strategy.referenceImages) {
const absRef = await resolveToAbsoluteUrl(ref.url);
if (!images.some(img => img.url === absRef)) {
images.push({ url: absRef, mimeType: ref.mimeType });
}
}
} else if (characterSheet) {
const absSheet = await resolveToAbsoluteUrl(characterSheet);
images.push({ url: absSheet, mimeType: "image/jpeg" });
}
if (maskUrl) {
const absMask = await resolveToAbsoluteUrl(maskUrl);
images.push({ url: absMask, mimeType: "image/jpeg" });
}
const basePrompt = [
strategyPrefix,
prompt,
characterSheet && !characterConfig ? "Match the character reference sheet style exactly." : "",
maskUrl ? "The mask image indicates the character silhouette to preserve." : "",
"Output only the character on a transparent background.",
].filter(Boolean).join(" ");
const fullPrompt = buildPoseConstraints(basePrompt, {
preservePose: true,
preserveProportions: true,
preservePosition: true,
});
const result = await generateImage({
prompt: fullPrompt,
originalImages: images,
});
return result.url || characterFrameUrl;
}
/**
* Propagate segmentation mask across a sequence of frames
* Uses temporal consistency to track objects across frames
*
* When SAM 2 external service is configured, uses the /propagate endpoint.
* Otherwise, uses simulated drift model.
*/
export async function propagateMask(
startFrameUrl: string,
startMaskUrl: string,
targetFrameUrls: string[]
): Promise<PropagationResult[]> {
const config = await getServicesConfig();
// Try external SAM 2 propagation
if (config.sam2Mode === "external" && config.sam2Endpoint) {
try {
const result = await callExternalSAM2(config.sam2Endpoint, "propagate", {
startFrameUrl,
startMaskUrl,
targetFrameUrls,
}) as any;
if (result.results && Array.isArray(result.results)) {
return result.results.map((r: any, i: number) => ({
frameIndex: i,
maskUrl: r.maskUrl,
confidence: r.confidence || 0.9,
drift: r.drift || 0,
}));
}
} catch (error) {
console.warn("[Segmentation] External SAM 2 propagation failed, falling back:", error);
}
}
// Simulated fallback with drift model
const results: PropagationResult[] = [];
let currentMask = startMaskUrl;
let totalDrift = 0;
for (let i = 0; i < targetFrameUrls.length; i++) {
const frameDrift = 0.01 + Math.random() * 0.02;
totalDrift += frameDrift;
const confidence = Math.max(0.5, 0.95 - totalDrift * 0.5);
const needsRekey = totalDrift > 0.3;
if (needsRekey) {
totalDrift = 0;
}
const propagatedMaskUrl = `/manus-storage/masks/propagated_${i}_${Date.now()}.png`;
results.push({
frameIndex: i,
maskUrl: propagatedMaskUrl,
confidence,
drift: totalDrift,
});
currentMask = propagatedMaskUrl;
}
return results;
}
/**
* Composite layers back together with proper alpha blending
* Combines regenerated background + original/regenerated foreground using the mask
*
* The mask defines the alpha channel:
* - White = foreground visible (character)
* - Black = background visible
* - Gray = partial transparency (edges, anti-aliasing)
*/
export async function compositeFrame(
backgroundUrl: string,
foregroundUrl: string,
maskUrl: string,
options: {
featherRadius?: number; // Edge softening in pixels
opacity?: number; // Overall foreground opacity (0-1)
} = {}
): Promise<string> {
const { featherRadius = 2, opacity = 1.0 } = options;
// In production, this would use:
// 1. Load background image as base canvas
// 2. Load mask as alpha channel
// 3. Apply feathering (Gaussian blur on mask edges)
// 4. Load foreground image
// 5. For each pixel: output = bg * (1 - alpha * opacity) + fg * alpha * opacity
// 6. Save composited result
console.log(`[Compositor] Compositing with mask, feather=${featherRadius}px, opacity=${opacity}`);
return `/manus-storage/composited/comp_${Date.now()}.png`;
}
/**
* Batch composite an entire sequence
* Applies the same background to all frames with per-frame foreground masks
*/
export async function compositeSequence(
backgroundUrl: string,
frames: Array<{ foregroundUrl: string; maskUrl: string }>,
options: { featherRadius?: number; opacity?: number } = {}
): Promise<string[]> {
const results: string[] = [];
for (let i = 0; i < frames.length; i++) {
const result = await compositeFrame(
backgroundUrl,
frames[i].foregroundUrl,
frames[i].maskUrl,
options
);
results.push(result);
}
return results;
}