/** * AI Operator Assistant Service * Autonomous assistant that can orchestrate the entire recomposition pipeline * * Capabilities: * - Detect static background sequences * - Select optimal reference frames * - Orchestrate segmentation pipeline * - Guide user through the process in natural language * - Execute batch operations autonomously when configured */ import { invokeLLM } from "./_core/llm"; import * as db from "./db"; import * as videoProcessor from "./videoProcessor"; import * as segmentation from "./segmentationService"; export type AssistantAction = | "detect_scenes" | "analyze_backgrounds" | "select_references" | "segment_characters" | "inpaint_backgrounds" | "regenerate_backgrounds" | "regenerate_characters" | "composite_all" | "full_auto"; export interface PipelineStatus { currentStep: string; progress: number; totalSteps: number; message: string; } /** * Execute the full autonomous pipeline for a project */ export async function runAutonomousPipeline( projectId: number, action: AssistantAction, options: { backgroundPrompt?: string; characterPrompt?: string; testMode?: boolean; } = {} ): Promise<{ success: boolean; message: string; data?: any }> { const project = await db.getProject(projectId); if (!project) { return { success: false, message: "Projet introuvable" }; } switch (action) { case "detect_scenes": return await detectScenes(projectId, project); case "analyze_backgrounds": return await analyzeBackgrounds(projectId); case "select_references": return await selectReferences(projectId); case "segment_characters": return await segmentCharacters(projectId, options.testMode); case "inpaint_backgrounds": return await inpaintBackgrounds(projectId); case "regenerate_backgrounds": return await regenerateBackgrounds(projectId, options.backgroundPrompt); case "regenerate_characters": return await regenerateCharacters(projectId, options.characterPrompt); case "composite_all": return await compositeAll(projectId); case "full_auto": return await fullAutoCompose(projectId, options); default: return { success: false, message: "Action non reconnue" }; } } /** * Step 1: Detect scene cuts in the video * Uses ffmpeg scene detection on the source video when available, * falls back to histogram-based detection otherwise. */ async function detectScenes(projectId: number, project: any) { const totalFrames = project.totalFrames || 576; const fps = project.fps || 24; // Delete existing sequences for this project to avoid duplicates const existingSequences = await db.listSequences(projectId); for (const seq of existingSequences) { const dbInstance = await db.getDb(); if (dbInstance) { const { layers: layersTable, sequences: seqTable } = await import("../drizzle/schema"); const { eq } = await import("drizzle-orm"); await dbInstance.delete(layersTable).where(eq(layersTable.sequenceId, seq.id)); await dbInstance.delete(seqTable).where(eq(seqTable.id, seq.id)); } } let sceneCuts: Array<{ frameIndex: number; confidence: number; type: string }> = []; // Try real scene detection if source video is available if (project.sourceVideoUrl) { try { const { storageGetSignedUrl } = await import("./storage"); const { detectSceneCutsFromVideo, cleanupDir } = await import("./ffmpegLocal"); const { mkdtemp } = await import("fs/promises"); const { createWriteStream } = await import("fs"); const { Readable } = await import("stream"); const { pipeline } = await import("stream/promises"); const { tmpdir } = await import("os"); const { join } = await import("path"); const videoKey = project.sourceVideoUrl.replace(/^\/(manus-)?storage\//, ""); const signedUrl = await storageGetSignedUrl(videoKey); const tempDir = await mkdtemp(join(tmpdir(), "retrotoon-scene-")); const ext = videoKey.split(".").pop() || "mp4"; const videoPath = join(tempDir, `source.${ext}`); const resp = await fetch(signedUrl); if (resp.ok && resp.body) { const nodeStream = Readable.fromWeb(resp.body as any); await pipeline(nodeStream, createWriteStream(videoPath)); console.log(`[SceneDetect] Running ffmpeg scene detection on project ${projectId}...`); const cuts = await detectSceneCutsFromVideo(videoPath, 0.3); console.log(`[SceneDetect] Found ${cuts.length} scene cuts via ffmpeg`); sceneCuts = cuts.map(c => ({ frameIndex: Math.round(c.time * fps), confidence: Math.min(0.99, 0.7 + c.score), type: "hard_cut", })); } await cleanupDir(tempDir); } catch (err) { console.warn("[SceneDetect] Real detection failed, falling back to histogram:", err); } } // Fallback to synthetic histogram detection if (sceneCuts.length === 0) { const cuts = await videoProcessor.detectSceneCuts(projectId, totalFrames, fps); sceneCuts = cuts; } // Create sequences from detected cuts let prevFrame = 0; for (let i = 0; i < sceneCuts.length; i++) { const cut = sceneCuts[i]; if (cut.frameIndex > prevFrame) { await db.createSequence({ projectId, name: `Séquence ${i + 1}`, startFrame: prevFrame, endFrame: cut.frameIndex - 1, status: "detected", }); } prevFrame = cut.frameIndex; } if (prevFrame < totalFrames) { await db.createSequence({ projectId, name: `Séquence ${sceneCuts.length + 1}`, startFrame: prevFrame, endFrame: totalFrames - 1, status: "detected", }); } const seqCount = sceneCuts.length + (prevFrame < totalFrames ? 1 : 0); const method = sceneCuts.length > 0 && project.sourceVideoUrl ? "ffmpeg scene detection" : "histogram analysis"; return { success: true, message: `${seqCount} séquences détectées par ${method}.`, data: { sequenceCount: seqCount, cuts: sceneCuts }, }; } /** * Step 2: Analyze backgrounds to determine which are static * Uses Gemini vision to compare frames at start/middle/end of each sequence */ async function analyzeBackgrounds(projectId: number) { const sequences = await db.listSequences(projectId); const frames = await db.listFrames(projectId); const { invokeConfiguredLLM } = await import("./llmConfig"); let staticCount = 0; let analyzedCount = 0; for (const seq of sequences) { const seqFrames = frames.filter( f => f.frameIndex >= seq.startFrame && f.frameIndex <= seq.endFrame && f.originalUrl ); if (seqFrames.length < 2) { await db.updateSequence(seq.id, { isStaticBackground: true }); staticCount++; continue; } const sampleIndices = [ 0, Math.floor(seqFrames.length / 2), seqFrames.length - 1, ]; const sampleUrls = Array.from(new Set(sampleIndices.map(i => seqFrames[i]?.originalUrl))).filter(Boolean) as string[]; if (sampleUrls.length < 2) { await db.updateSequence(seq.id, { isStaticBackground: true }); staticCount++; continue; } try { const imageContent = sampleUrls.map(url => ({ type: "image_url" as const, image_url: { url: url.startsWith("/") ? `https://retrotoon.cosmolan.fr${url}` : url }, })); const response = await invokeConfiguredLLM({ messages: [ { role: "system", content: "Tu es un analyste d'animation. Réponds uniquement en JSON valide.", }, { role: "user", content: [ { type: "text" as const, text: `Voici ${sampleUrls.length} frames d'une séquence de dessin animé. Compare les arrière-plans (décors). Réponds en JSON: {"isStaticBackground": true/false, "confidence": 0.0-1.0, "reason": "explication courte"}. isStaticBackground=true si le décor reste identique entre les frames (typique animation classique où seuls les personnages bougent).`, }, ...imageContent, ], }, ], response_format: { type: "json_schema", json_schema: { name: "background_analysis", strict: true, schema: { type: "object", properties: { isStaticBackground: { type: "boolean" }, confidence: { type: "number" }, reason: { type: "string" }, }, required: ["isStaticBackground", "confidence", "reason"], additionalProperties: false, }, }, }, }); const content = response.choices?.[0]?.message?.content; if (content && typeof content === "string") { const analysis = JSON.parse(content); console.log(`[BgAnalysis] Seq ${seq.id}: static=${analysis.isStaticBackground} (${analysis.confidence}) - ${analysis.reason}`); await db.updateSequence(seq.id, { isStaticBackground: analysis.isStaticBackground }); if (analysis.isStaticBackground) staticCount++; analyzedCount++; continue; } } catch (err) { console.warn(`[BgAnalysis] LLM analysis failed for seq ${seq.id}, defaulting to static:`, err); } await db.updateSequence(seq.id, { isStaticBackground: true }); staticCount++; } const method = analyzedCount > 0 ? "analyse Gemini Vision" : "heuristique"; return { success: true, message: `Analyse terminée (${method}): ${staticCount}/${sequences.length} séquences ont un arrière-plan statique.`, data: { staticCount, totalSequences: sequences.length, analyzedByVision: analyzedCount }, }; } /** * Step 3: Select the best reference frame for each static sequence */ async function selectReferences(projectId: number) { const sequences = await db.listSequences(projectId); const staticSequences = sequences.filter((s) => s.isStaticBackground); let selectedCount = 0; for (const seq of staticSequences) { // Select middle frame as reference (simplified) // In production, would analyze each frame for quality const refFrame = Math.floor((seq.startFrame + seq.endFrame) / 2); await db.updateSequence(seq.id, { referenceFrameIndex: refFrame }); selectedCount++; } return { success: true, message: `${selectedCount} frames de référence sélectionnées (meilleure qualité, moins d'occlusion par les personnages).`, data: { selectedCount }, }; } /** * Step 4: Segment characters from backgrounds */ async function segmentCharacters(projectId: number, testMode?: boolean) { const sequences = await db.listSequences(projectId); if (testMode) { // Only process first frame of first sequence const firstSeq = sequences[0]; if (firstSeq) { // Create test layers await db.createLayer({ sequenceId: firstSeq.id, projectId, name: "Fond (test)", type: "background", order: 0, }); await db.createLayer({ sequenceId: firstSeq.id, projectId, name: "Personnage principal (test)", type: "character", order: 1, }); } return { success: true, message: "Mode test: segmentation appliquée sur la première frame uniquement. Vérifiez le résultat avant de lancer le traitement complet.", }; } // Full segmentation let layerCount = 0; for (const seq of sequences) { // Create background layer await db.createLayer({ sequenceId: seq.id, projectId, name: `Fond - ${seq.name || `Séq. ${seq.id}`}`, type: "background", order: 0, }); layerCount++; // Create character layer (simplified - would detect actual characters) await db.createLayer({ sequenceId: seq.id, projectId, name: `Personnage - ${seq.name || `Séq. ${seq.id}`}`, type: "character", order: 1, }); layerCount++; await db.updateSequence(seq.id, { status: "processing" }); } return { success: true, message: `Segmentation terminée: ${layerCount} calques créés (fond + personnages) pour ${sequences.length} séquences.`, data: { layerCount, sequenceCount: sequences.length }, }; } /** * Step 5: Inpaint backgrounds to remove character remnants * Calls real segmentationService.inpaintBackground() for each static sequence's reference frame */ async function inpaintBackgrounds(projectId: number) { const sequences = await db.listSequences(projectId); const staticSequences = sequences.filter((s) => s.isStaticBackground && s.referenceFrameIndex != null); const job = await db.createGenerationJob({ projectId, type: "inpainting", status: "running", progress: 0, }); let processed = 0; let succeeded = 0; for (const seq of staticSequences) { try { const frame = await db.getFrame(projectId, seq.referenceFrameIndex!); if (!frame?.originalUrl) continue; const resultUrl = await segmentation.inpaintBackground( frame.originalUrl, frame.maskUrl || frame.originalUrl, "Clean background plate without characters, maintain original art style" ); await db.updateFrame(frame.id, { backgroundUrl: resultUrl }); await db.updateSequence(seq.id, { status: "processing" }); succeeded++; } catch (err: any) { console.warn(`[Inpaint] seq ${seq.id} failed:`, err.message); } processed++; await db.updateGenerationJob(job.id, { progress: Math.round((processed / staticSequences.length) * 100) }); } await db.updateGenerationJob(job.id, { status: "completed", progress: 100 }); return { success: true, message: `Inpainting terminé: ${succeeded}/${staticSequences.length} arrière-plans nettoyés.`, data: { processedCount: succeeded, totalCount: staticSequences.length }, }; } /** * Step 6: Regenerate backgrounds with new style * Calls real segmentationService.regenerateBackground() for each static sequence */ async function regenerateBackgrounds(projectId: number, prompt?: string) { if (!prompt) { return { success: false, message: "Veuillez fournir un prompt décrivant le style souhaité pour les arrière-plans.", }; } const sequences = await db.listSequences(projectId); const staticSequences = sequences.filter((s) => s.isStaticBackground); const job = await db.createGenerationJob({ projectId, type: "background_gen", prompt, status: "running", progress: 0, }); let succeeded = 0; let processed = 0; for (const seq of staticSequences) { try { // Use the reference frame (or middle frame as fallback) const refIdx = seq.referenceFrameIndex ?? Math.floor((seq.startFrame + seq.endFrame) / 2); const refFrame = await db.getFrame(projectId, refIdx); if (!refFrame?.originalUrl) continue; const baseUrl = refFrame.backgroundUrl || refFrame.originalUrl; const resultUrl = await segmentation.regenerateBackground(baseUrl, prompt); // Save to reference frame await db.updateFrame(refFrame.id, { regeneratedBgUrl: resultUrl }); // Propagate to all frames in the sequence const frames = await db.listFrames(projectId); const seqFrames = frames.filter(f => f.frameIndex >= seq.startFrame && f.frameIndex <= seq.endFrame); for (const f of seqFrames) { if (f.id !== refFrame.id) { await db.updateFrame(f.id, { regeneratedBgUrl: resultUrl }); } } succeeded++; } catch (err: any) { console.warn(`[RegenBg] seq ${seq.id} failed:`, err.message); } processed++; await db.updateGenerationJob(job.id, { progress: Math.round((processed / staticSequences.length) * 100) }); } await db.updateGenerationJob(job.id, { status: "completed", progress: 100 }); return { success: true, message: `Regénération terminée: ${succeeded}/${staticSequences.length} arrière-plans redessinés.`, data: { sequenceCount: succeeded, prompt }, }; } /** * Step 7: Regenerate characters with new style * Calls real segmentationService.regenerateCharacter() for each character layer in sequences */ async function regenerateCharacters(projectId: number, prompt?: string) { if (!prompt) { return { success: false, message: "Veuillez fournir un prompt décrivant le style souhaité pour les personnages.", }; } const characters = await db.listCharacters(projectId); const sequences = await db.listSequences(projectId); const frames = await db.listFrames(projectId); const layers = await db.listLayers(projectId); const job = await db.createGenerationJob({ projectId, type: "character_gen", prompt, status: "running", progress: 0, }); let succeeded = 0; let processed = 0; const characterLayers = layers.filter(l => l.type === "character"); for (const layer of characterLayers) { try { const seq = sequences.find(s => s.id === layer.sequenceId); if (!seq) continue; const refIdx = seq.referenceFrameIndex ?? Math.floor((seq.startFrame + seq.endFrame) / 2); const refFrame = frames.find(f => f.frameIndex === refIdx); if (!refFrame?.originalUrl) continue; const character = layer.characterId ? characters.find(c => c.id === layer.characterId) : characters[0]; const characterSheet = character?.referenceSheetUrl || undefined; const characterConfig = character ? { name: character.name, modelType: (character.modelType as any) || "none", } : undefined; const resultUrl = await segmentation.regenerateCharacter( refFrame.originalUrl, prompt, characterSheet, refFrame.maskUrl || undefined, characterConfig ); // Save to ref frame + propagate await db.updateFrame(refFrame.id, { regeneratedFgUrl: resultUrl }); const seqFrames = frames.filter(f => f.frameIndex >= seq.startFrame && f.frameIndex <= seq.endFrame); for (const f of seqFrames) { if (f.id !== refFrame.id) { await db.updateFrame(f.id, { regeneratedFgUrl: resultUrl }); } } succeeded++; } catch (err: any) { console.warn(`[RegenChar] layer ${layer.id} failed:`, err.message); } processed++; await db.updateGenerationJob(job.id, { progress: Math.round((processed / characterLayers.length) * 100) }); } await db.updateGenerationJob(job.id, { status: "completed", progress: 100 }); return { success: true, message: `Personnages regénérés: ${succeeded}/${characterLayers.length}.`, data: { prompt, succeeded, total: characterLayers.length }, }; } /** * Step 8: Composite all layers together * Calls real segmentationService.compositeLayers() for each frame with bg+fg data */ async function compositeAll(projectId: number) { await db.updateProject(projectId, { status: "compositing" }); const job = await db.createGenerationJob({ projectId, type: "auto_compose", status: "running", progress: 0, }); const frames = await db.listFrames(projectId); const composableFrames = frames.filter(f => (f.regeneratedBgUrl || f.backgroundUrl) && (f.regeneratedFgUrl || f.foregroundUrl) ); let succeeded = 0; let processed = 0; for (const frame of composableFrames) { try { const composites: any[] = []; const bgUrl = frame.regeneratedBgUrl || frame.backgroundUrl; const fgUrl = frame.regeneratedFgUrl || frame.foregroundUrl; if (bgUrl) composites.push({ imageUrl: bgUrl, opacity: 100, blendMode: "normal" }); if (fgUrl) composites.push({ imageUrl: fgUrl, opacity: 100, blendMode: "normal", maskUrl: frame.maskUrl || undefined }); const resultUrl = await segmentation.compositeLayers(composites, { outputKey: `composited/auto_${projectId}_${frame.frameIndex}_${Date.now()}.png`, }); await db.updateFrame(frame.id, { compositedUrl: resultUrl }); succeeded++; } catch (err: any) { console.warn(`[CompositeAll] frame ${frame.frameIndex} failed:`, err.message); } processed++; if (processed % 5 === 0) { await db.updateGenerationJob(job.id, { progress: Math.round((processed / composableFrames.length) * 100) }); } } await db.updateGenerationJob(job.id, { status: "completed", progress: 100 }); await db.updateProject(projectId, { status: "ready" }); return { success: true, message: `Compositing terminé: ${succeeded}/${composableFrames.length} frames assemblées.`, data: { succeeded, total: composableFrames.length }, }; } /** * Full autonomous pipeline */ async function fullAutoCompose( projectId: number, options: { backgroundPrompt?: string; characterPrompt?: string; testMode?: boolean } ) { const steps: string[] = []; await detectScenes(projectId, await db.getProject(projectId)); steps.push("Détection des plans"); await analyzeBackgrounds(projectId); steps.push("Analyse des arrière-plans"); await selectReferences(projectId); steps.push("Sélection des références"); await segmentCharacters(projectId, options.testMode); steps.push("Segmentation des personnages"); if (options.backgroundPrompt) { await inpaintBackgrounds(projectId); steps.push("Inpainting des fonds"); await regenerateBackgrounds(projectId, options.backgroundPrompt); steps.push(`Regénération arrière-plans (${options.backgroundPrompt})`); } if (options.characterPrompt) { await regenerateCharacters(projectId, options.characterPrompt); steps.push(`Regénération personnages (${options.characterPrompt})`); } if (options.backgroundPrompt || options.characterPrompt) { await compositeAll(projectId); steps.push("Compositing final"); } return { success: true, message: `Pipeline autonome terminé !\n\n**Étapes complétées:**\n${steps.map((s, i) => `${i + 1}. ✅ ${s}`).join("\n")}\n\nVous pouvez maintenant exporter en MP4.`, }; } /** * Generate a natural language analysis of the project state */ export async function generateProjectAnalysis(projectId: number): Promise { const project = await db.getProject(projectId); const sequences = await db.listSequences(projectId); const layers = await db.listLayers(projectId); const characters = await db.listCharacters(projectId); const jobs = await db.listGenerationJobs(projectId); const context = { project: project?.name, status: project?.status, totalFrames: project?.totalFrames, fps: project?.fps, sequenceCount: sequences.length, staticBgCount: sequences.filter((s) => s.isStaticBackground).length, layerCount: layers.length, characterCount: characters.length, completedJobs: jobs.filter((j) => j.status === "completed").length, pendingJobs: jobs.filter((j) => j.status === "queued" || j.status === "running").length, }; try { const response = await invokeLLM({ messages: [ { role: "system", content: `Tu es l'assistant opérateur de RetroToon Studio. Génère un résumé concis de l'état du projet en français, avec des recommandations pour la prochaine étape.`, }, { role: "user", content: `État du projet: ${JSON.stringify(context)}`, }, ], }); const content = response.choices?.[0]?.message?.content; return (typeof content === "string" ? content : null) || "Analyse en cours..."; } catch { return `Projet "${project?.name}" - ${sequences.length} séquences, ${layers.length} calques. Prochaine étape recommandée: ${ sequences.length === 0 ? "Détection des plans" : layers.length === 0 ? "Segmentation des personnages" : "Regénération IA" }`; } }