Spaces:
Runtime error
Runtime error
Update deformes4D_engine.py
Browse files- deformes4D_engine.py +14 -37
deformes4D_engine.py
CHANGED
|
@@ -2,14 +2,12 @@
|
|
| 2 |
#
|
| 3 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
-
# Version: 2.0
|
| 6 |
#
|
| 7 |
# This file contains the Deformes4D Engine, which acts as the primary "Editor" or
|
| 8 |
-
# "Film Crew" specialist within the ADUC-SDR architecture. It
|
| 9 |
-
#
|
| 10 |
-
#
|
| 11 |
-
# the enhancer specialist), and final rendering/post-production tasks like HD mastering
|
| 12 |
-
# and audio generation. It executes the specific commands delegated by the AducOrchestrator.
|
| 13 |
|
| 14 |
import os
|
| 15 |
import time
|
|
@@ -26,11 +24,11 @@ import shutil
|
|
| 26 |
from pathlib import Path
|
| 27 |
from typing import List, Tuple, Generator, Dict, Any, Optional
|
| 28 |
|
| 29 |
-
from
|
|
|
|
|
|
|
| 30 |
from gemini_helpers import gemini_singleton
|
| 31 |
-
from latent_enhancer_specialist import latent_enhancer_specialist_singleton
|
| 32 |
from hd_specialist import hd_specialist_singleton
|
| 33 |
-
from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
|
| 34 |
from audio_specialist import audio_specialist_singleton
|
| 35 |
from tools.video_encode_tool import video_encode_tool_singleton
|
| 36 |
|
|
@@ -51,35 +49,12 @@ class Deformes4DEngine:
|
|
| 51 |
def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
|
| 52 |
self.ltx_manager = ltx_manager
|
| 53 |
self.workspace_dir = workspace_dir
|
| 54 |
-
self._vae = None
|
| 55 |
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 56 |
logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.")
|
| 57 |
os.makedirs(self.workspace_dir, exist_ok=True)
|
| 58 |
|
| 59 |
-
|
| 60 |
-
@property
|
| 61 |
-
def vae(self):
|
| 62 |
-
if self._vae is None:
|
| 63 |
-
# Assumes the VAE from the first LTX worker is representative
|
| 64 |
-
self._vae = self.ltx_manager.workers[0].pipeline.vae
|
| 65 |
-
self._vae.to(self.device); self._vae.eval()
|
| 66 |
-
return self._vae
|
| 67 |
-
|
| 68 |
# --- HELPER METHODS ---
|
| 69 |
|
| 70 |
-
@torch.no_grad()
|
| 71 |
-
def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
|
| 72 |
-
"""Encodes a pixel-space tensor to the latent space using the VAE."""
|
| 73 |
-
tensor = tensor.to(self.device, dtype=self.vae.dtype)
|
| 74 |
-
return vae_encode(tensor, self.vae, vae_per_channel_normalize=True)
|
| 75 |
-
|
| 76 |
-
@torch.no_grad()
|
| 77 |
-
def latents_to_pixels(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
|
| 78 |
-
"""Decodes a latent-space tensor to pixels using the VAE."""
|
| 79 |
-
latent_tensor = latent_tensor.to(self.device, dtype=self.vae.dtype)
|
| 80 |
-
timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
|
| 81 |
-
return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
|
| 82 |
-
|
| 83 |
def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
|
| 84 |
"""Saves a pixel-space tensor as an MP4 video file."""
|
| 85 |
if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
|
|
@@ -96,11 +71,11 @@ class Deformes4DEngine:
|
|
| 96 |
return image
|
| 97 |
|
| 98 |
def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
|
| 99 |
-
"""Converts a PIL Image to a latent tensor."""
|
| 100 |
image_np = np.array(pil_image).astype(np.float32) / 255.0
|
| 101 |
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
|
| 102 |
tensor = (tensor * 2.0) - 1.0
|
| 103 |
-
return
|
| 104 |
|
| 105 |
# --- CORE ADUC-SDR LOGIC ---
|
| 106 |
|
|
@@ -196,7 +171,8 @@ class Deformes4DEngine:
|
|
| 196 |
logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
|
| 197 |
base_name = f"clip_{i:04d}_{run_timestamp}"
|
| 198 |
current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
|
| 199 |
-
|
|
|
|
| 200 |
self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
|
| 201 |
del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
|
| 202 |
final_video_clip_paths.append(current_clip_path)
|
|
@@ -234,7 +210,7 @@ class Deformes4DEngine:
|
|
| 234 |
upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
|
| 235 |
del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
|
| 236 |
logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}")
|
| 237 |
-
pixel_tensor =
|
| 238 |
del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
|
| 239 |
base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
|
| 240 |
current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
|
|
@@ -277,6 +253,8 @@ class Deformes4DEngine:
|
|
| 277 |
logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
|
| 278 |
progress(0.1, desc="Preparing for audio generation...")
|
| 279 |
run_timestamp = int(time.time())
|
|
|
|
|
|
|
| 280 |
try:
|
| 281 |
result = subprocess.run(
|
| 282 |
["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
|
|
@@ -284,7 +262,6 @@ class Deformes4DEngine:
|
|
| 284 |
duration = float(result.stdout.strip())
|
| 285 |
logger.info(f"Source video duration: {duration:.2f} seconds.")
|
| 286 |
progress(0.5, desc="Generating audio track...")
|
| 287 |
-
output_path = os.path.join(self.workspace_dir, f"final_movie_with_audio_{run_timestamp}.mp4")
|
| 288 |
final_path = audio_specialist_singleton.generate_audio_for_video(
|
| 289 |
video_path=source_video_path,
|
| 290 |
prompt=audio_prompt,
|
|
|
|
| 2 |
#
|
| 3 |
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
|
| 4 |
#
|
| 5 |
+
# Version: 2.1.0
|
| 6 |
#
|
| 7 |
# This file contains the Deformes4D Engine, which acts as the primary "Editor" or
|
| 8 |
+
# "Film Crew" specialist within the ADUC-SDR architecture. It has been refactored
|
| 9 |
+
# to delegate all VAE operations to the dedicated VaeManager, cleaning up its own
|
| 10 |
+
# logic and adhering to the specialist-based architecture.
|
|
|
|
|
|
|
| 11 |
|
| 12 |
import os
|
| 13 |
import time
|
|
|
|
| 24 |
from pathlib import Path
|
| 25 |
from typing import List, Tuple, Generator, Dict, Any, Optional
|
| 26 |
|
| 27 |
+
from managers.ltx_manager import ltx_manager_singleton
|
| 28 |
+
from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
|
| 29 |
+
from managers.vae_manager import vae_manager_singleton
|
| 30 |
from gemini_helpers import gemini_singleton
|
|
|
|
| 31 |
from hd_specialist import hd_specialist_singleton
|
|
|
|
| 32 |
from audio_specialist import audio_specialist_singleton
|
| 33 |
from tools.video_encode_tool import video_encode_tool_singleton
|
| 34 |
|
|
|
|
| 49 |
def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
|
| 50 |
self.ltx_manager = ltx_manager
|
| 51 |
self.workspace_dir = workspace_dir
|
|
|
|
| 52 |
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 53 |
logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.")
|
| 54 |
os.makedirs(self.workspace_dir, exist_ok=True)
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
# --- HELPER METHODS ---
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
|
| 59 |
"""Saves a pixel-space tensor as an MP4 video file."""
|
| 60 |
if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
|
|
|
|
| 71 |
return image
|
| 72 |
|
| 73 |
def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
|
| 74 |
+
"""Converts a PIL Image to a latent tensor by calling the VaeManager."""
|
| 75 |
image_np = np.array(pil_image).astype(np.float32) / 255.0
|
| 76 |
tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
|
| 77 |
tensor = (tensor * 2.0) - 1.0
|
| 78 |
+
return vae_manager_singleton.encode(tensor)
|
| 79 |
|
| 80 |
# --- CORE ADUC-SDR LOGIC ---
|
| 81 |
|
|
|
|
| 171 |
logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
|
| 172 |
base_name = f"clip_{i:04d}_{run_timestamp}"
|
| 173 |
current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
|
| 174 |
+
|
| 175 |
+
pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
|
| 176 |
self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
|
| 177 |
del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
|
| 178 |
final_video_clip_paths.append(current_clip_path)
|
|
|
|
| 210 |
upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
|
| 211 |
del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
|
| 212 |
logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}")
|
| 213 |
+
pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk)
|
| 214 |
del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
|
| 215 |
base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
|
| 216 |
current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
|
|
|
|
| 253 |
logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
|
| 254 |
progress(0.1, desc="Preparing for audio generation...")
|
| 255 |
run_timestamp = int(time.time())
|
| 256 |
+
source_name = Path(source_video_path).stem
|
| 257 |
+
output_path = os.path.join(self.workspace_dir, f"{source_name}_with_audio_{run_timestamp}.mp4")
|
| 258 |
try:
|
| 259 |
result = subprocess.run(
|
| 260 |
["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
|
|
|
|
| 262 |
duration = float(result.stdout.strip())
|
| 263 |
logger.info(f"Source video duration: {duration:.2f} seconds.")
|
| 264 |
progress(0.5, desc="Generating audio track...")
|
|
|
|
| 265 |
final_path = audio_specialist_singleton.generate_audio_for_video(
|
| 266 |
video_path=source_video_path,
|
| 267 |
prompt=audio_prompt,
|