Aduc-sdr-cinematic-video

Runtime error

App Files Files Community

euiia commited on Sep 4

Commit

d79a09b

verified ·

1 Parent(s): 1b8c94a

Update deformes4D_engine.py

Browse files

Files changed (1) hide show

deformes4D_engine.py +14 -37

deformes4D_engine.py CHANGED Viewed

@@ -2,14 +2,12 @@
 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
-# Version: 2.0.2
 #
 # This file contains the Deformes4D Engine, which acts as the primary "Editor" or
-# "Film Crew" specialist within the ADUC-SDR architecture. It implements the Camera (Ψ)
-# and Distiller (Δ) concepts. Its core responsibilities include the low-level orchestration
-# of video fragment generation (calling the LTX specialist), latent manipulation (calling
-# the enhancer specialist), and final rendering/post-production tasks like HD mastering
-# and audio generation. It executes the specific commands delegated by the AducOrchestrator.
 import os
 import time
@@ -26,11 +24,11 @@ import shutil
 from pathlib import Path
 from typing import List, Tuple, Generator, Dict, Any, Optional
-from ltx_manager_helpers import ltx_manager_singleton
 from gemini_helpers import gemini_singleton
-from latent_enhancer_specialist import latent_enhancer_specialist_singleton
 from hd_specialist import hd_specialist_singleton
-from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
 from audio_specialist import audio_specialist_singleton
 from tools.video_encode_tool import video_encode_tool_singleton
@@ -51,35 +49,12 @@ class Deformes4DEngine:
     def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
         self.ltx_manager = ltx_manager
         self.workspace_dir = workspace_dir
-        self._vae = None
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.")
         os.makedirs(self.workspace_dir, exist_ok=True)
-    @property
-    def vae(self):
-        if self._vae is None:
-            # Assumes the VAE from the first LTX worker is representative
-            self._vae = self.ltx_manager.workers[0].pipeline.vae
-        self._vae.to(self.device); self._vae.eval()
-        return self._vae
     # --- HELPER METHODS ---
-    @torch.no_grad()
-    def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
-        """Encodes a pixel-space tensor to the latent space using the VAE."""
-        tensor = tensor.to(self.device, dtype=self.vae.dtype)
-        return vae_encode(tensor, self.vae, vae_per_channel_normalize=True)
-    @torch.no_grad()
-    def latents_to_pixels(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
-        """Decodes a latent-space tensor to pixels using the VAE."""
-        latent_tensor = latent_tensor.to(self.device, dtype=self.vae.dtype)
-        timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
-        return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
     def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
         """Saves a pixel-space tensor as an MP4 video file."""
         if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
@@ -96,11 +71,11 @@ class Deformes4DEngine:
         return image
     def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
-        """Converts a PIL Image to a latent tensor."""
         image_np = np.array(pil_image).astype(np.float32) / 255.0
         tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
         tensor = (tensor * 2.0) - 1.0
-        return self.pixels_to_latents(tensor)
     # --- CORE ADUC-SDR LOGIC ---
@@ -196,7 +171,8 @@ class Deformes4DEngine:
             logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
             base_name = f"clip_{i:04d}_{run_timestamp}"
             current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
-            pixel_tensor = self.latents_to_pixels(sub_group_latent)
             self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
             del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
             final_video_clip_paths.append(current_clip_path)
@@ -234,7 +210,7 @@ class Deformes4DEngine:
             upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
             del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
             logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}")
-            pixel_tensor = self.latents_to_pixels(upscaled_latent_chunk)
             del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
             base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
             current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
@@ -277,6 +253,8 @@ class Deformes4DEngine:
         logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
         progress(0.1, desc="Preparing for audio generation...")
         run_timestamp = int(time.time())
         try:
             result = subprocess.run(
                 ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
@@ -284,7 +262,6 @@ class Deformes4DEngine:
             duration = float(result.stdout.strip())
             logger.info(f"Source video duration: {duration:.2f} seconds.")
             progress(0.5, desc="Generating audio track...")
-            output_path = os.path.join(self.workspace_dir, f"final_movie_with_audio_{run_timestamp}.mp4")
             final_path = audio_specialist_singleton.generate_audio_for_video(
                 video_path=source_video_path,
                 prompt=audio_prompt,

 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
+# Version: 2.1.0
 #
 # This file contains the Deformes4D Engine, which acts as the primary "Editor" or
+# "Film Crew" specialist within the ADUC-SDR architecture. It has been refactored
+# to delegate all VAE operations to the dedicated VaeManager, cleaning up its own
+# logic and adhering to the specialist-based architecture.
 import os
 import time
 from pathlib import Path
 from typing import List, Tuple, Generator, Dict, Any, Optional
+from managers.ltx_manager import ltx_manager_singleton
+from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
+from managers.vae_manager import vae_manager_singleton
 from gemini_helpers import gemini_singleton
 from hd_specialist import hd_specialist_singleton
 from audio_specialist import audio_specialist_singleton
 from tools.video_encode_tool import video_encode_tool_singleton
     def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
         self.ltx_manager = ltx_manager
         self.workspace_dir = workspace_dir
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.")
         os.makedirs(self.workspace_dir, exist_ok=True)
     # --- HELPER METHODS ---
     def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
         """Saves a pixel-space tensor as an MP4 video file."""
         if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
         return image
     def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
+        """Converts a PIL Image to a latent tensor by calling the VaeManager."""
         image_np = np.array(pil_image).astype(np.float32) / 255.0
         tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
         tensor = (tensor * 2.0) - 1.0
+        return vae_manager_singleton.encode(tensor)
     # --- CORE ADUC-SDR LOGIC ---
             logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
             base_name = f"clip_{i:04d}_{run_timestamp}"
             current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
+            pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
             self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
             del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
             final_video_clip_paths.append(current_clip_path)
             upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
             del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
             logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}")
+            pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk)
             del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
             base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
             current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
         logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
         progress(0.1, desc="Preparing for audio generation...")
         run_timestamp = int(time.time())
+        source_name = Path(source_video_path).stem
+        output_path = os.path.join(self.workspace_dir, f"{source_name}_with_audio_{run_timestamp}.mp4")
         try:
             result = subprocess.run(
                 ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
             duration = float(result.stdout.strip())
             logger.info(f"Source video duration: {duration:.2f} seconds.")
             progress(0.5, desc="Generating audio track...")
             final_path = audio_specialist_singleton.generate_audio_for_video(
                 video_path=source_video_path,
                 prompt=audio_prompt,