euiia commited on
Commit
d79a09b
·
verified ·
1 Parent(s): 1b8c94a

Update deformes4D_engine.py

Browse files
Files changed (1) hide show
  1. deformes4D_engine.py +14 -37
deformes4D_engine.py CHANGED
@@ -2,14 +2,12 @@
2
  #
3
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
  #
5
- # Version: 2.0.2
6
  #
7
  # This file contains the Deformes4D Engine, which acts as the primary "Editor" or
8
- # "Film Crew" specialist within the ADUC-SDR architecture. It implements the Camera (Ψ)
9
- # and Distiller (Δ) concepts. Its core responsibilities include the low-level orchestration
10
- # of video fragment generation (calling the LTX specialist), latent manipulation (calling
11
- # the enhancer specialist), and final rendering/post-production tasks like HD mastering
12
- # and audio generation. It executes the specific commands delegated by the AducOrchestrator.
13
 
14
  import os
15
  import time
@@ -26,11 +24,11 @@ import shutil
26
  from pathlib import Path
27
  from typing import List, Tuple, Generator, Dict, Any, Optional
28
 
29
- from ltx_manager_helpers import ltx_manager_singleton
 
 
30
  from gemini_helpers import gemini_singleton
31
- from latent_enhancer_specialist import latent_enhancer_specialist_singleton
32
  from hd_specialist import hd_specialist_singleton
33
- from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
34
  from audio_specialist import audio_specialist_singleton
35
  from tools.video_encode_tool import video_encode_tool_singleton
36
 
@@ -51,35 +49,12 @@ class Deformes4DEngine:
51
  def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
52
  self.ltx_manager = ltx_manager
53
  self.workspace_dir = workspace_dir
54
- self._vae = None
55
  self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
56
  logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.")
57
  os.makedirs(self.workspace_dir, exist_ok=True)
58
 
59
-
60
- @property
61
- def vae(self):
62
- if self._vae is None:
63
- # Assumes the VAE from the first LTX worker is representative
64
- self._vae = self.ltx_manager.workers[0].pipeline.vae
65
- self._vae.to(self.device); self._vae.eval()
66
- return self._vae
67
-
68
  # --- HELPER METHODS ---
69
 
70
- @torch.no_grad()
71
- def pixels_to_latents(self, tensor: torch.Tensor) -> torch.Tensor:
72
- """Encodes a pixel-space tensor to the latent space using the VAE."""
73
- tensor = tensor.to(self.device, dtype=self.vae.dtype)
74
- return vae_encode(tensor, self.vae, vae_per_channel_normalize=True)
75
-
76
- @torch.no_grad()
77
- def latents_to_pixels(self, latent_tensor: torch.Tensor, decode_timestep: float = 0.05) -> torch.Tensor:
78
- """Decodes a latent-space tensor to pixels using the VAE."""
79
- latent_tensor = latent_tensor.to(self.device, dtype=self.vae.dtype)
80
- timestep_tensor = torch.tensor([decode_timestep] * latent_tensor.shape[0], device=self.device, dtype=latent_tensor.dtype)
81
- return vae_decode(latent_tensor, self.vae, is_video=True, timestep=timestep_tensor, vae_per_channel_normalize=True)
82
-
83
  def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
84
  """Saves a pixel-space tensor as an MP4 video file."""
85
  if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
@@ -96,11 +71,11 @@ class Deformes4DEngine:
96
  return image
97
 
98
  def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
99
- """Converts a PIL Image to a latent tensor."""
100
  image_np = np.array(pil_image).astype(np.float32) / 255.0
101
  tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
102
  tensor = (tensor * 2.0) - 1.0
103
- return self.pixels_to_latents(tensor)
104
 
105
  # --- CORE ADUC-SDR LOGIC ---
106
 
@@ -196,7 +171,8 @@ class Deformes4DEngine:
196
  logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
197
  base_name = f"clip_{i:04d}_{run_timestamp}"
198
  current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
199
- pixel_tensor = self.latents_to_pixels(sub_group_latent)
 
200
  self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
201
  del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
202
  final_video_clip_paths.append(current_clip_path)
@@ -234,7 +210,7 @@ class Deformes4DEngine:
234
  upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
235
  del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
236
  logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}")
237
- pixel_tensor = self.latents_to_pixels(upscaled_latent_chunk)
238
  del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
239
  base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
240
  current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
@@ -277,6 +253,8 @@ class Deformes4DEngine:
277
  logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
278
  progress(0.1, desc="Preparing for audio generation...")
279
  run_timestamp = int(time.time())
 
 
280
  try:
281
  result = subprocess.run(
282
  ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
@@ -284,7 +262,6 @@ class Deformes4DEngine:
284
  duration = float(result.stdout.strip())
285
  logger.info(f"Source video duration: {duration:.2f} seconds.")
286
  progress(0.5, desc="Generating audio track...")
287
- output_path = os.path.join(self.workspace_dir, f"final_movie_with_audio_{run_timestamp}.mp4")
288
  final_path = audio_specialist_singleton.generate_audio_for_video(
289
  video_path=source_video_path,
290
  prompt=audio_prompt,
 
2
  #
3
  # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
  #
5
+ # Version: 2.1.0
6
  #
7
  # This file contains the Deformes4D Engine, which acts as the primary "Editor" or
8
+ # "Film Crew" specialist within the ADUC-SDR architecture. It has been refactored
9
+ # to delegate all VAE operations to the dedicated VaeManager, cleaning up its own
10
+ # logic and adhering to the specialist-based architecture.
 
 
11
 
12
  import os
13
  import time
 
24
  from pathlib import Path
25
  from typing import List, Tuple, Generator, Dict, Any, Optional
26
 
27
+ from managers.ltx_manager import ltx_manager_singleton
28
+ from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
29
+ from managers.vae_manager import vae_manager_singleton
30
  from gemini_helpers import gemini_singleton
 
31
  from hd_specialist import hd_specialist_singleton
 
32
  from audio_specialist import audio_specialist_singleton
33
  from tools.video_encode_tool import video_encode_tool_singleton
34
 
 
49
  def __init__(self, ltx_manager, workspace_dir="deformes_workspace"):
50
  self.ltx_manager = ltx_manager
51
  self.workspace_dir = workspace_dir
 
52
  self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
53
  logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.")
54
  os.makedirs(self.workspace_dir, exist_ok=True)
55
 
 
 
 
 
 
 
 
 
 
56
  # --- HELPER METHODS ---
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
59
  """Saves a pixel-space tensor as an MP4 video file."""
60
  if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
 
71
  return image
72
 
73
  def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
74
+ """Converts a PIL Image to a latent tensor by calling the VaeManager."""
75
  image_np = np.array(pil_image).astype(np.float32) / 255.0
76
  tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
77
  tensor = (tensor * 2.0) - 1.0
78
+ return vae_manager_singleton.encode(tensor)
79
 
80
  # --- CORE ADUC-SDR LOGIC ---
81
 
 
171
  logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
172
  base_name = f"clip_{i:04d}_{run_timestamp}"
173
  current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
174
+
175
+ pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
176
  self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
177
  del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
178
  final_video_clip_paths.append(current_clip_path)
 
210
  upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
211
  del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
212
  logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}")
213
+ pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk)
214
  del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache()
215
  base_name = f"upscaled_clip_{i:04d}_{run_timestamp}"
216
  current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4")
 
253
  logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---")
254
  progress(0.1, desc="Preparing for audio generation...")
255
  run_timestamp = int(time.time())
256
+ source_name = Path(source_video_path).stem
257
+ output_path = os.path.join(self.workspace_dir, f"{source_name}_with_audio_{run_timestamp}.mp4")
258
  try:
259
  result = subprocess.run(
260
  ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
 
262
  duration = float(result.stdout.strip())
263
  logger.info(f"Source video duration: {duration:.2f} seconds.")
264
  progress(0.5, desc="Generating audio track...")
 
265
  final_path = audio_specialist_singleton.generate_audio_for_video(
266
  video_path=source_video_path,
267
  prompt=audio_prompt,