x2XcarleX2x commited on
Commit
b8b5372
·
verified ·
1 Parent(s): 253a5a5

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +307 -200
Dockerfile CHANGED
@@ -1,201 +1,308 @@
1
- # =============================================================================
2
- # DOCKERFILE - ADUC-SDR: AI Video Suite v4.1.0
3
- # Base: CUDA 12.8.0 | PyTorch 2.8.0+cu128
4
- # Otimizado para Hugging Face Spaces com 8x NVIDIA L40S GPUs
5
- # =============================================================================
6
-
7
- # Base CUDA 12.8.0
8
- FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
9
-
10
- # =============================================================================
11
- # METADADOS
12
- # =============================================================================
13
- LABEL maintainer="Carlos Rodrigues dos Santos & Development Partner"
14
- LABEL description="ADUC-SDR: Production-Ready Multi-GPU AI Video Generation Suite with Wan2.2, SeedVR, LTX, MMAudio"
15
- LABEL version="4.1.0"
16
- LABEL cuda_version="12.8.0"
17
- LABEL python_version="3.10"
18
- LABEL pytorch_version="2.8.0+cu128"
19
- LABEL gpu_optimized_for="8x_NVIDIA_L40S"
20
-
21
- # Otimizações específicas do sistema
22
- ENV OMP_NUM_THREADS=8
23
- ENV MKL_NUM_THREADS=8
24
- ENV CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
25
- ENV CUDA_DEVICE_MAX_CONNECTIONS=32
26
-
27
- # Cache e Memory Management
28
- # Unificar PYTORCH_CUDA_ALLOC_CONF em uma linha
29
- ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,garbage_collection_threshold:0.8
30
- ENV CUDA_MODULE_LOADING=LAZY
31
- ENV CUDA_DEVICE_MAX_CONNECTIONS=32
32
- ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
33
-
34
-
35
-
36
- # Constantes de Performance para Build
37
- ENV CUDA_CACHE_MAXSIZE=2147483648
38
- ENV CUDA_CACHE_DISABLE=0
39
- ENV TORCH_HOME=/app/.cache/torch
40
- ENV HF_HOME=/app/.cache/huggingface
41
- ENV HF_DATASETS_CACHE=/app/.cache/datasets
42
-
43
- # Constantes de Memory Management
44
- ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,garbage_collection_threshold:0.8
45
- ENV CUDA_LAUNCH_BLOCKING=0
46
- ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
47
-
48
- # Constantes de Model Loading
49
- ENV TRANSFORMERS_CACHE=/app/.cache/transformers
50
- ENV DIFFUSERS_CACHE=/app/.cache/diffusers
51
- ENV MODEL_CACHE_STRATEGY=aggressive
52
-
53
-
54
- # Melhorias de cache/download
55
- ENV HF_HOME=/app/.cache/huggingface
56
- ENV TRANSFORMERS_CACHE=/app/.cache/transformers
57
- ENV DIFFUSERS_CACHE=/app/.cache/diffusers
58
- ENV HF_DATASETS_CACHE=/app/.cache/datasets
59
- ENV HF_HUB_ENABLE_HF_TRANSFER=1
60
- ENV TOKENIZERS_PARALLELISM=false
61
-
62
-
63
-
64
- # =============================================================================
65
- # VARIÁVEIS DE AMBIENTE GLOBAIS
66
- # =============================================================================
67
- ENV DEBIAN_FRONTEND=noninteractive
68
- ENV TZ=UTC
69
- ENV LANG=C.UTF-8
70
- ENV LC_ALL=C.UTF-8
71
- ENV PYTHONUNBUFFERED=1
72
- ENV PYTHONDONTWRITEBYTECODE=1
73
- ENV PIP_NO_CACHE_DIR=1
74
- ENV PIP_DISABLE_PIP_VERSION_CHECK=1
75
-
76
- # Otimizações de CUDA e Build
77
- ENV NVIDIA_VISIBLE_DEVICES=all
78
- ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
79
- ENV TORCH_CUDA_ARCH_LIST="8.9"
80
- ENV MAX_JOBS=90
81
-
82
- # Caminhos da Aplicação
83
- ENV APP_HOME=/app
84
- WORKDIR $APP_HOME
85
-
86
- # =============================================================================
87
- # PACOTES DO SISTEMA E PYTHON 3.10
88
- # =============================================================================
89
- RUN apt-get update && \
90
- apt-get install -y --no-install-recommends \
91
- build-essential cmake git git-lfs curl wget ffmpeg ninja-build \
92
- python3.10 python3.10-dev python3.10-distutils python3-pip \
93
- && apt-get clean && rm -rf /var/lib/apt/lists/*
94
-
95
- RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \
96
- ln -sf /usr/bin/python3.10 /usr/bin/python && \
97
- python3 -m pip install --upgrade pip
98
-
99
- # =============================================================================
100
- # INSTALAÇÃO DE BIBLIOTECAS DE ALTA PERFORMANCE
101
- # =============================================================================
102
-
103
- # 1. Instala PyTorch 2.8.0 e ferramentas de build
104
- RUN pip -v install \
105
- torch>=2.8.0+cu128 \
106
- torchvision \
107
- torchaudio \
108
- --index-url https://download.pytorch.org/whl/cu128
109
 
110
- RUN pip install \
111
- packaging \
112
- ninja \
113
- cmake \
114
- pybind11 \
115
- scikit-build \
116
- cython \
117
- hf_transfer \
118
- numpy==1.24.4
119
-
120
-
121
- # =============================================================================
122
- # CLONAGEM E INSTALAÇÃO DOS REPOSITÓRIOS DA APLICAÇÃO
123
- # =============================================================================
124
-
125
- RUN git clone https://github.com/Wan-Video/Wan2.2.git && \
126
- #cd Wan2.2 && pip install -v -r requirements.txt && pip install -v -r requirements_s2v.txt && cd .. && \
127
- echo "Copiando 'wan' para /app/wan..." && \
128
- cp -r Wan2.2/wan /app/wan
129
-
130
- # VINCIE
131
- RUN git clone https://github.com/bytedance-seed/VINCIE.git && \
132
- #cd VINCIE && pip install -v -r requirements.txt && cd .. && \
133
- #echo "Copiando módulos do SeedVR para /app/..." && \
134
- #cp /VINCIE/generate.py /app/VINCIE/ 2>/dev/null || echo "vincie_service.py não encontrado"
135
- #cp -r VINCIE /app/VINCIE && \
136
- #cp -r VINCIE/projects /app/projects && \
137
- #cp -r VINCIE/data /app/data && \
138
- cp -r VINCIE/configs/. /app/configs/
139
-
140
-
141
- # SeedVR
142
- RUN git clone https://github.com/bytedance-seed/SeedVR.git && \
143
- #cd SeedVR && pip install -v -r requirements.txt && cd .. && \
144
- #echo "Copiando módulos do SeedVR para /app/..." && \
145
- #cp -r SeedVR/common /app/common && \
146
- #cp -r SeedVR/projects /app/projects && \
147
- #cp -r SeedVR/data /app/data && \
148
- cp -r SeedVR/configs_3b /app/configs_3b
149
-
150
-
151
- # MMAudio
152
- #RUN git clone https://github.com/hkchengrex/MMAudio.git && \
153
- # cd MMAudio && pip install -v -e . && cd .. && \
154
- # echo "Copiando 'mmaudio' para /app/mmaudio..." && \
155
- # cp -r MMAudio/mmaudio /app/mmaudio
156
-
157
- # LTX-Video
158
- RUN git clone https://github.com/Lightricks/LTX-Video.git && \
159
- #cd LTX-Video && pip install -v -e .[inference] && cd .. && \
160
- echo "Copiando 'ltx_video' para /app/ltx_video..." && \
161
- cp -r LTX-Video/ltx_video /app/ltx_video
162
-
163
- # opcionais
164
- RUN pip uninstall -y bitsandbytes triton && \
165
- pip install -v bitsandbytes --index-url https://pypi.org/simple/ && \
166
- pip install -v triton
167
-
168
- # =============================================================================
169
- # INSTALAÇÃO DO RESTANTE DAS DEPENDÊNCIAS
170
- # =============================================================================
171
- COPY requirements.txt .
172
-
173
- # Instala os pacotes restantes do requirements.txt
174
- # A linha do flash-attention no arquivo será ignorada se já estiver instalado, mas é bom limpá-la.
175
- RUN pip install -r requirements.txt
176
-
177
-
178
- # PyTorch cu128 (fixar versões consistentes do mesmo canal)
179
- RUN pip install --index-url https://download.pytorch.org/whl/cu128 \
180
- torch==2.8.0+cu128 torchvision>=0.19.0+cu128 torchaudio>=2.8.0+cu128
181
-
182
-
183
-
184
- # =============================================================================
185
- # COPIA O CÓDIGO DA APLICAÇÃO E CONFIGURA PERMISSÕES
186
- # =============================================================================
187
- COPY . .
188
-
189
- RUN useradd -m -u 1000 -s /bash appuser && \
190
- chown -R appuser:appuser $APP_HOME && \
191
- mkdir -p /app && chown -R appuser:appuser /app
192
-
193
- USER appuser
194
-
195
- # =============================================================================
196
- # PONTO DE ENTRADA
197
- # =============================================================================
198
- RUN chmod +x ./start.sh
199
-
200
- ENTRYPOINT ["./start.sh"]
201
- CMD ["gradio"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/managers/wan_manager.py
2
+ # WanManager v1.5.3 (Correção final de dispositivo para `device_map="auto"`)
3
+
4
+ import os
5
+ import platform
6
+ import shutil
7
+ import subprocess
8
+ import tempfile
9
+ import random
10
+ from typing import List, Any, Optional, Tuple
11
+
12
+ import numpy as np
13
+ import torch
14
+ from PIL import Image
15
+ import imageio.v2 as imageio
16
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
17
+
18
+ torch.backends.cuda.matmul.allow_tf32 = True
19
+
20
+ try:
21
+ from torch.nn.attention import sdpa_kernel, SDPBackend
22
+ _SDPA_NEW = True
23
+ except ImportError:
24
+ _SDPA_NEW = False
25
+
26
+ from diffusers import FlowMatchEulerDiscreteScheduler
27
+ from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
28
+ from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
29
+ from diffusers.utils.export_utils import export_to_video
30
+ from aduc_framework.utils.callbacks import DenoiseStepLogger
31
+
32
+ class WanManager:
33
+ """
34
+ Gerenciador de produção v1.5.3:
35
+ - CORREÇÃO: Remove o gerenciamento manual de dispositivos na preparação de tensores
36
+ para ser totalmente compatível com a automação do `accelerate` e `device_map="auto"`.
37
+ Resolve os erros "Cannot copy out of meta tensor".
38
+ - Mantém o `yield` para atualizações em tempo real, os modos I2V e V2V,
39
+ e toda a lógica de validação robusta.
40
+ """
41
+ MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
42
+ TRANSFORMER_ID = "cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers"
43
+ MIN_FRAMES_MODEL = 8
44
+ MAX_FRAMES_MODEL = 81
45
+ default_negative_prompt = (
46
+ "bright, overexposed, static, blurry details, text, subtitles, watermark, style, "
47
+ "artwork, painting, still image, gray scale, worst quality, low quality, jpeg artifacts, "
48
+ "ugly, deformed, disfigured, missing fingers, extra fingers, poorly drawn hands, "
49
+ "poorly drawn face, malformed limbs, fused fingers, messy background, three legs, "
50
+ "too many people, walking backwards."
51
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ def __init__(self) -> None:
54
+ self._print_env_banner()
55
+ print("Loading models into memory...")
56
+ n_gpus = torch.cuda.device_count()
57
+ max_memory = {i: "43GiB" for i in range(n_gpus)}
58
+ max_memory["cpu"] = "120GiB"
59
+ transformer = WanTransformer3DModel.from_pretrained(
60
+ self.TRANSFORMER_ID, subfolder="transformer", torch_dtype=torch.bfloat16,
61
+ device_map="auto", max_memory=max_memory
62
+ )
63
+ transformer_2 = WanTransformer3DModel.from_pretrained(
64
+ self.TRANSFORMER_ID, subfolder="transformer_2", torch_dtype=torch.bfloat16,
65
+ device_map="auto", max_memory=max_memory
66
+ )
67
+ self.pipe = WanImageToVideoPipeline.from_pretrained(
68
+ self.MODEL_ID, transformer=transformer, transformer_2=transformer_2, torch_dtype=torch.bfloat16
69
+ )
70
+ self.pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(self.pipe.scheduler.config, shift=32.0)
71
+
72
+ print("Applying 8-step Lightning LoRA...")
73
+ try:
74
+ self.pipe.load_lora_weights("Kijai/WanVideo_comfy", weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors", adapter_name="lightx2v")
75
+ self.pipe.load_lora_weights("Kijai/WanVideo_comfy", weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors", adapter_name="lightx2v_2", load_into_transformer_2=True)
76
+ self.pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
77
+ print("Fusing LoRA weights into the main model...")
78
+ self.pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
79
+ self.pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
80
+ self.pipe.unload_lora_weights()
81
+ print("Lightning LoRA successfully fused.")
82
+ except Exception as e:
83
+ print(f"[WanManager] AVISO: Falha ao fundir LoRA Lightning: {e}")
84
+ print("All models loaded. Service is ready.")
85
+
86
+ def _print_env_banner(self) -> None:
87
+ def _safe_get(fn, default="n/a"):
88
+ try: return fn()
89
+ except Exception: return default
90
+
91
+ torch_ver = getattr(torch, "__version__", "unknown")
92
+ cuda_rt = getattr(torch.version, "cuda", "unknown")
93
+ cudnn_ver = _safe_get(lambda: torch.backends.cudnn.version())
94
+ cuda_ok = torch.cuda.is_available()
95
+ n_gpu = torch.cuda.device_count() if cuda_ok else 0
96
+ devs, total_vram, caps = [], [], []
97
+ if cuda_ok:
98
+ for i in range(n_gpu):
99
+ props = torch.cuda.get_device_properties(i)
100
+ devs.append(f"cuda:{i} {props.name}")
101
+ total_vram.append(f"{props.total_memory/1024**3:.1f}GiB")
102
+ caps.append(f"{props.major}.{props.minor}")
103
+
104
+ try: bf16_supported = torch.cuda.is_bf16_supported()
105
+ except: bf16_supported = False
106
+
107
+ tf32_allowed = torch.backends.cuda.matmul.allow_tf32
108
+ sdpa_api = "torch.nn.attention (2.1+)" if _SDPA_NEW else "torch.backends.cuda (2.0)" if not _SDPA_NEW and hasattr(torch.backends.cuda, 'sdp_kernel') else "unavailable"
109
+
110
+ try:
111
+ import xformers
112
+ xformers_ok = True
113
+ except ImportError:
114
+ xformers_ok = False
115
+
116
+ alloc_conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "unset")
117
+ visible = os.environ.get("CUDA_VISIBLE_DEVICES", "unset")
118
+ python_ver = platform.python_version()
119
+ nvcc = shutil.which("nvcc")
120
+ nvcc_ver = "n/a"
121
+ if nvcc:
122
+ try: nvcc_ver = subprocess.check_output([nvcc, "--version"], text=True).strip().splitlines()[-1]
123
+ except Exception: nvcc_ver = "n/a"
124
+
125
+ banner_lines = [
126
+ "================== WAN MANAGER • ENV ==================",
127
+ f"Python : {python_ver}", f"PyTorch : {torch_ver}",
128
+ f"CUDA (torch) : {cuda_rt}", f"cuDNN : {cudnn_ver}",
129
+ f"CUDA available : {cuda_ok}", f"GPU count : {n_gpu}",
130
+ f"GPUs : {', '.join(devs) if devs else 'n/a'}",
131
+ f"GPU VRAM : {', '.join(total_vram) if total_vram else 'n/a'}",
132
+ f"Compute Capability : {', '.join(caps) if caps else 'n/a'}",
133
+ f"BF16 supported : {bf16_supported}", f"TF32 allowed : {tf32_allowed}",
134
+ f"SDPA API : {sdpa_api}", f"xFormers available : {xformers_ok}",
135
+ f"CUDA_VISIBLE_DEVICES: {visible}", f"PYTORCH_CUDA_ALLOC_CONF: {alloc_conf}",
136
+ f"nvcc : {nvcc_ver}",
137
+ "=======================================================",
138
+ ]
139
+ print("\n".join(banner_lines))
140
+
141
+ def resize_and_crop_to_match(self, target: Image.Image, ref_w: int, ref_h: int) -> Image.Image:
142
+ tw, th = target.size
143
+ s = max(ref_w / tw, ref_h / th)
144
+ nw, nh = int(tw * s), int(th * s)
145
+ resized = target.resize((nw, nh), Image.Resampling.LANCZOS)
146
+ left, top = (nw - ref_w) // 2, (nh - ref_h) // 2
147
+ return resized.crop((left, top, left + ref_w, top + ref_h))
148
+
149
+ def _preprocess_causal_video(self, video_path: str, target_fps: int, target_w: int, target_h: int) -> str:
150
+ print(f"[WanManager] Pré-processando vídeo: conformando para {target_w}x{target_h} @ {target_fps}fps...")
151
+ clip = VideoFileClip(video_path)
152
+ conformed_clip = clip.resize(height=target_h) if (clip.w / clip.h) < (target_w / target_h) else clip.resize(width=target_w)
153
+ conformed_clip = conformed_clip.crop(x_center=conformed_clip.w/2, y_center=conformed_clip.h/2, width=target_w, height=target_h)
154
+ conformed_clip = conformed_clip.set_fps(target_fps)
155
+ with tempfile.NamedTemporaryFile(suffix="_conformed.mp4", delete=False) as tmp:
156
+ conformed_video_path = tmp.name
157
+ conformed_clip.write_videofile(conformed_video_path, codec="libx264", audio=False, logger=None, threads=os.cpu_count() or 1)
158
+ clip.close()
159
+ print(f"[WanManager] Vídeo conformado salvo em: {conformed_video_path}")
160
+ return conformed_video_path
161
+
162
+ def generate_video(
163
+ self,
164
+ convergent_img: Image.Image,
165
+ causal_video_path: Optional[str] = None,
166
+ causal_img: Optional[Image.Image] = None,
167
+ handler_img: Optional[Image.Image] = None,
168
+ total_frames: Optional[int] = 33,
169
+ handler_frame: Optional[int] = 17,
170
+ handler_weight: float = 1.0,
171
+ causal_weight: float = 1.0,
172
+ fps: Optional[int] = 16,
173
+ resolution: Optional[str] = "480x832",
174
+ prompt: str = "",
175
+ negative_prompt: Optional[str] = None,
176
+ steps: int = 8,
177
+ guidance_scale: float = 1.0,
178
+ guidance_scale_2: float = 1.0,
179
+ seed: int = 42,
180
+ randomize_seed: bool = True,
181
+ ):
182
+ final_handler_img, final_causal_img = handler_img, causal_img
183
+ final_total_frames, final_fps, final_resolution = total_frames, fps, resolution
184
+ final_handler_frame, final_causal_weight, final_handler_weight = handler_frame, causal_weight, handler_weight
185
+ conformed_video_path = None
186
+
187
+ if causal_video_path and os.path.exists(causal_video_path):
188
+ print(f"[WanManager] INFO: Modo 'Causal Video' ativado com o arquivo: {causal_video_path}")
189
+ target_h, target_w = [int(x) for x in resolution.split('x')]
190
+ conformed_video_path = self._preprocess_causal_video(causal_video_path, fps, target_w, target_h)
191
+ reader = imageio.get_reader(conformed_video_path)
192
+ video_frame_count = reader.count_frames()
193
+ if video_frame_count < 25:
194
+ reader.close()
195
+ raise ValueError(f"O vídeo conformado deve ter pelo menos 25 frames. Tem apenas {video_frame_count}.")
196
+ print("[WanManager] INFO: Extraindo frames de controle do vídeo conformado...")
197
+ causal_img_from_video_np = reader.get_data(video_frame_count - 25)
198
+ final_causal_img = Image.fromarray(causal_img_from_video_np)
199
+ handler_img_from_video_np = reader.get_data(video_frame_count - 1)
200
+ final_handler_img = Image.fromarray(handler_img_from_video_np)
201
+ reader.close()
202
+ final_total_frames, final_fps, final_resolution = video_frame_count, fps, resolution
203
+ final_handler_frame, final_handler_weight, final_causal_weight = 24, 1.0, causal_weight
204
+ else:
205
+ print("[WanManager] INFO: Modo 'Image to Video' padrão ativado.")
206
+ if convergent_img is None or causal_img is None:
207
+ raise ValueError("A imagem convergente (inicial) e a imagem causal (final) são obrigatórias no modo I2V.")
208
+
209
+ target_h, target_w = [int(x) for x in final_resolution.split('x')]
210
+
211
+ processed_convergent = self.resize_and_crop_to_match(convergent_img, target_w, target_h)
212
+ processed_causal = self.resize_and_crop_to_match(final_causal_img, target_w, target_h)
213
+ processed_handler = self.resize_and_crop_to_match(final_handler_img, target_w, target_h) if final_handler_img else None
214
+
215
+ clamped_frames = int(np.clip(final_total_frames, self.MIN_FRAMES_MODEL, self.MAX_FRAMES_MODEL))
216
+ sf_t = getattr(self.pipe, "vae_scale_factor_temporal", 4)
217
+ num_frames = ((clamped_frames - 1) // sf_t * sf_t) + 1
218
+
219
+ print(f"[WanManager] INFO: Total de frames final para a pipeline é {num_frames}.")
220
+
221
+ current_seed = random.randint(0, np.iinfo(np.int32).max) if randomize_seed else int(seed)
222
+
223
+ corrected_handler_index = None
224
+ if processed_handler is not None and final_handler_frame is not None:
225
+ min_safe_frame, max_safe_frame = 9, num_frames - 9
226
+ if causal_video_path:
227
+ corrected_handler_index = max(min_safe_frame, min(final_handler_frame, max_safe_frame))
228
+ else:
229
+ block_index = round(final_handler_frame / 8)
230
+ aligned_frame = block_index * 8 + 1
231
+ corrected_handler_index = max(min_safe_frame, min(aligned_frame, max_safe_frame))
232
+ print(f"[WanManager] INFO: Handler Frame final validado para {corrected_handler_index}.")
233
+
234
+ print("[WanManager] Preparando tensores e timesteps para a geração...")
235
+
236
+ transformer_dtype = self.pipe.transformer.dtype
237
+ generator = torch.Generator(device="cpu").manual_seed(current_seed)
238
+
239
+ prompt_embeds, negative_prompt_embeds = self.pipe.encode_prompt(prompt=prompt, negative_prompt=negative_prompt or self.default_negative_prompt)
240
+ prompt_embeds = prompt_embeds.to(transformer_dtype)
241
+ if negative_prompt_embeds is not None:
242
+ negative_prompt_embeds = negative_prompt_embeds.to(transformer_dtype)
243
+
244
+ image_processed = self.pipe.video_processor.preprocess(processed_convergent, height=target_h, width=target_w)
245
+ causal_img_processed = self.pipe.video_processor.preprocess(processed_causal, height=target_h, width=target_w)
246
+ handler_img_processed = self.pipe.video_processor.preprocess(processed_handler, height=target_h, width=target_w) if processed_handler else None
247
+
248
+ latents_outputs = self.pipe.prepare_latents(
249
+ image=image_processed, batch_size=1, num_channels_latents=self.pipe.vae.config.z_dim,
250
+ height=target_h, width=target_w, num_frames=num_frames, dtype=torch.float32, generator=generator,
251
+ causal_img=causal_img_processed, handler_img=handler_img_processed,
252
+ handler_frame_index=corrected_handler_index, handler_weight=final_handler_weight, causal_weight=final_causal_weight
253
+ )
254
+ latents, condition = latents_outputs
255
+
256
+ self.pipe.scheduler.set_timesteps(steps, device=latents.device)
257
+ timesteps = self.pipe.scheduler.timesteps
258
+
259
+ denoise_logger = DenoiseStepLogger(self.pipe)
260
+ denoising_step_videos = []
261
+
262
+ with torch.no_grad():
263
+ for i, t in enumerate(timesteps):
264
+ print(f"[WanManager] Executando passo de denoising {i+1}/{steps}...")
265
+ latent_model_input = torch.cat([latents, condition], dim=1).to(transformer_dtype)
266
+
267
+ noise_pred_uncond = self.pipe.transformer(latent_model_input, t, encoder_hidden_states=negative_prompt_embeds).sample
268
+ noise_pred_text = self.pipe.transformer(latent_model_input, t, encoder_hidden_states=prompt_embeds).sample
269
+
270
+ noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
271
+ latents = self.pipe.scheduler.step(noise_pred, t, latents).prev_sample
272
+
273
+ video_frames_np = denoise_logger.decode_latents_to_video_tensor(latents)
274
+ with tempfile.NamedTemporaryFile(suffix=f"_step_{i+1}.mp4", delete=False) as tmp:
275
+ step_video_path = tmp.name
276
+ export_to_video(video_frames_np[0], step_video_path, fps=final_fps)
277
+ denoising_step_videos.append(step_video_path)
278
+
279
+ yield None, None, denoising_step_videos
280
+
281
+ print("[WanManager] Denoising completo. Processando o vídeo final...")
282
+ final_video_frames_np = denoise_logger.decode_latents_to_video_tensor(latents)
283
+
284
+ with tempfile.NamedTemporaryFile(suffix="_generated_clip.mp4", delete=False) as tmp:
285
+ generated_clip_path = tmp.name
286
+ export_to_video(final_video_frames_np[0], generated_clip_path, fps=final_fps)
287
+
288
+ final_video_path = generated_clip_path
289
+ if conformed_video_path:
290
+ print("[WanManager] INFO: Modo Causal Video: iniciando concatenação final...")
291
+ input_clip = VideoFileClip(conformed_video_path)
292
+ generated_clip = VideoFileClip(generated_clip_path)
293
+ duration_to_cut = 25 / input_clip.fps
294
+ if input_clip.duration > duration_to_cut:
295
+ prefix_clip = input_clip.subclip(0, input_clip.duration - duration_to_cut)
296
+ final_clip = concatenate_videoclips([prefix_clip, generated_clip])
297
+ else:
298
+ final_clip = generated_clip
299
+ with tempfile.NamedTemporaryFile(suffix="_final.mp4", delete=False) as tmp:
300
+ final_video_path = tmp.name
301
+ final_clip.write_videofile(final_video_path, codec="libx264", audio=False, logger=None, threads=os.cpu_count() or 1)
302
+ input_clip.close()
303
+ generated_clip.close()
304
+ os.remove(conformed_video_path)
305
+ os.remove(generated_clip_path)
306
+ print(f"[WanManager] INFO: Vídeo final concatenado salvo em: {final_video_path}")
307
+
308
+ yield final_video_path, current_seed, denoising_step_videos