diff --git a/aduc_framework/__init__.py b/aduc_framework/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ddec1af60b6f4123f871d847f5da4deaa0437843 --- /dev/null +++ b/aduc_framework/__init__.py @@ -0,0 +1,75 @@ +# aduc_framework/__init__.py +# +# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos +# +# Versão 3.0.0 (Framework Entry Point) +# +# Este arquivo serve como o ponto de entrada principal para o Aduc Framework. +# Ele define a interface pública que os clientes (UIs, APIs, etc.) usarão +# para criar e interagir com o sistema de orquestração. +# +# A principal responsabilidade deste arquivo é expor uma função de fábrica +# ('create_aduc_instance') que encapsula a lógica de inicialização do +# orquestrador e seus componentes, garantindo que o framework seja fácil +# de consumir. + +import logging + +# Importa as classes e tipos que formarão a interface pública do framework +from .orchestrator import AducOrchestrator +from .types import ( + GenerationState, + PreProductionParams, + ProductionParams, + GenerationParameters, + MediaRef, + Ato, + KeyframeData, + VideoData +) + +# Configura um logger para o framework para que os clientes possam ver as mensagens de inicialização. +logger = logging.getLogger(__name__) + +def create_aduc_instance(workspace_dir: str) -> AducOrchestrator: + """ + Ponto de entrada de fábrica para criar uma instância totalmente funcional do Aduc Framework. + + Esta função abstrai a complexidade da inicialização do AducOrchestrator e de todos + os seus engenheiros e managers dependentes. Clientes do framework devem usar esta + função para garantir uma inicialização correta e consistente. + + Args: + workspace_dir (str): O caminho para o diretório onde todos os artefatos + (imagens, vídeos, latentes, logs) serão salvos. + + Returns: + AducOrchestrator: Uma instância pronta para uso do orquestrador principal. + """ + logger.info(f"Fábrica ADUC: Criando uma nova instância com workspace em '{workspace_dir}'...") + + # Futuramente, lógicas mais complexas de inicialização, como a verificação de + # dependências ou configuração de hardware, podem ser adicionadas aqui. + + instance = AducOrchestrator(workspace_dir=workspace_dir) + + logger.info("Fábrica ADUC: Instância do framework criada e pronta para uso.") + + return instance + +# Mensagem de log para confirmar que o pacote do framework foi importado com sucesso. +logger.info("Módulo 'aduc_framework' carregado. Use a função 'create_aduc_instance()' para começar.") + +# Opcional: Definir __all__ para controlar o que é importado com 'from aduc_framework import *' +__all__ = [ + "create_aduc_instance", + "AducOrchestrator", + "GenerationState", + "PreProductionParams", + "ProductionParams", + "GenerationParameters", + "MediaRef", + "Ato", + "KeyframeData", + "VideoData" +] \ No newline at end of file diff --git a/aduc_framework/director.py b/aduc_framework/director.py new file mode 100644 index 0000000000000000000000000000000000000000..59534da67158e0b62a99cbba533e252fe694450a --- /dev/null +++ b/aduc_framework/director.py @@ -0,0 +1,116 @@ +# aduc_framework/director.py +# +# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos +# +# Versão 3.0.0 (Framework State Manager) +# +# Este arquivo contém a classe AducDirector. Sua única responsabilidade +# é gerenciar o objeto de estado da geração (GenerationState). Ele atua +# como o "score" da orquestra ou o "script" do filme, mantendo um registro +# preciso de todos os parâmetros e artefatos gerados. + +import logging +import os +from typing import List, Dict, Any + +# Importa os modelos de dados Pydantic que ele irá gerenciar +from .types import GenerationState, PreProductionParams, ProductionParams, Ato, MediaRef, KeyframeData, VideoData + +logger = logging.getLogger(__name__) + +class AducDirector: + """ + Representa o Diretor de Cena, responsável por gerenciar o estado da produção. + Atua como a fonte única da verdade para todos os dados relacionados a uma + única tarefa de geração de vídeo. + """ + def __init__(self, workspace_dir: str): + """ + Inicializa o Diretor. + + Args: + workspace_dir (str): O diretório onde os artefatos são salvos. + O Diretor usa isso para referenciar caminhos se necessário. + """ + self.workspace_dir = workspace_dir + self.state: GenerationState = self._initialize_state() + os.makedirs(self.workspace_dir, exist_ok=True) + logger.info(f"AducDirector inicializado. O estado de geração foi criado.") + + def _initialize_state(self) -> GenerationState: + """ + Cria uma instância vazia e válida do modelo GenerationState. + """ + return GenerationState() + + def get_full_state(self) -> GenerationState: + """ + Retorna o objeto de estado Pydantic completo. + + Returns: + GenerationState: O estado atual da geração. + """ + return self.state + + def get_full_state_as_dict(self) -> Dict[str, Any]: + """ + Retorna o estado completo serializado como um dicionário Python. + Útil para passar para bibliotecas que não suportam Pydantic diretamente. + + Returns: + Dict[str, Any]: O estado atual como um dicionário. + """ + return self.state.model_dump() + + def update_parameters(self, stage: str, params: Any): + """ + Atualiza o nó de parâmetros no estado de geração. + + Args: + stage (str): O estágio da produção ('pre_producao', 'producao', etc.). + params (BaseModel): O objeto Pydantic contendo os parâmetros para aquele estágio. + """ + if hasattr(self.state.parametros_geracao, stage): + setattr(self.state.parametros_geracao, stage, params) + logger.info(f"Parâmetros do estágio '{stage}' atualizados no estado.") + else: + logger.warning(f"Tentativa de atualizar parâmetros para um estágio desconhecido: '{stage}'") + + def update_pre_production_state(self, prompt: str, ref_paths: List[str], storyboard: List[str]): + """ + Popula as seções iniciais do estado após a geração do storyboard. + + Args: + prompt (str): O prompt geral. + ref_paths (List[str]): Lista de caminhos para as mídias de referência. + storyboard (List[str]): Lista de resumos dos atos. + """ + self.state.Promt_geral = prompt + self.state.midias_referencia = [MediaRef(id=i, caminho=path) for i, path in enumerate(ref_paths)] + self.state.Atos = [Ato(id=i, resumo_ato=ato) for i, ato in enumerate(storyboard)] + logger.info("Estado de pré-produção (prompt, referências, atos) atualizado.") + + def update_keyframes_state(self, keyframes_data: List[Dict[str, Any]]): + """ + Atualiza a lista de keyframes no estado. + + Args: + keyframes_data (List[Dict[str, Any]]): Uma lista de dicionários, cada um + representando os dados de um keyframe. + """ + # Converte os dicionários em modelos Pydantic KeyframeData + self.state.Keyframe_atos = [KeyframeData(**data) for data in keyframes_data] + logger.info(f"{len(keyframes_data)} keyframes adicionados ao estado.") + + def update_video_state(self, video_data_dict: Dict[str, Any]): + """ + Atualiza a lista de vídeos gerados no estado. + + Args: + video_data_dict (Dict[str, Any]): Um dicionário representando os dados do vídeo gerado. + """ + # Converte o dicionário em um modelo Pydantic VideoData + video_model = VideoData(**video_data_dict) + # Atualmente, substituímos a lista, mas poderíamos adicionar a ela no futuro. + self.state.videos_atos = [video_model] + logger.info("Dados da produção de vídeo atualizados no estado.") \ No newline at end of file diff --git a/engineers/LICENSE b/aduc_framework/engineers/LICENSE similarity index 100% rename from engineers/LICENSE rename to aduc_framework/engineers/LICENSE diff --git a/engineers/NOTICE.md b/aduc_framework/engineers/NOTICE.md similarity index 100% rename from engineers/NOTICE.md rename to aduc_framework/engineers/NOTICE.md diff --git a/engineers/README.md b/aduc_framework/engineers/README.md similarity index 100% rename from engineers/README.md rename to aduc_framework/engineers/README.md diff --git a/aduc_framework/engineers/__init__.py b/aduc_framework/engineers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..476476ac2a24f18dd6aa3b6910e7d9cf446f32a7 --- /dev/null +++ b/aduc_framework/engineers/__init__.py @@ -0,0 +1,13 @@ +# aduc_framework/engineers/__init__.py + +# Expõe os singletons e classes principais do sub-pacote de engenheiros. + +from .deformes2D_thinker import deformes2d_thinker_singleton +from .deformes3D import deformes3d_engine_singleton +from .deformes4D import Deformes4DEngine + +__all__ = [ + "deformes2d_thinker_singleton", + "deformes3d_engine_singleton", + "Deformes4DEngine", +] \ No newline at end of file diff --git a/engineers/deformes2D_thinker.py b/aduc_framework/engineers/deformes2D_thinker.py similarity index 94% rename from engineers/deformes2D_thinker.py rename to aduc_framework/engineers/deformes2D_thinker.py index 9bfd800fad83f1b3ff94b59d986ae6d8e5c991d1..3c1da3a4cf7c44b473e3a1dc98566f38f5a1904b 100644 --- a/engineers/deformes2D_thinker.py +++ b/aduc_framework/engineers/deformes2D_thinker.py @@ -36,7 +36,7 @@ import gradio as gr from typing import List # It imports the communication layer, not the API directly -from managers.gemini_manager import gemini_manager_singleton +from ..managers.gemini_manager import gemini_manager_singleton logger = logging.getLogger(__name__) @@ -117,16 +117,16 @@ class Deformes2DThinker: prompt_parts = [ f"# CONTEXT:\n- Global Story Goal: {global_prompt}\n# VISUAL ASSETS:", "Current Base Image [IMG-BASE]:", - Image.open(last_image_path) + "",#Image.open(last_image_path) ] - ref_counter = 1 - for path in fixed_ref_paths: - if path != last_image_path: - prompt_parts.extend([f"General Reference Image [IMG-REF-{ref_counter}]:", Image.open(path)]) - ref_counter += 1 + #ref_counter = 1 + #for path in fixed_ref_paths: + # if path != last_image_path: + # prompt_parts.extend([f"General Reference Image [IMG-REF-{ref_counter}]:", Image.open(path)]) + # ref_counter += 1 - prompt_parts.append(director_prompt) + #prompt_parts.append(director_prompt) final_flux_prompt = gemini_manager_singleton.get_raw_text(prompt_parts) diff --git a/aduc_framework/engineers/deformes3D.py b/aduc_framework/engineers/deformes3D.py new file mode 100644 index 0000000000000000000000000000000000000000..b6c7f894274736b7d58bd40fcd684d1f0425dfca --- /dev/null +++ b/aduc_framework/engineers/deformes3D.py @@ -0,0 +1,183 @@ +# aduc_framework/engineers/deformes3D.py +# +# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos +# +# Versão 3.1.2 (Com correção de import de 'typing') +# +# Este engenheiro é o "Diretor de Arte" do framework. Sua responsabilidade +# é ler o estado de geração (storyboard, parâmetros) e orquestrar a criação +# dos keyframes visuais, que servirão de âncora para a geração de vídeo. + +import os +import time +import logging +import yaml +import torch +import numpy as np +from PIL import Image, ImageOps + +# >>> INÍCIO DA CORREÇÃO <<< +from typing import List, Dict, Any, Callable, Optional +# >>> FIM DA CORREÇÃO <<< + +# --- Imports Relativos Corrigidos --- +from .deformes2D_thinker import deformes2d_thinker_singleton +from ..types import LatentConditioningItem +from ..managers.ltx_manager import ltx_manager_singleton +from ..managers.vae_manager import vae_manager_singleton +from ..managers.latent_enhancer_manager import latent_enhancer_specialist_singleton + +logger = logging.getLogger(__name__) + +# Define um tipo para o callback de progresso para clareza +ProgressCallback = Optional[Callable[[float, str], None]] + +class Deformes3DEngine: + """ + Especialista ADUC para a geração de imagens estáticas (keyframes). + """ + def __init__(self): + """O construtor é leve e não recebe argumentos.""" + self.workspace_dir: Optional[str] = None + logger.info("Deformes3DEngine instanciado (não inicializado).") + + def initialize(self, workspace_dir: str): + """Inicializa o engenheiro com as configurações necessárias.""" + if self.workspace_dir is not None: + return # Evita reinicialização + self.workspace_dir = workspace_dir + logger.info(f"3D Engine (Image Specialist) inicializado com workspace: {self.workspace_dir}.") + + def generate_keyframes_from_storyboard( + self, + generation_state: Dict[str, Any], + progress_callback: ProgressCallback = None + ) -> List[Dict[str, Any]]: + """ + Orquestra a geração de todos os keyframes com base no estado de geração completo. + Retorna uma lista de dicionários com dados detalhados de cada keyframe. + """ + if not self.workspace_dir: + raise RuntimeError("Deformes3DEngine não foi inicializado. Chame o método initialize() antes de usar.") + + # 1. Extrai todos os parâmetros necessários do estado + params = generation_state.get("parametros_geracao", {}).get("pre_producao", {}) + storyboard = [ato["resumo_ato"] for ato in generation_state.get("Atos", [])] + global_prompt = generation_state.get("Promt_geral", "") + general_ref_paths = [media["caminho"] for media in generation_state.get("midias_referencia", [])] + + keyframe_resolution = params.get('resolution', 480) + initial_ref_path = general_ref_paths[0] + + previous_prompt = "" + all_keyframes_data: List[Dict[str, Any]] = [] + width, height = keyframe_resolution, keyframe_resolution + target_resolution_tuple = (width, height) + + + + logger.info(f"IMAGE SPECIALIST: Ordem para gerar {num_keyframes_to_generate} keyframes (versões LTX).") + ltx_conditioning_items0 = [] + + + img_pil0 = Image.open(initial_ref_path).convert("RGB") + img_processed0 = self._preprocess_image_for_latent_conversion(img_pil0, target_resolution_tuple) + pixel_tensor0 = self._pil_to_pixel_tensor(img_processed0) + + ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 0, 0.05)) + ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 23, 0.05)) + + latent_tensorY = pixel_tensor0 + latent_tensorX = latent_tensorY + + + current_base_image_path = initial_ref_path + past_base_image_path = initial_ref_path + + + for i in range(num_keyframes_to_generate): + ltx_conditioning_items = ltx_conditioning_items0 + scene_index = i + 1 + + current_scene = storyboard[i] + future_scene = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final." + logger.info(f"--> Gerando Keyframe {scene_index}/{num_keyframes_to_generate}...") + + img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt( + global_prompt=global_prompt, + scene_history=previous_prompt, + current_scene_desc=current_scene, + future_scene_desc=future_scene, + last_image_path=past_base_image_path, + fixed_ref_paths=current_base_image_path + ) + + past_base_image_path = current_base_image_path + + + + ltx_conditioning_items = ltx_conditioning_items0 + ltx_conditioning_items.append(LatentConditioningItem(latent_tensorX, 0, 0.4)) + ltx_conditioning_items.append(LatentConditioningItem(latent_tensorY, 8, 0.6)) + + latent_tensorX = latent_tensorY + + ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25} + generated_latents, _ = ltx_manager_singleton.generate_latent_fragment( + height=height, width=width, + conditioning_items_data=ltx_conditioning_items, + motion_prompt=img_prompt, + video_total_frames=24, video_fps=24, + **ltx_base_params + ) + + final_latent = generated_latents[:, :, -1:, :, :] + #upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent) + enriched_pixel_tensor = vae_manager_singleton.decode(final_latent) + + pixel_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_pixel.png") + latent_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_latent.pt") + self.save_image_from_tensor(enriched_pixel_tensor, pixel_path) + torch.save(final_latent.cpu(), latent_path) + + latent_tensorY = latent_path + past_base_image_path = current_base_image_path + + keyframe_data = { + "id": scene_index, + "caminho_pixel": pixel_path, + "caminho_latent": latent_path, + "prompt_keyframe": img_prompt + } + + all_keyframes_data.append(keyframe_data) + + + current_base_image_path = pixel_path + previous_prompt = img_prompt + + logger.info("IMAGE SPECIALIST: Geração de todos os dados de keyframes completa.") + return all_keyframes_data + + # --- FUNÇÕES HELPER --- + + def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: + if image.size != target_resolution: + return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) + return image + + def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor: + image_np = np.array(pil_image).astype(np.float32) / 255.0 + tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) + return (tensor * 2.0) - 1.0 + + def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str): + tensor_chw = pixel_tensor.squeeze(0).squeeze(1) + tensor_hwc = tensor_chw.permute(1, 2, 0) + tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0 + image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8) + Image.fromarray(image_np).save(path) + +# --- Instanciação Singleton --- +# A instância é criada, mas não configurada ainda. O Orchestrator fará isso. +deformes3d_engine_singleton = Deformes3DEngine() \ No newline at end of file diff --git a/aduc_framework/engineers/deformes4D.py b/aduc_framework/engineers/deformes4D.py new file mode 100644 index 0000000000000000000000000000000000000000..800422e51ec8b6c05e459b616c8ce9e03dc3625f --- /dev/null +++ b/aduc_framework/engineers/deformes4D.py @@ -0,0 +1,235 @@ +# aduc_framework/engineers/deformes4D.py +# +# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos +# +# Versão 3.1.1 (Com correção de limpeza de arquivos) +# +# Este engenheiro implementa a Câmera (Ψ) e o Destilador (Δ) da arquitetura +# ADUC-SDR. Sua única responsabilidade é a geração sequencial de fragmentos de +# vídeo com base em um conjunto de keyframes pré-definido. + +import os +import time +import imageio +import numpy as np +import torch +import logging +from PIL import Image, ImageOps +import gc +import shutil +from pathlib import Path +from typing import List, Tuple, Dict, Any, Callable, Optional + +# --- Imports Relativos Corrigidos --- +from ..types import LatentConditioningItem +from ..managers.ltx_manager import ltx_manager_singleton +from ..managers.vae_manager import vae_manager_singleton +from .deformes2D_thinker import deformes2d_thinker_singleton +from ..tools.video_encode_tool import video_encode_tool_singleton + +logger = logging.getLogger(__name__) + +ProgressCallback = Optional[Callable[[float, str], None]] + +class Deformes4DEngine: + """ + Orquestra a geração e concatenação de fragmentos de vídeo. + """ + def __init__(self): + """O construtor é leve e não recebe argumentos.""" + self.workspace_dir: Optional[str] = None + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + logger.info("Deformes4DEngine instanciado (não inicializado).") + + def initialize(self, workspace_dir: str): + """Inicializa o engenheiro com as configurações necessárias.""" + if self.workspace_dir is not None: + return # Evita reinicialização + self.workspace_dir = workspace_dir + os.makedirs(self.workspace_dir, exist_ok=True) + logger.info(f"Deformes4D Specialist (Executor) inicializado com workspace: {self.workspace_dir}.") + + def generate_original_movie( + self, + full_generation_state: Dict[str, Any], + progress_callback: ProgressCallback = None + ) -> Dict[str, Any]: + """ + Gera o filme principal lendo todos os parâmetros do estado de geração. + """ + if not self.workspace_dir: + raise RuntimeError("Deformes4DEngine não foi inicializado. Chame o método initialize() antes de usar.") + + # 1. Extrai todos os parâmetros do estado de geração + pre_prod_params = full_generation_state.get("parametros_geracao", {}).get("pre_producao", {}) + prod_params = full_generation_state.get("parametros_geracao", {}).get("producao", {}) + + keyframes_data = full_generation_state.get("Keyframe_atos", []) + global_prompt = full_generation_state.get("Promt_geral", "") + storyboard = [ato["resumo_ato"] for ato in full_generation_state.get("Atos", [])] + keyframe_paths = [kf["caminho_pixel"] for kf in keyframes_data] + + seconds_per_fragment = pre_prod_params.get('duration_per_fragment', 4.0) + video_resolution = pre_prod_params.get('resolution', 480) + + trim_percent = prod_params.get('trim_percent', 50) + handler_strength = prod_params.get('handler_strength', 0.5) + destination_convergence_strength = prod_params.get('destination_convergence_strength', 0.75) + guidance_scale = prod_params.get('guidance_scale', 2.0) + stg_scale = prod_params.get('stg_scale', 0.025) + num_inference_steps = prod_params.get('inference_steps', 20) + + # 2. Inicia o processo de geração + FPS = 24 + FRAMES_PER_LATENT_CHUNK = 8 + LATENT_PROCESSING_CHUNK_SIZE = 4 + + run_timestamp = int(time.time()) + temp_latent_dir = os.path.join(self.workspace_dir, f"temp_latents_{run_timestamp}") + temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}") + os.makedirs(temp_latent_dir, exist_ok=True) + os.makedirs(temp_video_clips_dir, exist_ok=True) + + total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK) + frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK) + latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK + DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0 + DESTINATION_FRAME_TARGET = total_frames_brutos - 1 + + base_ltx_params = {"guidance_scale": guidance_scale, "stg_scale": stg_scale, "num_inference_steps": num_inference_steps} + story_history = "" + target_resolution_tuple = (video_resolution, video_resolution) + eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None + latent_fragment_paths = [] + video_fragments_data = [] + + if len(keyframe_paths) < 2: + raise ValueError(f"A geração requer pelo menos 2 keyframes. Fornecidos: {len(keyframe_paths)}.") + num_transitions_to_generate = len(keyframe_paths) - 1 + + logger.info("--- ESTÁGIO 1: Geração de Fragmentos Latentes ---") + for i in range(num_transitions_to_generate): + fragment_index = i + 1 + if progress_callback: + progress_fraction = (i / num_transitions_to_generate) * 0.7 + progress_callback(progress_fraction, f"Gerando Latente {fragment_index}/{num_transitions_to_generate}") + + past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i] + start_keyframe_path = keyframe_paths[i] + destination_keyframe_path = keyframe_paths[i + 1] + future_story_prompt = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final." + decision = deformes2d_thinker_singleton.get_cinematic_decision( + global_prompt, story_history, past_keyframe_path, start_keyframe_path, + destination_keyframe_path, storyboard[i - 1] if i > 0 else "O início.", + storyboard[i], future_story_prompt + ) + motion_prompt = decision["motion_prompt"] + story_history += f"\n- Ato {fragment_index}: {motion_prompt}" + + conditioning_items = [] + if eco_latent_for_next_loop is None: + img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple) + conditioning_items.append(LatentConditioningItem(self._pil_to_latent(img_start), 0, 1.0)) + else: + conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0)) + conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength)) + + img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple) + conditioning_items.append(LatentConditioningItem(self._pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength)) + + latents_brutos, _ = ltx_manager_singleton.generate_latent_fragment( + height=video_resolution, width=video_resolution, + conditioning_items_data=conditioning_items, motion_prompt=motion_prompt, + video_total_frames=total_frames_brutos, video_fps=FPS, + **base_ltx_params + ) + + last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone() + eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone() + dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone() + latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone() + del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache() + + cpu_latent = latents_video.cpu() + latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt") + torch.save(cpu_latent, latent_path) + latent_fragment_paths.append(latent_path) + + video_fragments_data.append({"id": i, "prompt_video": motion_prompt}) + del latents_video, cpu_latent; gc.collect() + + del eco_latent_for_next_loop, dejavu_latent_for_next_loop; gc.collect(); torch.cuda.empty_cache() + + logger.info(f"--- ESTÁGIO 2: Processando {len(latent_fragment_paths)} latentes ---") + final_video_clip_paths = [] + num_chunks = -(-len(latent_fragment_paths) // LATENT_PROCESSING_CHUNK_SIZE) if LATENT_PROCESSING_CHUNK_SIZE > 0 else 0 + for i in range(num_chunks): + chunk_start_index = i * LATENT_PROCESSING_CHUNK_SIZE + chunk_end_index = chunk_start_index + LATENT_PROCESSING_CHUNK_SIZE + chunk_paths = latent_fragment_paths[chunk_start_index:chunk_end_index] + + if progress_callback: + progress_fraction = 0.7 + (i / num_chunks * 0.28) + progress_callback(progress_fraction, f"Processando & Decodificando Lote {i+1}/{num_chunks}") + + tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths] + sub_group_latent = torch.cat(tensors_in_chunk, dim=2) + del tensors_in_chunk; gc.collect(); torch.cuda.empty_cache() + + pixel_tensor = vae_manager_singleton.decode(sub_group_latent) + del sub_group_latent; gc.collect(); torch.cuda.empty_cache() + + base_name = f"clip_{i:04d}_{run_timestamp}" + current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4") + self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS) + final_video_clip_paths.append(current_clip_path) + del pixel_tensor; gc.collect(); torch.cuda.empty_cache() + + if progress_callback: progress_callback(0.98, "Montando o filme final...") + final_video_path = os.path.join(self.workspace_dir, f"original_movie_{run_timestamp}.mp4") + video_encode_tool_singleton.concatenate_videos(final_video_clip_paths, final_video_path, self.workspace_dir) + + try: + shutil.rmtree(temp_video_clips_dir) + # A linha que apagava 'temp_latent_dir' foi removida para persistir os latentes. + except OSError as e: + logger.warning(f"Não foi possível remover o diretório de clipes temporários: {e}") + + logger.info(f"Processo completo! Vídeo original salvo em: {final_video_path}") + + final_video_data_for_state = { + "id": 0, "caminho_pixel": final_video_path, + "caminhos_latentes_fragmentos": latent_fragment_paths, + "fragmentos_componentes": video_fragments_data + } + + return { + "final_path": final_video_path, + "latent_paths": latent_fragment_paths, + "video_data": final_video_data_for_state + } + + # --- FUNÇÕES HELPER --- + def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24): + if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return + video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0) + video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0 + video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8) + with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer: + for frame in video_np: writer.append_data(frame) + + def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: + if image.size != target_resolution: + return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) + return image + + def _pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor: + image_np = np.array(pil_image).astype(np.float32) / 255.0 + tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) + tensor = (tensor * 2.0) - 1.0 + return vae_manager_singleton.encode(tensor) + + def _quantize_to_multiple(self, n: int, m: int) -> int: + if m == 0: return n + quantized = int(round(n / m) * m) + return m if n > 0 and quantized == 0 else quantized \ No newline at end of file diff --git a/managers/LICENSE b/aduc_framework/managers/LICENSE similarity index 100% rename from managers/LICENSE rename to aduc_framework/managers/LICENSE diff --git a/managers/LICENSE.txt b/aduc_framework/managers/LICENSE.txt similarity index 100% rename from managers/LICENSE.txt rename to aduc_framework/managers/LICENSE.txt diff --git a/managers/NOTICE.md b/aduc_framework/managers/NOTICE.md similarity index 100% rename from managers/NOTICE.md rename to aduc_framework/managers/NOTICE.md diff --git a/managers/README.md b/aduc_framework/managers/README.md similarity index 100% rename from managers/README.md rename to aduc_framework/managers/README.md diff --git a/aduc_framework/managers/__init__.py b/aduc_framework/managers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a2be08eb433ed90c34ed24685f7e6e3b5a344a01 --- /dev/null +++ b/aduc_framework/managers/__init__.py @@ -0,0 +1,19 @@ +# aduc_framework/managers/__init__.py + +# Expõe os singletons e classes principais do sub-pacote de managers. + +from .gemini_manager import gemini_manager_singleton +from .ltx_manager import ltx_manager_singleton +from .vae_manager import vae_manager_singleton +from .latent_enhancer_manager import latent_enhancer_specialist_singleton +from .mmaudio_manager import mmaudio_manager_singleton +from .seedvr_manager import seedvr_manager_singleton + +__all__ = [ + "gemini_manager_singleton", + "ltx_manager_singleton", + "vae_manager_singleton", + "latent_enhancer_specialist_singleton", + "mmaudio_manager_singleton", + "seedvr_manager_singleton", +] \ No newline at end of file diff --git a/managers/config.yaml b/aduc_framework/managers/config.yaml similarity index 100% rename from managers/config.yaml rename to aduc_framework/managers/config.yaml diff --git a/managers/flux_kontext_manager.py b/aduc_framework/managers/flux_kontext_manager.py similarity index 99% rename from managers/flux_kontext_manager.py rename to aduc_framework/managers/flux_kontext_manager.py index 34838c991f91c4385ae67a0dd5e3a266c2d81812..72ac80ede9e2103f97913b60c375c33eb1222ec9 100644 --- a/managers/flux_kontext_manager.py +++ b/aduc_framework/managers/flux_kontext_manager.py @@ -25,7 +25,7 @@ import threading import yaml import logging -from tools.hardware_manager import hardware_manager +from ..tools.hardware_manager import hardware_manager logger = logging.getLogger(__name__) diff --git a/managers/gemini_manager.py b/aduc_framework/managers/gemini_manager.py similarity index 98% rename from managers/gemini_manager.py rename to aduc_framework/managers/gemini_manager.py index 0d0f2b50c51c05e43a3b089df1f5694a7760959e..abb9a95c828587cad6e28b1f4adc3d28fd9d7290 100644 --- a/managers/gemini_manager.py +++ b/aduc_framework/managers/gemini_manager.py @@ -63,7 +63,7 @@ class GeminiManager: self.api_key = os.environ.get("GEMINI_API_KEY") if self.api_key: genai.configure(api_key=self.api_key) - self.model = genai.GenerativeModel('gemini-2.5-flash') + self.model = genai.GenerativeModel('gemini-2.0-flash') logger.info("GeminiManager (Communication Layer) initialized successfully.") else: self.model = None diff --git a/managers/latent_enhancer_manager.py b/aduc_framework/managers/latent_enhancer_manager.py similarity index 98% rename from managers/latent_enhancer_manager.py rename to aduc_framework/managers/latent_enhancer_manager.py index 00bf8055e2f4d4101d9e7500d30530bd8b204197..dc020c0bcc8a794ec4869c9916390aeb9dbd01d0 100644 --- a/managers/latent_enhancer_manager.py +++ b/aduc_framework/managers/latent_enhancer_manager.py @@ -19,7 +19,7 @@ import torch import logging import time from diffusers import LTXLatentUpsamplePipeline -from managers.ltx_manager import ltx_manager_singleton +from ..managers.ltx_manager import ltx_manager_singleton logger = logging.getLogger(__name__) diff --git a/managers/ltx_manager.py b/aduc_framework/managers/ltx_manager.py similarity index 54% rename from managers/ltx_manager.py rename to aduc_framework/managers/ltx_manager.py index f35ab418c00f39ba807b4e2b202efc2e4efab9f5..a261485ddd0a8dc053bbba30520659900630f611 100644 --- a/managers/ltx_manager.py +++ b/aduc_framework/managers/ltx_manager.py @@ -1,22 +1,13 @@ -# managers/ltx_manager.py -# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR -# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos +# aduc_framework/managers/ltx_manager.py # -# Contato: -# Carlos Rodrigues dos Santos -# carlex22@gmail.com -# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025 +# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos # -# Repositórios e Projetos Relacionados: -# GitHub: https://github.com/carlex22/Aduc-sdr +# Versão 2.3.2 (Com correção de manipulação de dataclass) # -# PENDING PATENT NOTICE: Please see NOTICE.md. -# -# Version: 2.2.2 -# -# This file manages the LTX-Video specialist pool. It has been refactored to be -# self-contained by automatically cloning its own dependencies and using a local -# utility module for pipeline creation, fully decoupling it from external scripts. +# Este manager é responsável por controlar a pipeline LTX-Video. Ele gerencia +# um pool de workers para otimizar o uso de múltiplas GPUs, lida com a inicialização +# e o setup de dependências complexas, e expõe uma interface de alto nível para a +# geração de fragmentos de vídeo no espaço latente. import torch import gc @@ -31,18 +22,19 @@ import subprocess from pathlib import Path from typing import Optional, List, Tuple, Union -from tools.optimization import optimize_ltx_worker, can_optimize_fp8 -from tools.hardware_manager import hardware_manager -from aduc_types import LatentConditioningItem +# --- Imports Relativos Corrigidos --- +from ..types import LatentConditioningItem +from ..tools.optimization import optimize_ltx_worker, can_optimize_fp8 +from ..tools.hardware_manager import hardware_manager logger = logging.getLogger(__name__) -# --- Dependency Management --- +# --- Gerenciamento de Dependências e Placeholders --- DEPS_DIR = Path("./deps") LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video" LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git" -# --- Placeholders for lazy-loaded modules --- +# Placeholders para módulos importados tardiamente (lazy-loaded) create_ltx_video_pipeline = None calculate_padding = None LTXVideoPipeline = None @@ -54,11 +46,10 @@ randn_tensor = None class LtxPoolManager: """ - Manages a pool of LtxWorkers for optimized multi-GPU usage. - Handles its own code dependencies by cloning the LTX-Video repository. + Gerencia um pool de LtxWorkers e expõe a pipeline de aprimoramento de prompt. """ - def __init__(self, device_ids, ltx_config_file_name): - logger.info(f"LTX POOL MANAGER: Creating workers for devices: {device_ids}") + def __init__(self, device_ids: List[str], ltx_config_file_name: str): + logger.info(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}") self._ltx_modules_loaded = False self._setup_dependencies() self._lazy_load_ltx_modules() @@ -69,61 +60,65 @@ class LtxPoolManager: self.current_worker_index = 0 self.lock = threading.Lock() + self.prompt_enhancement_pipeline = self.workers[0].pipeline if self.workers else None + if self.prompt_enhancement_pipeline: + logger.info("LTX POOL MANAGER: Pipeline de aprimoramento de prompt exposta para outros especialistas.") + self._apply_ltx_pipeline_patches() if all(w.device.type == 'cuda' for w in self.workers): - logger.info("LTX POOL MANAGER: HOT START MODE ENABLED. Pre-warming all GPUs...") + logger.info("LTX POOL MANAGER: MODO HOT START ATIVADO. Pré-aquecendo todas as GPUs...") for worker in self.workers: worker.to_gpu() - logger.info("LTX POOL MANAGER: All GPUs are hot and ready.") + logger.info("LTX POOL MANAGER: Todas as GPUs estão prontas.") else: - logger.info("LTX POOL MANAGER: Operating in CPU or mixed mode. GPU pre-warming skipped.") + logger.info("LTX POOL MANAGER: Operando em modo CPU ou misto. Pré-aquecimento de GPU pulado.") def _setup_dependencies(self): - """Clones the LTX-Video repo if not found and adds it to the system path.""" + """Clona o repositório LTX-Video se não encontrado e o adiciona ao sys.path.""" if not LTX_VIDEO_REPO_DIR.exists(): - logger.info(f"LTX-Video repository not found at '{LTX_VIDEO_REPO_DIR}'. Cloning from GitHub...") + logger.info(f"Repositório LTX-Video não encontrado em '{LTX_VIDEO_REPO_DIR}'. Clonando do GitHub...") try: DEPS_DIR.mkdir(exist_ok=True) subprocess.run( - ["git", "clone", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)], + ["git", "clone", "--depth", "1", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)], check=True, capture_output=True, text=True ) - logger.info("LTX-Video repository cloned successfully.") + logger.info("Repositório LTX-Video clonado com sucesso.") except subprocess.CalledProcessError as e: - logger.error(f"Failed to clone LTX-Video repository. Git stderr: {e.stderr}") - raise RuntimeError("Could not clone the required LTX-Video dependency from GitHub.") + logger.error(f"Falha ao clonar o repositório LTX-Video. Git stderr: {e.stderr}") + raise RuntimeError("Não foi possível clonar a dependência LTX-Video do GitHub.") else: - logger.info("Found local LTX-Video repository.") + logger.info("Repositório LTX-Video local encontrado.") if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path: sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve())) - logger.info(f"Added '{LTX_VIDEO_REPO_DIR.resolve()}' to sys.path.") - + logger.info(f"Adicionado '{LTX_VIDEO_REPO_DIR.resolve()}' ao sys.path.") + def _lazy_load_ltx_modules(self): - """Dynamically imports LTX-Video modules after ensuring the repo exists.""" + """Importa dinamicamente os módulos do LTX-Video após garantir que o repositório existe.""" if self._ltx_modules_loaded: return global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline global vae_encode, latent_to_pixel_coords, randn_tensor - from managers.ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding + from .ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords from diffusers.utils.torch_utils import randn_tensor self._ltx_modules_loaded = True - logger.info("LTX-Video modules have been dynamically loaded.") + logger.info("Módulos do LTX-Video foram carregados dinamicamente.") def _apply_ltx_pipeline_patches(self): - """Applies runtime patches to the LTX pipeline for ADUC-SDR compatibility.""" - logger.info("LTX POOL MANAGER: Applying ADUC-SDR patches to LTX pipeline...") + """Aplica patches em tempo de execução na pipeline LTX para compatibilidade com ADUC-SDR.""" + logger.info("LTX POOL MANAGER: Aplicando patches ADUC-SDR na pipeline LTX...") for worker in self.workers: worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline) - logger.info("LTX POOL MANAGER: All pipeline instances have been patched successfully.") + logger.info("LTX POOL MANAGER: Todas as instâncias da pipeline foram corrigidas com sucesso.") - def _get_next_worker(self): + def _get_next_worker(self) -> 'LtxWorker': with self.lock: worker = self.workers[self.current_worker_index] self.current_worker_index = (self.current_worker_index + 1) % len(self.workers) @@ -144,63 +139,72 @@ class LtxPoolManager: pipeline_params["latents"] = kwargs['latents'].to(worker.device, dtype=worker.pipeline.transformer.dtype) if 'strength' in kwargs: pipeline_params["strength"] = kwargs['strength'] + if 'conditioning_items_data' in kwargs: final_conditioning_items = [] for item in kwargs['conditioning_items_data']: + # CORREÇÃO: Como LatentConditioningItem é uma dataclass mutável, + # nós modificamos o atributo diretamente no dispositivo do worker. item.latent_tensor = item.latent_tensor.to(worker.device) final_conditioning_items.append(item) pipeline_params["conditioning_items"] = final_conditioning_items + if worker.is_distilled: - logger.info(f"Worker {worker.device} is using a distilled model. Using fixed timesteps.") fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps") - pipeline_params["timesteps"] = fixed_timesteps if fixed_timesteps: + pipeline_params["timesteps"] = fixed_timesteps pipeline_params["num_inference_steps"] = len(fixed_timesteps) + + callback = kwargs.get('callback') + if callback: + pipeline_params["callback_on_step_end"] = callback + pipeline_params["callback_on_step_end_tensor_inputs"] = ["latents"] + return pipeline_params - def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple): + def generate_latent_fragment(self, **kwargs) -> Tuple[torch.Tensor, tuple]: worker_to_use = self._get_next_worker() try: height, width = kwargs['height'], kwargs['width'] padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32 padding_vals = calculate_padding(height, width, padded_h, padded_w) kwargs['height'], kwargs['width'] = padded_h, padded_w + pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs) - logger.info(f"Initiating GENERATION on {worker_to_use.device} with shape {padded_w}x{padded_h}") + + logger.info(f"Iniciando GERAÇÃO em {worker_to_use.device} com shape {padded_w}x{padded_h}") + if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline): result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images else: result = worker_to_use.generate_video_fragment_internal(**pipeline_params) return result, padding_vals except Exception as e: - logger.error(f"LTX POOL MANAGER: Error during generation on {worker_to_use.device}: {e}", exc_info=True) + logger.error(f"LTX POOL MANAGER: Erro durante a geração em {worker_to_use.device}: {e}", exc_info=True) raise e finally: if worker_to_use and worker_to_use.device.type == 'cuda': with torch.cuda.device(worker_to_use.device): - gc.collect(); torch.cuda.empty_cache() + gc.collect() + torch.cuda.empty_cache() - def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple): - # This function can be expanded later if needed. - pass + def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> Tuple[torch.Tensor, tuple]: + pass # Placeholder class LtxWorker: - """ - Represents a single instance of the LTX-Video pipeline on a specific device. - """ + """Representa uma única instância da pipeline LTX-Video em um dispositivo específico.""" def __init__(self, device_id, ltx_config_file): self.cpu_device = torch.device('cpu') self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu') - logger.info(f"LTX Worker ({self.device}): Initializing with config '{ltx_config_file}'...") + logger.info(f"LTX Worker ({self.device}): Inicializando com config '{ltx_config_file}'...") with open(ltx_config_file, "r") as file: self.config = yaml.safe_load(file) self.is_distilled = "distilled" in self.config.get("checkpoint_path", "") - models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded" - logger.info(f"LTX Worker ({self.device}): Preparing to load model...") + logger.info(f"LTX Worker ({self.device}): Preparando para carregar modelo...") model_filename = self.config["checkpoint_path"] model_path = huggingface_hub.hf_hub_download( repo_id="Lightricks/LTX-Video", filename=model_filename, @@ -214,22 +218,20 @@ class LtxWorker: sampler=self.config["sampler"], device='cpu' ) - logger.info(f"LTX Worker ({self.device}): Model ready on CPU. Is distilled model? {self.is_distilled}") + logger.info(f"LTX Worker ({self.device}): Modelo pronto na CPU. É um modelo distilled? {self.is_distilled}") def to_gpu(self): if self.device.type == 'cpu': return - logger.info(f"LTX Worker: Moving pipeline to GPU {self.device}...") + logger.info(f"LTX Worker: Movendo pipeline para a GPU {self.device}...") self.pipeline.to(self.device) if self.device.type == 'cuda' and can_optimize_fp8(): - logger.info(f"LTX Worker ({self.device}): FP8 supported GPU detected. Optimizing...") + logger.info(f"LTX Worker ({self.device}): GPU com suporte a FP8 detectada. Otimizando...") optimize_ltx_worker(self) - logger.info(f"LTX Worker ({self.device}): Optimization complete.") - elif self.device.type == 'cuda': - logger.info(f"LTX Worker ({self.device}): FP8 optimization not supported or disabled.") - + logger.info(f"LTX Worker ({self.device}): Otimização completa.") + def to_cpu(self): if self.device.type == 'cpu': return - logger.info(f"LTX Worker: Unloading pipeline from GPU {self.device}...") + logger.info(f"LTX Worker: Descarregando pipeline da GPU {self.device}...") self.pipeline.to('cpu') gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() @@ -237,10 +239,9 @@ class LtxWorker: def generate_video_fragment_internal(self, **kwargs): return self.pipeline(**kwargs).images - def _aduc_prepare_conditioning_patch( - self: LTXVideoPipeline, - conditioning_items: Optional[List[Union[ConditioningItem, "LatentConditioningItem"]]], + self: "LTXVideoPipeline", + conditioning_items: Optional[List[Union["ConditioningItem", "LatentConditioningItem"]]], init_latents: torch.Tensor, num_frames: int, height: int, @@ -252,62 +253,52 @@ def _aduc_prepare_conditioning_patch( init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents) init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning) return init_latents, init_pixel_coords, None, 0 - init_conditioning_mask = torch.zeros(init_latents[:, 0, :, :, :].shape, dtype=torch.float32, device=init_latents.device) + + init_conditioning_mask = torch.zeros_like(init_latents[:, 0, ...], dtype=torch.float32, device=init_latents.device) extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], [] extra_conditioning_num_latents = 0 - is_latent_mode = hasattr(conditioning_items[0], 'latent_tensor') - if is_latent_mode: - for item in conditioning_items: - media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device) - media_frame_number, strength = item.media_frame_number, item.conditioning_strength - if media_frame_number == 0: - f_l, h_l, w_l = media_item_latents.shape[-3:] - init_latents[:, :, :f_l, :h_l, :w_l] = torch.lerp(init_latents[:, :, :f_l, :h_l, :w_l], media_item_latents, strength) - init_conditioning_mask[:, :f_l, :h_l, :w_l] = strength - else: - noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype) - media_item_latents = torch.lerp(noise, media_item_latents, strength) - patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents) - pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning) - pixel_coords[:, 0] += media_frame_number - extra_conditioning_num_latents += patched_latents.shape[1] - new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device) - extra_conditioning_latents.append(patched_latents) - extra_conditioning_pixel_coords.append(pixel_coords) - extra_conditioning_mask.append(new_mask) - else: - for item in conditioning_items: - if not isinstance(item, ConditioningItem): continue - item = self._resize_conditioning_item(item, height, width) - media_item_latents = vae_encode(item.media_item.to(dtype=self.vae.dtype, device=self.vae.device), self.vae, vae_per_channel_normalize=vae_per_channel_normalize).to(dtype=init_latents.dtype) - if item.media_frame_number == 0: - media_item_latents, l_x, l_y = self._get_latent_spatial_position(media_item_latents, item, height, width, strip_latent_border=True) - f_l, h_l, w_l = media_item_latents.shape[-3:] - init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = torch.lerp(init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l], media_item_latents, item.conditioning_strength) - init_conditioning_mask[:, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = item.conditioning_strength - else: - logger.warning("Pixel-based conditioning for non-zero frames is not fully implemented in this patch.") + for item in conditioning_items: + if not isinstance(item, LatentConditioningItem): + logger.warning("Patch ADUC: Item de condicionamento não é um LatentConditioningItem e será ignorado.") + continue + + media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device) + media_frame_number, strength = item.media_frame_number, item.conditioning_strength + + if media_frame_number == 0: + f_l, h_l, w_l = media_item_latents.shape[-3:] + init_latents[..., :f_l, :h_l, :w_l] = torch.lerp(init_latents[..., :f_l, :h_l, :w_l], media_item_latents, strength) + init_conditioning_mask[..., :f_l, :h_l, :w_l] = strength + else: + noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype) + media_item_latents = torch.lerp(noise, media_item_latents, strength) + patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents) + pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning) + pixel_coords[:, 0] += media_frame_number + extra_conditioning_num_latents += patched_latents.shape[1] + new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device) + extra_conditioning_latents.append(patched_latents) + extra_conditioning_pixel_coords.append(pixel_coords) + extra_conditioning_mask.append(new_mask) + init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents) init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning) init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1)) init_conditioning_mask = init_conditioning_mask.squeeze(-1) + if extra_conditioning_latents: init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1) init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2) init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1) - if self.transformer.use_tpu_flash_attention: - init_latents = init_latents[:, :-extra_conditioning_num_latents] - init_pixel_coords = init_pixel_coords[:, :, :-extra_conditioning_num_latents] - init_conditioning_mask = init_conditioning_mask[:, :-extra_conditioning_num_latents] + return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents - -# --- Singleton Instantiation --- +# --- Instanciação Singleton --- with open("config.yaml", 'r') as f: config = yaml.safe_load(f) ltx_gpus_required = config['specialists']['ltx']['gpus_required'] ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required) ltx_config_filename = config['specialists']['ltx']['config_file'] ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename) -logger.info("Video Specialist (LTX) ready.") \ No newline at end of file +logger.info("Especialista de Vídeo (LTX) pronto.") \ No newline at end of file diff --git a/managers/ltx_pipeline_utils.py b/aduc_framework/managers/ltx_pipeline_utils.py similarity index 100% rename from managers/ltx_pipeline_utils.py rename to aduc_framework/managers/ltx_pipeline_utils.py diff --git a/aduc_framework/managers/mmaudio_manager.py b/aduc_framework/managers/mmaudio_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..b89f94598d37f1445b13d4451f4df20e77d7a7a0 --- /dev/null +++ b/aduc_framework/managers/mmaudio_manager.py @@ -0,0 +1,226 @@ +# managers/mmaudio_manager.py +# +# Copyright (C) 2025 Carlos Rodrigues dos Santos +# +# Version: 3.0.0 (GPU Pool Manager) +# +# Esta versão refatora o MMAudioManager para um modelo de Pool com Workers, +# permitindo o uso de múltiplas GPUs dedicadas para a geração de áudio +# com um sistema de rodízio para gerenciamento eficiente de VRAM. + +import torch +import logging +import subprocess +import os +import time +import yaml +import gc +import threading +from pathlib import Path +import gradio as gr +import sys + +# Imports relativos para o hardware_manager +from ..tools.hardware_manager import hardware_manager + +logger = logging.getLogger(__name__) + +# --- Gerenciamento de Dependências --- +DEPS_DIR = Path("./deps") +MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio" +MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git" + +# Lazy-loaded imports +ModelConfig, all_model_cfg, mmaudio_generate, load_video, make_video = None, None, None, None, None +MMAudio, get_my_mmaudio = None, None +FeaturesUtils = None +SequenceConfig = None +FlowMatching = None + +class MMAudioWorker: + """Representa uma única instância do pipeline MMAudio em um dispositivo.""" + def __init__(self, device_id: str): + self.device = torch.device(device_id) + self.cpu_device = torch.device("cpu") + self.dtype = torch.bfloat16 if 'cuda' in self.device.type else torch.float32 + + self.net: 'MMAudio' = None + self.feature_utils: 'FeaturesUtils' = None + self.seq_cfg: 'SequenceConfig' = None + self.model_config: 'ModelConfig' = None + + self._check_and_run_global_setup() + self._lazy_load_mmaudio_modules() + logger.info(f"MMAudio Worker inicializado para o dispositivo {self.device}.") + + def _lazy_load_mmaudio_modules(self): + """Importa dinamicamente os módulos do MMAudio.""" + global ModelConfig, all_model_cfg, mmaudio_generate, load_video, make_video, MMAudio, get_my_mmaudio, FeaturesUtils, SequenceConfig, FlowMatching + if MMAudio is not None: return + + from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video + from mmaudio.model.flow_matching import FlowMatching + from mmaudio.model.networks import MMAudio, get_my_mmaudio + from mmaudio.model.utils.features_utils import FeaturesUtils + from mmaudio.model.sequence_config import SequenceConfig + logger.info("Módulos do MMAudio foram carregados dinamicamente.") + + @staticmethod + def _check_and_run_global_setup(): + """Executa o setup de clonagem do repositório e download de modelos uma única vez.""" + setup_flag = DEPS_DIR / "mmaudio.setup.complete" + if setup_flag.exists(): + return True + + logger.info("--- Iniciando Setup Global do MMAudio (primeira execução) ---") + if not MMAUDIO_REPO_DIR.exists(): + DEPS_DIR.mkdir(exist_ok=True) + subprocess.run(["git", "clone", "--depth", "1", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)], check=True) + + if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path: + sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve())) + + # Importar após adicionar ao path + from mmaudio.eval_utils import all_model_cfg as cfg + + # Ajustar caminhos e baixar modelos + for cfg_key in cfg: + config = cfg[cfg_key] + config.model_path = MMAUDIO_REPO_DIR / config.model_path + config.vae_path = MMAUDIO_REPO_DIR / config.vae_path + if config.bigvgan_16k_path: + config.bigvgan_16k_path = MMAUDIO_REPO_DIR / config.bigvgan_16k_path + config.synchformer_ckpt = MMAUDIO_REPO_DIR / config.synchformer_ckpt + config.download_if_needed() + + setup_flag.touch() + logger.info("--- Setup Global do MMAudio Concluído ---") + return True + + def initialize_models(self): + """Carrega os modelos do worker para a CPU e depois para a GPU designada.""" + if self.net is not None: return + + self.model_config = all_model_cfg['large_44k_v2'] + self.seq_cfg = self.model_config.seq_cfg + + logger.info(f"Worker {self.device}: Carregando modelo MMAudio para a CPU...") + self.net = get_my_mmaudio(self.model_config.model_name).eval() + self.net.load_weights(torch.load(self.model_config.model_path, map_location=self.cpu_device, weights_only=True)) + + self.feature_utils = FeaturesUtils( + tod_vae_ckpt=self.model_config.vae_path, + synchformer_ckpt=self.model_config.synchformer_ckpt, + enable_conditions=True, mode=self.model_config.mode, + bigvgan_vocoder_ckpt=self.model_config.bigvgan_16k_path, + need_vae_encoder=False + ).eval() + + self.net.to(self.device, self.dtype) + self.feature_utils.to(self.device, self.dtype) + logger.info(f"Worker {self.device}: Modelos MMAudio prontos na VRAM.") + + def unload_models(self): + """Descarrega os modelos da VRAM, movendo-os para a CPU.""" + if self.net is None: return + logger.info(f"Worker {self.device}: Descarregando modelos MMAudio da VRAM...") + self.net.to(self.cpu_device) + self.feature_utils.to(self.cpu_device) + del self.net, self.feature_utils, self.seq_cfg, self.model_config + self.net, self.feature_utils, self.seq_cfg, self.model_config = None, None, None, None + gc.collect() + if torch.cuda.is_available(): torch.cuda.empty_cache() + + def generate_audio_internal(self, video_path: str, prompt: str, duration_seconds: float, output_path: str) -> str: + """Lógica de geração de áudio que roda na GPU do worker.""" + negative_prompt = "human voice, speech, talking, singing, narration" + rng = torch.Generator(device=self.device).manual_seed(int(time.time())) + fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25) + + video_info = load_video(Path(video_path), duration_seconds) + self.seq_cfg.duration = video_info.duration_sec + self.net.update_seq_lengths(self.seq_cfg.latent_seq_len, self.seq_cfg.clip_seq_len, self.seq_cfg.sync_seq_len) + + with torch.no_grad(): + audios = mmaudio_generate( + clip_video=video_info.clip_frames.unsqueeze(0).to(self.device, self.dtype), + sync_video=video_info.sync_frames.unsqueeze(0).to(self.device, self.dtype), + text=[prompt], negative_text=[negative_prompt], + feature_utils=self.feature_utils, net=self.net, fm=fm, rng=rng, cfg_strength=4.5 + ) + audio_waveform = audios.float().cpu()[0] + + make_video(video_info, Path(output_path), audio_waveform, sampling_rate=self.seq_cfg.sampling_rate) + return output_path + +class MMAudioPoolManager: + def __init__(self, device_ids: list[str], workspace_dir: str): + logger.info(f"MMAUDIO POOL MANAGER: Criando workers para os dispositivos: {device_ids}") + self.workspace_dir = workspace_dir + if not device_ids or 'cpu' in device_ids: + raise ValueError("MMAudioPoolManager requer GPUs dedicadas.") + self.workers = [MMAudioWorker(device_id) for device_id in device_ids] + self.current_worker_index = 0 + self.lock = threading.Lock() + self.last_cleanup_thread = None + + def _cleanup_worker_thread(self, worker: MMAudioWorker): + logger.info(f"MMAUDIO CLEANUP THREAD: Iniciando limpeza de {worker.device} em background...") + worker.unload_models() + + def generate_audio_for_video(self, video_path: str, prompt: str, duration_seconds: float, output_path_override: str = None) -> str: + if duration_seconds < 1: + logger.warning(f"Vídeo muito curto ({duration_seconds:.2f}s). Pulando geração de áudio.") + return video_path + + worker_to_use = None + try: + with self.lock: + if self.last_cleanup_thread and self.last_cleanup_thread.is_alive(): + self.last_cleanup_thread.join() + + worker_to_use = self.workers[self.current_worker_index] + previous_worker_index = (self.current_worker_index - 1 + len(self.workers)) % len(self.workers) + worker_to_cleanup = self.workers[previous_worker_index] + + cleanup_thread = threading.Thread(target=self._cleanup_worker_thread, args=(worker_to_cleanup,)) + cleanup_thread.start() + self.last_cleanup_thread = cleanup_thread + + worker_to_use.initialize_models() + self.current_worker_index = (self.current_worker_index + 1) % len(self.workers) + + logger.info(f"MMAUDIO POOL MANAGER: Gerando áudio em {worker_to_use.device}...") + + output_path = output_path_override or os.path.join(self.workspace_dir, f"{Path(video_path).stem}_with_audio.mp4") + + return worker_to_use.generate_audio_internal( + video_path=video_path, prompt=prompt, duration_seconds=duration_seconds, output_path=output_path + ) + except Exception as e: + logger.error(f"MMAUDIO POOL MANAGER: Erro durante a geração de áudio: {e}", exc_info=True) + raise gr.Error(f"Falha na geração de áudio: {e}") + +# --- Instanciação Singleton --- +class MMAudioPlaceholder: + def generate_audio_for_video(self, video_path, *args, **kwargs): + logger.error("MMAudio não foi inicializado pois nenhuma GPU foi alocada. Pulando etapa de áudio.") + return video_path + +try: + with open("config.yaml", 'r') as f: + config = yaml.safe_load(f) + WORKSPACE_DIR = config['application']['workspace_dir'] + + mmaudio_gpus_required = config['specialists'].get('mmaudio', {}).get('gpus_required', 0) + mmaudio_device_ids = hardware_manager.allocate_gpus('MMAudio', mmaudio_gpus_required) + + if mmaudio_gpus_required > 0 and 'cpu' not in mmaudio_device_ids: + mmaudio_manager_singleton = MMAudioPoolManager(device_ids=mmaudio_device_ids, workspace_dir=WORKSPACE_DIR) + logger.info("Especialista de Áudio (MMAudio Pool) pronto.") + else: + mmaudio_manager_singleton = MMAudioPlaceholder() + logger.warning("MMAudio Pool Manager não foi inicializado. Nenhuma GPU foi requisitada na config.yaml.") +except Exception as e: + logger.critical(f"Falha CRÍTICA ao inicializar o MMAudioManager: {e}", exc_info=True) + mmaudio_manager_singleton = MMAudioPlaceholder() \ No newline at end of file diff --git a/aduc_framework/managers/seedvr_manager.py b/aduc_framework/managers/seedvr_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..c2937dc0b85627928ed78e4b51af89edc3a4d8a5 --- /dev/null +++ b/aduc_framework/managers/seedvr_manager.py @@ -0,0 +1,229 @@ +# managers/seedvr_manager.py +# +# Copyright (C) 2025 Carlos Rodrigues dos Santos +# +# Version: 10.0.0 (Definitive Monkey Patch / Single Instance) +# +# Esta é a arquitetura final e mais robusta. O paralelismo problemático +# é desativado programaticamente via "monkey patching" no decorador `master_only`. +# Isso elimina a necessidade de gerenciar `torch.distributed`, simplificando +# o código e resolvendo a causa raiz de todos os erros de paralelismo. +# A isolação de GPU com CUDA_VISIBLE_DEVICES é mantida como a melhor +# prática para o gerenciamento de hardware. + +import torch +import os +import gc +import logging +import sys +import subprocess +from pathlib import Path +from urllib.parse import urlparse +from torch.hub import download_url_to_file +import mediapy +from einops import rearrange +import shutil +from omegaconf import OmegaConf +import yaml + +from ..tools.hardware_manager import hardware_manager + +logger = logging.getLogger(__name__) + +APP_ROOT = Path("/home/user/app") +DEPS_DIR = APP_ROOT / "deps" +SEEDVR_SPACE_DIR = DEPS_DIR / "SeedVR_Space" +SEEDVR_SPACE_URL = "https://huggingface.co/spaces/ByteDance-Seed/SeedVR2-3B" + +class SeedVrManager: + """Gerencia uma única instância do pipeline SeedVR em uma GPU dedicada e isolada.""" + def __init__(self, device_id: str): + self.global_device_id = device_id + self.local_device_name = 'cuda:0' # O que o processo enxergará + self.gpu_index = self.global_device_id.split(':')[-1] + + self.runner = None + self._check_and_run_global_setup() + logger.info(f"SeedVR Manager (Single Instance) inicializado para operar na GPU {self.global_device_id}.") + + @staticmethod + def _check_and_run_global_setup(): + """Executa o setup de arquivos e aplica o patch para desativar o paralelismo.""" + setup_flag = DEPS_DIR / "seedvr.setup.complete" + if str(APP_ROOT) not in sys.path: sys.path.insert(0, str(APP_ROOT)) + + # O patch deve ser aplicado toda vez que o código roda. + try: + from common import decorators + import functools + + def _passthrough_decorator(func): + @functools.wraps(func) + def wrapped(*args, **kwargs): + return func(*args, **kwargs) + return wrapped + + decorators.master_only = _passthrough_decorator + logger.info("Monkey patch aplicado com sucesso em 'common.decorators.master_only' para desativar o paralelismo.") + except Exception as e: + logger.error(f"Falha ao aplicar o monkey patch: {e}", exc_info=True) + + if setup_flag.exists(): return True + + logger.info("--- Iniciando Setup Global do SeedVR (primeira execução) ---") + if not SEEDVR_SPACE_DIR.exists(): + DEPS_DIR.mkdir(exist_ok=True, parents=True) + subprocess.run(["git", "clone", "--depth", "1", SEEDVR_SPACE_URL, str(SEEDVR_SPACE_DIR)], check=True) + + required_dirs = ["projects", "common", "models", "configs_3b", "configs_7b", "data"] + for dirname in required_dirs: + source, target = SEEDVR_SPACE_DIR / dirname, APP_ROOT / dirname + if not target.exists(): shutil.copytree(source, target) + + try: + import apex + except ImportError: + apex_url = 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl' + apex_wheel_path = _load_file_from_url(url=apex_url, model_dir=str(DEPS_DIR)) + subprocess.run(f"pip install {apex_wheel_path}", check=True, shell=True) + + ckpt_dir = APP_ROOT / 'ckpts' + ckpt_dir.mkdir(exist_ok=True) + model_urls = { + 'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth', + 'dit_3b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth', + #'dit_7b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-7B/resolve/main/seedvr2_ema_7b.pth', + 'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt', + 'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt' + } + for name, url in model_urls.items(): + _load_file_from_url(url=url, model_dir=str(ckpt_dir)) + + setup_flag.touch() + logger.info("--- Setup Global do SeedVR Concluído ---") + + def _initialize_runner(self): + """Carrega o modelo 3B em um ambiente de GPU isolado.""" + if self.runner is not None: return + + os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_index + + from projects.video_diffusion_sr.infer import VideoDiffusionInfer + from common.config import load_config + + logger.info(f"Manager na GPU {self.global_device_id}: Inicializando runner SeedVR 3B...") + + config_path = APP_ROOT / 'configs_3b' / 'main.yaml' + checkpoint_path = APP_ROOT / 'ckpts' / 'seedvr2_ema_3b.pth' + + config = load_config(str(config_path)) + self.runner = VideoDiffusionInfer(config) + OmegaConf.set_readonly(self.runner.config, False) + + self.runner.configure_dit_model(device=self.local_device_name, checkpoint=str(checkpoint_path)) + self.runner.configure_vae_model() + + logger.info(f"Manager na GPU {self.global_device_id}: Runner 3B pronto na VRAM.") + + def _unload_runner(self): + """Descarrega os modelos da VRAM e limpa o ambiente.""" + if self.runner is not None: + del self.runner; self.runner = None + gc.collect(); torch.cuda.empty_cache() + logger.info(f"Manager na GPU {self.global_device_id}: Runner descarregado da VRAM.") + + if 'CUDA_VISIBLE_DEVICES' in os.environ: + del os.environ['CUDA_VISIBLE_DEVICES'] + + def process_video(self, input_video_path: str, output_video_path: str, prompt: str, + steps: int = 100, seed: int = 666) -> str: + """Ciclo completo de carga, processamento e descarga para uma única tarefa.""" + try: + self._initialize_runner() + + device = torch.device(self.local_device_name) + + from common.seed import set_seed + from data.image.transforms.divisible_crop import DivisibleCrop + from data.image.transforms.na_resize import NaResize + from data.video.transforms.rearrange import Rearrange + from projects.video_diffusion_sr.color_fix import wavelet_reconstruction + from torchvision.transforms import Compose, Lambda, Normalize + from torchvision.io.video import read_video + + set_seed(seed, same_across_ranks=True) + self.runner.config.diffusion.timesteps.sampling.steps = steps + self.runner.configure_diffusion() + + video_tensor = read_video(input_video_path, output_format="TCHW")[0] / 255.0 + res_h, res_w = video_tensor.shape[-2:] + video_transform = Compose([ + NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False), + Lambda(lambda x: torch.clamp(x, 0.0, 1.0)), + DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w"), + ]) + cond_latents = [video_transform(video_tensor.to(device))] + self.runner.dit.to("cpu"); self.runner.vae.to(device) + cond_latents = self.runner.vae_encode(cond_latents) + self.runner.vae.to("cpu"); gc.collect(); torch.cuda.empty_cache(); self.runner.dit.to(device) + + pos_emb = torch.load(APP_ROOT / 'ckpts' / 'pos_emb.pt').to(device) + neg_emb = torch.load(APP_ROOT / 'ckpts' / 'neg_emb.pt').to(device) + text_embeds_dict = {"texts_pos": [pos_emb], "texts_neg": [neg_emb]} + + noises = [torch.randn_like(latent) for latent in cond_latents] + conditions = [self.runner.get_condition(noise, latent_blur=latent, task="sr") for noise, latent in zip(noises, cond_latents)] + + with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True): + video_tensors = self.runner.inference(noises=noises, conditions=conditions, dit_offload=True, **text_embeds_dict) + + self.runner.dit.to("cpu"); gc.collect(); torch.cuda.empty_cache(); self.runner.vae.to(device) + samples = self.runner.vae_decode(video_tensors) + final_sample, input_video_sample = samples[0], cond_latents[0] + if final_sample.shape[1] < input_video_sample.shape[1]: + input_video_sample = input_video_sample[:, :final_sample.shape[1]] + + final_sample = wavelet_reconstruction(rearrange(final_sample, "c t h w -> t c h w"), rearrange(input_video_sample, "c t h w -> t c h w")) + final_sample = rearrange(final_sample, "t c h w -> t h w c") + final_sample = final_sample.clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round() + final_sample_np = final_sample.to(torch.uint8).cpu().numpy() + + mediapy.write_video(output_video_path, final_sample_np, fps=24) + return output_video_path + finally: + self._unload_runner() + + +def _load_file_from_url(url, model_dir='./', file_name=None): + os.makedirs(model_dir, exist_ok=True) + filename = file_name or os.path.basename(urlparse(url).path) + cached_file = os.path.abspath(os.path.join(model_dir, filename)) + if not os.path.exists(cached_file): + download_url_to_file(url, cached_file, hash_prefix=None, progress=True) + return cached_file + +# --- Instanciação Singleton --- +class SeedVrPlaceholder: + def process_video(self, input_video_path, *args, **kwargs): + logger.warning("SeedVR está desabilitado (gpus_required: 0). Pulando etapa de masterização HD.") + return input_video_path + +try: + with open("config.yaml", 'r') as f: config = yaml.safe_load(f) + seedvr_gpus_required = config['specialists'].get('seedvr', {}).get('gpus_required', 2) + + if seedvr_gpus_required > 0: + seedvr_device_ids = hardware_manager.allocate_gpus('SeedVR', seedvr_gpus_required) + if seedvr_device_ids and 'cpu' not in seedvr_device_ids: + device_to_use = seedvr_device_ids[0] + seedvr_manager_singleton = SeedVrManager(device_id=device_to_use) + logger.info(f"Especialista de Masterização HD (SeedVR Single Instance) pronto para usar a GPU {device_to_use}.") + else: + seedvr_manager_singleton = SeedVrPlaceholder() + logger.warning("SeedVR não foi inicializado porque nenhuma GPU pôde ser alocada.") + else: + seedvr_manager_singleton = SeedVrPlaceholder() + logger.warning("SeedVR Manager não foi inicializado (gpus_required: 0 na config).") +except Exception as e: + logger.critical(f"Falha CRÍTICA ao inicializar o SeedVrManager: {e}", exc_info=True) + seedvr_manager_singleton = SeedVrPlaceholder() \ No newline at end of file diff --git a/managers/upscaler_specialist.py b/aduc_framework/managers/upscaler_specialist.py similarity index 98% rename from managers/upscaler_specialist.py rename to aduc_framework/managers/upscaler_specialist.py index f3336ea79b53e7f5da27c5bc30e4b9b39e44b820..8981fe1d13ab87f0e8d81d30d66797758ff9f5dc 100644 --- a/managers/upscaler_specialist.py +++ b/aduc_framework/managers/upscaler_specialist.py @@ -5,7 +5,7 @@ import torch import logging from diffusers import LTXLatentUpsamplePipeline -from managers.ltx_manager import ltx_manager_singleton +from ..managers.ltx_manager import ltx_manager_singleton logger = logging.getLogger(__name__) diff --git a/managers/vae_manager.py b/aduc_framework/managers/vae_manager.py similarity index 98% rename from managers/vae_manager.py rename to aduc_framework/managers/vae_manager.py index 214e43e5e36c4630ba9da5d173b37523a4bd5bf2..07aa62fe8461d836bfa3aaa3a94b11c46590428f 100644 --- a/managers/vae_manager.py +++ b/aduc_framework/managers/vae_manager.py @@ -28,7 +28,7 @@ import gc from typing import Generator # Import the source of the VAE model and the low-level functions -from managers.ltx_manager import ltx_manager_singleton +from ..managers.ltx_manager import ltx_manager_singleton from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode logger = logging.getLogger(__name__) diff --git a/aduc_framework/orchestrator.py b/aduc_framework/orchestrator.py new file mode 100644 index 0000000000000000000000000000000000000000..6533860ef500aee1026338343cf92b7a6f1d1e66 --- /dev/null +++ b/aduc_framework/orchestrator.py @@ -0,0 +1,194 @@ +# aduc_framework/orchestrator.py +# +# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos +# +# Version: 4.1.0 (Multi-Pool with Forced SeedVR 3B) +# +# Esta versão do orquestrador opera com uma arquitetura de múltiplos pools de +# especialistas e simplifica a tarefa de masterização HD, fixando o uso do +# modelo SeedVR 3B e removendo a necessidade de selecionar a versão do modelo. + +import logging +from typing import List, Dict, Any, Tuple, Callable, Optional, Generator +from PIL import Image, ImageOps +import os +import subprocess +import shutil +from pathlib import Path +import time +import gc +import torch + +# Componentes internos do framework +from .director import AducDirector +from .types import GenerationState, PreProductionParams, ProductionParams + +# Engenheiros de alto nível que definem a lógica do fluxo +from .engineers import deformes2d_thinker_singleton, deformes3d_engine_singleton, Deformes4DEngine + +# Managers (Pools) de especialistas que executam as tarefas em hardware dedicado +from .managers.latent_enhancer_manager import latent_enhancer_specialist_singleton +from .managers.seedvr_manager import seedvr_manager_singleton +from .managers.mmaudio_manager import mmaudio_manager_singleton +from .managers.vae_manager import vae_manager_singleton + +# Ferramentas de utilidade +from .tools.video_encode_tool import video_encode_tool_singleton + +logger = logging.getLogger(__name__) + +ProgressCallback = Optional[Callable[[float, str], None]] + +class AducOrchestrator: + """ + Implementa o Maestro (Γ), a camada de orquestração central do Aduc Framework. + Ele recebe solicitações, atualiza o estado de geração, delega tarefas para os + engenheiros e seus pools de especialistas, e retorna o estado atualizado. + """ + def __init__(self, workspace_dir: str): + self.director = AducDirector(workspace_dir) + self.editor = Deformes4DEngine() + self.editor.initialize(workspace_dir) + self.painter = deformes3d_engine_singleton + self.painter.initialize(workspace_dir) + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + logger.info("ADUC Maestro (Framework Core) pronto para reger a orquestra de especialistas.") + + def get_current_state(self) -> GenerationState: + """Retorna o estado de geração atual.""" + return self.director.get_full_state() + + def process_image_for_story(self, image_path: str, size: int, filename: str) -> str: + """Processa uma imagem de referência para o formato quadrado padrão.""" + img = Image.open(image_path).convert("RGB") + img_square = ImageOps.fit(img, (size, size), Image.Resampling.LANCZOS) + processed_path = os.path.join(self.director.workspace_dir, filename) + img_square.save(processed_path) + logger.info(f"Imagem de referência processada e salva em: {processed_path}") + return processed_path + + # --- ETAPA 1: PRÉ-PRODUÇÃO --- + def task_pre_production(self, params: PreProductionParams, progress_callback: ProgressCallback = None) -> Tuple[List[str], List[str], GenerationState]: + """Orquestra a criação do storyboard e dos keyframes visuais.""" + logger.info("Maestro: Iniciando tarefa de Pré-Produção.") + self.director.update_parameters("pre_producao", params) + + if progress_callback: progress_callback(0.1, "Gerando storyboard...") + storyboard_list = deformes2d_thinker_singleton.generate_storyboard(prompt=params.prompt, num_keyframes=params.num_keyframes, ref_image_paths=params.ref_paths) + self.director.update_pre_production_state(params.prompt, params.ref_paths, storyboard_list) + + if progress_callback: progress_callback(0.2, "Iniciando geração de keyframes...") + keyframes_detailed_data = self.painter.generate_keyframes_from_storyboard(generation_state=self.director.get_full_state_as_dict(), progress_callback=progress_callback) + self.director.update_keyframes_state(keyframes_detailed_data) + + final_keyframe_paths = [kf["caminho_pixel"] for kf in keyframes_detailed_data] + final_state = self.director.get_full_state() + logger.info("Maestro: Tarefa de Pré-Produção concluída.") + return storyboard_list, final_keyframe_paths, final_state + + # --- ETAPA 2: PRODUÇÃO --- + def task_produce_original_movie(self, params: ProductionParams, progress_callback: ProgressCallback = None) -> Tuple[str, List[str], GenerationState]: + """Orquestra a geração do vídeo principal a partir dos keyframes.""" + logger.info("Maestro: Iniciando tarefa de Produção do Filme Original.") + self.director.update_parameters("producao", params) + + result_data = self.editor.generate_original_movie(full_generation_state=self.director.get_full_state_as_dict(), progress_callback=progress_callback) + self.director.update_video_state(result_data["video_data"]) + + final_video_path = result_data["final_path"] + latent_paths = result_data["latent_paths"] + final_state = self.director.get_full_state() + logger.info("Maestro: Tarefa de Produção do Filme Original concluída.") + return final_video_path, latent_paths, final_state + + # --- ETAPA 3: PÓS-PRODUÇÃO (Cadeia de Efeitos) --- + + def task_run_latent_upscaler(self, latent_paths: List[str], chunk_size: int, progress_callback: ProgressCallback = None) -> Generator[Dict[str, Any], None, None]: + """Aplica upscale 2x nos latentes e os decodifica para um novo vídeo.""" + if not self.director.workspace_dir: raise RuntimeError("Orchestrator não inicializado.") + if not latent_paths: raise ValueError("Nenhum caminho de latente fornecido para o upscale.") + + logger.info("--- ORQUESTRADOR: Tarefa de Upscaling de Latentes ---") + run_timestamp = int(time.time()) + temp_dir = os.path.join(self.director.workspace_dir, f"temp_upscaled_clips_{run_timestamp}") + os.makedirs(temp_dir, exist_ok=True) + + final_upscaled_clip_paths = [] + num_chunks = -(-len(latent_paths) // chunk_size) + + for i in range(num_chunks): + chunk_paths = latent_paths[i * chunk_size:(i + 1) * chunk_size] + if progress_callback: progress_callback(i / num_chunks, f"Upscalando & Decodificando Lote {i+1}/{num_chunks}") + + tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths] + sub_group_latent = torch.cat(tensors_in_chunk, dim=2) + + upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent) + pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk) + + current_clip_path = os.path.join(temp_dir, f"upscaled_clip_{i:04d}.mp4") + self.editor.save_video_from_tensor(pixel_tensor, current_clip_path, fps=24) + final_upscaled_clip_paths.append(current_clip_path) + + del tensors_in_chunk, sub_group_latent, upscaled_latent_chunk, pixel_tensor + gc.collect(); torch.cuda.empty_cache() + yield {"progress": (i + 1) / num_chunks} + + final_video_path = os.path.join(self.director.workspace_dir, f"upscaled_movie_{run_timestamp}.mp4") + video_encode_tool_singleton.concatenate_videos(final_upscaled_clip_paths, final_video_path, self.director.workspace_dir) + + shutil.rmtree(temp_dir) + logger.info(f"Upscaling de latentes completo! Vídeo final em: {final_video_path}") + yield {"final_path": final_video_path} + + def task_run_hd_mastering(self, source_video_path: str, steps: int, prompt: str, progress_callback: ProgressCallback = None) -> Generator[Dict[str, Any], None, None]: + """Aplica masterização em HD usando o pool de GPUs do SeedVR com o modelo 3B.""" + if not self.director.workspace_dir: raise RuntimeError("Orchestrator não inicializado.") + logger.info(f"--- ORQUESTRADOR: Tarefa de Masterização HD com SeedVR 3B ---") + + run_timestamp = int(time.time()) + output_path = os.path.join(self.director.workspace_dir, f"hd_mastered_movie_3B_{run_timestamp}.mp4") + + final_path = seedvr_manager_singleton.process_video( + input_video_path=source_video_path, + output_video_path=output_path, + prompt=prompt, + steps=steps + ) + logger.info(f"Masterização HD completa! Vídeo final em: {final_path}") + yield {"final_path": final_path} + + def task_run_audio_generation(self, source_video_path: str, audio_prompt: str, progress_callback: ProgressCallback = None) -> Generator[Dict[str, Any], None, None]: + """Gera e adiciona áudio ao vídeo usando o pool de GPUs do MMAudio.""" + if not self.director.workspace_dir: raise RuntimeError("Orchestrator não inicializado.") + logger.info(f"--- ORQUESTRADOR: Tarefa de Geração de Áudio ---") + + if progress_callback: progress_callback(0.1, "Preparando para geração de áudio...") + + run_timestamp = int(time.time()) + source_name = Path(source_video_path).stem + output_path = os.path.join(self.director.workspace_dir, f"{source_name}_with_audio_{run_timestamp}.mp4") + + try: + result = subprocess.run( + ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path], + capture_output=True, text=True, check=True + ) + duration = float(result.stdout.strip()) + except Exception as e: + logger.error(f"Não foi possível obter a duração do vídeo '{source_video_path}': {e}", exc_info=True) + yield {"error": "Falha ao obter duração do vídeo."} + return + + if progress_callback: progress_callback(0.5, "Gerando trilha de áudio...") + + final_path = mmaudio_manager_singleton.generate_audio_for_video( + video_path=source_video_path, + prompt=audio_prompt, + duration_seconds=duration, + output_path_override=output_path + ) + + logger.info(f"Geração de áudio completa! Vídeo com áudio em: {final_path}") + if progress_callback: progress_callback(1.0, "Geração de áudio completa!") + yield {"final_path": final_path} \ No newline at end of file diff --git a/prompts/LICENSE b/aduc_framework/prompts/LICENSE similarity index 100% rename from prompts/LICENSE rename to aduc_framework/prompts/LICENSE diff --git a/prompts/NOTICE.md b/aduc_framework/prompts/NOTICE.md similarity index 100% rename from prompts/NOTICE.md rename to aduc_framework/prompts/NOTICE.md diff --git a/prompts/README.md b/aduc_framework/prompts/README.md similarity index 100% rename from prompts/README.md rename to aduc_framework/prompts/README.md diff --git a/prompts/anticipatory_keyframe_prompt.txt b/aduc_framework/prompts/anticipatory_keyframe_prompt.txt similarity index 100% rename from prompts/anticipatory_keyframe_prompt.txt rename to aduc_framework/prompts/anticipatory_keyframe_prompt.txt diff --git a/prompts/audio_director_prompt.txt b/aduc_framework/prompts/audio_director_prompt.txt similarity index 100% rename from prompts/audio_director_prompt.txt rename to aduc_framework/prompts/audio_director_prompt.txt diff --git a/aduc_framework/prompts/cinematic_director_prompt.txt b/aduc_framework/prompts/cinematic_director_prompt.txt new file mode 100644 index 0000000000000000000000000000000000000000..8e9535fd1625aad6262ccbae4f713b4c05b2fbff --- /dev/null +++ b/aduc_framework/prompts/cinematic_director_prompt.txt @@ -0,0 +1,27 @@ +# ROLE: AI Cinematic Scenarist + +# GOAL: +# Your single, crucial task is to write a rich, cinematic motion prompt. +# This prompt must describe the most logical and compelling action that +# connects the PRESENT visual state to the FUTURE visual state, considering +# the context of the PAST. + +# CONTEXT FOR YOUR PROMPT: +- Global Story Goal: {global_prompt} +- Creative History: {story_history} +- The Past: "{past_scene_desc}" -> [PAST_IMAGE] +- The Present: "{present_scene_desc}" -> [PRESENT_IMAGE] +- The Future: "{future_scene_desc}" -> [FUTURE_IMAGE] + +# CRITICAL PROMPT DIRECTIVES: +# 1. ALWAYS DESCRIBE MOTION: The scene must not be static. +# 2. STYLE: Be descriptive, cinematic, and direct. +# 3. STRUCTURE: In a single paragraph (under 150 words), describe the scene's +# motion, prioritizing in this EXACT order: +# a. Actors/Animals: What are they doing? +# b. Objects: How do they interact? +# c. Camera: How is it moving? +# d. Scenery/Environment: What details add to the mood? + +# RESPONSE FORMAT: +# You MUST respond with ONLY the raw, single-line string for the motion prompt. diff --git a/prompts/director_composition_prompt.txt b/aduc_framework/prompts/director_composition_prompt.txt similarity index 100% rename from prompts/director_composition_prompt.txt rename to aduc_framework/prompts/director_composition_prompt.txt diff --git a/prompts/flux_composition_wrapper_prompt.txt b/aduc_framework/prompts/flux_composition_wrapper_prompt.txt similarity index 100% rename from prompts/flux_composition_wrapper_prompt.txt rename to aduc_framework/prompts/flux_composition_wrapper_prompt.txt diff --git a/prompts/initial_motion_prompt.txt b/aduc_framework/prompts/initial_motion_prompt.txt similarity index 100% rename from prompts/initial_motion_prompt.txt rename to aduc_framework/prompts/initial_motion_prompt.txt diff --git a/prompts/keyframe_selection_prompt.txt b/aduc_framework/prompts/keyframe_selection_prompt.txt similarity index 100% rename from prompts/keyframe_selection_prompt.txt rename to aduc_framework/prompts/keyframe_selection_prompt.txt diff --git a/prompts/sound_director_prompt.txt b/aduc_framework/prompts/sound_director_prompt.txt similarity index 100% rename from prompts/sound_director_prompt.txt rename to aduc_framework/prompts/sound_director_prompt.txt diff --git a/prompts/sound_director_prompt.txt.txt b/aduc_framework/prompts/sound_director_prompt.txt.txt similarity index 100% rename from prompts/sound_director_prompt.txt.txt rename to aduc_framework/prompts/sound_director_prompt.txt.txt diff --git a/prompts/transition_decision_prompt.txt b/aduc_framework/prompts/transition_decision_prompt.txt similarity index 100% rename from prompts/transition_decision_prompt.txt rename to aduc_framework/prompts/transition_decision_prompt.txt diff --git a/prompts/unified_cinematographer_prompt.txt b/aduc_framework/prompts/unified_cinematographer_prompt.txt similarity index 100% rename from prompts/unified_cinematographer_prompt.txt rename to aduc_framework/prompts/unified_cinematographer_prompt.txt diff --git a/prompts/unified_storyboard_prompt.txt b/aduc_framework/prompts/unified_storyboard_prompt.txt similarity index 100% rename from prompts/unified_storyboard_prompt.txt rename to aduc_framework/prompts/unified_storyboard_prompt.txt diff --git a/tools/LICENSE b/aduc_framework/tools/LICENSE similarity index 100% rename from tools/LICENSE rename to aduc_framework/tools/LICENSE diff --git a/tools/NOTICE.md b/aduc_framework/tools/NOTICE.md similarity index 100% rename from tools/NOTICE.md rename to aduc_framework/tools/NOTICE.md diff --git a/tools/README.md b/aduc_framework/tools/README.md similarity index 100% rename from tools/README.md rename to aduc_framework/tools/README.md diff --git a/aduc_framework/tools/__init__.py b/aduc_framework/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ba18ddfc00bcb0180fbd08c0699852dbb222e19 --- /dev/null +++ b/aduc_framework/tools/__init__.py @@ -0,0 +1,15 @@ +# aduc_framework/tools/__init__.py + +# Expõe os singletons e classes principais do sub-pacote de ferramentas. + +from .hardware_manager import hardware_manager +from .video_encode_tool import video_encode_tool_singleton +from . import optimization +from . import tensor_utils + +__all__ = [ + "hardware_manager", + "video_encode_tool_singleton", + "optimization", + "tensor_utils", +] \ No newline at end of file diff --git a/tools/hardware_manager.py b/aduc_framework/tools/hardware_manager.py similarity index 100% rename from tools/hardware_manager.py rename to aduc_framework/tools/hardware_manager.py diff --git a/tools/optimization.py b/aduc_framework/tools/optimization.py similarity index 100% rename from tools/optimization.py rename to aduc_framework/tools/optimization.py diff --git a/tools/tensor_utils.py b/aduc_framework/tools/tensor_utils.py similarity index 100% rename from tools/tensor_utils.py rename to aduc_framework/tools/tensor_utils.py diff --git a/tools/video_encode_tool.py b/aduc_framework/tools/video_encode_tool.py similarity index 100% rename from tools/video_encode_tool.py rename to aduc_framework/tools/video_encode_tool.py diff --git a/aduc_framework/types.py b/aduc_framework/types.py new file mode 100644 index 0000000000000000000000000000000000000000..308016a296c67dffce62b158ad760ca8bc191288 --- /dev/null +++ b/aduc_framework/types.py @@ -0,0 +1,100 @@ +# aduc_framework/types.py +# +# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos +# +# Versão 3.1.0 (Framework Data Models with Core Types) +# +# Este arquivo define as estruturas de dados centrais para o Aduc Framework +# usando Pydantic. Ele também inclui tipos de dados de baixo nível, como dataclasses, +# que são usados internamente pelos managers e engineers. + +from pydantic import BaseModel, Field +from typing import List, Dict, Any, Optional +from dataclasses import dataclass +import torch + +# --- Modelos de Parâmetros de Entrada (Pydantic) --- +# Representam os dados que o usuário fornece através de uma interface. + +class PreProductionParams(BaseModel): + """Parâmetros para a etapa de Roteiro e Keyframes.""" + prompt: str = Field(..., description="A ideia geral do filme ou cena.") + num_keyframes: int = Field(..., gt=0, description="O número de keyframes a serem gerados.") + ref_paths: List[str] = Field(..., description="Lista de caminhos para as imagens de referência iniciais.") + resolution: int = Field(..., description="A resolução base (largura/altura) para a geração.") + duration_per_fragment: float = Field(..., gt=0, description="A duração alvo em segundos para cada fragmento de vídeo.") + +class ProductionParams(BaseModel): + """Parâmetros para a etapa de Geração de Vídeo.""" + trim_percent: int = Field(..., ge=0, le=100, description="Poda causal para o mecanismo Déjà-Vu.") + handler_strength: float = Field(..., ge=0.0, le=1.0, description="Força do guia de trajetória (Déjà-Vu).") + destination_convergence_strength: float = Field(..., ge=0.0, le=1.0, description="Força da âncora final (destino).") + guidance_scale: float = Field(..., ge=0.0, description="Escala de orientação do prompt de movimento.") + stg_scale: float = Field(..., ge=0.0, description="Escala de continuidade temporal (STG).") + inference_steps: int = Field(..., gt=0, description="Número de passos de inferência para a geração de vídeo.") + +class GenerationParameters(BaseModel): + """Agrega todos os parâmetros de configuração da geração.""" + pre_producao: Optional[PreProductionParams] = None + producao: Optional[ProductionParams] = None + pos_producao: Optional[Dict[str, Any]] = None + + +# --- Modelos de Artefatos Gerados (Pydantic) --- +# Representam os dados e metadados dos resultados criados pelo framework. + +class MediaRef(BaseModel): + """Representa uma mídia de referência fornecida pelo usuário.""" + id: int + caminho: str + +class Ato(BaseModel): + """Representa uma unidade narrativa (sub-tarefa) do storyboard.""" + id: int + resumo_ato: str + +class KeyframeData(BaseModel): + """Estrutura de dados completa para um único keyframe gerado.""" + id: int + caminho_pixel: str + caminho_latent: str + prompt_keyframe: str + +class VideoFragmentData(BaseModel): + """Metadados sobre a geração de um único fragmento de vídeo entre dois keyframes.""" + id: int + prompt_video: str + +class VideoData(BaseModel): + """Estrutura de dados completa para o vídeo final (ou um grande clipe).""" + id: int + caminho_pixel: str + caminhos_latentes_fragmentos: List[str] + fragmentos_componentes: List[VideoFragmentData] + + +# --- O Modelo de Estado Principal (Pydantic) --- + +class GenerationState(BaseModel): + """ + O "DNA Digital" completo de uma geração. + Este é o objeto de estado central que flui através do framework. + """ + parametros_geracao: GenerationParameters = Field(default_factory=GenerationParameters) + Promt_geral: str = "" + midias_referencia: List[MediaRef] = Field(default_factory=list) + Atos: List[Ato] = Field(default_factory=list) + Keyframe_atos: List[KeyframeData] = Field(default_factory=list) + videos_atos: List[VideoData] = Field(default_factory=list) + + +# --- Tipos de Dados Internos (Dataclass) --- +# Usado para passar dados complexos (como tensores) que não são facilmente +# serializáveis em JSON, entre os componentes internos do framework. + +@dataclass +class LatentConditioningItem: + """Representa uma âncora de condicionamento no espaço latente para o LTX.""" + latent_tensor: torch.Tensor + media_frame_number: int + conditioning_strength: float \ No newline at end of file diff --git a/aduc_orchestrator.py b/aduc_orchestrator.py deleted file mode 100644 index 594a2d8c35854f380346248c8845c7e06b367326..0000000000000000000000000000000000000000 --- a/aduc_orchestrator.py +++ /dev/null @@ -1,199 +0,0 @@ -# aduc_orchestrator.py -# -# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos -# -# Version: 2.2.0 -# -# This file contains the core ADUC (Automated Discovery and Orchestration of Complex tasks) -# orchestrator, known as the "Maestro" (Γ). Its responsibility is to manage the high-level -# creative workflow of film production. This version is updated to reflect the final -# refactored project structure with `engineers` and `managers`. - -import os -import logging -from typing import List, Dict, Any, Generator, Tuple - -import gradio as gr -from PIL import Image, ImageOps - -from engineers.deformes4D import Deformes4DEngine -from engineers.deformes2D_thinker import deformes2d_thinker_singleton -from engineers.deformes3D import deformes3d_engine_singleton - -# The logger is configured in app.py; here we just get the instance. -logger = logging.getLogger(__name__) - -class AducDirector: - """ - Represents the Scene Director, responsible for managing the production state. - Acts as the "score" for the orchestra, keeping track of all generated artifacts - (script, keyframes, etc.) during the creative process. - """ - def __init__(self, workspace_dir: str): - self.workspace_dir = workspace_dir - os.makedirs(self.workspace_dir, exist_ok=True) - self.state: Dict[str, Any] = {} - logger.info(f"The stage is set. Workspace at '{self.workspace_dir}'.") - - def update_state(self, key: str, value: Any) -> None: - logger.info(f"Notating on the score: State '{key}' updated.") - self.state[key] = value - - def get_state(self, key: str, default: Any = None) -> Any: - return self.state.get(key, default) - -class AducOrchestrator: - """ - Implements the Maestro (Γ), the central orchestration layer of the ADUC architecture. - It does not execute AI tasks directly but delegates each step of the creative - process (scriptwriting, art direction, cinematography) to the appropriate Specialists. - """ - def __init__(self, workspace_dir: str): - self.director = AducDirector(workspace_dir) - self.editor = Deformes4DEngine(workspace_dir) - self.painter = deformes3d_engine_singleton - logger.info("ADUC Maestro is on the podium. Musicians (specialists) are ready.") - - def process_image_for_story(self, image_path: str, size: int, filename: str) -> str: - """ - Pre-processes a reference image, standardizing it for use by the Specialists. - """ - img = Image.open(image_path).convert("RGB") - img_square = ImageOps.fit(img, (size, size), Image.Resampling.LANCZOS) - processed_path = os.path.join(self.director.workspace_dir, filename) - img_square.save(processed_path) - logger.info(f"Reference image processed and saved to: {processed_path}") - return processed_path - - # --- PRE-PRODUCTION TASKS --- - - def task_generate_storyboard(self, prompt: str, num_keyframes: int, ref_image_paths: List[str], - progress: gr.Progress) -> Tuple[List[str], str, Any]: - """ - Delegates the task of creating the storyboard to the Scriptwriter (deformes2D_thinker). - """ - logger.info(f"Act 1, Scene 1: Script. Instructing Scriptwriter to create {num_keyframes} scenes.") - progress(0.2, desc="Consulting AI Scriptwriter...") - - storyboard = deformes2d_thinker_singleton.generate_storyboard(prompt, num_keyframes, ref_image_paths) - - logger.info(f"Scriptwriter returned the score: {storyboard}") - self.director.update_state("storyboard", storyboard) - self.director.update_state("processed_ref_paths", ref_image_paths) - return storyboard, ref_image_paths[0], gr.update(visible=True, open=True) - - def task_select_keyframes(self, storyboard: List[str], base_ref_paths: List[str], - pool_ref_paths: List[str]) -> List[str]: - """ - Delegates to the Photographer (deformes2D_thinker) the task of selecting keyframes. - """ - logger.info(f"Act 1, Scene 2 (Photographer Mode): Instructing Photographer to select {len(storyboard)} keyframes.") - selected_paths = deformes2d_thinker_singleton.select_keyframes_from_pool(storyboard, base_ref_paths, pool_ref_paths) - logger.info(f"Photographer selected the following scenes: {[os.path.basename(p) for p in selected_paths]}") - self.director.update_state("keyframes", selected_paths) - return selected_paths - - def task_generate_keyframes(self, storyboard: List[str], initial_ref_path: str, global_prompt: str, - keyframe_resolution: int, progress_callback_factory=None) -> List[str]: - """ - Delegates to the Art Director (Deformes3DEngine) the task of generating keyframes. - """ - logger.info("Act 1, Scene 2 (Art Director Mode): Delegating to Art Director.") - general_ref_paths = self.director.get_state("processed_ref_paths", []) - - final_keyframes = self.painter.generate_keyframes_from_storyboard( - storyboard=storyboard, - initial_ref_path=initial_ref_path, - global_prompt=global_prompt, - keyframe_resolution=keyframe_resolution, - general_ref_paths=general_ref_paths, - progress_callback_factory=progress_callback_factory - ) - self.director.update_state("keyframes", final_keyframes) - logger.info("Maestro: Art Director has completed keyframe generation.") - return final_keyframes - - # --- PRODUCTION & POST-PRODUCTION TASKS --- - - def task_produce_original_movie(self, keyframes: List[str], global_prompt: str, seconds_per_fragment: float, - trim_percent: int, handler_strength: float, - destination_convergence_strength: float, - guidance_scale: float, stg_scale: float, inference_steps: int, - video_resolution: int, use_continuity_director: bool, - progress: gr.Progress) -> Dict[str, Any]: - """ - Delegates the production of the original master video to the Deformes4DEngine. - """ - logger.info("Maestro: Delegating production of the original movie to Deformes4DEngine.") - storyboard = self.director.get_state("storyboard", []) - - result = self.editor.generate_original_movie( - keyframes=keyframes, - global_prompt=global_prompt, - storyboard=storyboard, - seconds_per_fragment=seconds_per_fragment, - trim_percent=trim_percent, - handler_strength=handler_strength, - destination_convergence_strength=destination_convergence_strength, - video_resolution=video_resolution, - use_continuity_director=use_continuity_director, - guidance_scale=guidance_scale, - stg_scale=stg_scale, - num_inference_steps=inference_steps, - progress=progress - ) - - self.director.update_state("final_video_path", result["final_path"]) - self.director.update_state("latent_paths", result["latent_paths"]) - logger.info("Maestro: Original movie production complete.") - return result - - def task_run_latent_upscaler(self, latent_paths: List[str], chunk_size: int, progress: gr.Progress) -> Generator[Dict[str, Any], None, None]: - """ - Orchestrates the latent upscaling task. - """ - logger.info(f"Maestro: Delegating latent upscaling task for {len(latent_paths)} fragments.") - for update in self.editor.upscale_latents_and_create_video( - latent_paths=latent_paths, - chunk_size=chunk_size, - progress=progress - ): - if "final_path" in update and update["final_path"]: - self.director.update_state("final_video_path", update["final_path"]) - yield update - break - logger.info("Maestro: Latent upscaling complete.") - - def task_run_hd_mastering(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress) -> Generator[Dict[str, Any], None, None]: - """ - Orchestrates the HD mastering task. - """ - logger.info(f"Maestro: Delegating HD mastering task using SeedVR {model_version}.") - for update in self.editor.master_video_hd( - source_video_path=source_video_path, - model_version=model_version, - steps=steps, - prompt=prompt, - progress=progress - ): - if "final_path" in update and update["final_path"]: - self.director.update_state("final_video_path", update["final_path"]) - yield update - break - logger.info("Maestro: HD mastering complete.") - - def task_run_audio_generation(self, source_video_path: str, audio_prompt: str, progress: gr.Progress) -> Generator[Dict[str, Any], None, None]: - """ - Orchestrates the audio generation task. - """ - logger.info(f"Maestro: Delegating audio generation task.") - for update in self.editor.generate_audio_for_final_video( - source_video_path=source_video_path, - audio_prompt=audio_prompt, - progress=progress - ): - if "final_path" in update and update["final_path"]: - self.director.update_state("final_video_path", update["final_path"]) - yield update - break - logger.info("Maestro: Audio generation complete.") \ No newline at end of file diff --git a/aduc_types.py b/aduc_types.py deleted file mode 100644 index e830e159063a8599b3e6e181c9c1449878ee9feb..0000000000000000000000000000000000000000 --- a/aduc_types.py +++ /dev/null @@ -1,43 +0,0 @@ -# aduc_types.py -# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR -# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos -# -# Contato: -# Carlos Rodrigues dos Santos -# carlex22@gmail.com -# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025 -# -# Repositórios e Projetos Relacionados: -# GitHub: https://github.com/carlex22/Aduc-sdr -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License... -# PENDING PATENT NOTICE: Please see NOTICE.md. -# -# Version: 1.0.0 -# -# This file defines common data structures and types used across the ADUC-SDR -# framework to ensure consistent data contracts between modules. - -from dataclasses import dataclass -import torch - -@dataclass -class LatentConditioningItem: - """Represents a conditioning anchor in the latent space for the Camera (Ψ).""" - latent_tensor: torch.Tensor - media_frame_number: int - conditioning_strength: float \ No newline at end of file diff --git a/app.py b/app.py index 75b5a15c7174e72b7203390ed46d2b0bb1221e43..7216a5aefcc7b2ab5b8825fb03e26368c0d247f2 100644 --- a/app.py +++ b/app.py @@ -2,31 +2,11 @@ # # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos # -# Version: 2.3.0 +# Versão 4.0.0 (UI for Forced SeedVR 3B) # -# Contact: -# Carlos Rodrigues dos Santos -# carlex22@gmail.com -# -# Related Repositories and Projects: -# GitHub: https://github.com/carlex22/Aduc-sdr -# YouTube (Results): https://m.youtube.com/channel/UC3EgoJi_Fv7yuDpvfYNtoIQ -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by the -# Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# -# PENDING PATENT NOTICE: The ADUC method and system implemented in this -# software is in the process of being patented. Please see NOTICE.md for details. +# Esta versão da interface de usuário foi atualizada para refletir as mudanças +# no backend, onde o SeedVR agora usa exclusivamente o modelo 3B. O seletor +# de modelo foi removido para simplificar a experiência do usuário. import gradio as gr import yaml @@ -37,47 +17,27 @@ import shutil import time import json -from aduc_orchestrator import AducOrchestrator +# --- 1. IMPORTAÇÃO DO FRAMEWORK E SEUS TIPOS --- +import aduc_framework +from aduc_framework.types import PreProductionParams, ProductionParams -# --- CUSTOM UI THEME DEFINITION --- -# This theme provides a professional, dark-mode look and feel, suitable for creative tools. +# --- CUSTOM UI THEME E CONFIGURAÇÃO INICIAL --- cinematic_theme = gr.themes.Base( primary_hue=gr.themes.colors.indigo, secondary_hue=gr.themes.colors.purple, neutral_hue=gr.themes.colors.slate, font=(gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"), ).set( - # -- Colors -- - body_background_fill="#111827", # Slate 900 - body_text_color="#E5E7EB", # Slate 200 - - # -- Buttons -- - button_primary_background_fill="linear-gradient(90deg, #4F46E5, #8B5CF6)", # Gradient Indigo -> Purple - button_primary_text_color="#FFFFFF", - button_secondary_background_fill="#374151", # Slate 700 - button_secondary_border_color="#4B5563", - button_secondary_text_color="#E5E7EB", - - # -- Blocks and Containers -- - block_background_fill="#1F2937", # Slate 800 - block_border_width="1px", - block_border_color="#374151", # Slate 700 - block_label_background_fill="#374151", - block_label_text_color="#E5E7EB", - block_title_text_color="#FFFFFF", - - # -- Input Fields -- - input_background_fill="#374151", - input_border_color="#4B5563", - input_placeholder_color="#9CA3AF", - - # -- Spacing and Radius -- - #block_radius_size="lg", - #spacing_size="lg", - #layout_gap="lg", + body_background_fill="#111827", body_text_color="#E5E7EB", + button_primary_background_fill="linear-gradient(90deg, #4F46E5, #8B5CF6)", + button_primary_text_color="#FFFFFF", button_secondary_background_fill="#374151", + button_secondary_border_color="#4B5563", button_secondary_text_color="#E5E7EB", + block_background_fill="#1F2937", block_border_width="1px", block_border_color="#374151", + block_label_background_fill="#374151", block_label_text_color="#E5E7EB", + block_title_text_color="#FFFFFF", input_background_fill="#374151", + input_border_color="#4B5563", input_placeholder_color="#9CA3AF", ) -# --- 1. CONFIGURATION AND INITIALIZATION --- LOG_FILE_PATH = "aduc_log.txt" if os.path.exists(LOG_FILE_PATH): os.remove(LOG_FILE_PATH) @@ -87,204 +47,149 @@ root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) root_logger.handlers.clear() stream_handler = logging.StreamHandler(sys.stdout) -stream_handler.setLevel(logging.INFO) stream_handler.setFormatter(logging.Formatter(log_format)) root_logger.addHandler(stream_handler) file_handler = logging.FileHandler(LOG_FILE_PATH, mode='w', encoding='utf-8') -file_handler.setLevel(logging.INFO) file_handler.setFormatter(logging.Formatter(log_format)) root_logger.addHandler(file_handler) logger = logging.getLogger(__name__) -i18n = {} -try: - with open("i18n.json", "r", encoding="utf-8") as f: i18n = json.load(f) -except Exception as e: - logger.error(f"Error loading i18n.json: {e}") - i18n = {"pt": {}, "en": {}, "zh": {}} -if 'pt' not in i18n: i18n['pt'] = i18n.get('en', {}) -if 'en' not in i18n: i18n['en'] = {} -if 'zh' not in i18n: i18n['zh'] = i18n.get('en', {}) - try: with open("config.yaml", 'r') as f: config = yaml.safe_load(f) WORKSPACE_DIR = config['application']['workspace_dir'] - aduc = AducOrchestrator(workspace_dir=WORKSPACE_DIR) - logger.info("ADUC Orchestrator and Specialists initialized successfully.") + aduc = aduc_framework.create_aduc_instance(workspace_dir=WORKSPACE_DIR) + logger.info("Interface Gradio inicializada e conectada ao Aduc Framework.") except Exception as e: - logger.error(f"CRITICAL ERROR during initialization: {e}", exc_info=True) + logger.critical(f"ERRO CRÍTICO durante a inicialização: {e}", exc_info=True) + # Em caso de erro crítico, exibe a mensagem na interface do Gradio antes de sair + with gr.Blocks() as demo: + gr.Markdown("# ERRO CRÍTICO NA INICIALIZAÇÃO") + gr.Markdown("Não foi possível iniciar o Aduc Framework. Verifique os logs para mais detalhes.") + gr.Textbox(value=str(e), label="Detalhes do Erro", lines=10) + demo.launch() exit() -# --- 2. UI WRAPPER FUNCTIONS --- +# --- 2. FUNÇÕES WRAPPER (CAMADA DE TRADUÇÃO UI <-> FRAMEWORK) --- + def run_pre_production_wrapper(prompt, num_keyframes, ref_files, resolution_str, duration_per_fragment, progress=gr.Progress()): - if not ref_files: raise gr.Error("Please provide at least one reference image.") + if not ref_files: raise gr.Error("Por favor, forneça pelo menos uma imagem de referência.") ref_paths = [aduc.process_image_for_story(f.name, 480, f"ref_processed_{i}.png") for i, f in enumerate(ref_files)] - progress(0.1, desc="Generating storyboard...") - storyboard, initial_ref_path, _ = aduc.task_generate_storyboard(prompt, num_keyframes, ref_paths, progress) - resolution = int(resolution_str.split('x')[0]) - def cb_factory(scene_index, total_scenes): - start_time = time.time() - total_steps = 12 - def callback(pipe_self, step, timestep, callback_kwargs): - elapsed, current_step = time.time() - start_time, step + 1 - if current_step > 0: - it_per_sec = current_step / elapsed - eta = (total_steps - current_step) / it_per_sec if it_per_sec > 0 else 0 - desc = f"Keyframe {scene_index}/{total_scenes}: {int((current_step/total_steps)*100)}% | {current_step}/{total_steps} [{elapsed:.0f}s<{eta:.0f}s, {it_per_sec:.2f}it/s]" - base_progress = 0.2 + (scene_index - 1) * (0.8 / total_scenes) - step_progress = (current_step / total_steps) * (0.8 / total_scenes) - progress(base_progress + step_progress, desc=desc) - return {} - return callback - final_keyframes = aduc.task_generate_keyframes(storyboard, initial_ref_path, prompt, resolution, cb_factory) - return gr.update(value=storyboard), gr.update(value=final_keyframes), gr.update(visible=True, open=True) - -def run_pre_production_photo_wrapper(prompt, num_keyframes, ref_files, progress=gr.Progress()): - if not ref_files or len(ref_files) < 2: raise gr.Error("Photographer Mode requires at least 2 images: one base and one for the scene pool.") - base_ref_paths = [aduc.process_image_for_story(ref_files[0].name, 480, "base_ref_processed_0.png")] - pool_ref_paths = [aduc.process_image_for_story(f.name, 480, f"pool_ref_{i+1}.png") for i, f in enumerate(ref_files[1:])] - progress(0.1, desc="Generating storyboard...") - storyboard, _, _ = aduc.task_generate_storyboard(prompt, num_keyframes, base_ref_paths, progress) - progress(0.5, desc="AI Photographer is selecting the best scenes...") - selected_keyframes = aduc.task_select_keyframes(storyboard, base_ref_paths, pool_ref_paths) - return gr.update(value=storyboard), gr.update(value=selected_keyframes), gr.update(visible=True, open=True) + params = PreProductionParams(prompt=prompt, num_keyframes=int(num_keyframes), ref_paths=ref_paths, resolution=int(resolution_str.split('x')[0]), duration_per_fragment=duration_per_fragment) + storyboard, final_keyframes, updated_state = aduc.task_pre_production(params, progress) + return updated_state.model_dump(), storyboard, final_keyframes, gr.update(visible=True, open=True) -def run_original_production_wrapper(keyframes, prompt, duration, trim_percent, handler_strength, dest_strength, guidance_scale, stg_scale, steps, resolution, progress=gr.Progress()): - yield {original_video_output: gr.update(value=None, visible=True, label="🎬 Producing your original master video... Please wait."), final_video_output: gr.update(value=None, visible=True, label="🎬 Production in progress..."), step4_accordion: gr.update(visible=False)} - res = int(resolution.split('x')[0]) - result = aduc.task_produce_original_movie(keyframes, prompt, duration, int(trim_percent), handler_strength, dest_strength, guidance_scale, stg_scale, int(steps), res, use_continuity_director=True, progress=progress) - yield {original_video_output: gr.update(value=result["final_path"], label="✅ Original Master Video"), final_video_output: gr.update(value=result["final_path"], label="Final Film (Result of the Last Step)"), step4_accordion: gr.update(visible=True, open=True), original_latents_paths_state: result["latent_paths"], original_video_path_state: result["final_path"], current_source_video_state: result["final_path"]} +def run_original_production_wrapper(current_state_dict, trim_percent, handler_strength, dest_strength, guidance_scale, stg_scale, steps, progress=gr.Progress()): + yield {original_video_output: gr.update(value=None, visible=True, label="🎬 Produzindo seu filme..."), final_video_output: gr.update(value=None, visible=True, label="🎬 Produção em progresso..."), step4_accordion: gr.update(visible=False)} + production_params = ProductionParams(trim_percent=int(trim_percent), handler_strength=handler_strength, destination_convergence_strength=dest_strength, guidance_scale=guidance_scale, stg_scale=stg_scale, inference_steps=int(steps)) + final_video_path, latent_paths, updated_state = aduc.task_produce_original_movie(params=production_params, progress_callback=progress) + updated_state_dict = updated_state.model_dump() + yield {original_video_output: gr.update(value=final_video_path, label="✅ Filme Original Master"), final_video_output: gr.update(value=final_video_path), step4_accordion: gr.update(visible=True, open=True), original_latents_paths_state: latent_paths, original_video_path_state: final_video_path, current_source_video_state: final_video_path, generation_state_holder: updated_state_dict, generation_data_output: updated_state_dict} def run_upscaler_wrapper(latent_paths, chunk_size, progress=gr.Progress()): - if not latent_paths: raise gr.Error("Cannot run Upscaler. No original latents found. Please complete Step 3 first.") - yield {upscaler_video_output: gr.update(value=None, visible=True, label="Upscaling latents and decoding video..."), final_video_output: gr.update(label="Post-Production in progress: Latent Upscaling...")} + if not latent_paths: raise gr.Error("Não é possível executar o Upscaler. Nenhum latente original encontrado.") + yield {upscaler_video_output: gr.update(value=None, visible=True, label="Fazendo upscale dos latentes..."), final_video_output: gr.update(label="Pós-Produção: Upscaler Latente...")} final_path = None - for update in aduc.task_run_latent_upscaler(latent_paths, int(chunk_size), progress=progress): final_path = update['final_path'] - yield {upscaler_video_output: gr.update(value=final_path, label="✅ Latent Upscale Complete"), final_video_output: gr.update(value=final_path), upscaled_video_path_state: final_path, current_source_video_state: final_path} + for update in aduc.task_run_latent_upscaler(latent_paths, int(chunk_size), progress): + if "final_path" in update: final_path = update['final_path'] + yield {upscaler_video_output: gr.update(value=final_path, label="✅ Upscale Latente Concluído"), final_video_output: gr.update(value=final_path), upscaled_video_path_state: final_path, current_source_video_state: final_path} -def run_hd_wrapper(source_video, model_version, steps, global_prompt, progress=gr.Progress()): - if not source_video: raise gr.Error("Cannot run HD Mastering. No source video found. Please complete a previous step first.") - yield {hd_video_output: gr.update(value=None, visible=True, label="Applying HD mastering... This may take a while."), final_video_output: gr.update(label="Post-Production in progress: HD Mastering...")} +def run_hd_wrapper(source_video, steps, global_prompt, progress=gr.Progress()): + if not source_video: raise gr.Error("Não é possível executar a Masterização HD.") + yield {hd_video_output: gr.update(value=None, visible=True, label="Aplicando masterização HD..."), final_video_output: gr.update(label="Pós-Produção: Masterização HD...")} final_path = None - for update in aduc.task_run_hd_mastering(source_video, model_version, int(steps), global_prompt, progress=progress): final_path = update['final_path'] - yield {hd_video_output: gr.update(value=final_path, label="✅ HD Mastering Complete"), final_video_output: gr.update(value=final_path), hd_video_path_state: final_path, current_source_video_state: final_path} + for update in aduc.task_run_hd_mastering(source_video, int(steps), global_prompt, progress): + if "final_path" in update: final_path = update['final_path'] + yield {hd_video_output: gr.update(value=final_path, label="✅ Masterização HD Concluída"), final_video_output: gr.update(value=final_path), hd_video_path_state: final_path, current_source_video_state: final_path} def run_audio_wrapper(source_video, audio_prompt, global_prompt, progress=gr.Progress()): - if not source_video: raise gr.Error("Cannot run Audio Generation. No source video found. Please complete a previous step first.") - yield {audio_video_output: gr.update(value=None, visible=True, label="Generating audio and muxing..."), final_video_output: gr.update(label="Post-Production in progress: Audio Generation...")} + if not source_video: raise gr.Error("Não é possível executar a Geração de Áudio.") + yield {audio_video_output: gr.update(value=None, visible=True, label="Gerando áudio e unindo..."), final_video_output: gr.update(label="Pós-Produção: Geração de Áudio...")} final_audio_prompt = audio_prompt if audio_prompt and audio_prompt.strip() else global_prompt final_path = None - for update in aduc.task_run_audio_generation(source_video, final_audio_prompt, progress=progress): final_path = update['final_path'] - yield {audio_video_output: gr.update(value=final_path, label="✅ Audio Generation Complete"), final_video_output: gr.update(value=final_path)} + for update in aduc.task_run_audio_generation(source_video, final_audio_prompt, progress): + if "final_path" in update: final_path = update['final_path'] + yield {audio_video_output: gr.update(value=final_path, label="✅ Geração de Áudio Concluída"), final_video_output: gr.update(value=final_path)} def get_log_content(): try: with open(LOG_FILE_PATH, "r", encoding="utf-8") as f: return f.read() - except FileNotFoundError: - return "Log file not yet created. Start a generation." + except FileNotFoundError: return "Arquivo de log ainda não criado." -def update_ui_language(lang_emoji): - lang_code_map = {"🇧🇷": "pt", "🇺🇸": "en", "🇨🇳": "zh"} - lang_code = lang_code_map.get(lang_emoji, "en") - lang_map = i18n.get(lang_code, i18n.get('en', {})) - # ... This dictionary mapping will be long, so it's defined once in the main block - -# --- 3. GRADIO UI DEFINITION --- +# --- 3. DEFINIÇÃO DA UI GRADIO --- with gr.Blocks(theme=cinematic_theme, css="style.css") as demo: - default_lang = i18n.get('pt', {}) - - original_latents_paths_state = gr.State(value=None) + generation_state_holder = gr.State(value={}) + original_latents_paths_state = gr.State(value=[]) original_video_path_state = gr.State(value=None) + current_source_video_state = gr.State(value=None) upscaled_video_path_state = gr.State(value=None) hd_video_path_state = gr.State(value=None) - current_source_video_state = gr.State(value=None) - title_md = gr.Markdown(f"

{default_lang.get('app_title')}

") - subtitle_md = gr.Markdown(f"

{default_lang.get('app_subtitle')}

") + gr.Markdown("

ADUC-SDR 🎬 - O Diretor de Cinema IA

") + gr.Markdown("

Crie um filme completo com vídeo e áudio, orquestrado por uma equipe de IAs especialistas.

") + with gr.Row(): - lang_selector = gr.Radio(["🇧🇷", "🇺🇸", "🇨🇳"], value="🇧🇷", label=default_lang.get('lang_selector_label')) - resolution_selector = gr.Radio(["480x480", "720x720", "960x960"], value="480x480", label="Base Resolution") + lang_selector = gr.Radio(["🇧🇷", "🇺🇸", "🇨🇳"], value="🇧🇷", label="Idioma / Language") + resolution_selector = gr.Radio(["480x480", "720x720", "960x960"], value="480x480", label="Resolução Base") - with gr.Accordion(default_lang.get('step1_accordion'), open=True) as step1_accordion: - prompt_input = gr.Textbox(label=default_lang.get('prompt_label'), value="A majestic lion walks across the savanna, sits down, and then roars at the setting sun.") - ref_image_input = gr.File(label=default_lang.get('ref_images_label'), file_count="multiple", file_types=["image"]) - with gr.Row(): - num_keyframes_slider = gr.Slider(minimum=3, maximum=42, value=5, step=1, label=default_lang.get('keyframes_label')) - duration_per_fragment_slider = gr.Slider(label=default_lang.get('duration_label'), info=default_lang.get('duration_info'), minimum=2.0, maximum=10.0, value=4.0, step=0.1) + with gr.Accordion("Etapa 1: Roteiro e Cenas-Chave (Pré-Produção)", open=True) as step1_accordion: + prompt_input = gr.Textbox(label="Ideia Geral do Filme", value="Um leão majestoso caminha pela savana, senta-se e ruge para o sol poente.") + ref_image_input = gr.File(label="Imagens de Referência", file_count="multiple", file_types=["image"]) with gr.Row(): - storyboard_and_keyframes_button = gr.Button(default_lang.get('storyboard_and_keyframes_button'), variant="primary") - storyboard_from_photos_button = gr.Button(default_lang.get('storyboard_from_photos_button'), variant="secondary") - step1_mode_b_info_md = gr.Markdown(f"*{default_lang.get('step1_mode_b_info')}*") - storyboard_output = gr.JSON(label=default_lang.get('storyboard_output_label')) - keyframe_gallery = gr.Gallery(label=default_lang.get('keyframes_gallery_label'), visible=True, object_fit="contain", height="auto", type="filepath") - - with gr.Accordion(default_lang.get('step3_accordion'), open=False, visible=False) as step3_accordion: - step3_description_md = gr.Markdown(default_lang.get('step3_description')) - with gr.Accordion(default_lang.get('ltx_advanced_options'), open=False) as ltx_advanced_options_accordion: - with gr.Accordion(default_lang.get('causality_controls_title'), open=True) as causality_accordion: - trim_percent_slider = gr.Slider(minimum=10, maximum=90, value=50, step=5, label=default_lang.get('trim_percent_label'), info=default_lang.get('trim_percent_info')) - with gr.Row(): - forca_guia_slider = gr.Slider(label=default_lang.get('forca_guia_label'), minimum=0.0, maximum=1.0, value=0.5, step=0.05, info=default_lang.get('forca_guia_info')) - convergencia_destino_slider = gr.Slider(label=default_lang.get('convergencia_final_label'), minimum=0.0, maximum=1.0, value=0.75, step=0.05, info=default_lang.get('convergencia_final_info')) - with gr.Accordion(default_lang.get('ltx_pipeline_options'), open=True) as ltx_pipeline_accordion: - with gr.Row(): - guidance_scale_slider = gr.Slider(minimum=1.0, maximum=10.0, value=2.0, step=0.1, label=default_lang.get('guidance_scale_label'), info=default_lang.get('guidance_scale_info')) - stg_scale_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.025, step=0.005, label=default_lang.get('stg_scale_label'), info=default_lang.get('stg_scale_info')) - inference_steps_slider = gr.Slider(minimum=10, maximum=50, value=20, step=1, label=default_lang.get('steps_label'), info=default_lang.get('steps_info')) - produce_original_button = gr.Button(default_lang.get('produce_original_button'), variant="primary") - original_video_output = gr.Video(label="Original Master Video", visible=False, interactive=False) - - with gr.Accordion(default_lang.get('step4_accordion'), open=False, visible=False) as step4_accordion: - step4_description_md = gr.Markdown(default_lang.get('step4_description')) - with gr.Accordion(default_lang.get('sub_step_a_upscaler'), open=True) as sub_step_a_accordion: - upscaler_description_md = gr.Markdown(default_lang.get('upscaler_description')) - with gr.Accordion(default_lang.get('upscaler_options'), open=False) as upscaler_options_accordion: - upscaler_chunk_size_slider = gr.Slider(minimum=1, maximum=10, value=2, step=1, label=default_lang.get('upscaler_chunk_size_label'), info=default_lang.get('upscaler_chunk_size_info')) - run_upscaler_button = gr.Button(default_lang.get('run_upscaler_button'), variant="secondary") - upscaler_video_output = gr.Video(label="Upscaled Video", visible=False, interactive=False) - with gr.Accordion(default_lang.get('sub_step_b_hd'), open=True) as sub_step_b_accordion: - hd_description_md = gr.Markdown(default_lang.get('hd_description')) - with gr.Accordion(default_lang.get('hd_options'), open=False) as hd_options_accordion: - hd_model_radio = gr.Radio(["3B", "7B"], value="7B", label=default_lang.get('hd_model_label')) - hd_steps_slider = gr.Slider(minimum=20, maximum=150, value=100, step=5, label=default_lang.get('hd_steps_label'), info=default_lang.get('hd_steps_info')) - run_hd_button = gr.Button(default_lang.get('run_hd_button'), variant="secondary") - hd_video_output = gr.Video(label="HD Mastered Video", visible=False, interactive=False) - with gr.Accordion(default_lang.get('sub_step_c_audio'), open=True) as sub_step_c_accordion: - audio_description_md = gr.Markdown(default_lang.get('audio_description')) - with gr.Accordion(default_lang.get('audio_options'), open=False) as audio_options_accordion: - audio_prompt_input = gr.Textbox(label=default_lang.get('audio_prompt_label'), info=default_lang.get('audio_prompt_info'), lines=3) - run_audio_button = gr.Button(default_lang.get('run_audio_button'), variant="secondary") - audio_video_output = gr.Video(label="Video with Audio", visible=False, interactive=False) - - final_video_output = gr.Video(label=default_lang.get('final_video_label'), visible=False, interactive=False) - with gr.Accordion(default_lang.get('log_accordion_label'), open=False) as log_accordion: - log_display = gr.Textbox(label=default_lang.get('log_display_label'), lines=20, interactive=False, autoscroll=True) - update_log_button = gr.Button(default_lang.get('update_log_button')) - - # --- 4. UI EVENT CONNECTIONS --- - all_ui_components = [title_md, subtitle_md, lang_selector, step1_accordion, prompt_input, ref_image_input, num_keyframes_slider, duration_per_fragment_slider, storyboard_and_keyframes_button, storyboard_from_photos_button, step1_mode_b_info_md, storyboard_output, keyframe_gallery, step3_accordion, step3_description_md, produce_original_button, ltx_advanced_options_accordion, causality_accordion, trim_percent_slider, forca_guia_slider, convergencia_destino_slider, ltx_pipeline_accordion, guidance_scale_slider, stg_scale_slider, inference_steps_slider, step4_accordion, step4_description_md, sub_step_a_accordion, upscaler_description_md, upscaler_options_accordion, upscaler_chunk_size_slider, run_upscaler_button, sub_step_b_accordion, hd_description_md, hd_options_accordion, hd_model_radio, hd_steps_slider, run_hd_button, sub_step_c_accordion, audio_description_md, audio_options_accordion, audio_prompt_input, run_audio_button, final_video_output, log_accordion, log_display, update_log_button] - def create_lang_update_fn(): - def update_lang(lang_emoji): - lang_code_map = {"🇧🇷": "pt", "🇺🇸": "en", "🇨🇳": "zh"} - lang_code = lang_code_map.get(lang_emoji, "en") - lang_map = i18n.get(lang_code, i18n.get('en', {})) - return [gr.update(value=f"

{lang_map.get('app_title')}

"),gr.update(value=f"

{lang_map.get('app_subtitle')}

"),gr.update(label=lang_map.get('lang_selector_label')),gr.update(label=lang_map.get('step1_accordion')),gr.update(label=lang_map.get('prompt_label')),gr.update(label=lang_map.get('ref_images_label')),gr.update(label=lang_map.get('keyframes_label')),gr.update(label=lang_map.get('duration_label'), info=lang_map.get('duration_info')),gr.update(value=lang_map.get('storyboard_and_keyframes_button')),gr.update(value=lang_map.get('storyboard_from_photos_button')),gr.update(value=f"*{lang_map.get('step1_mode_b_info')}*"),gr.update(label=lang_map.get('storyboard_output_label')),gr.update(label=lang_map.get('keyframes_gallery_label')),gr.update(label=lang_map.get('step3_accordion')),gr.update(value=lang_map.get('step3_description')),gr.update(value=lang_map.get('produce_original_button')),gr.update(label=lang_map.get('ltx_advanced_options')),gr.update(label=lang_map.get('causality_controls_title')),gr.update(label=lang_map.get('trim_percent_label'), info=lang_map.get('trim_percent_info')),gr.update(label=lang_map.get('forca_guia_label'), info=lang_map.get('forca_guia_info')),gr.update(label=lang_map.get('convergencia_final_label'), info=lang_map.get('convergencia_final_info')),gr.update(label=lang_map.get('ltx_pipeline_options')),gr.update(label=lang_map.get('guidance_scale_label'), info=lang_map.get('guidance_scale_info')),gr.update(label=lang_map.get('stg_scale_label'), info=lang_map.get('stg_scale_info')),gr.update(label=lang_map.get('steps_label'), info=lang_map.get('steps_info')),gr.update(label=lang_map.get('step4_accordion')),gr.update(value=lang_map.get('step4_description')),gr.update(label=lang_map.get('sub_step_a_upscaler')),gr.update(value=lang_map.get('upscaler_description')),gr.update(label=lang_map.get('upscaler_options')),gr.update(label=lang_map.get('upscaler_chunk_size_label'), info=lang_map.get('upscaler_chunk_size_info')),gr.update(value=lang_map.get('run_upscaler_button')),gr.update(label=lang_map.get('sub_step_b_hd')),gr.update(value=lang_map.get('hd_description')),gr.update(label=lang_map.get('hd_options')),gr.update(label=lang_map.get('hd_model_label')),gr.update(label=lang_map.get('hd_steps_label'), info=lang_map.get('hd_steps_info')),gr.update(value=lang_map.get('run_hd_button')),gr.update(label=lang_map.get('sub_step_c_audio')),gr.update(value=lang_map.get('audio_description')),gr.update(label=lang_map.get('audio_options')),gr.update(label=lang_map.get('audio_prompt_label'), info=lang_map.get('audio_prompt_info')),gr.update(value=lang_map.get('run_audio_button')),gr.update(label=lang_map.get('final_video_label')),gr.update(label=lang_map.get('log_accordion_label')),gr.update(label=lang_map.get('log_display_label')),gr.update(value=lang_map.get('update_log_button'))] - return update_lang - lang_selector.change(fn=create_lang_update_fn(), inputs=lang_selector, outputs=all_ui_components) + num_keyframes_slider = gr.Slider(minimum=3, maximum=42, value=5, step=1, label="Número de Cenas-Chave") + duration_per_fragment_slider = gr.Slider(label="Duração de cada Clipe (s)", info="Duração alvo para cada fragmento de vídeo.", minimum=2.0, maximum=10.0, value=4.0, step=0.1) + storyboard_and_keyframes_button = gr.Button("Gerar Roteiro e Keyframes", variant="primary") + storyboard_output = gr.JSON(label="Roteiro Gerado (Storyboard)") + keyframe_gallery = gr.Gallery(label="Galeria de Cenas-Chave (Keyframes)", visible=True, object_fit="contain", height="auto", type="filepath") + + with gr.Accordion("Etapa 3: Produção do Vídeo Original", open=False, visible=False) as step3_accordion: + trim_percent_slider = gr.Slider(minimum=10, maximum=90, value=50, step=5, label="Poda Causal (%)") + handler_strength = gr.Slider(label="Força do Déjà-Vu", minimum=0.0, maximum=1.0, value=0.5, step=0.05) + dest_strength = gr.Slider(label="Força da Âncora Final", minimum=0.0, maximum=1.0, value=0.75, step=0.05) + guidance_scale_slider = gr.Slider(minimum=1.0, maximum=10.0, value=2.0, step=0.1, label="Escala de Orientação") + stg_scale_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.025, step=0.005, label="Escala STG") + inference_steps_slider = gr.Slider(minimum=10, maximum=50, value=20, step=1, label="Passos de Inferência") + produce_original_button = gr.Button("🎬 Produzir Vídeo Original", variant="primary") + original_video_output = gr.Video(label="Filme Original Master", visible=False, interactive=False) + + with gr.Accordion("Etapa 4: Pós-Produção (Opcional)", open=False, visible=False) as step4_accordion: + gr.Markdown("Aplique efeitos de melhoria ao vídeo mais recente. Cada etapa usa o resultado da anterior como fonte.") + with gr.Accordion("A. Upscaler Latente 2x", open=True): + upscaler_chunk_size_slider = gr.Slider(minimum=1, maximum=10, value=2, step=1, label="Fragmentos por Lote") + run_upscaler_button = gr.Button("Executar Upscaler Latente", variant="secondary") + upscaler_video_output = gr.Video(label="Vídeo com Upscale", visible=False, interactive=False) + with gr.Accordion("B. Masterização HD (SeedVR)", open=True): + hd_steps_slider = gr.Slider(minimum=20, maximum=150, value=100, step=5, label="Passos de Inferência HD") + run_hd_button = gr.Button("Executar Masterização HD (Modelo 3B)", variant="secondary") + hd_video_output = gr.Video(label="Vídeo Masterizado em HD", visible=False, interactive=False) + with gr.Accordion("C. Geração de Áudio", open=True): + audio_prompt_input = gr.Textbox(label="Prompt de Áudio Detalhado (Opcional)", lines=3, placeholder="Descreva os sons, efeitos e música desejados. Se vazio, usará o prompt geral do filme.") + run_audio_button = gr.Button("Gerar Áudio", variant="secondary") + audio_video_output = gr.Video(label="Vídeo com Áudio", visible=False, interactive=False) + + with gr.Accordion("🧬 DNA Digital da Geração (JSON)", open=False) as data_accordion: + generation_data_output = gr.JSON(label="Estado de Geração Completo") + + final_video_output = gr.Video(label="Filme Final (Resultado da Última Etapa)", visible=False, interactive=False) + + with gr.Accordion("📝 Log de Geração (Detalhado)", open=False) as log_accordion: + log_display = gr.Textbox(label="Log da Sessão", lines=20, interactive=False, autoscroll=True) + update_log_button = gr.Button("Atualizar Log") - storyboard_and_keyframes_button.click(fn=run_pre_production_wrapper, inputs=[prompt_input, num_keyframes_slider, ref_image_input, resolution_selector, duration_per_fragment_slider], outputs=[storyboard_output, keyframe_gallery, step3_accordion]) - storyboard_from_photos_button.click(fn=run_pre_production_photo_wrapper, inputs=[prompt_input, num_keyframes_slider, ref_image_input], outputs=[storyboard_output, keyframe_gallery, step3_accordion]) - produce_original_button.click(fn=run_original_production_wrapper, inputs=[keyframe_gallery, prompt_input, duration_per_fragment_slider, trim_percent_slider, forca_guia_slider, convergencia_destino_slider, guidance_scale_slider, stg_scale_slider, inference_steps_slider, resolution_selector], outputs=[original_video_output, final_video_output, step4_accordion, original_latents_paths_state, original_video_path_state, current_source_video_state]) + # --- 4. CONEXÕES DE EVENTOS DA UI --- + storyboard_and_keyframes_button.click(fn=run_pre_production_wrapper, inputs=[prompt_input, num_keyframes_slider, ref_image_input, resolution_selector, duration_per_fragment_slider], outputs=[generation_state_holder, storyboard_output, keyframe_gallery, step3_accordion]) + produce_original_button.click(fn=run_original_production_wrapper, inputs=[generation_state_holder, trim_percent_slider, handler_strength, dest_strength, guidance_scale_slider, stg_scale_slider, inference_steps_slider], outputs=[original_video_output, final_video_output, step4_accordion, original_latents_paths_state, original_video_path_state, current_source_video_state, generation_state_holder, generation_data_output]) + run_upscaler_button.click(fn=run_upscaler_wrapper, inputs=[original_latents_paths_state, upscaler_chunk_size_slider], outputs=[upscaler_video_output, final_video_output, upscaled_video_path_state, current_source_video_state]) - run_hd_button.click(fn=run_hd_wrapper, inputs=[current_source_video_state, hd_model_radio, hd_steps_slider, prompt_input], outputs=[hd_video_output, final_video_output, hd_video_path_state, current_source_video_state]) + run_hd_button.click(fn=run_hd_wrapper, inputs=[current_source_video_state, hd_steps_slider, prompt_input], outputs=[hd_video_output, final_video_output, hd_video_path_state, current_source_video_state]) run_audio_button.click(fn=run_audio_wrapper, inputs=[current_source_video_state, audio_prompt_input, prompt_input], outputs=[audio_video_output, final_video_output]) + + generation_state_holder.change(fn=lambda state: state, inputs=generation_state_holder, outputs=generation_data_output) update_log_button.click(fn=get_log_content, inputs=[], outputs=[log_display]) -# --- 5. APPLICATION LAUNCH --- +# --- 5. INICIALIZAÇÃO DA APLICAÇÃO --- if __name__ == "__main__": if os.path.exists(WORKSPACE_DIR): - logger.info(f"Clearing previous workspace at: {WORKSPACE_DIR}") shutil.rmtree(WORKSPACE_DIR) os.makedirs(WORKSPACE_DIR) - logger.info(f"Application started. Launching Gradio interface...") + logger.info("Aplicação Gradio iniciada. Lançando interface...") demo.queue().launch() \ No newline at end of file diff --git a/app_api.py b/app_api.py new file mode 100644 index 0000000000000000000000000000000000000000..36156ac9b6eec8305c12ca91c2892779f44636a0 --- /dev/null +++ b/app_api.py @@ -0,0 +1,127 @@ +# app_api.py +# +# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos +# +# Versão 3.0.0 (API Head for Aduc Framework) +# +# Este arquivo implementa um servidor de API usando FastAPI para expor as +# funcionalidades do Aduc Framework. Ele permite o controle programático +# do processo de geração de vídeo. + +import yaml +import logging +import uuid +from typing import Dict + +from fastapi import FastAPI, BackgroundTasks, HTTPException + +# --- 1. IMPORTAÇÃO DO FRAMEWORK E SEUS TIPOS --- +import aduc_framework +from aduc_framework.types import GenerationState, PreProductionParams, ProductionParams + +# --- CONFIGURAÇÃO INICIAL --- +logger = logging.getLogger(__name__) + +# Cria a aplicação FastAPI +app = FastAPI( + title="ADUC-SDR Framework API", + description="API para orquestração de geração de vídeo coerente com IA.", + version="3.0.0" +) + +# Carrega a configuração e inicializa uma instância SINGLETON do framework. +# O framework é pesado e deve ser carregado apenas uma vez na inicialização da API. +try: + with open("config.yaml", 'r') as f: config = yaml.safe_load(f) + WORKSPACE_DIR = config['application']['workspace_dir'] + + aduc = aduc_framework.create_aduc_instance(workspace_dir=WORKSPACE_DIR) + + logger.info("API FastAPI inicializada e conectada ao Aduc Framework.") +except Exception as e: + logger.critical(f"ERRO CRÍTICO durante a inicialização da API: {e}", exc_info=True) + # A API não pode funcionar sem o framework, então saímos se falhar. + exit() + +# --- ARMAZENAMENTO DE TAREFAS EM MEMÓRIA --- +# Em um ambiente de produção real, isso seria substituído por um banco de dados +# ou um cache como Redis para persistir o estado das tarefas. +tasks_state: Dict[str, GenerationState] = {} + + +# --- FUNÇÕES DE BACKGROUND --- + +def run_production_in_background(task_id: str, params: ProductionParams): + """ + Função que executa a tarefa de produção demorada em segundo plano. + Ela opera na instância global 'aduc' para modificar seu estado interno. + """ + logger.info(f"Background task {task_id}: Iniciando produção de vídeo...") + try: + # A tarefa do framework modifica o estado interno da instância 'aduc' + _, _, final_state = aduc.task_produce_original_movie(params=params) + + # Armazena o estado final e completo no nosso "banco de dados" de tarefas + tasks_state[task_id] = final_state + logger.info(f"Background task {task_id}: Produção de vídeo concluída com sucesso.") + except Exception as e: + logger.error(f"Background task {task_id}: Falha na produção. Erro: {e}", exc_info=True) + # Opcional: Atualizar o estado da tarefa com uma mensagem de erro. + + +# --- ENDPOINTS DA API --- + +@app.post("/v1/pre-production", response_model=GenerationState, tags=["Workflow"]) +async def start_pre_production(params: PreProductionParams): + """ + Inicia e executa a etapa de pré-produção (storyboard e keyframes). + + Esta é uma chamada síncrona, pois a pré-produção é relativamente rápida. + Ela retorna o estado de geração completo após a conclusão. + """ + logger.info(f"API: Recebida solicitação de pré-produção com prompt: '{params.prompt[:30]}...'") + try: + _, _, updated_state = aduc.task_pre_production(params=params) + return updated_state + except Exception as e: + logger.error(f"API: Erro na pré-produção: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Erro interno durante a pré-produção: {e}") + +@app.post("/v1/production", status_code=202, tags=["Workflow"]) +async def start_production(params: ProductionParams, background_tasks: BackgroundTasks): + """ + Inicia a tarefa de produção de vídeo principal em segundo plano. + + Esta chamada retorna imediatamente com um `task_id`. Use o endpoint + `/v1/status/{task_id}` para verificar o progresso e obter o resultado final. + """ + task_id = str(uuid.uuid4()) + logger.info(f"API: Recebida solicitação de produção. Criando tarefa de background com ID: {task_id}") + + # Armazena o estado atual (pré-produção) antes de iniciar a nova tarefa + tasks_state[task_id] = aduc.get_current_state() + + # Adiciona a função demorada para ser executada em segundo plano + background_tasks.add_task(run_production_in_background, task_id, params) + + return {"message": "Produção de vídeo iniciada em segundo plano.", "task_id": task_id} + +@app.get("/v1/status/{task_id}", response_model=GenerationState, tags=["Workflow"]) +async def get_task_status(task_id: str): + """ + Verifica o estado de uma tarefa de geração em andamento ou concluída. + """ + logger.info(f"API: Verificando status da tarefa {task_id}") + state = tasks_state.get(task_id) + if not state: + raise HTTPException(status_code=404, detail="ID de tarefa não encontrado.") + + # Retorna o estado mais recente que temos para essa tarefa + return state + +@app.get("/health", tags=["Infra"]) +async def health_check(): + """ + Endpoint simples para verificar se a API está online. + """ + return {"status": "ok"} \ No newline at end of file diff --git a/config.yaml b/config.yaml index 7aab03c427150e2f8091f5f20ba466f1af81239e..4b9c5f70b80b25e4aacbf2200c22641d36a61b86 100644 --- a/config.yaml +++ b/config.yaml @@ -9,11 +9,16 @@ sdk: gradio app_file: app.py specialists: + + seedvr: + gpus_required: 2 + + flux: # Define quantas GPUs o pool do Flux deve tentar alocar. # Se não houver GPUs suficientes, o hardware_manager lançará um erro. # Se 0, usará a CPU. - gpus_required: 4 + gpus_required: 0 ltx: # Define quantas GPUs o pool do LTX deve tentar alocar. @@ -21,4 +26,15 @@ specialists: # Aponta para o arquivo de configuração específico do modelo LTX. # Alterado para usar o modelo 0.9.8-dev. - config_file: "ltxv-13b-0.9.8-distilled.yaml" \ No newline at end of file + config_file: "ltxv-13b-0.9.8-distilled.yaml" + enable_prompt_enhancement: false + + + mmaudio: + gpus_required: 2 + + + prompt_enhancer: + image_caption_model: "MiaoshouAI/Florence-2-large-PromptGen-v2.0" + llm_model: "unsloth/Llama-3.2-3B-Instruct" + prompt_file: "prompts/cinematic_director_prompt.txt" \ No newline at end of file diff --git a/engineers/__init__.py b/engineers/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/engineers/deformes3D.py b/engineers/deformes3D.py deleted file mode 100644 index 6f9dda89b06101fbef4f3797420b1599dbce9f18..0000000000000000000000000000000000000000 --- a/engineers/deformes3D.py +++ /dev/null @@ -1,171 +0,0 @@ -# engineers/deformes3D.py -# -# Copyright (C) 2025 Carlos Rodrigues dos Santos -# -# Version: 1.5.1 -# -# This version maintains the core FLUX-based keyframe generation and adds the -# LTX-based "enrichment" as a secondary, experimental step for each keyframe, -# allowing for direct comparison without altering the primary workflow. - -from PIL import Image, ImageOps -import os -import time -import logging -import gradio as gr -import yaml -import torch -import numpy as np - -from managers.flux_kontext_manager import flux_kontext_singleton -from engineers.deformes2D_thinker import deformes2d_thinker_singleton -from aduc_types import LatentConditioningItem -from managers.ltx_manager import ltx_manager_singleton -from managers.vae_manager import vae_manager_singleton -from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton - -logger = logging.getLogger(__name__) - -class Deformes3DEngine: - """ - ADUC Specialist for static image (keyframe) generation. - """ - def __init__(self, workspace_dir): - self.workspace_dir = workspace_dir - self.image_generation_helper = flux_kontext_singleton - logger.info("3D Engine (Image Specialist) ready to receive orders from the Maestro.") - - def _generate_single_keyframe(self, prompt: str, reference_images: list[Image.Image], output_filename: str, width: int, height: int, callback: callable = None) -> str: - """ - Low-level function that generates a single image using the LTX helper. - """ - logger.info(f"Generating keyframe '{output_filename}' with prompt: '{prompt}'") - generated_image = self.image_generation_helper.generate_image( - reference_images=reference_images, prompt=prompt, width=width, - height=height, seed=int(time.time()), callback=callback - ) - final_path = os.path.join(self.workspace_dir, output_filename) - generated_image.save(final_path) - logger.info(f"Keyframe successfully saved to: {final_path}") - return final_path - - def generate_keyframes_from_storyboard(self, storyboard: list, initial_ref_path: str, global_prompt: str, keyframe_resolution: int, general_ref_paths: list, progress_callback_factory: callable = None): - """ - Orchestrates the generation of all keyframes. - """ - current_base_image_path = initial_ref_path - previous_prompt = "N/A (initial reference image)" - final_keyframes_gallery = [] #[current_base_image_path] - width, height = keyframe_resolution, keyframe_resolution - target_resolution_tuple = (width, height) - - num_keyframes_to_generate = len(storyboard) - 1 - logger.info(f"IMAGE SPECIALIST: Received order to generate {num_keyframes_to_generate} keyframes (LTX versions).") - - for i in range(num_keyframes_to_generate): - scene_index = i + 1 - current_scene = storyboard[i] - future_scene = storyboard[i+1] - progress_callback_flux = progress_callback_factory(scene_index, num_keyframes_to_generate) if progress_callback_factory else None - - logger.info(f"--> Generating Keyframe {scene_index}/{num_keyframes_to_generate}...") - - # --- STEP A: Generate with FLUX (Primary Method) --- - logger.info(f" - Step A: Generating with keyframe...") - - img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt( - global_prompt=global_prompt, scene_history=previous_prompt, - current_scene_desc=current_scene, future_scene_desc=future_scene, - last_image_path=current_base_image_path, fixed_ref_paths=general_ref_paths - ) - - #flux_ref_paths = list(set([current_base_image_path] + general_ref_paths)) - #flux_ref_images = [Image.open(p) for p in flux_ref_paths] - - #flux_keyframe_path = self._generate_single_keyframe( - # prompt=img_prompt, reference_images=flux_ref_images, - # output_filename=f"keyframe_{scene_index}_flux.png", width=width, height=height, - # callback=progress_callback_flux - #) - #final_keyframes_gallery.append(flux_keyframe_path) - - # --- STEP B: LTX Enrichment Experiment --- - #logger.info(f" - Step B: Generating enrichment with LTX...") - - ltx_context_paths = [] - context_paths = [] - context_paths = [current_base_image_path] + [p for p in general_ref_paths if p != current_base_image_path][:3] - - ltx_context_paths = list(reversed(context_paths)) - logger.info(f" - LTX Context Order (Reversed): {[os.path.basename(p) for p in ltx_context_paths]}") - - ltx_conditioning_items = [] - - weight = 0.6 - for idx, path in enumerate(ltx_context_paths): - img_pil = Image.open(path).convert("RGB") - img_processed = self._preprocess_image_for_latent_conversion(img_pil, target_resolution_tuple) - pixel_tensor = self._pil_to_pixel_tensor(img_processed) - latent_tensor = vae_manager_singleton.encode(pixel_tensor) - - ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight)) - - if idx >= 0: - weight -= 0.1 - - ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25} - generated_latents, _ = ltx_manager_singleton.generate_latent_fragment( - height=height, width=width, - conditioning_items_data=ltx_conditioning_items, - motion_prompt=img_prompt, - video_total_frames=48, - video_fps=24, - **ltx_base_params - ) - - final_latent = generated_latents[:, :, -1:, :, :] - upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent) - enriched_pixel_tensor = vae_manager_singleton.decode(upscaled_latent) - - ltx_keyframe_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index}_ltx.png") - self.save_image_from_tensor(enriched_pixel_tensor, ltx_keyframe_path) - final_keyframes_gallery.append(ltx_keyframe_path) - - # Use the FLUX keyframe as the base for the next iteration to maintain the primary narrative path - current_base_image_path = ltx_keyframe_path #flux_keyframe_path - previous_prompt = img_prompt - - logger.info(f"IMAGE SPECIALIST: Generation of all keyframe versions (LTX) complete.") - return final_keyframes_gallery - - # --- HELPER FUNCTIONS --- - - def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: - """Resizes and fits an image to the target resolution for VAE encoding.""" - if image.size != target_resolution: - return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) - return image - - def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor: - """Helper to convert PIL to the 5D pixel tensor the VAE expects.""" - image_np = np.array(pil_image).astype(np.float32) / 255.0 - tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) - return (tensor * 2.0) - 1.0 - - def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str): - """Helper to save a 1-frame pixel tensor as an image.""" - tensor_chw = pixel_tensor.squeeze(0).squeeze(1) - tensor_hwc = tensor_chw.permute(1, 2, 0) - tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0 - image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8) - Image.fromarray(image_np).save(path) - -# --- Singleton Instantiation --- -try: - with open("config.yaml", 'r') as f: - config = yaml.safe_load(f) - WORKSPACE_DIR = config['application']['workspace_dir'] - deformes3d_engine_singleton = Deformes3DEngine(workspace_dir=WORKSPACE_DIR) -except Exception as e: - logger.error(f"Could not initialize Deformes3DEngine: {e}", exc_info=True) - deformes3d_engine_singleton = None \ No newline at end of file diff --git a/engineers/deformes4D.py b/engineers/deformes4D.py deleted file mode 100644 index 21e7cd96df16d6abd0ea1ac9fda5112c604d6e7a..0000000000000000000000000000000000000000 --- a/engineers/deformes4D.py +++ /dev/null @@ -1,338 +0,0 @@ -# engineers/deformes4D.py -# -# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR -# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos -# -# Contato: -# Carlos Rodrigues dos Santos -# carlex22@gmail.com -# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025 -# -# Repositórios e Projetos Relacionados: -# GitHub: https://github.com/carlex22/Aduc-sdr -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License... -# PENDING PATENT NOTICE: Please see NOTICE.md. -# -# Version 2.0.1 - -import os -import time -import imageio -import numpy as np -import torch -import logging -from PIL import Image, ImageOps -from dataclasses import dataclass -import gradio as gr -import subprocess -import gc -import shutil -from pathlib import Path -from typing import List, Tuple, Generator, Dict, Any - -from aduc_types import LatentConditioningItem -from managers.ltx_manager import ltx_manager_singleton -from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton -from managers.vae_manager import vae_manager_singleton -from engineers.deformes2D_thinker import deformes2d_thinker_singleton -from managers.seedvr_manager import seedvr_manager_singleton -from managers.mmaudio_manager import mmaudio_manager_singleton -from tools.video_encode_tool import video_encode_tool_singleton - -logger = logging.getLogger(__name__) - -class Deformes4DEngine: - """ - Implements the Camera (Ψ) and Distiller (Δ) of the ADUC-SDR architecture. - Orchestrates the generation, latent post-production, and final rendering of video fragments. - """ - def __init__(self, workspace_dir="deformes_workspace"): - self.workspace_dir = workspace_dir - self.device = 'cuda' if torch.cuda.is_available() else 'cpu' - logger.info("Deformes4D Specialist (ADUC-SDR Executor) initialized.") - os.makedirs(self.workspace_dir, exist_ok=True) - - # --- HELPER METHODS --- - - def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24): - """Saves a pixel-space tensor as an MP4 video file.""" - if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return - video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0) - video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0 - video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8) - with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer: - for frame in video_np: writer.append_data(frame) - - def read_video_to_tensor(self, video_path: str) -> torch.Tensor: - """Reads a video file and converts it into a pixel-space tensor.""" - with imageio.get_reader(video_path, 'ffmpeg') as reader: - frames = [frame for frame in reader] - - frames_np = np.stack(frames, axis=0).astype(np.float32) / 255.0 - # (F, H, W, C) -> (C, F, H, W) - tensor = torch.from_numpy(frames_np).permute(3, 0, 1, 2) - tensor = tensor.unsqueeze(0) # (B, C, F, H, W) - tensor = (tensor * 2.0) - 1.0 # Normalize to [-1, 1] - return tensor.to(self.device) - - def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: - """Resizes and fits an image to the target resolution for VAE encoding.""" - if image.size != target_resolution: - return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) - return image - - def pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor: - """Converts a PIL Image to a latent tensor by calling the VaeManager.""" - image_np = np.array(pil_image).astype(np.float32) / 255.0 - tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) - tensor = (tensor * 2.0) - 1.0 - return vae_manager_singleton.encode(tensor) - - # --- CORE ADUC-SDR LOGIC --- - - def generate_original_movie(self, keyframes: list, global_prompt: str, storyboard: list, - seconds_per_fragment: float, trim_percent: int, - handler_strength: float, destination_convergence_strength: float, - video_resolution: int, use_continuity_director: bool, - guidance_scale: float, stg_scale: float, num_inference_steps: int, - progress: gr.Progress = gr.Progress()): - FPS = 24 - FRAMES_PER_LATENT_CHUNK = 8 - LATENT_PROCESSING_CHUNK_SIZE = 4 - - run_timestamp = int(time.time()) - temp_latent_dir = os.path.join(self.workspace_dir, f"temp_latents_{run_timestamp}") - temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}") - os.makedirs(temp_latent_dir, exist_ok=True) - os.makedirs(temp_video_clips_dir, exist_ok=True) - - total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK) - frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK) - latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK - - #if frames_a_podar % 2 == 0: - # frames_a_podar = frames_a_podar-1 - - total_latent_frames = total_frames_brutos // FRAMES_PER_LATENT_CHUNK - - DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0 - DESTINATION_FRAME_TARGET = total_frames_brutos - 1 - - base_ltx_params = {"guidance_scale": guidance_scale, "stg_scale": stg_scale, "num_inference_steps": num_inference_steps, "rescaling_scale": 0.15, "image_cond_noise_scale": 0.00} - keyframe_paths = [item[0] if isinstance(item, tuple) else item for item in keyframes] - story_history = "" - target_resolution_tuple = (video_resolution, video_resolution) - eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None - latent_fragment_paths = [] - - if len(keyframe_paths) < 2: raise gr.Error(f"Generation requires at least 2 keyframes. You provided {len(keyframe_paths)}.") - num_transitions_to_generate = len(keyframe_paths) - 1 - - logger.info("--- STARTING STAGE 1: Latent Fragment Generation ---") - for i in range(num_transitions_to_generate): - fragment_index = i + 1 - progress(i / num_transitions_to_generate, desc=f"Generating Latent {fragment_index}/{num_transitions_to_generate}") - past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i] - start_keyframe_path = keyframe_paths[i] - destination_keyframe_path = keyframe_paths[i + 1] - future_story_prompt = storyboard[i + 1] if (i + 1) < len(storyboard) else "The final scene." - logger.info(f"Calling deformes2D_thinker to generate cinematic decision for fragment {fragment_index}...") - decision = deformes2d_thinker_singleton.get_cinematic_decision(global_prompt, story_history, past_keyframe_path, start_keyframe_path, destination_keyframe_path, storyboard[i - 1] if i > 0 else "The beginning.", storyboard[i], future_story_prompt) - transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"] - story_history += f"\n- Act {fragment_index}: {motion_prompt}" - - conditioning_items = [] - if eco_latent_for_next_loop is None: - img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple) - conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_start), 0, 1.0)) - else: - conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0)) - conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength)) - - if transition_type == "cutx": - logger.info(f"Cinematic Director chose a 'cut'. Creating FFmpeg transition bridge...") - bridge_duration_seconds = FRAMES_PER_LATENT_CHUNK / FPS - bridge_video_path = video_encode_tool_singleton.create_transition_bridge( - start_image_path=start_keyframe_path, end_image_path=destination_keyframe_path, - duration=bridge_duration_seconds, fps=FPS, target_resolution=target_resolution_tuple, - workspace_dir=self.workspace_dir - ) - bridge_pixel_tensor = self.read_video_to_tensor(bridge_video_path) - bridge_latent_tensor = vae_manager_singleton.encode(bridge_pixel_tensor) - final_fade_latent = bridge_latent_tensor[:, :, -2:, :, :] - conditioning_items.append(LatentConditioningItem(final_fade_latent, total_latent_frames - 16, 0.95)) - #img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple) - #conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength * 0.5)) - del bridge_pixel_tensor, bridge_latent_tensor, final_fade_latent - if os.path.exists(bridge_video_path): os.remove(bridge_video_path) - else: - img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple) - conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength)) - - current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt} - logger.info(f"Calling LTX to generate video latents for fragment {fragment_index} ({total_frames_brutos} frames)...") - latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos) - num_latent_frames = latents_brutos.shape[2] - logger.info(f"LTX responded with a latent tensor of shape {latents_brutos.shape}, representing ~{num_latent_frames * 8 + 1} video frames at {FPS} FPS.") - - last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone() - eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone() - dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone() - latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone() - latents_video = latents_video[:, :, 1:, :, :] - del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache() - - if transition_type == "cutx": - eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None - - - cpu_latent = latents_video.cpu() - latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt") - torch.save(cpu_latent, latent_path) - latent_fragment_paths.append(latent_path) - del latents_video, cpu_latent; gc.collect() - del eco_latent_for_next_loop, dejavu_latent_for_next_loop; gc.collect(); torch.cuda.empty_cache() - - logger.info(f"--- STARTING STAGE 2: Processing {len(latent_fragment_paths)} latents in chunks of {LATENT_PROCESSING_CHUNK_SIZE} ---") - final_video_clip_paths = [] - num_chunks = -(-len(latent_fragment_paths) // LATENT_PROCESSING_CHUNK_SIZE) - for i in range(num_chunks): - chunk_start_index = i * LATENT_PROCESSING_CHUNK_SIZE - chunk_end_index = chunk_start_index + LATENT_PROCESSING_CHUNK_SIZE - chunk_paths = latent_fragment_paths[chunk_start_index:chunk_end_index] - progress(i / num_chunks, desc=f"Processing & Decoding Batch {i+1}/{num_chunks}") - tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths] - tensors_para_concatenar = [frag[:, :, :-1, :, :] if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)] - sub_group_latent = torch.cat(tensors_para_concatenar, dim=2) - del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache() - logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}") - base_name = f"clip_{i:04d}_{run_timestamp}" - current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4") - pixel_tensor = vae_manager_singleton.decode(sub_group_latent) - self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS) - del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache() - final_video_clip_paths.append(current_clip_path) - - progress(0.98, desc="Final assembly of clips...") - final_video_path = os.path.join(self.workspace_dir, f"original_movie_{run_timestamp}.mp4") - video_encode_tool_singleton.concatenate_videos(video_paths=final_video_clip_paths, output_path=final_video_path, workspace_dir=self.workspace_dir) - logger.info("Cleaning up temporary clip files...") - try: - shutil.rmtree(temp_video_clips_dir) - except OSError as e: - logger.warning(f"Could not remove temporary clip directory: {e}") - logger.info(f"Process complete! Original video saved to: {final_video_path}") - return {"final_path": final_video_path, "latent_paths": latent_fragment_paths} - - def upscale_latents_and_create_video(self, latent_paths: list, chunk_size: int, progress: gr.Progress): - if not latent_paths: - raise gr.Error("Cannot perform upscaling: no latent paths were provided.") - logger.info("--- STARTING POST-PRODUCTION: Latent Upscaling ---") - run_timestamp = int(time.time()) - temp_upscaled_clips_dir = os.path.join(self.workspace_dir, f"temp_upscaled_clips_{run_timestamp}") - os.makedirs(temp_upscaled_clips_dir, exist_ok=True) - final_upscaled_clip_paths = [] - num_chunks = -(-len(latent_paths) // chunk_size) - for i in range(num_chunks): - chunk_start_index = i * chunk_size - chunk_end_index = chunk_start_index + chunk_size - chunk_paths = latent_paths[chunk_start_index:chunk_end_index] - progress(i / num_chunks, desc=f"Upscaling & Decoding Batch {i+1}/{num_chunks}") - tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths] - tensors_para_concatenar = [frag if j < len(tensors_in_chunk) - 1 else frag for j, frag in enumerate(tensors_in_chunk)] - sub_group_latent = torch.cat(tensors_para_concatenar, dim=2) - del tensors_in_chunk, tensors_para_concatenar; gc.collect(); torch.cuda.empty_cache() - logger.info(f"Batch {i+1} loaded. Original latent shape: {sub_group_latent.shape}") - upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent) - del sub_group_latent; gc.collect(); torch.cuda.empty_cache() - logger.info(f"Batch {i+1} upscaled. New latent shape: {upscaled_latent_chunk.shape}") - pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk) - del upscaled_latent_chunk; gc.collect(); torch.cuda.empty_cache() - base_name = f"upscaled_clip_{i:04d}_{run_timestamp}" - current_clip_path = os.path.join(temp_upscaled_clips_dir, f"{base_name}.mp4") - self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=24) - final_upscaled_clip_paths.append(current_clip_path) - del pixel_tensor; gc.collect(); torch.cuda.empty_cache() - logger.info(f"Saved upscaled clip: {Path(current_clip_path).name}") - progress(0.98, desc="Assembling upscaled clips...") - final_video_path = os.path.join(self.workspace_dir, f"upscaled_movie_{run_timestamp}.mp4") - video_encode_tool_singleton.concatenate_videos(video_paths=final_upscaled_clip_paths, output_path=final_video_path, workspace_dir=self.workspace_dir) - logger.info("Cleaning up temporary upscaled clip files...") - try: - shutil.rmtree(temp_upscaled_clips_dir) - except OSError as e: - logger.warning(f"Could not remove temporary upscaled clip directory: {e}") - logger.info(f"Latent upscaling complete! Final video at: {final_video_path}") - yield {"final_path": final_video_path} - - def master_video_hd(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress): - logger.info(f"--- STARTING POST-PRODUCTION: HD Mastering with SeedVR {model_version} ---") - progress(0.1, desc=f"Preparing for HD Mastering with SeedVR {model_version}...") - run_timestamp = int(time.time()) - output_path = os.path.join(self.workspace_dir, f"hd_mastered_movie_{model_version}_{run_timestamp}.mp4") - try: - final_path = seedvr_manager_singleton.process_video( - input_video_path=source_video_path, - output_video_path=output_path, - prompt=prompt, - model_version=model_version, - steps=steps, - progress=progress - ) - logger.info(f"HD Mastering complete! Final video at: {final_path}") - yield {"final_path": final_path} - except Exception as e: - logger.error(f"HD Mastering failed: {e}", exc_info=True) - raise gr.Error(f"HD Mastering failed. Details: {e}") - - def generate_audio_for_final_video(self, source_video_path: str, audio_prompt: str, progress: gr.Progress): - logger.info(f"--- STARTING POST-PRODUCTION: Audio Generation ---") - progress(0.1, desc="Preparing for audio generation...") - run_timestamp = int(time.time()) - source_name = Path(source_video_path).stem - output_path = os.path.join(self.workspace_dir, f"{source_name}_with_audio_{run_timestamp}.mp4") - try: - result = subprocess.run( - ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path], - capture_output=True, text=True, check=True) - duration = float(result.stdout.strip()) - logger.info(f"Source video duration: {duration:.2f} seconds.") - progress(0.5, desc="Generating audio track...") - final_path = mmaudio_manager_singleton.generate_audio_for_video( - video_path=source_video_path, - prompt=audio_prompt, - duration_seconds=duration, - output_path_override=output_path - ) - logger.info(f"Audio generation complete! Final video with audio at: {final_path}") - progress(1.0, desc="Audio generation complete!") - yield {"final_path": final_path} - except Exception as e: - logger.error(f"Audio generation failed: {e}", exc_info=True) - raise gr.Error(f"Audio generation failed. Details: {e}") - - def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate): - """Internal helper to call the LTX manager.""" - final_ltx_params = {**ltx_params, 'width': target_resolution[0], 'height': target_resolution[1], 'video_total_frames': total_frames_to_generate, 'video_fps': 24, 'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items} - return ltx_manager_singleton.generate_latent_fragment(**final_ltx_params) - - def _quantize_to_multiple(self, n, m): - """Helper to round n to the nearest multiple of m.""" - if m == 0: return n - quantized = int(round(n / m) * m) - return m if n > 0 and quantized == 0 else quantized \ No newline at end of file diff --git a/managers/__init__.py b/managers/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/managers/mmaudio_manager.py b/managers/mmaudio_manager.py deleted file mode 100644 index 6d0cf66ccf1c1bb44ca5f29cf98166bb083fc9dd..0000000000000000000000000000000000000000 --- a/managers/mmaudio_manager.py +++ /dev/null @@ -1,208 +0,0 @@ -# managers/mmaudio_manager.py -# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR -# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos -# -# Contato: -# Carlos Rodrigues dos Santos -# carlex22@gmail.com -# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025 -# -# Repositórios e Projetos Relacionados: -# GitHub: https://github.com/carlex22/Aduc-sdr -# -# PENDING PATENT NOTICE: Please see NOTICE.md. -# -# Version: 2.3.0 -# -# This file defines the MMAudioManager for the ADUC-SDR framework. It is responsible -# for generating audio synchronized with video clips. This version has been refactored -# to be self-contained by automatically cloning the MMAudio dependency from its -# official repository, making the framework more portable and easier to set up. - -import torch -import logging -import subprocess -import os -import time -import yaml -import gc -from pathlib import Path -import gradio as gr -import sys - -logger = logging.getLogger(__name__) - -# --- Dependency Management --- -DEPS_DIR = Path("./deps") -MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio" -MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git" - -def setup_mmaudio_dependencies(): - """ - Ensures the MMAudio repository is cloned and available in the sys.path. - This function is run once when the module is first imported. - """ - if not MMAUDIO_REPO_DIR.exists(): - logger.info(f"MMAudio repository not found at '{MMAUDIO_REPO_DIR}'. Cloning from GitHub...") - try: - DEPS_DIR.mkdir(exist_ok=True) - subprocess.run( - ["git", "clone", "--depth", "1", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)], - check=True, capture_output=True, text=True - ) - logger.info("MMAudio repository cloned successfully.") - except subprocess.CalledProcessError as e: - logger.error(f"Failed to clone MMAudio repository. Git stderr: {e.stderr}") - raise RuntimeError("Could not clone the required MMAudio dependency from GitHub.") - else: - logger.info("Found local MMAudio repository.") - - if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path: - sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve())) - logger.info(f"Added '{MMAUDIO_REPO_DIR.resolve()}' to sys.path.") - -setup_mmaudio_dependencies() - -from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video -from mmaudio.model.flow_matching import FlowMatching -from mmaudio.model.networks import MMAudio, get_my_mmaudio -from mmaudio.model.utils.features_utils import FeaturesUtils -from mmaudio.model.sequence_config import SequenceConfig - - -class MMAudioManager: - """ - Manages the MMAudio model for audio generation tasks. - """ - def __init__(self, workspace_dir): - self.device = "cuda" if torch.cuda.is_available() else "cpu" - self.cpu_device = torch.device("cpu") - self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32 - self.workspace_dir = workspace_dir - - self.all_model_cfg = all_model_cfg - self.model_config: 'ModelConfig' = self.all_model_cfg['large_44k_v2'] - self.net: 'MMAudio' = None - self.feature_utils: 'FeaturesUtils' = None - self.seq_cfg: 'SequenceConfig' = None - - self._load_models_to_cpu() - - def _adjust_paths_for_repo(self): - """Adjusts the checkpoint paths in the model config to point inside the cloned repo.""" - for cfg_key in self.all_model_cfg: - cfg = self.all_model_cfg[cfg_key] - # The paths in the original config are relative, so we join them with our repo path - cfg.model_path = MMAUDIO_REPO_DIR / cfg.model_path - cfg.vae_path = MMAUDIO_REPO_DIR / cfg.vae_path - if cfg.bigvgan_16k_path is not None: - cfg.bigvgan_16k_path = MMAUDIO_REPO_DIR / cfg.bigvgan_16k_path - cfg.synchformer_ckpt = MMAUDIO_REPO_DIR / cfg.synchformer_ckpt - - def _load_models_to_cpu(self): - """Loads the MMAudio models to CPU memory on initialization.""" - try: - self._adjust_paths_for_repo() - logger.info("Verifying and downloading MMAudio models, if necessary...") - self.model_config.download_if_needed() - - self.seq_cfg = self.model_config.seq_cfg - - logger.info(f"Loading MMAudio model: {self.model_config.model_name} to CPU...") - self.net = get_my_mmaudio(self.model_config.model_name).eval() - self.net.load_weights(torch.load(self.model_config.model_path, map_location=self.cpu_device, weights_only=True)) - - logger.info("Loading MMAudio feature utils to CPU...") - self.feature_utils = FeaturesUtils( - tod_vae_ckpt=self.model_config.vae_path, - synchformer_ckpt=self.model_config.synchformer_ckpt, - enable_conditions=True, - mode=self.model_config.mode, - bigvgan_vocoder_ckpt=self.model_config.bigvgan_16k_path, - need_vae_encoder=False - ) - self.feature_utils = self.feature_utils.eval() - self.net.to(self.cpu_device) - self.feature_utils.to(self.cpu_device) - logger.info("MMAudioManager ready on CPU.") - except Exception as e: - logger.error(f"Failed to load audio models: {e}", exc_info=True) - self.net = None - - def to_gpu(self): - """Moves the models and utilities to the GPU before inference.""" - if self.device == 'cpu': return - logger.info(f"Moving MMAudioManager to GPU ({self.device})...") - self.net.to(self.device, self.dtype) - self.feature_utils.to(self.device, self.dtype) - - def to_cpu(self): - """Moves the models back to CPU and clears VRAM after inference.""" - if self.device == 'cpu': return - logger.info("Unloading MMAudioManager from GPU...") - self.net.to(self.cpu_device) - self.feature_utils.to(self.cpu_device) - gc.collect() - if torch.cuda.is_available(): torch.cuda.empty_cache() - - def generate_audio_for_video(self, video_path: str, prompt: str, duration_seconds: float, output_path_override: str = None) -> str: - """ - Generates audio for a video file, applying a negative prompt to avoid speech. - """ - if self.net is None: - raise gr.Error("MMAudio model is not loaded. Cannot generate audio.") - - logger.info("--- Generating Audio for Video Fragment ---") - logger.info(f"--- Video: {os.path.basename(video_path)}") - logger.info(f"--- Duration: {duration_seconds:.2f}s") - - negative_prompt = "human voice, speech, talking, singing, narration" - logger.info(f"--- Prompt: '{prompt}' | Negative Prompt: '{negative_prompt}'") - - if duration_seconds < 1: - logger.warning("Fragment too short (<1s). Returning original video.") - return video_path - - if self.device == 'cpu': - logger.warning("Generating audio on CPU. This may be very slow.") - - try: - self.to_gpu() - with torch.no_grad(): - rng = torch.Generator(device=self.device).manual_seed(int(time.time())) - fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25) - - video_info = load_video(Path(video_path), duration_seconds) - self.seq_cfg.duration = video_info.duration_sec - self.net.update_seq_lengths(self.seq_cfg.latent_seq_len, self.seq_cfg.clip_seq_len, self.seq_cfg.sync_seq_len) - - audios = mmaudio_generate( - clip_video=video_info.clip_frames.unsqueeze(0), - sync_video=video_info.sync_frames.unsqueeze(0), - text=[prompt], - negative_text=[negative_prompt], - feature_utils=self.feature_utils, - net=self.net, - fm=fm, - rng=rng, - cfg_strength=4.5 - ) - audio_waveform = audios.float().cpu()[0] - - output_video_path = output_path_override if output_path_override else os.path.join(self.workspace_dir, f"{Path(video_path).stem}_with_audio.mp4") - - make_video(video_info, Path(output_video_path), audio_waveform, sampling_rate=self.seq_cfg.sampling_rate) - logger.info(f"--- Fragment with audio saved to: {os.path.basename(output_video_path)}") - return output_video_path - finally: - self.to_cpu() - -# --- Singleton Instantiation --- -try: - with open("config.yaml", 'r') as f: - config = yaml.safe_load(f) - WORKSPACE_DIR = config['application']['workspace_dir'] - mmaudio_manager_singleton = MMAudioManager(workspace_dir=WORKSPACE_DIR) -except Exception as e: - logger.error(f"Could not initialize MMAudioManager: {e}", exc_info=True) - mmaudio_manager_singleton = None \ No newline at end of file diff --git a/managers/seedvr_manager.py b/managers/seedvr_manager.py deleted file mode 100644 index 136976b7a703624890fe0efab1df515fee02aa35..0000000000000000000000000000000000000000 --- a/managers/seedvr_manager.py +++ /dev/null @@ -1,213 +0,0 @@ -# managers/seedvr_manager.py -# -# Copyright (C) 2025 Carlos Rodrigues dos Santos -# -# Version: 2.3.5 -# -# This version uses the optimal strategy of cloning the self-contained Hugging Face -# Space repository and uses the full, correct import paths to resolve all -# ModuleNotFoundErrors, while retaining necessary runtime patches. - -import torch -import torch.distributed as dist -import os -import gc -import logging -import sys -import subprocess -from pathlib import Path -from urllib.parse import urlparse -from torch.hub import download_url_to_file -import gradio as gr -import mediapy -from einops import rearrange - -from tools.tensor_utils import wavelet_reconstruction - -logger = logging.getLogger(__name__) - -# --- Dependency Management --- -DEPS_DIR = Path("./deps") -SEEDVR_SPACE_DIR = DEPS_DIR / "SeedVR_Space" -SEEDVR_SPACE_URL = "https://huggingface.co/spaces/ByteDance-Seed/SeedVR2-3B" -VAE_CONFIG_URL = "https://raw.githubusercontent.com/ByteDance-Seed/SeedVR/main/models/video_vae_v3/s8_c16_t4_inflation_sd3.yaml" - -def setup_seedvr_dependencies(): - """ - Ensures the SeedVR Space repository is cloned and available in the sys.path. - """ - if not SEEDVR_SPACE_DIR.exists(): - logger.info(f"SeedVR Space not found at '{SEEDVR_SPACE_DIR}'. Cloning from Hugging Face...") - try: - DEPS_DIR.mkdir(exist_ok=True) - subprocess.run( - ["git", "clone", SEEDVR_SPACE_URL, str(SEEDVR_SPACE_DIR)], - check=True, capture_output=True, text=True - ) - logger.info("SeedVR Space cloned successfully.") - except subprocess.CalledProcessError as e: - logger.error(f"Failed to clone SeedVR Space. Git stderr: {e.stderr}") - raise RuntimeError("Could not clone the required SeedVR dependency from Hugging Face.") - else: - logger.info("Found local SeedVR Space repository.") - - if str(SEEDVR_SPACE_DIR.resolve()) not in sys.path: - sys.path.insert(0, str(SEEDVR_SPACE_DIR.resolve())) - logger.info(f"Added '{SEEDVR_SPACE_DIR.resolve()}' to sys.path.") - -setup_seedvr_dependencies() - -# Use full import paths relative to the root of the cloned repository -from projects.video_diffusion_sr.infer import VideoDiffusionInfer -from common.config import load_config -from common.seed import set_seed -from data.image.transforms.divisible_crop import DivisibleCrop -from data.image.transforms.na_resize import NaResize -from data.video.transforms.rearrange import Rearrange -from torchvision.transforms import Compose, Lambda, Normalize -from torchvision.io.video import read_video -from omegaconf import OmegaConf - - -def _load_file_from_url(url, model_dir='./', file_name=None): - os.makedirs(model_dir, exist_ok=True) - filename = file_name or os.path.basename(urlparse(url).path) - cached_file = os.path.abspath(os.path.join(model_dir, filename)) - if not os.path.exists(cached_file): - logger.info(f'Downloading: "{url}" to {cached_file}') - download_url_to_file(url, cached_file, hash_prefix=None, progress=True) - return cached_file - -class SeedVrManager: - """Manages the SeedVR model for HD Mastering tasks.""" - def __init__(self, workspace_dir="deformes_workspace"): - self.device = 'cuda' if torch.cuda.is_available() else 'cpu' - self.runner = None - self.workspace_dir = workspace_dir - self.is_initialized = False - self._original_barrier = None - logger.info("SeedVrManager initialized. Model will be loaded on demand.") - - def _download_models_and_configs(self): - """Downloads the necessary checkpoints AND the missing VAE config file.""" - logger.info("Verifying and downloading SeedVR2 models and configs...") - ckpt_dir = SEEDVR_SPACE_DIR / 'ckpts' - config_dir = SEEDVR_SPACE_DIR / 'configs' / 'vae' - ckpt_dir.mkdir(exist_ok=True) - config_dir.mkdir(parents=True, exist_ok=True) - _load_file_from_url(url=VAE_CONFIG_URL, model_dir=str(config_dir)) - pretrain_model_urls = { - 'vae_ckpt': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth', - 'dit_3b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth', - 'dit_7b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-7B/resolve/main/seedvr2_ema_7b.pth', - 'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt', - 'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt' - } - for key, url in pretrain_model_urls.items(): - _load_file_from_url(url=url, model_dir=str(ckpt_dir)) - logger.info("SeedVR2 models and configs downloaded successfully.") - - def _initialize_runner(self, model_version: str): - """Loads and configures the SeedVR model, with patches for single-GPU inference.""" - if self.runner is not None: return - self._download_models_and_configs() - - if dist.is_available() and not dist.is_initialized(): - logger.info("Applying patch to disable torch.distributed.barrier for single-GPU inference.") - self._original_barrier = dist.barrier - dist.barrier = lambda *args, **kwargs: None - - logger.info(f"Initializing SeedVR2 {model_version} runner...") - if model_version == '3B': - config_path = SEEDVR_SPACE_DIR / 'configs_3b' / 'main.yaml' - checkpoint_path = SEEDVR_SPACE_DIR / 'ckpts' / 'seedvr2_ema_3b.pth' - elif model_version == '7B': - config_path = SEEDVR_SPACE_DIR / 'configs_7b' / 'main.yaml' - checkpoint_path = SEEDVR_SPACE_DIR / 'ckpts' / 'seedvr2_ema_7b.pth' - else: - raise ValueError(f"Unsupported SeedVR model version: {model_version}") - - try: - config = load_config(str(config_path)) - except FileNotFoundError: - logger.warning("Caught expected FileNotFoundError. Loading config manually.") - config = OmegaConf.load(str(config_path)) - correct_vae_config_path = SEEDVR_SPACE_DIR / 'configs' / 'vae' / 's8_c16_t4_inflation_sd3.yaml' - vae_config = OmegaConf.load(str(correct_vae_config_path)) - config.vae = vae_config - logger.info("Configuration loaded and patched manually.") - - self.runner = VideoDiffusionInfer(config) - OmegaConf.set_readonly(self.runner.config, False) - self.runner.configure_dit_model(device=self.device, checkpoint=str(checkpoint_path)) - self.runner.configure_vae_model() - if hasattr(self.runner.vae, "set_memory_limit"): - self.runner.vae.set_memory_limit(**self.runner.config.vae.memory_limit) - self.is_initialized = True - logger.info(f"Runner for SeedVR2 {model_version} initialized and ready.") - - def _unload_runner(self): - """Unloads the runner from VRAM and restores patches.""" - if self.runner is not None: - del self.runner; self.runner = None - gc.collect(); torch.cuda.empty_cache() - self.is_initialized = False - logger.info("SeedVR runner unloaded from VRAM.") - if self._original_barrier is not None: - logger.info("Restoring original torch.distributed.barrier function.") - dist.barrier = self._original_barrier - self._original_barrier = None - - def process_video(self, input_video_path: str, output_video_path: str, prompt: str, - model_version: str = '3B', steps: int = 50, seed: int = 666, - progress: gr.Progress = None) -> str: - """Applies HD enhancement to a video.""" - try: - self._initialize_runner(model_version) - set_seed(seed, same_across_ranks=True) - self.runner.config.diffusion.timesteps.sampling.steps = steps - self.runner.configure_diffusion() - video_tensor = read_video(input_video_path, output_format="TCHW")[0] / 255.0 - res_h, res_w = video_tensor.shape[-2:] - video_transform = Compose([ - NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False), - Lambda(lambda x: torch.clamp(x, 0.0, 1.0)), - DivisibleCrop((16, 16)), - Normalize(0.5, 0.5), - Rearrange("t c h w -> c t h w"), - ]) - cond_latents = [video_transform(video_tensor.to(self.device))] - input_videos = cond_latents - self.runner.dit.to("cpu") - self.runner.vae.to(self.device) - cond_latents = self.runner.vae_encode(cond_latents) - self.runner.vae.to("cpu"); gc.collect(); torch.cuda.empty_cache() - self.runner.dit.to(self.device) - pos_emb_path = SEEDVR_SPACE_DIR / 'ckpts' / 'pos_emb.pt' - neg_emb_path = SEEDVR_SPACE_DIR / 'ckpts' / 'neg_emb.pt' - text_pos_embeds = torch.load(pos_emb_path).to(self.device) - text_neg_embeds = torch.load(neg_emb_path).to(self.device) - text_embeds_dict = {"texts_pos": [text_pos_embeds], "texts_neg": [text_neg_embeds]} - noises = [torch.randn_like(latent) for latent in cond_latents] - conditions = [self.runner.get_condition(noise, latent_blur=latent, task="sr") for noise, latent in zip(noises, cond_latents)] - with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True): - video_tensors = self.runner.inference(noises=noises, conditions=conditions, dit_offload=True, **text_embeds_dict) - self.runner.dit.to("cpu"); gc.collect(); torch.cuda.empty_cache() - self.runner.vae.to(self.device) - samples = self.runner.vae_decode(video_tensors) - final_sample = samples[0] - input_video_sample = input_videos[0] - if final_sample.shape[1] < input_video_sample.shape[1]: - input_video_sample = input_video_sample[:, :final_sample.shape[1]] - final_sample = wavelet_reconstruction(rearrange(final_sample, "c t h w -> t c h w"), rearrange(input_video_sample, "c t h w -> t c h w")) - final_sample = rearrange(final_sample, "t c h w -> t h w c") - final_sample = final_sample.clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round() - final_sample_np = final_sample.to(torch.uint8).cpu().numpy() - mediapy.write_video(output_video_path, final_sample_np, fps=24) - logger.info(f"HD Mastered video saved to: {output_video_path}") - return output_video_path - finally: - self._unload_runner() - -# --- Singleton Instance --- -seedvr_manager_singleton = SeedVrManager() \ No newline at end of file diff --git a/prompts/cinematic_director_prompt.txt b/prompts/cinematic_director_prompt.txt deleted file mode 100644 index c5bb42e1ca423bd1358ca694233b71f4175df145..0000000000000000000000000000000000000000 --- a/prompts/cinematic_director_prompt.txt +++ /dev/null @@ -1,45 +0,0 @@ -# ROLE: AI Film Director & Cinematographer - -# GOAL: -Your primary task is to act as a film director. You will analyze the full context of a scene—its past, present, and future—to make a crucial editing decision ("continuous" or "cut"). After deciding, you must write a single, rich, cinematic motion prompt to generate the video for the PRESENT act. - -# CONTEXT FOR YOUR DECISION: -You will receive a complete picture of the narrative timeline. - -- **Global Story Goal:** The main theme of the entire film. - - `{global_prompt}` - -- **Creative History:** The sequence of motion prompts you have already created. This is your memory. - - `{story_history}` - -- **The Past (Where you came from):** - - Textual Past (Ato_n-1): "{past_scene_desc}" - - Visual Past (Keyframe k_n-1): [PAST_IMAGE] - -- **The Present (Where you are now):** - - Textual Present (Ato_n): "{present_scene_desc}" - - Visual Present (Keyframe k_n): [PRESENT_IMAGE] - -- **The Future (Where you are going):** - - Textual Future (Ato_n+1): "{future_scene_desc}" - - Visual Future (Keyframe k_n+1): [FUTURE_IMAGE] - -# --- TASK 1: THE EDITING DECISION --- -Analyze the transition from the PRESENT (`k_n`) to the FUTURE (`k_n+1`). -- If there is a major, non-continuous jump (e.g., scene changes from day to night, character teleports, location is completely different), you MUST decide this is a "cut". This is a critical break in the action. -- Otherwise, if the action can flow logically from the present to the future, decide it is "continuous". - -# --- TASK 2: THE CINEMATIC MOTION PROMPT --- -Based on your decision, write the `motion_prompt`. The prompt MUST describe the action that moves the story from the PRESENT visual (`k_n`) towards the FUTURE visual (`k_n+1`). - -**CRITICAL PROMPT DIRECTIVES:** -1. **ALWAYS DESCRIBE MOTION:** The scene must not be static. Something must always be moving. -2. **STYLE:** Be descriptive, cinematic, and direct. Use the user's `Global Story Goal` as a stylistic guide. -3. **STRUCTURE:** In a single paragraph (under 150 words), describe the scene's motion, prioritizing in this EXACT order: - a. **Actors/Animals:** What are they doing? Where did they come from, how are they moving, where are they going? Describe actions and expressions. - b. **Objects:** How do objects interact with the actors or the environment? - c. **Camera:** How is the camera moving? (e.g., "slow pan from left to right", "dolly zoom focusing on the character's face", "dynamic tracking shot following the action"). - d. **Scenery/Environment:** Describe environmental details that add to the motion and mood (e.g., "wind rustling the leaves", "rain streaks down the window"). - -# RESPONSE FORMAT: -You MUST respond with a single, clean JSON object with two keys: "transition_type" and "motion_prompt". \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7bc30c03ccb4a777bd9fe53f9c23314bfc54a4a1..c607e2581aefb22ca7170e4a612f24dfcd28861c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,9 @@ -torch +torch==2.6.0 +torchvision==0.21.0 +torchaudio==2.6.0 +opencv-python==4.9.0.80 torchao -torchvision -torchaudio -transformers<4.41 +transformers accelerate safetensors einops @@ -19,9 +20,7 @@ tiktoken transformers_stream_generator rotary-embedding-torch Pillow -numpy PyYAML -opencv-python imageio imageio-ffmpeg av @@ -42,4 +41,9 @@ isort pre-commit expecttest hypothesis - +numpy<2 +ninja +psutil +packaging +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl +#https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl diff --git a/run.py b/run.py new file mode 100644 index 0000000000000000000000000000000000000000..0f8e00bc1ecb3ba09fed1e28fd829dc49bbcafda --- /dev/null +++ b/run.py @@ -0,0 +1,20 @@ +import argparse +import uvicorn +import os + +def main(): + parser = argparse.ArgumentParser(description="Executor do Aduc-Sdr") + parser.add_argument("mode", choices=["gradio", "api"], help="Modo de execução: 'gradio' para a UI, 'api' para o servidor FastAPI.") + args = parser.parse_args() + + if args.mode == "gradio": + print("Iniciando a interface Gradio...") + # Importa e executa a lógica de lançamento que está no final de app_gradio.py + from app_gradio import demo + demo.queue().launch() + elif args.mode == "api": + print("Iniciando o servidor FastAPI em http://127.0.0.1:8000") + uvicorn.run("app_api:app", host="127.0.0.1", port=8000, reload=True) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tools/__init__.py b/tools/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000