diff --git a/aduc_framework/__init__.py b/aduc_framework/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ddec1af60b6f4123f871d847f5da4deaa0437843
--- /dev/null
+++ b/aduc_framework/__init__.py
@@ -0,0 +1,75 @@
+# aduc_framework/__init__.py
+#
+# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
+#
+# Versão 3.0.0 (Framework Entry Point)
+#
+# Este arquivo serve como o ponto de entrada principal para o Aduc Framework.
+# Ele define a interface pública que os clientes (UIs, APIs, etc.) usarão
+# para criar e interagir com o sistema de orquestração.
+#
+# A principal responsabilidade deste arquivo é expor uma função de fábrica
+# ('create_aduc_instance') que encapsula a lógica de inicialização do
+# orquestrador e seus componentes, garantindo que o framework seja fácil
+# de consumir.
+
+import logging
+
+# Importa as classes e tipos que formarão a interface pública do framework
+from .orchestrator import AducOrchestrator
+from .types import (
+ GenerationState,
+ PreProductionParams,
+ ProductionParams,
+ GenerationParameters,
+ MediaRef,
+ Ato,
+ KeyframeData,
+ VideoData
+)
+
+# Configura um logger para o framework para que os clientes possam ver as mensagens de inicialização.
+logger = logging.getLogger(__name__)
+
+def create_aduc_instance(workspace_dir: str) -> AducOrchestrator:
+ """
+ Ponto de entrada de fábrica para criar uma instância totalmente funcional do Aduc Framework.
+
+ Esta função abstrai a complexidade da inicialização do AducOrchestrator e de todos
+ os seus engenheiros e managers dependentes. Clientes do framework devem usar esta
+ função para garantir uma inicialização correta e consistente.
+
+ Args:
+ workspace_dir (str): O caminho para o diretório onde todos os artefatos
+ (imagens, vídeos, latentes, logs) serão salvos.
+
+ Returns:
+ AducOrchestrator: Uma instância pronta para uso do orquestrador principal.
+ """
+ logger.info(f"Fábrica ADUC: Criando uma nova instância com workspace em '{workspace_dir}'...")
+
+ # Futuramente, lógicas mais complexas de inicialização, como a verificação de
+ # dependências ou configuração de hardware, podem ser adicionadas aqui.
+
+ instance = AducOrchestrator(workspace_dir=workspace_dir)
+
+ logger.info("Fábrica ADUC: Instância do framework criada e pronta para uso.")
+
+ return instance
+
+# Mensagem de log para confirmar que o pacote do framework foi importado com sucesso.
+logger.info("Módulo 'aduc_framework' carregado. Use a função 'create_aduc_instance()' para começar.")
+
+# Opcional: Definir __all__ para controlar o que é importado com 'from aduc_framework import *'
+__all__ = [
+ "create_aduc_instance",
+ "AducOrchestrator",
+ "GenerationState",
+ "PreProductionParams",
+ "ProductionParams",
+ "GenerationParameters",
+ "MediaRef",
+ "Ato",
+ "KeyframeData",
+ "VideoData"
+]
\ No newline at end of file
diff --git a/aduc_framework/director.py b/aduc_framework/director.py
new file mode 100644
index 0000000000000000000000000000000000000000..59534da67158e0b62a99cbba533e252fe694450a
--- /dev/null
+++ b/aduc_framework/director.py
@@ -0,0 +1,116 @@
+# aduc_framework/director.py
+#
+# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
+#
+# Versão 3.0.0 (Framework State Manager)
+#
+# Este arquivo contém a classe AducDirector. Sua única responsabilidade
+# é gerenciar o objeto de estado da geração (GenerationState). Ele atua
+# como o "score" da orquestra ou o "script" do filme, mantendo um registro
+# preciso de todos os parâmetros e artefatos gerados.
+
+import logging
+import os
+from typing import List, Dict, Any
+
+# Importa os modelos de dados Pydantic que ele irá gerenciar
+from .types import GenerationState, PreProductionParams, ProductionParams, Ato, MediaRef, KeyframeData, VideoData
+
+logger = logging.getLogger(__name__)
+
+class AducDirector:
+ """
+ Representa o Diretor de Cena, responsável por gerenciar o estado da produção.
+ Atua como a fonte única da verdade para todos os dados relacionados a uma
+ única tarefa de geração de vídeo.
+ """
+ def __init__(self, workspace_dir: str):
+ """
+ Inicializa o Diretor.
+
+ Args:
+ workspace_dir (str): O diretório onde os artefatos são salvos.
+ O Diretor usa isso para referenciar caminhos se necessário.
+ """
+ self.workspace_dir = workspace_dir
+ self.state: GenerationState = self._initialize_state()
+ os.makedirs(self.workspace_dir, exist_ok=True)
+ logger.info(f"AducDirector inicializado. O estado de geração foi criado.")
+
+ def _initialize_state(self) -> GenerationState:
+ """
+ Cria uma instância vazia e válida do modelo GenerationState.
+ """
+ return GenerationState()
+
+ def get_full_state(self) -> GenerationState:
+ """
+ Retorna o objeto de estado Pydantic completo.
+
+ Returns:
+ GenerationState: O estado atual da geração.
+ """
+ return self.state
+
+ def get_full_state_as_dict(self) -> Dict[str, Any]:
+ """
+ Retorna o estado completo serializado como um dicionário Python.
+ Útil para passar para bibliotecas que não suportam Pydantic diretamente.
+
+ Returns:
+ Dict[str, Any]: O estado atual como um dicionário.
+ """
+ return self.state.model_dump()
+
+ def update_parameters(self, stage: str, params: Any):
+ """
+ Atualiza o nó de parâmetros no estado de geração.
+
+ Args:
+ stage (str): O estágio da produção ('pre_producao', 'producao', etc.).
+ params (BaseModel): O objeto Pydantic contendo os parâmetros para aquele estágio.
+ """
+ if hasattr(self.state.parametros_geracao, stage):
+ setattr(self.state.parametros_geracao, stage, params)
+ logger.info(f"Parâmetros do estágio '{stage}' atualizados no estado.")
+ else:
+ logger.warning(f"Tentativa de atualizar parâmetros para um estágio desconhecido: '{stage}'")
+
+ def update_pre_production_state(self, prompt: str, ref_paths: List[str], storyboard: List[str]):
+ """
+ Popula as seções iniciais do estado após a geração do storyboard.
+
+ Args:
+ prompt (str): O prompt geral.
+ ref_paths (List[str]): Lista de caminhos para as mídias de referência.
+ storyboard (List[str]): Lista de resumos dos atos.
+ """
+ self.state.Promt_geral = prompt
+ self.state.midias_referencia = [MediaRef(id=i, caminho=path) for i, path in enumerate(ref_paths)]
+ self.state.Atos = [Ato(id=i, resumo_ato=ato) for i, ato in enumerate(storyboard)]
+ logger.info("Estado de pré-produção (prompt, referências, atos) atualizado.")
+
+ def update_keyframes_state(self, keyframes_data: List[Dict[str, Any]]):
+ """
+ Atualiza a lista de keyframes no estado.
+
+ Args:
+ keyframes_data (List[Dict[str, Any]]): Uma lista de dicionários, cada um
+ representando os dados de um keyframe.
+ """
+ # Converte os dicionários em modelos Pydantic KeyframeData
+ self.state.Keyframe_atos = [KeyframeData(**data) for data in keyframes_data]
+ logger.info(f"{len(keyframes_data)} keyframes adicionados ao estado.")
+
+ def update_video_state(self, video_data_dict: Dict[str, Any]):
+ """
+ Atualiza a lista de vídeos gerados no estado.
+
+ Args:
+ video_data_dict (Dict[str, Any]): Um dicionário representando os dados do vídeo gerado.
+ """
+ # Converte o dicionário em um modelo Pydantic VideoData
+ video_model = VideoData(**video_data_dict)
+ # Atualmente, substituímos a lista, mas poderíamos adicionar a ela no futuro.
+ self.state.videos_atos = [video_model]
+ logger.info("Dados da produção de vídeo atualizados no estado.")
\ No newline at end of file
diff --git a/engineers/LICENSE b/aduc_framework/engineers/LICENSE
similarity index 100%
rename from engineers/LICENSE
rename to aduc_framework/engineers/LICENSE
diff --git a/engineers/NOTICE.md b/aduc_framework/engineers/NOTICE.md
similarity index 100%
rename from engineers/NOTICE.md
rename to aduc_framework/engineers/NOTICE.md
diff --git a/engineers/README.md b/aduc_framework/engineers/README.md
similarity index 100%
rename from engineers/README.md
rename to aduc_framework/engineers/README.md
diff --git a/aduc_framework/engineers/__init__.py b/aduc_framework/engineers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..476476ac2a24f18dd6aa3b6910e7d9cf446f32a7
--- /dev/null
+++ b/aduc_framework/engineers/__init__.py
@@ -0,0 +1,13 @@
+# aduc_framework/engineers/__init__.py
+
+# Expõe os singletons e classes principais do sub-pacote de engenheiros.
+
+from .deformes2D_thinker import deformes2d_thinker_singleton
+from .deformes3D import deformes3d_engine_singleton
+from .deformes4D import Deformes4DEngine
+
+__all__ = [
+ "deformes2d_thinker_singleton",
+ "deformes3d_engine_singleton",
+ "Deformes4DEngine",
+]
\ No newline at end of file
diff --git a/engineers/deformes2D_thinker.py b/aduc_framework/engineers/deformes2D_thinker.py
similarity index 94%
rename from engineers/deformes2D_thinker.py
rename to aduc_framework/engineers/deformes2D_thinker.py
index 9bfd800fad83f1b3ff94b59d986ae6d8e5c991d1..3c1da3a4cf7c44b473e3a1dc98566f38f5a1904b 100644
--- a/engineers/deformes2D_thinker.py
+++ b/aduc_framework/engineers/deformes2D_thinker.py
@@ -36,7 +36,7 @@ import gradio as gr
from typing import List
# It imports the communication layer, not the API directly
-from managers.gemini_manager import gemini_manager_singleton
+from ..managers.gemini_manager import gemini_manager_singleton
logger = logging.getLogger(__name__)
@@ -117,16 +117,16 @@ class Deformes2DThinker:
prompt_parts = [
f"# CONTEXT:\n- Global Story Goal: {global_prompt}\n# VISUAL ASSETS:",
"Current Base Image [IMG-BASE]:",
- Image.open(last_image_path)
+ "",#Image.open(last_image_path)
]
- ref_counter = 1
- for path in fixed_ref_paths:
- if path != last_image_path:
- prompt_parts.extend([f"General Reference Image [IMG-REF-{ref_counter}]:", Image.open(path)])
- ref_counter += 1
+ #ref_counter = 1
+ #for path in fixed_ref_paths:
+ # if path != last_image_path:
+ # prompt_parts.extend([f"General Reference Image [IMG-REF-{ref_counter}]:", Image.open(path)])
+ # ref_counter += 1
- prompt_parts.append(director_prompt)
+ #prompt_parts.append(director_prompt)
final_flux_prompt = gemini_manager_singleton.get_raw_text(prompt_parts)
diff --git a/aduc_framework/engineers/deformes3D.py b/aduc_framework/engineers/deformes3D.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6c7f894274736b7d58bd40fcd684d1f0425dfca
--- /dev/null
+++ b/aduc_framework/engineers/deformes3D.py
@@ -0,0 +1,183 @@
+# aduc_framework/engineers/deformes3D.py
+#
+# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
+#
+# Versão 3.1.2 (Com correção de import de 'typing')
+#
+# Este engenheiro é o "Diretor de Arte" do framework. Sua responsabilidade
+# é ler o estado de geração (storyboard, parâmetros) e orquestrar a criação
+# dos keyframes visuais, que servirão de âncora para a geração de vídeo.
+
+import os
+import time
+import logging
+import yaml
+import torch
+import numpy as np
+from PIL import Image, ImageOps
+
+# >>> INÍCIO DA CORREÇÃO <<<
+from typing import List, Dict, Any, Callable, Optional
+# >>> FIM DA CORREÇÃO <<<
+
+# --- Imports Relativos Corrigidos ---
+from .deformes2D_thinker import deformes2d_thinker_singleton
+from ..types import LatentConditioningItem
+from ..managers.ltx_manager import ltx_manager_singleton
+from ..managers.vae_manager import vae_manager_singleton
+from ..managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
+
+logger = logging.getLogger(__name__)
+
+# Define um tipo para o callback de progresso para clareza
+ProgressCallback = Optional[Callable[[float, str], None]]
+
+class Deformes3DEngine:
+ """
+ Especialista ADUC para a geração de imagens estáticas (keyframes).
+ """
+ def __init__(self):
+ """O construtor é leve e não recebe argumentos."""
+ self.workspace_dir: Optional[str] = None
+ logger.info("Deformes3DEngine instanciado (não inicializado).")
+
+ def initialize(self, workspace_dir: str):
+ """Inicializa o engenheiro com as configurações necessárias."""
+ if self.workspace_dir is not None:
+ return # Evita reinicialização
+ self.workspace_dir = workspace_dir
+ logger.info(f"3D Engine (Image Specialist) inicializado com workspace: {self.workspace_dir}.")
+
+ def generate_keyframes_from_storyboard(
+ self,
+ generation_state: Dict[str, Any],
+ progress_callback: ProgressCallback = None
+ ) -> List[Dict[str, Any]]:
+ """
+ Orquestra a geração de todos os keyframes com base no estado de geração completo.
+ Retorna uma lista de dicionários com dados detalhados de cada keyframe.
+ """
+ if not self.workspace_dir:
+ raise RuntimeError("Deformes3DEngine não foi inicializado. Chame o método initialize() antes de usar.")
+
+ # 1. Extrai todos os parâmetros necessários do estado
+ params = generation_state.get("parametros_geracao", {}).get("pre_producao", {})
+ storyboard = [ato["resumo_ato"] for ato in generation_state.get("Atos", [])]
+ global_prompt = generation_state.get("Promt_geral", "")
+ general_ref_paths = [media["caminho"] for media in generation_state.get("midias_referencia", [])]
+
+ keyframe_resolution = params.get('resolution', 480)
+ initial_ref_path = general_ref_paths[0]
+
+ previous_prompt = ""
+ all_keyframes_data: List[Dict[str, Any]] = []
+ width, height = keyframe_resolution, keyframe_resolution
+ target_resolution_tuple = (width, height)
+
+
+
+ logger.info(f"IMAGE SPECIALIST: Ordem para gerar {num_keyframes_to_generate} keyframes (versões LTX).")
+ ltx_conditioning_items0 = []
+
+
+ img_pil0 = Image.open(initial_ref_path).convert("RGB")
+ img_processed0 = self._preprocess_image_for_latent_conversion(img_pil0, target_resolution_tuple)
+ pixel_tensor0 = self._pil_to_pixel_tensor(img_processed0)
+
+ ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 0, 0.05))
+ ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 23, 0.05))
+
+ latent_tensorY = pixel_tensor0
+ latent_tensorX = latent_tensorY
+
+
+ current_base_image_path = initial_ref_path
+ past_base_image_path = initial_ref_path
+
+
+ for i in range(num_keyframes_to_generate):
+ ltx_conditioning_items = ltx_conditioning_items0
+ scene_index = i + 1
+
+ current_scene = storyboard[i]
+ future_scene = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final."
+ logger.info(f"--> Gerando Keyframe {scene_index}/{num_keyframes_to_generate}...")
+
+ img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
+ global_prompt=global_prompt,
+ scene_history=previous_prompt,
+ current_scene_desc=current_scene,
+ future_scene_desc=future_scene,
+ last_image_path=past_base_image_path,
+ fixed_ref_paths=current_base_image_path
+ )
+
+ past_base_image_path = current_base_image_path
+
+
+
+ ltx_conditioning_items = ltx_conditioning_items0
+ ltx_conditioning_items.append(LatentConditioningItem(latent_tensorX, 0, 0.4))
+ ltx_conditioning_items.append(LatentConditioningItem(latent_tensorY, 8, 0.6))
+
+ latent_tensorX = latent_tensorY
+
+ ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25}
+ generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
+ height=height, width=width,
+ conditioning_items_data=ltx_conditioning_items,
+ motion_prompt=img_prompt,
+ video_total_frames=24, video_fps=24,
+ **ltx_base_params
+ )
+
+ final_latent = generated_latents[:, :, -1:, :, :]
+ #upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent)
+ enriched_pixel_tensor = vae_manager_singleton.decode(final_latent)
+
+ pixel_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_pixel.png")
+ latent_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_latent.pt")
+ self.save_image_from_tensor(enriched_pixel_tensor, pixel_path)
+ torch.save(final_latent.cpu(), latent_path)
+
+ latent_tensorY = latent_path
+ past_base_image_path = current_base_image_path
+
+ keyframe_data = {
+ "id": scene_index,
+ "caminho_pixel": pixel_path,
+ "caminho_latent": latent_path,
+ "prompt_keyframe": img_prompt
+ }
+
+ all_keyframes_data.append(keyframe_data)
+
+
+ current_base_image_path = pixel_path
+ previous_prompt = img_prompt
+
+ logger.info("IMAGE SPECIALIST: Geração de todos os dados de keyframes completa.")
+ return all_keyframes_data
+
+ # --- FUNÇÕES HELPER ---
+
+ def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
+ if image.size != target_resolution:
+ return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
+ return image
+
+ def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
+ image_np = np.array(pil_image).astype(np.float32) / 255.0
+ tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
+ return (tensor * 2.0) - 1.0
+
+ def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
+ tensor_chw = pixel_tensor.squeeze(0).squeeze(1)
+ tensor_hwc = tensor_chw.permute(1, 2, 0)
+ tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
+ image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8)
+ Image.fromarray(image_np).save(path)
+
+# --- Instanciação Singleton ---
+# A instância é criada, mas não configurada ainda. O Orchestrator fará isso.
+deformes3d_engine_singleton = Deformes3DEngine()
\ No newline at end of file
diff --git a/aduc_framework/engineers/deformes4D.py b/aduc_framework/engineers/deformes4D.py
new file mode 100644
index 0000000000000000000000000000000000000000..800422e51ec8b6c05e459b616c8ce9e03dc3625f
--- /dev/null
+++ b/aduc_framework/engineers/deformes4D.py
@@ -0,0 +1,235 @@
+# aduc_framework/engineers/deformes4D.py
+#
+# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
+#
+# Versão 3.1.1 (Com correção de limpeza de arquivos)
+#
+# Este engenheiro implementa a Câmera (Ψ) e o Destilador (Δ) da arquitetura
+# ADUC-SDR. Sua única responsabilidade é a geração sequencial de fragmentos de
+# vídeo com base em um conjunto de keyframes pré-definido.
+
+import os
+import time
+import imageio
+import numpy as np
+import torch
+import logging
+from PIL import Image, ImageOps
+import gc
+import shutil
+from pathlib import Path
+from typing import List, Tuple, Dict, Any, Callable, Optional
+
+# --- Imports Relativos Corrigidos ---
+from ..types import LatentConditioningItem
+from ..managers.ltx_manager import ltx_manager_singleton
+from ..managers.vae_manager import vae_manager_singleton
+from .deformes2D_thinker import deformes2d_thinker_singleton
+from ..tools.video_encode_tool import video_encode_tool_singleton
+
+logger = logging.getLogger(__name__)
+
+ProgressCallback = Optional[Callable[[float, str], None]]
+
+class Deformes4DEngine:
+ """
+ Orquestra a geração e concatenação de fragmentos de vídeo.
+ """
+ def __init__(self):
+ """O construtor é leve e não recebe argumentos."""
+ self.workspace_dir: Optional[str] = None
+ self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+ logger.info("Deformes4DEngine instanciado (não inicializado).")
+
+ def initialize(self, workspace_dir: str):
+ """Inicializa o engenheiro com as configurações necessárias."""
+ if self.workspace_dir is not None:
+ return # Evita reinicialização
+ self.workspace_dir = workspace_dir
+ os.makedirs(self.workspace_dir, exist_ok=True)
+ logger.info(f"Deformes4D Specialist (Executor) inicializado com workspace: {self.workspace_dir}.")
+
+ def generate_original_movie(
+ self,
+ full_generation_state: Dict[str, Any],
+ progress_callback: ProgressCallback = None
+ ) -> Dict[str, Any]:
+ """
+ Gera o filme principal lendo todos os parâmetros do estado de geração.
+ """
+ if not self.workspace_dir:
+ raise RuntimeError("Deformes4DEngine não foi inicializado. Chame o método initialize() antes de usar.")
+
+ # 1. Extrai todos os parâmetros do estado de geração
+ pre_prod_params = full_generation_state.get("parametros_geracao", {}).get("pre_producao", {})
+ prod_params = full_generation_state.get("parametros_geracao", {}).get("producao", {})
+
+ keyframes_data = full_generation_state.get("Keyframe_atos", [])
+ global_prompt = full_generation_state.get("Promt_geral", "")
+ storyboard = [ato["resumo_ato"] for ato in full_generation_state.get("Atos", [])]
+ keyframe_paths = [kf["caminho_pixel"] for kf in keyframes_data]
+
+ seconds_per_fragment = pre_prod_params.get('duration_per_fragment', 4.0)
+ video_resolution = pre_prod_params.get('resolution', 480)
+
+ trim_percent = prod_params.get('trim_percent', 50)
+ handler_strength = prod_params.get('handler_strength', 0.5)
+ destination_convergence_strength = prod_params.get('destination_convergence_strength', 0.75)
+ guidance_scale = prod_params.get('guidance_scale', 2.0)
+ stg_scale = prod_params.get('stg_scale', 0.025)
+ num_inference_steps = prod_params.get('inference_steps', 20)
+
+ # 2. Inicia o processo de geração
+ FPS = 24
+ FRAMES_PER_LATENT_CHUNK = 8
+ LATENT_PROCESSING_CHUNK_SIZE = 4
+
+ run_timestamp = int(time.time())
+ temp_latent_dir = os.path.join(self.workspace_dir, f"temp_latents_{run_timestamp}")
+ temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
+ os.makedirs(temp_latent_dir, exist_ok=True)
+ os.makedirs(temp_video_clips_dir, exist_ok=True)
+
+ total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
+ frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
+ latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
+ DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
+ DESTINATION_FRAME_TARGET = total_frames_brutos - 1
+
+ base_ltx_params = {"guidance_scale": guidance_scale, "stg_scale": stg_scale, "num_inference_steps": num_inference_steps}
+ story_history = ""
+ target_resolution_tuple = (video_resolution, video_resolution)
+ eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
+ latent_fragment_paths = []
+ video_fragments_data = []
+
+ if len(keyframe_paths) < 2:
+ raise ValueError(f"A geração requer pelo menos 2 keyframes. Fornecidos: {len(keyframe_paths)}.")
+ num_transitions_to_generate = len(keyframe_paths) - 1
+
+ logger.info("--- ESTÁGIO 1: Geração de Fragmentos Latentes ---")
+ for i in range(num_transitions_to_generate):
+ fragment_index = i + 1
+ if progress_callback:
+ progress_fraction = (i / num_transitions_to_generate) * 0.7
+ progress_callback(progress_fraction, f"Gerando Latente {fragment_index}/{num_transitions_to_generate}")
+
+ past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
+ start_keyframe_path = keyframe_paths[i]
+ destination_keyframe_path = keyframe_paths[i + 1]
+ future_story_prompt = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final."
+ decision = deformes2d_thinker_singleton.get_cinematic_decision(
+ global_prompt, story_history, past_keyframe_path, start_keyframe_path,
+ destination_keyframe_path, storyboard[i - 1] if i > 0 else "O início.",
+ storyboard[i], future_story_prompt
+ )
+ motion_prompt = decision["motion_prompt"]
+ story_history += f"\n- Ato {fragment_index}: {motion_prompt}"
+
+ conditioning_items = []
+ if eco_latent_for_next_loop is None:
+ img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
+ conditioning_items.append(LatentConditioningItem(self._pil_to_latent(img_start), 0, 1.0))
+ else:
+ conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0))
+ conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
+
+ img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
+ conditioning_items.append(LatentConditioningItem(self._pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
+
+ latents_brutos, _ = ltx_manager_singleton.generate_latent_fragment(
+ height=video_resolution, width=video_resolution,
+ conditioning_items_data=conditioning_items, motion_prompt=motion_prompt,
+ video_total_frames=total_frames_brutos, video_fps=FPS,
+ **base_ltx_params
+ )
+
+ last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
+ eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
+ dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
+ latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
+ del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache()
+
+ cpu_latent = latents_video.cpu()
+ latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt")
+ torch.save(cpu_latent, latent_path)
+ latent_fragment_paths.append(latent_path)
+
+ video_fragments_data.append({"id": i, "prompt_video": motion_prompt})
+ del latents_video, cpu_latent; gc.collect()
+
+ del eco_latent_for_next_loop, dejavu_latent_for_next_loop; gc.collect(); torch.cuda.empty_cache()
+
+ logger.info(f"--- ESTÁGIO 2: Processando {len(latent_fragment_paths)} latentes ---")
+ final_video_clip_paths = []
+ num_chunks = -(-len(latent_fragment_paths) // LATENT_PROCESSING_CHUNK_SIZE) if LATENT_PROCESSING_CHUNK_SIZE > 0 else 0
+ for i in range(num_chunks):
+ chunk_start_index = i * LATENT_PROCESSING_CHUNK_SIZE
+ chunk_end_index = chunk_start_index + LATENT_PROCESSING_CHUNK_SIZE
+ chunk_paths = latent_fragment_paths[chunk_start_index:chunk_end_index]
+
+ if progress_callback:
+ progress_fraction = 0.7 + (i / num_chunks * 0.28)
+ progress_callback(progress_fraction, f"Processando & Decodificando Lote {i+1}/{num_chunks}")
+
+ tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
+ sub_group_latent = torch.cat(tensors_in_chunk, dim=2)
+ del tensors_in_chunk; gc.collect(); torch.cuda.empty_cache()
+
+ pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
+ del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
+
+ base_name = f"clip_{i:04d}_{run_timestamp}"
+ current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
+ self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
+ final_video_clip_paths.append(current_clip_path)
+ del pixel_tensor; gc.collect(); torch.cuda.empty_cache()
+
+ if progress_callback: progress_callback(0.98, "Montando o filme final...")
+ final_video_path = os.path.join(self.workspace_dir, f"original_movie_{run_timestamp}.mp4")
+ video_encode_tool_singleton.concatenate_videos(final_video_clip_paths, final_video_path, self.workspace_dir)
+
+ try:
+ shutil.rmtree(temp_video_clips_dir)
+ # A linha que apagava 'temp_latent_dir' foi removida para persistir os latentes.
+ except OSError as e:
+ logger.warning(f"Não foi possível remover o diretório de clipes temporários: {e}")
+
+ logger.info(f"Processo completo! Vídeo original salvo em: {final_video_path}")
+
+ final_video_data_for_state = {
+ "id": 0, "caminho_pixel": final_video_path,
+ "caminhos_latentes_fragmentos": latent_fragment_paths,
+ "fragmentos_componentes": video_fragments_data
+ }
+
+ return {
+ "final_path": final_video_path,
+ "latent_paths": latent_fragment_paths,
+ "video_data": final_video_data_for_state
+ }
+
+ # --- FUNÇÕES HELPER ---
+ def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
+ if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
+ video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
+ video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
+ video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
+ with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
+ for frame in video_np: writer.append_data(frame)
+
+ def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
+ if image.size != target_resolution:
+ return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
+ return image
+
+ def _pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
+ image_np = np.array(pil_image).astype(np.float32) / 255.0
+ tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
+ tensor = (tensor * 2.0) - 1.0
+ return vae_manager_singleton.encode(tensor)
+
+ def _quantize_to_multiple(self, n: int, m: int) -> int:
+ if m == 0: return n
+ quantized = int(round(n / m) * m)
+ return m if n > 0 and quantized == 0 else quantized
\ No newline at end of file
diff --git a/managers/LICENSE b/aduc_framework/managers/LICENSE
similarity index 100%
rename from managers/LICENSE
rename to aduc_framework/managers/LICENSE
diff --git a/managers/LICENSE.txt b/aduc_framework/managers/LICENSE.txt
similarity index 100%
rename from managers/LICENSE.txt
rename to aduc_framework/managers/LICENSE.txt
diff --git a/managers/NOTICE.md b/aduc_framework/managers/NOTICE.md
similarity index 100%
rename from managers/NOTICE.md
rename to aduc_framework/managers/NOTICE.md
diff --git a/managers/README.md b/aduc_framework/managers/README.md
similarity index 100%
rename from managers/README.md
rename to aduc_framework/managers/README.md
diff --git a/aduc_framework/managers/__init__.py b/aduc_framework/managers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2be08eb433ed90c34ed24685f7e6e3b5a344a01
--- /dev/null
+++ b/aduc_framework/managers/__init__.py
@@ -0,0 +1,19 @@
+# aduc_framework/managers/__init__.py
+
+# Expõe os singletons e classes principais do sub-pacote de managers.
+
+from .gemini_manager import gemini_manager_singleton
+from .ltx_manager import ltx_manager_singleton
+from .vae_manager import vae_manager_singleton
+from .latent_enhancer_manager import latent_enhancer_specialist_singleton
+from .mmaudio_manager import mmaudio_manager_singleton
+from .seedvr_manager import seedvr_manager_singleton
+
+__all__ = [
+ "gemini_manager_singleton",
+ "ltx_manager_singleton",
+ "vae_manager_singleton",
+ "latent_enhancer_specialist_singleton",
+ "mmaudio_manager_singleton",
+ "seedvr_manager_singleton",
+]
\ No newline at end of file
diff --git a/managers/config.yaml b/aduc_framework/managers/config.yaml
similarity index 100%
rename from managers/config.yaml
rename to aduc_framework/managers/config.yaml
diff --git a/managers/flux_kontext_manager.py b/aduc_framework/managers/flux_kontext_manager.py
similarity index 99%
rename from managers/flux_kontext_manager.py
rename to aduc_framework/managers/flux_kontext_manager.py
index 34838c991f91c4385ae67a0dd5e3a266c2d81812..72ac80ede9e2103f97913b60c375c33eb1222ec9 100644
--- a/managers/flux_kontext_manager.py
+++ b/aduc_framework/managers/flux_kontext_manager.py
@@ -25,7 +25,7 @@ import threading
import yaml
import logging
-from tools.hardware_manager import hardware_manager
+from ..tools.hardware_manager import hardware_manager
logger = logging.getLogger(__name__)
diff --git a/managers/gemini_manager.py b/aduc_framework/managers/gemini_manager.py
similarity index 98%
rename from managers/gemini_manager.py
rename to aduc_framework/managers/gemini_manager.py
index 0d0f2b50c51c05e43a3b089df1f5694a7760959e..abb9a95c828587cad6e28b1f4adc3d28fd9d7290 100644
--- a/managers/gemini_manager.py
+++ b/aduc_framework/managers/gemini_manager.py
@@ -63,7 +63,7 @@ class GeminiManager:
self.api_key = os.environ.get("GEMINI_API_KEY")
if self.api_key:
genai.configure(api_key=self.api_key)
- self.model = genai.GenerativeModel('gemini-2.5-flash')
+ self.model = genai.GenerativeModel('gemini-2.0-flash')
logger.info("GeminiManager (Communication Layer) initialized successfully.")
else:
self.model = None
diff --git a/managers/latent_enhancer_manager.py b/aduc_framework/managers/latent_enhancer_manager.py
similarity index 98%
rename from managers/latent_enhancer_manager.py
rename to aduc_framework/managers/latent_enhancer_manager.py
index 00bf8055e2f4d4101d9e7500d30530bd8b204197..dc020c0bcc8a794ec4869c9916390aeb9dbd01d0 100644
--- a/managers/latent_enhancer_manager.py
+++ b/aduc_framework/managers/latent_enhancer_manager.py
@@ -19,7 +19,7 @@ import torch
import logging
import time
from diffusers import LTXLatentUpsamplePipeline
-from managers.ltx_manager import ltx_manager_singleton
+from ..managers.ltx_manager import ltx_manager_singleton
logger = logging.getLogger(__name__)
diff --git a/managers/ltx_manager.py b/aduc_framework/managers/ltx_manager.py
similarity index 54%
rename from managers/ltx_manager.py
rename to aduc_framework/managers/ltx_manager.py
index f35ab418c00f39ba807b4e2b202efc2e4efab9f5..a261485ddd0a8dc053bbba30520659900630f611 100644
--- a/managers/ltx_manager.py
+++ b/aduc_framework/managers/ltx_manager.py
@@ -1,22 +1,13 @@
-# managers/ltx_manager.py
-# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
-# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
+# aduc_framework/managers/ltx_manager.py
#
-# Contato:
-# Carlos Rodrigues dos Santos
-# carlex22@gmail.com
-# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
+# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
#
-# Repositórios e Projetos Relacionados:
-# GitHub: https://github.com/carlex22/Aduc-sdr
+# Versão 2.3.2 (Com correção de manipulação de dataclass)
#
-# PENDING PATENT NOTICE: Please see NOTICE.md.
-#
-# Version: 2.2.2
-#
-# This file manages the LTX-Video specialist pool. It has been refactored to be
-# self-contained by automatically cloning its own dependencies and using a local
-# utility module for pipeline creation, fully decoupling it from external scripts.
+# Este manager é responsável por controlar a pipeline LTX-Video. Ele gerencia
+# um pool de workers para otimizar o uso de múltiplas GPUs, lida com a inicialização
+# e o setup de dependências complexas, e expõe uma interface de alto nível para a
+# geração de fragmentos de vídeo no espaço latente.
import torch
import gc
@@ -31,18 +22,19 @@ import subprocess
from pathlib import Path
from typing import Optional, List, Tuple, Union
-from tools.optimization import optimize_ltx_worker, can_optimize_fp8
-from tools.hardware_manager import hardware_manager
-from aduc_types import LatentConditioningItem
+# --- Imports Relativos Corrigidos ---
+from ..types import LatentConditioningItem
+from ..tools.optimization import optimize_ltx_worker, can_optimize_fp8
+from ..tools.hardware_manager import hardware_manager
logger = logging.getLogger(__name__)
-# --- Dependency Management ---
+# --- Gerenciamento de Dependências e Placeholders ---
DEPS_DIR = Path("./deps")
LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
-# --- Placeholders for lazy-loaded modules ---
+# Placeholders para módulos importados tardiamente (lazy-loaded)
create_ltx_video_pipeline = None
calculate_padding = None
LTXVideoPipeline = None
@@ -54,11 +46,10 @@ randn_tensor = None
class LtxPoolManager:
"""
- Manages a pool of LtxWorkers for optimized multi-GPU usage.
- Handles its own code dependencies by cloning the LTX-Video repository.
+ Gerencia um pool de LtxWorkers e expõe a pipeline de aprimoramento de prompt.
"""
- def __init__(self, device_ids, ltx_config_file_name):
- logger.info(f"LTX POOL MANAGER: Creating workers for devices: {device_ids}")
+ def __init__(self, device_ids: List[str], ltx_config_file_name: str):
+ logger.info(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
self._ltx_modules_loaded = False
self._setup_dependencies()
self._lazy_load_ltx_modules()
@@ -69,61 +60,65 @@ class LtxPoolManager:
self.current_worker_index = 0
self.lock = threading.Lock()
+ self.prompt_enhancement_pipeline = self.workers[0].pipeline if self.workers else None
+ if self.prompt_enhancement_pipeline:
+ logger.info("LTX POOL MANAGER: Pipeline de aprimoramento de prompt exposta para outros especialistas.")
+
self._apply_ltx_pipeline_patches()
if all(w.device.type == 'cuda' for w in self.workers):
- logger.info("LTX POOL MANAGER: HOT START MODE ENABLED. Pre-warming all GPUs...")
+ logger.info("LTX POOL MANAGER: MODO HOT START ATIVADO. Pré-aquecendo todas as GPUs...")
for worker in self.workers:
worker.to_gpu()
- logger.info("LTX POOL MANAGER: All GPUs are hot and ready.")
+ logger.info("LTX POOL MANAGER: Todas as GPUs estão prontas.")
else:
- logger.info("LTX POOL MANAGER: Operating in CPU or mixed mode. GPU pre-warming skipped.")
+ logger.info("LTX POOL MANAGER: Operando em modo CPU ou misto. Pré-aquecimento de GPU pulado.")
def _setup_dependencies(self):
- """Clones the LTX-Video repo if not found and adds it to the system path."""
+ """Clona o repositório LTX-Video se não encontrado e o adiciona ao sys.path."""
if not LTX_VIDEO_REPO_DIR.exists():
- logger.info(f"LTX-Video repository not found at '{LTX_VIDEO_REPO_DIR}'. Cloning from GitHub...")
+ logger.info(f"Repositório LTX-Video não encontrado em '{LTX_VIDEO_REPO_DIR}'. Clonando do GitHub...")
try:
DEPS_DIR.mkdir(exist_ok=True)
subprocess.run(
- ["git", "clone", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
+ ["git", "clone", "--depth", "1", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
check=True, capture_output=True, text=True
)
- logger.info("LTX-Video repository cloned successfully.")
+ logger.info("Repositório LTX-Video clonado com sucesso.")
except subprocess.CalledProcessError as e:
- logger.error(f"Failed to clone LTX-Video repository. Git stderr: {e.stderr}")
- raise RuntimeError("Could not clone the required LTX-Video dependency from GitHub.")
+ logger.error(f"Falha ao clonar o repositório LTX-Video. Git stderr: {e.stderr}")
+ raise RuntimeError("Não foi possível clonar a dependência LTX-Video do GitHub.")
else:
- logger.info("Found local LTX-Video repository.")
+ logger.info("Repositório LTX-Video local encontrado.")
if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
- logger.info(f"Added '{LTX_VIDEO_REPO_DIR.resolve()}' to sys.path.")
-
+ logger.info(f"Adicionado '{LTX_VIDEO_REPO_DIR.resolve()}' ao sys.path.")
+
def _lazy_load_ltx_modules(self):
- """Dynamically imports LTX-Video modules after ensuring the repo exists."""
+ """Importa dinamicamente os módulos do LTX-Video após garantir que o repositório existe."""
if self._ltx_modules_loaded:
return
global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
global vae_encode, latent_to_pixel_coords, randn_tensor
- from managers.ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding
+ from .ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding
from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
from diffusers.utils.torch_utils import randn_tensor
self._ltx_modules_loaded = True
- logger.info("LTX-Video modules have been dynamically loaded.")
+ logger.info("Módulos do LTX-Video foram carregados dinamicamente.")
def _apply_ltx_pipeline_patches(self):
- """Applies runtime patches to the LTX pipeline for ADUC-SDR compatibility."""
- logger.info("LTX POOL MANAGER: Applying ADUC-SDR patches to LTX pipeline...")
+ """Aplica patches em tempo de execução na pipeline LTX para compatibilidade com ADUC-SDR."""
+ logger.info("LTX POOL MANAGER: Aplicando patches ADUC-SDR na pipeline LTX...")
for worker in self.workers:
worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline)
- logger.info("LTX POOL MANAGER: All pipeline instances have been patched successfully.")
+ logger.info("LTX POOL MANAGER: Todas as instâncias da pipeline foram corrigidas com sucesso.")
- def _get_next_worker(self):
+ def _get_next_worker(self) -> 'LtxWorker':
with self.lock:
worker = self.workers[self.current_worker_index]
self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
@@ -144,63 +139,72 @@ class LtxPoolManager:
pipeline_params["latents"] = kwargs['latents'].to(worker.device, dtype=worker.pipeline.transformer.dtype)
if 'strength' in kwargs:
pipeline_params["strength"] = kwargs['strength']
+
if 'conditioning_items_data' in kwargs:
final_conditioning_items = []
for item in kwargs['conditioning_items_data']:
+ # CORREÇÃO: Como LatentConditioningItem é uma dataclass mutável,
+ # nós modificamos o atributo diretamente no dispositivo do worker.
item.latent_tensor = item.latent_tensor.to(worker.device)
final_conditioning_items.append(item)
pipeline_params["conditioning_items"] = final_conditioning_items
+
if worker.is_distilled:
- logger.info(f"Worker {worker.device} is using a distilled model. Using fixed timesteps.")
fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
- pipeline_params["timesteps"] = fixed_timesteps
if fixed_timesteps:
+ pipeline_params["timesteps"] = fixed_timesteps
pipeline_params["num_inference_steps"] = len(fixed_timesteps)
+
+ callback = kwargs.get('callback')
+ if callback:
+ pipeline_params["callback_on_step_end"] = callback
+ pipeline_params["callback_on_step_end_tensor_inputs"] = ["latents"]
+
return pipeline_params
- def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
+ def generate_latent_fragment(self, **kwargs) -> Tuple[torch.Tensor, tuple]:
worker_to_use = self._get_next_worker()
try:
height, width = kwargs['height'], kwargs['width']
padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
padding_vals = calculate_padding(height, width, padded_h, padded_w)
kwargs['height'], kwargs['width'] = padded_h, padded_w
+
pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
- logger.info(f"Initiating GENERATION on {worker_to_use.device} with shape {padded_w}x{padded_h}")
+
+ logger.info(f"Iniciando GERAÇÃO em {worker_to_use.device} com shape {padded_w}x{padded_h}")
+
if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
else:
result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
return result, padding_vals
except Exception as e:
- logger.error(f"LTX POOL MANAGER: Error during generation on {worker_to_use.device}: {e}", exc_info=True)
+ logger.error(f"LTX POOL MANAGER: Erro durante a geração em {worker_to_use.device}: {e}", exc_info=True)
raise e
finally:
if worker_to_use and worker_to_use.device.type == 'cuda':
with torch.cuda.device(worker_to_use.device):
- gc.collect(); torch.cuda.empty_cache()
+ gc.collect()
+ torch.cuda.empty_cache()
- def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
- # This function can be expanded later if needed.
- pass
+ def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> Tuple[torch.Tensor, tuple]:
+ pass # Placeholder
class LtxWorker:
- """
- Represents a single instance of the LTX-Video pipeline on a specific device.
- """
+ """Representa uma única instância da pipeline LTX-Video em um dispositivo específico."""
def __init__(self, device_id, ltx_config_file):
self.cpu_device = torch.device('cpu')
self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
- logger.info(f"LTX Worker ({self.device}): Initializing with config '{ltx_config_file}'...")
+ logger.info(f"LTX Worker ({self.device}): Inicializando com config '{ltx_config_file}'...")
with open(ltx_config_file, "r") as file:
self.config = yaml.safe_load(file)
self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
-
models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded"
- logger.info(f"LTX Worker ({self.device}): Preparing to load model...")
+ logger.info(f"LTX Worker ({self.device}): Preparando para carregar modelo...")
model_filename = self.config["checkpoint_path"]
model_path = huggingface_hub.hf_hub_download(
repo_id="Lightricks/LTX-Video", filename=model_filename,
@@ -214,22 +218,20 @@ class LtxWorker:
sampler=self.config["sampler"],
device='cpu'
)
- logger.info(f"LTX Worker ({self.device}): Model ready on CPU. Is distilled model? {self.is_distilled}")
+ logger.info(f"LTX Worker ({self.device}): Modelo pronto na CPU. É um modelo distilled? {self.is_distilled}")
def to_gpu(self):
if self.device.type == 'cpu': return
- logger.info(f"LTX Worker: Moving pipeline to GPU {self.device}...")
+ logger.info(f"LTX Worker: Movendo pipeline para a GPU {self.device}...")
self.pipeline.to(self.device)
if self.device.type == 'cuda' and can_optimize_fp8():
- logger.info(f"LTX Worker ({self.device}): FP8 supported GPU detected. Optimizing...")
+ logger.info(f"LTX Worker ({self.device}): GPU com suporte a FP8 detectada. Otimizando...")
optimize_ltx_worker(self)
- logger.info(f"LTX Worker ({self.device}): Optimization complete.")
- elif self.device.type == 'cuda':
- logger.info(f"LTX Worker ({self.device}): FP8 optimization not supported or disabled.")
-
+ logger.info(f"LTX Worker ({self.device}): Otimização completa.")
+
def to_cpu(self):
if self.device.type == 'cpu': return
- logger.info(f"LTX Worker: Unloading pipeline from GPU {self.device}...")
+ logger.info(f"LTX Worker: Descarregando pipeline da GPU {self.device}...")
self.pipeline.to('cpu')
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()
@@ -237,10 +239,9 @@ class LtxWorker:
def generate_video_fragment_internal(self, **kwargs):
return self.pipeline(**kwargs).images
-
def _aduc_prepare_conditioning_patch(
- self: LTXVideoPipeline,
- conditioning_items: Optional[List[Union[ConditioningItem, "LatentConditioningItem"]]],
+ self: "LTXVideoPipeline",
+ conditioning_items: Optional[List[Union["ConditioningItem", "LatentConditioningItem"]]],
init_latents: torch.Tensor,
num_frames: int,
height: int,
@@ -252,62 +253,52 @@ def _aduc_prepare_conditioning_patch(
init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
return init_latents, init_pixel_coords, None, 0
- init_conditioning_mask = torch.zeros(init_latents[:, 0, :, :, :].shape, dtype=torch.float32, device=init_latents.device)
+
+ init_conditioning_mask = torch.zeros_like(init_latents[:, 0, ...], dtype=torch.float32, device=init_latents.device)
extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
extra_conditioning_num_latents = 0
- is_latent_mode = hasattr(conditioning_items[0], 'latent_tensor')
- if is_latent_mode:
- for item in conditioning_items:
- media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
- media_frame_number, strength = item.media_frame_number, item.conditioning_strength
- if media_frame_number == 0:
- f_l, h_l, w_l = media_item_latents.shape[-3:]
- init_latents[:, :, :f_l, :h_l, :w_l] = torch.lerp(init_latents[:, :, :f_l, :h_l, :w_l], media_item_latents, strength)
- init_conditioning_mask[:, :f_l, :h_l, :w_l] = strength
- else:
- noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
- media_item_latents = torch.lerp(noise, media_item_latents, strength)
- patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
- pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
- pixel_coords[:, 0] += media_frame_number
- extra_conditioning_num_latents += patched_latents.shape[1]
- new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
- extra_conditioning_latents.append(patched_latents)
- extra_conditioning_pixel_coords.append(pixel_coords)
- extra_conditioning_mask.append(new_mask)
- else:
- for item in conditioning_items:
- if not isinstance(item, ConditioningItem): continue
- item = self._resize_conditioning_item(item, height, width)
- media_item_latents = vae_encode(item.media_item.to(dtype=self.vae.dtype, device=self.vae.device), self.vae, vae_per_channel_normalize=vae_per_channel_normalize).to(dtype=init_latents.dtype)
- if item.media_frame_number == 0:
- media_item_latents, l_x, l_y = self._get_latent_spatial_position(media_item_latents, item, height, width, strip_latent_border=True)
- f_l, h_l, w_l = media_item_latents.shape[-3:]
- init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = torch.lerp(init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l], media_item_latents, item.conditioning_strength)
- init_conditioning_mask[:, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = item.conditioning_strength
- else:
- logger.warning("Pixel-based conditioning for non-zero frames is not fully implemented in this patch.")
+ for item in conditioning_items:
+ if not isinstance(item, LatentConditioningItem):
+ logger.warning("Patch ADUC: Item de condicionamento não é um LatentConditioningItem e será ignorado.")
+ continue
+
+ media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
+ media_frame_number, strength = item.media_frame_number, item.conditioning_strength
+
+ if media_frame_number == 0:
+ f_l, h_l, w_l = media_item_latents.shape[-3:]
+ init_latents[..., :f_l, :h_l, :w_l] = torch.lerp(init_latents[..., :f_l, :h_l, :w_l], media_item_latents, strength)
+ init_conditioning_mask[..., :f_l, :h_l, :w_l] = strength
+ else:
+ noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
+ media_item_latents = torch.lerp(noise, media_item_latents, strength)
+ patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
+ pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
+ pixel_coords[:, 0] += media_frame_number
+ extra_conditioning_num_latents += patched_latents.shape[1]
+ new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
+ extra_conditioning_latents.append(patched_latents)
+ extra_conditioning_pixel_coords.append(pixel_coords)
+ extra_conditioning_mask.append(new_mask)
+
init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
init_conditioning_mask = init_conditioning_mask.squeeze(-1)
+
if extra_conditioning_latents:
init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
- if self.transformer.use_tpu_flash_attention:
- init_latents = init_latents[:, :-extra_conditioning_num_latents]
- init_pixel_coords = init_pixel_coords[:, :, :-extra_conditioning_num_latents]
- init_conditioning_mask = init_conditioning_mask[:, :-extra_conditioning_num_latents]
+
return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
-
-# --- Singleton Instantiation ---
+# --- Instanciação Singleton ---
with open("config.yaml", 'r') as f:
config = yaml.safe_load(f)
ltx_gpus_required = config['specialists']['ltx']['gpus_required']
ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
ltx_config_filename = config['specialists']['ltx']['config_file']
ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename)
-logger.info("Video Specialist (LTX) ready.")
\ No newline at end of file
+logger.info("Especialista de Vídeo (LTX) pronto.")
\ No newline at end of file
diff --git a/managers/ltx_pipeline_utils.py b/aduc_framework/managers/ltx_pipeline_utils.py
similarity index 100%
rename from managers/ltx_pipeline_utils.py
rename to aduc_framework/managers/ltx_pipeline_utils.py
diff --git a/aduc_framework/managers/mmaudio_manager.py b/aduc_framework/managers/mmaudio_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..b89f94598d37f1445b13d4451f4df20e77d7a7a0
--- /dev/null
+++ b/aduc_framework/managers/mmaudio_manager.py
@@ -0,0 +1,226 @@
+# managers/mmaudio_manager.py
+#
+# Copyright (C) 2025 Carlos Rodrigues dos Santos
+#
+# Version: 3.0.0 (GPU Pool Manager)
+#
+# Esta versão refatora o MMAudioManager para um modelo de Pool com Workers,
+# permitindo o uso de múltiplas GPUs dedicadas para a geração de áudio
+# com um sistema de rodízio para gerenciamento eficiente de VRAM.
+
+import torch
+import logging
+import subprocess
+import os
+import time
+import yaml
+import gc
+import threading
+from pathlib import Path
+import gradio as gr
+import sys
+
+# Imports relativos para o hardware_manager
+from ..tools.hardware_manager import hardware_manager
+
+logger = logging.getLogger(__name__)
+
+# --- Gerenciamento de Dependências ---
+DEPS_DIR = Path("./deps")
+MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
+MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
+
+# Lazy-loaded imports
+ModelConfig, all_model_cfg, mmaudio_generate, load_video, make_video = None, None, None, None, None
+MMAudio, get_my_mmaudio = None, None
+FeaturesUtils = None
+SequenceConfig = None
+FlowMatching = None
+
+class MMAudioWorker:
+ """Representa uma única instância do pipeline MMAudio em um dispositivo."""
+ def __init__(self, device_id: str):
+ self.device = torch.device(device_id)
+ self.cpu_device = torch.device("cpu")
+ self.dtype = torch.bfloat16 if 'cuda' in self.device.type else torch.float32
+
+ self.net: 'MMAudio' = None
+ self.feature_utils: 'FeaturesUtils' = None
+ self.seq_cfg: 'SequenceConfig' = None
+ self.model_config: 'ModelConfig' = None
+
+ self._check_and_run_global_setup()
+ self._lazy_load_mmaudio_modules()
+ logger.info(f"MMAudio Worker inicializado para o dispositivo {self.device}.")
+
+ def _lazy_load_mmaudio_modules(self):
+ """Importa dinamicamente os módulos do MMAudio."""
+ global ModelConfig, all_model_cfg, mmaudio_generate, load_video, make_video, MMAudio, get_my_mmaudio, FeaturesUtils, SequenceConfig, FlowMatching
+ if MMAudio is not None: return
+
+ from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
+ from mmaudio.model.flow_matching import FlowMatching
+ from mmaudio.model.networks import MMAudio, get_my_mmaudio
+ from mmaudio.model.utils.features_utils import FeaturesUtils
+ from mmaudio.model.sequence_config import SequenceConfig
+ logger.info("Módulos do MMAudio foram carregados dinamicamente.")
+
+ @staticmethod
+ def _check_and_run_global_setup():
+ """Executa o setup de clonagem do repositório e download de modelos uma única vez."""
+ setup_flag = DEPS_DIR / "mmaudio.setup.complete"
+ if setup_flag.exists():
+ return True
+
+ logger.info("--- Iniciando Setup Global do MMAudio (primeira execução) ---")
+ if not MMAUDIO_REPO_DIR.exists():
+ DEPS_DIR.mkdir(exist_ok=True)
+ subprocess.run(["git", "clone", "--depth", "1", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)], check=True)
+
+ if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
+ sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
+
+ # Importar após adicionar ao path
+ from mmaudio.eval_utils import all_model_cfg as cfg
+
+ # Ajustar caminhos e baixar modelos
+ for cfg_key in cfg:
+ config = cfg[cfg_key]
+ config.model_path = MMAUDIO_REPO_DIR / config.model_path
+ config.vae_path = MMAUDIO_REPO_DIR / config.vae_path
+ if config.bigvgan_16k_path:
+ config.bigvgan_16k_path = MMAUDIO_REPO_DIR / config.bigvgan_16k_path
+ config.synchformer_ckpt = MMAUDIO_REPO_DIR / config.synchformer_ckpt
+ config.download_if_needed()
+
+ setup_flag.touch()
+ logger.info("--- Setup Global do MMAudio Concluído ---")
+ return True
+
+ def initialize_models(self):
+ """Carrega os modelos do worker para a CPU e depois para a GPU designada."""
+ if self.net is not None: return
+
+ self.model_config = all_model_cfg['large_44k_v2']
+ self.seq_cfg = self.model_config.seq_cfg
+
+ logger.info(f"Worker {self.device}: Carregando modelo MMAudio para a CPU...")
+ self.net = get_my_mmaudio(self.model_config.model_name).eval()
+ self.net.load_weights(torch.load(self.model_config.model_path, map_location=self.cpu_device, weights_only=True))
+
+ self.feature_utils = FeaturesUtils(
+ tod_vae_ckpt=self.model_config.vae_path,
+ synchformer_ckpt=self.model_config.synchformer_ckpt,
+ enable_conditions=True, mode=self.model_config.mode,
+ bigvgan_vocoder_ckpt=self.model_config.bigvgan_16k_path,
+ need_vae_encoder=False
+ ).eval()
+
+ self.net.to(self.device, self.dtype)
+ self.feature_utils.to(self.device, self.dtype)
+ logger.info(f"Worker {self.device}: Modelos MMAudio prontos na VRAM.")
+
+ def unload_models(self):
+ """Descarrega os modelos da VRAM, movendo-os para a CPU."""
+ if self.net is None: return
+ logger.info(f"Worker {self.device}: Descarregando modelos MMAudio da VRAM...")
+ self.net.to(self.cpu_device)
+ self.feature_utils.to(self.cpu_device)
+ del self.net, self.feature_utils, self.seq_cfg, self.model_config
+ self.net, self.feature_utils, self.seq_cfg, self.model_config = None, None, None, None
+ gc.collect()
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
+
+ def generate_audio_internal(self, video_path: str, prompt: str, duration_seconds: float, output_path: str) -> str:
+ """Lógica de geração de áudio que roda na GPU do worker."""
+ negative_prompt = "human voice, speech, talking, singing, narration"
+ rng = torch.Generator(device=self.device).manual_seed(int(time.time()))
+ fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25)
+
+ video_info = load_video(Path(video_path), duration_seconds)
+ self.seq_cfg.duration = video_info.duration_sec
+ self.net.update_seq_lengths(self.seq_cfg.latent_seq_len, self.seq_cfg.clip_seq_len, self.seq_cfg.sync_seq_len)
+
+ with torch.no_grad():
+ audios = mmaudio_generate(
+ clip_video=video_info.clip_frames.unsqueeze(0).to(self.device, self.dtype),
+ sync_video=video_info.sync_frames.unsqueeze(0).to(self.device, self.dtype),
+ text=[prompt], negative_text=[negative_prompt],
+ feature_utils=self.feature_utils, net=self.net, fm=fm, rng=rng, cfg_strength=4.5
+ )
+ audio_waveform = audios.float().cpu()[0]
+
+ make_video(video_info, Path(output_path), audio_waveform, sampling_rate=self.seq_cfg.sampling_rate)
+ return output_path
+
+class MMAudioPoolManager:
+ def __init__(self, device_ids: list[str], workspace_dir: str):
+ logger.info(f"MMAUDIO POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
+ self.workspace_dir = workspace_dir
+ if not device_ids or 'cpu' in device_ids:
+ raise ValueError("MMAudioPoolManager requer GPUs dedicadas.")
+ self.workers = [MMAudioWorker(device_id) for device_id in device_ids]
+ self.current_worker_index = 0
+ self.lock = threading.Lock()
+ self.last_cleanup_thread = None
+
+ def _cleanup_worker_thread(self, worker: MMAudioWorker):
+ logger.info(f"MMAUDIO CLEANUP THREAD: Iniciando limpeza de {worker.device} em background...")
+ worker.unload_models()
+
+ def generate_audio_for_video(self, video_path: str, prompt: str, duration_seconds: float, output_path_override: str = None) -> str:
+ if duration_seconds < 1:
+ logger.warning(f"Vídeo muito curto ({duration_seconds:.2f}s). Pulando geração de áudio.")
+ return video_path
+
+ worker_to_use = None
+ try:
+ with self.lock:
+ if self.last_cleanup_thread and self.last_cleanup_thread.is_alive():
+ self.last_cleanup_thread.join()
+
+ worker_to_use = self.workers[self.current_worker_index]
+ previous_worker_index = (self.current_worker_index - 1 + len(self.workers)) % len(self.workers)
+ worker_to_cleanup = self.workers[previous_worker_index]
+
+ cleanup_thread = threading.Thread(target=self._cleanup_worker_thread, args=(worker_to_cleanup,))
+ cleanup_thread.start()
+ self.last_cleanup_thread = cleanup_thread
+
+ worker_to_use.initialize_models()
+ self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
+
+ logger.info(f"MMAUDIO POOL MANAGER: Gerando áudio em {worker_to_use.device}...")
+
+ output_path = output_path_override or os.path.join(self.workspace_dir, f"{Path(video_path).stem}_with_audio.mp4")
+
+ return worker_to_use.generate_audio_internal(
+ video_path=video_path, prompt=prompt, duration_seconds=duration_seconds, output_path=output_path
+ )
+ except Exception as e:
+ logger.error(f"MMAUDIO POOL MANAGER: Erro durante a geração de áudio: {e}", exc_info=True)
+ raise gr.Error(f"Falha na geração de áudio: {e}")
+
+# --- Instanciação Singleton ---
+class MMAudioPlaceholder:
+ def generate_audio_for_video(self, video_path, *args, **kwargs):
+ logger.error("MMAudio não foi inicializado pois nenhuma GPU foi alocada. Pulando etapa de áudio.")
+ return video_path
+
+try:
+ with open("config.yaml", 'r') as f:
+ config = yaml.safe_load(f)
+ WORKSPACE_DIR = config['application']['workspace_dir']
+
+ mmaudio_gpus_required = config['specialists'].get('mmaudio', {}).get('gpus_required', 0)
+ mmaudio_device_ids = hardware_manager.allocate_gpus('MMAudio', mmaudio_gpus_required)
+
+ if mmaudio_gpus_required > 0 and 'cpu' not in mmaudio_device_ids:
+ mmaudio_manager_singleton = MMAudioPoolManager(device_ids=mmaudio_device_ids, workspace_dir=WORKSPACE_DIR)
+ logger.info("Especialista de Áudio (MMAudio Pool) pronto.")
+ else:
+ mmaudio_manager_singleton = MMAudioPlaceholder()
+ logger.warning("MMAudio Pool Manager não foi inicializado. Nenhuma GPU foi requisitada na config.yaml.")
+except Exception as e:
+ logger.critical(f"Falha CRÍTICA ao inicializar o MMAudioManager: {e}", exc_info=True)
+ mmaudio_manager_singleton = MMAudioPlaceholder()
\ No newline at end of file
diff --git a/aduc_framework/managers/seedvr_manager.py b/aduc_framework/managers/seedvr_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2937dc0b85627928ed78e4b51af89edc3a4d8a5
--- /dev/null
+++ b/aduc_framework/managers/seedvr_manager.py
@@ -0,0 +1,229 @@
+# managers/seedvr_manager.py
+#
+# Copyright (C) 2025 Carlos Rodrigues dos Santos
+#
+# Version: 10.0.0 (Definitive Monkey Patch / Single Instance)
+#
+# Esta é a arquitetura final e mais robusta. O paralelismo problemático
+# é desativado programaticamente via "monkey patching" no decorador `master_only`.
+# Isso elimina a necessidade de gerenciar `torch.distributed`, simplificando
+# o código e resolvendo a causa raiz de todos os erros de paralelismo.
+# A isolação de GPU com CUDA_VISIBLE_DEVICES é mantida como a melhor
+# prática para o gerenciamento de hardware.
+
+import torch
+import os
+import gc
+import logging
+import sys
+import subprocess
+from pathlib import Path
+from urllib.parse import urlparse
+from torch.hub import download_url_to_file
+import mediapy
+from einops import rearrange
+import shutil
+from omegaconf import OmegaConf
+import yaml
+
+from ..tools.hardware_manager import hardware_manager
+
+logger = logging.getLogger(__name__)
+
+APP_ROOT = Path("/home/user/app")
+DEPS_DIR = APP_ROOT / "deps"
+SEEDVR_SPACE_DIR = DEPS_DIR / "SeedVR_Space"
+SEEDVR_SPACE_URL = "https://huggingface.co/spaces/ByteDance-Seed/SeedVR2-3B"
+
+class SeedVrManager:
+ """Gerencia uma única instância do pipeline SeedVR em uma GPU dedicada e isolada."""
+ def __init__(self, device_id: str):
+ self.global_device_id = device_id
+ self.local_device_name = 'cuda:0' # O que o processo enxergará
+ self.gpu_index = self.global_device_id.split(':')[-1]
+
+ self.runner = None
+ self._check_and_run_global_setup()
+ logger.info(f"SeedVR Manager (Single Instance) inicializado para operar na GPU {self.global_device_id}.")
+
+ @staticmethod
+ def _check_and_run_global_setup():
+ """Executa o setup de arquivos e aplica o patch para desativar o paralelismo."""
+ setup_flag = DEPS_DIR / "seedvr.setup.complete"
+ if str(APP_ROOT) not in sys.path: sys.path.insert(0, str(APP_ROOT))
+
+ # O patch deve ser aplicado toda vez que o código roda.
+ try:
+ from common import decorators
+ import functools
+
+ def _passthrough_decorator(func):
+ @functools.wraps(func)
+ def wrapped(*args, **kwargs):
+ return func(*args, **kwargs)
+ return wrapped
+
+ decorators.master_only = _passthrough_decorator
+ logger.info("Monkey patch aplicado com sucesso em 'common.decorators.master_only' para desativar o paralelismo.")
+ except Exception as e:
+ logger.error(f"Falha ao aplicar o monkey patch: {e}", exc_info=True)
+
+ if setup_flag.exists(): return True
+
+ logger.info("--- Iniciando Setup Global do SeedVR (primeira execução) ---")
+ if not SEEDVR_SPACE_DIR.exists():
+ DEPS_DIR.mkdir(exist_ok=True, parents=True)
+ subprocess.run(["git", "clone", "--depth", "1", SEEDVR_SPACE_URL, str(SEEDVR_SPACE_DIR)], check=True)
+
+ required_dirs = ["projects", "common", "models", "configs_3b", "configs_7b", "data"]
+ for dirname in required_dirs:
+ source, target = SEEDVR_SPACE_DIR / dirname, APP_ROOT / dirname
+ if not target.exists(): shutil.copytree(source, target)
+
+ try:
+ import apex
+ except ImportError:
+ apex_url = 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl'
+ apex_wheel_path = _load_file_from_url(url=apex_url, model_dir=str(DEPS_DIR))
+ subprocess.run(f"pip install {apex_wheel_path}", check=True, shell=True)
+
+ ckpt_dir = APP_ROOT / 'ckpts'
+ ckpt_dir.mkdir(exist_ok=True)
+ model_urls = {
+ 'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
+ 'dit_3b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
+ #'dit_7b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-7B/resolve/main/seedvr2_ema_7b.pth',
+ 'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt',
+ 'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt'
+ }
+ for name, url in model_urls.items():
+ _load_file_from_url(url=url, model_dir=str(ckpt_dir))
+
+ setup_flag.touch()
+ logger.info("--- Setup Global do SeedVR Concluído ---")
+
+ def _initialize_runner(self):
+ """Carrega o modelo 3B em um ambiente de GPU isolado."""
+ if self.runner is not None: return
+
+ os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_index
+
+ from projects.video_diffusion_sr.infer import VideoDiffusionInfer
+ from common.config import load_config
+
+ logger.info(f"Manager na GPU {self.global_device_id}: Inicializando runner SeedVR 3B...")
+
+ config_path = APP_ROOT / 'configs_3b' / 'main.yaml'
+ checkpoint_path = APP_ROOT / 'ckpts' / 'seedvr2_ema_3b.pth'
+
+ config = load_config(str(config_path))
+ self.runner = VideoDiffusionInfer(config)
+ OmegaConf.set_readonly(self.runner.config, False)
+
+ self.runner.configure_dit_model(device=self.local_device_name, checkpoint=str(checkpoint_path))
+ self.runner.configure_vae_model()
+
+ logger.info(f"Manager na GPU {self.global_device_id}: Runner 3B pronto na VRAM.")
+
+ def _unload_runner(self):
+ """Descarrega os modelos da VRAM e limpa o ambiente."""
+ if self.runner is not None:
+ del self.runner; self.runner = None
+ gc.collect(); torch.cuda.empty_cache()
+ logger.info(f"Manager na GPU {self.global_device_id}: Runner descarregado da VRAM.")
+
+ if 'CUDA_VISIBLE_DEVICES' in os.environ:
+ del os.environ['CUDA_VISIBLE_DEVICES']
+
+ def process_video(self, input_video_path: str, output_video_path: str, prompt: str,
+ steps: int = 100, seed: int = 666) -> str:
+ """Ciclo completo de carga, processamento e descarga para uma única tarefa."""
+ try:
+ self._initialize_runner()
+
+ device = torch.device(self.local_device_name)
+
+ from common.seed import set_seed
+ from data.image.transforms.divisible_crop import DivisibleCrop
+ from data.image.transforms.na_resize import NaResize
+ from data.video.transforms.rearrange import Rearrange
+ from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
+ from torchvision.transforms import Compose, Lambda, Normalize
+ from torchvision.io.video import read_video
+
+ set_seed(seed, same_across_ranks=True)
+ self.runner.config.diffusion.timesteps.sampling.steps = steps
+ self.runner.configure_diffusion()
+
+ video_tensor = read_video(input_video_path, output_format="TCHW")[0] / 255.0
+ res_h, res_w = video_tensor.shape[-2:]
+ video_transform = Compose([
+ NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False),
+ Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
+ DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w"),
+ ])
+ cond_latents = [video_transform(video_tensor.to(device))]
+ self.runner.dit.to("cpu"); self.runner.vae.to(device)
+ cond_latents = self.runner.vae_encode(cond_latents)
+ self.runner.vae.to("cpu"); gc.collect(); torch.cuda.empty_cache(); self.runner.dit.to(device)
+
+ pos_emb = torch.load(APP_ROOT / 'ckpts' / 'pos_emb.pt').to(device)
+ neg_emb = torch.load(APP_ROOT / 'ckpts' / 'neg_emb.pt').to(device)
+ text_embeds_dict = {"texts_pos": [pos_emb], "texts_neg": [neg_emb]}
+
+ noises = [torch.randn_like(latent) for latent in cond_latents]
+ conditions = [self.runner.get_condition(noise, latent_blur=latent, task="sr") for noise, latent in zip(noises, cond_latents)]
+
+ with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
+ video_tensors = self.runner.inference(noises=noises, conditions=conditions, dit_offload=True, **text_embeds_dict)
+
+ self.runner.dit.to("cpu"); gc.collect(); torch.cuda.empty_cache(); self.runner.vae.to(device)
+ samples = self.runner.vae_decode(video_tensors)
+ final_sample, input_video_sample = samples[0], cond_latents[0]
+ if final_sample.shape[1] < input_video_sample.shape[1]:
+ input_video_sample = input_video_sample[:, :final_sample.shape[1]]
+
+ final_sample = wavelet_reconstruction(rearrange(final_sample, "c t h w -> t c h w"), rearrange(input_video_sample, "c t h w -> t c h w"))
+ final_sample = rearrange(final_sample, "t c h w -> t h w c")
+ final_sample = final_sample.clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round()
+ final_sample_np = final_sample.to(torch.uint8).cpu().numpy()
+
+ mediapy.write_video(output_video_path, final_sample_np, fps=24)
+ return output_video_path
+ finally:
+ self._unload_runner()
+
+
+def _load_file_from_url(url, model_dir='./', file_name=None):
+ os.makedirs(model_dir, exist_ok=True)
+ filename = file_name or os.path.basename(urlparse(url).path)
+ cached_file = os.path.abspath(os.path.join(model_dir, filename))
+ if not os.path.exists(cached_file):
+ download_url_to_file(url, cached_file, hash_prefix=None, progress=True)
+ return cached_file
+
+# --- Instanciação Singleton ---
+class SeedVrPlaceholder:
+ def process_video(self, input_video_path, *args, **kwargs):
+ logger.warning("SeedVR está desabilitado (gpus_required: 0). Pulando etapa de masterização HD.")
+ return input_video_path
+
+try:
+ with open("config.yaml", 'r') as f: config = yaml.safe_load(f)
+ seedvr_gpus_required = config['specialists'].get('seedvr', {}).get('gpus_required', 2)
+
+ if seedvr_gpus_required > 0:
+ seedvr_device_ids = hardware_manager.allocate_gpus('SeedVR', seedvr_gpus_required)
+ if seedvr_device_ids and 'cpu' not in seedvr_device_ids:
+ device_to_use = seedvr_device_ids[0]
+ seedvr_manager_singleton = SeedVrManager(device_id=device_to_use)
+ logger.info(f"Especialista de Masterização HD (SeedVR Single Instance) pronto para usar a GPU {device_to_use}.")
+ else:
+ seedvr_manager_singleton = SeedVrPlaceholder()
+ logger.warning("SeedVR não foi inicializado porque nenhuma GPU pôde ser alocada.")
+ else:
+ seedvr_manager_singleton = SeedVrPlaceholder()
+ logger.warning("SeedVR Manager não foi inicializado (gpus_required: 0 na config).")
+except Exception as e:
+ logger.critical(f"Falha CRÍTICA ao inicializar o SeedVrManager: {e}", exc_info=True)
+ seedvr_manager_singleton = SeedVrPlaceholder()
\ No newline at end of file
diff --git a/managers/upscaler_specialist.py b/aduc_framework/managers/upscaler_specialist.py
similarity index 98%
rename from managers/upscaler_specialist.py
rename to aduc_framework/managers/upscaler_specialist.py
index f3336ea79b53e7f5da27c5bc30e4b9b39e44b820..8981fe1d13ab87f0e8d81d30d66797758ff9f5dc 100644
--- a/managers/upscaler_specialist.py
+++ b/aduc_framework/managers/upscaler_specialist.py
@@ -5,7 +5,7 @@
import torch
import logging
from diffusers import LTXLatentUpsamplePipeline
-from managers.ltx_manager import ltx_manager_singleton
+from ..managers.ltx_manager import ltx_manager_singleton
logger = logging.getLogger(__name__)
diff --git a/managers/vae_manager.py b/aduc_framework/managers/vae_manager.py
similarity index 98%
rename from managers/vae_manager.py
rename to aduc_framework/managers/vae_manager.py
index 214e43e5e36c4630ba9da5d173b37523a4bd5bf2..07aa62fe8461d836bfa3aaa3a94b11c46590428f 100644
--- a/managers/vae_manager.py
+++ b/aduc_framework/managers/vae_manager.py
@@ -28,7 +28,7 @@ import gc
from typing import Generator
# Import the source of the VAE model and the low-level functions
-from managers.ltx_manager import ltx_manager_singleton
+from ..managers.ltx_manager import ltx_manager_singleton
from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
logger = logging.getLogger(__name__)
diff --git a/aduc_framework/orchestrator.py b/aduc_framework/orchestrator.py
new file mode 100644
index 0000000000000000000000000000000000000000..6533860ef500aee1026338343cf92b7a6f1d1e66
--- /dev/null
+++ b/aduc_framework/orchestrator.py
@@ -0,0 +1,194 @@
+# aduc_framework/orchestrator.py
+#
+# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
+#
+# Version: 4.1.0 (Multi-Pool with Forced SeedVR 3B)
+#
+# Esta versão do orquestrador opera com uma arquitetura de múltiplos pools de
+# especialistas e simplifica a tarefa de masterização HD, fixando o uso do
+# modelo SeedVR 3B e removendo a necessidade de selecionar a versão do modelo.
+
+import logging
+from typing import List, Dict, Any, Tuple, Callable, Optional, Generator
+from PIL import Image, ImageOps
+import os
+import subprocess
+import shutil
+from pathlib import Path
+import time
+import gc
+import torch
+
+# Componentes internos do framework
+from .director import AducDirector
+from .types import GenerationState, PreProductionParams, ProductionParams
+
+# Engenheiros de alto nível que definem a lógica do fluxo
+from .engineers import deformes2d_thinker_singleton, deformes3d_engine_singleton, Deformes4DEngine
+
+# Managers (Pools) de especialistas que executam as tarefas em hardware dedicado
+from .managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
+from .managers.seedvr_manager import seedvr_manager_singleton
+from .managers.mmaudio_manager import mmaudio_manager_singleton
+from .managers.vae_manager import vae_manager_singleton
+
+# Ferramentas de utilidade
+from .tools.video_encode_tool import video_encode_tool_singleton
+
+logger = logging.getLogger(__name__)
+
+ProgressCallback = Optional[Callable[[float, str], None]]
+
+class AducOrchestrator:
+ """
+ Implementa o Maestro (Γ), a camada de orquestração central do Aduc Framework.
+ Ele recebe solicitações, atualiza o estado de geração, delega tarefas para os
+ engenheiros e seus pools de especialistas, e retorna o estado atualizado.
+ """
+ def __init__(self, workspace_dir: str):
+ self.director = AducDirector(workspace_dir)
+ self.editor = Deformes4DEngine()
+ self.editor.initialize(workspace_dir)
+ self.painter = deformes3d_engine_singleton
+ self.painter.initialize(workspace_dir)
+ self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+ logger.info("ADUC Maestro (Framework Core) pronto para reger a orquestra de especialistas.")
+
+ def get_current_state(self) -> GenerationState:
+ """Retorna o estado de geração atual."""
+ return self.director.get_full_state()
+
+ def process_image_for_story(self, image_path: str, size: int, filename: str) -> str:
+ """Processa uma imagem de referência para o formato quadrado padrão."""
+ img = Image.open(image_path).convert("RGB")
+ img_square = ImageOps.fit(img, (size, size), Image.Resampling.LANCZOS)
+ processed_path = os.path.join(self.director.workspace_dir, filename)
+ img_square.save(processed_path)
+ logger.info(f"Imagem de referência processada e salva em: {processed_path}")
+ return processed_path
+
+ # --- ETAPA 1: PRÉ-PRODUÇÃO ---
+ def task_pre_production(self, params: PreProductionParams, progress_callback: ProgressCallback = None) -> Tuple[List[str], List[str], GenerationState]:
+ """Orquestra a criação do storyboard e dos keyframes visuais."""
+ logger.info("Maestro: Iniciando tarefa de Pré-Produção.")
+ self.director.update_parameters("pre_producao", params)
+
+ if progress_callback: progress_callback(0.1, "Gerando storyboard...")
+ storyboard_list = deformes2d_thinker_singleton.generate_storyboard(prompt=params.prompt, num_keyframes=params.num_keyframes, ref_image_paths=params.ref_paths)
+ self.director.update_pre_production_state(params.prompt, params.ref_paths, storyboard_list)
+
+ if progress_callback: progress_callback(0.2, "Iniciando geração de keyframes...")
+ keyframes_detailed_data = self.painter.generate_keyframes_from_storyboard(generation_state=self.director.get_full_state_as_dict(), progress_callback=progress_callback)
+ self.director.update_keyframes_state(keyframes_detailed_data)
+
+ final_keyframe_paths = [kf["caminho_pixel"] for kf in keyframes_detailed_data]
+ final_state = self.director.get_full_state()
+ logger.info("Maestro: Tarefa de Pré-Produção concluída.")
+ return storyboard_list, final_keyframe_paths, final_state
+
+ # --- ETAPA 2: PRODUÇÃO ---
+ def task_produce_original_movie(self, params: ProductionParams, progress_callback: ProgressCallback = None) -> Tuple[str, List[str], GenerationState]:
+ """Orquestra a geração do vídeo principal a partir dos keyframes."""
+ logger.info("Maestro: Iniciando tarefa de Produção do Filme Original.")
+ self.director.update_parameters("producao", params)
+
+ result_data = self.editor.generate_original_movie(full_generation_state=self.director.get_full_state_as_dict(), progress_callback=progress_callback)
+ self.director.update_video_state(result_data["video_data"])
+
+ final_video_path = result_data["final_path"]
+ latent_paths = result_data["latent_paths"]
+ final_state = self.director.get_full_state()
+ logger.info("Maestro: Tarefa de Produção do Filme Original concluída.")
+ return final_video_path, latent_paths, final_state
+
+ # --- ETAPA 3: PÓS-PRODUÇÃO (Cadeia de Efeitos) ---
+
+ def task_run_latent_upscaler(self, latent_paths: List[str], chunk_size: int, progress_callback: ProgressCallback = None) -> Generator[Dict[str, Any], None, None]:
+ """Aplica upscale 2x nos latentes e os decodifica para um novo vídeo."""
+ if not self.director.workspace_dir: raise RuntimeError("Orchestrator não inicializado.")
+ if not latent_paths: raise ValueError("Nenhum caminho de latente fornecido para o upscale.")
+
+ logger.info("--- ORQUESTRADOR: Tarefa de Upscaling de Latentes ---")
+ run_timestamp = int(time.time())
+ temp_dir = os.path.join(self.director.workspace_dir, f"temp_upscaled_clips_{run_timestamp}")
+ os.makedirs(temp_dir, exist_ok=True)
+
+ final_upscaled_clip_paths = []
+ num_chunks = -(-len(latent_paths) // chunk_size)
+
+ for i in range(num_chunks):
+ chunk_paths = latent_paths[i * chunk_size:(i + 1) * chunk_size]
+ if progress_callback: progress_callback(i / num_chunks, f"Upscalando & Decodificando Lote {i+1}/{num_chunks}")
+
+ tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
+ sub_group_latent = torch.cat(tensors_in_chunk, dim=2)
+
+ upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
+ pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk)
+
+ current_clip_path = os.path.join(temp_dir, f"upscaled_clip_{i:04d}.mp4")
+ self.editor.save_video_from_tensor(pixel_tensor, current_clip_path, fps=24)
+ final_upscaled_clip_paths.append(current_clip_path)
+
+ del tensors_in_chunk, sub_group_latent, upscaled_latent_chunk, pixel_tensor
+ gc.collect(); torch.cuda.empty_cache()
+ yield {"progress": (i + 1) / num_chunks}
+
+ final_video_path = os.path.join(self.director.workspace_dir, f"upscaled_movie_{run_timestamp}.mp4")
+ video_encode_tool_singleton.concatenate_videos(final_upscaled_clip_paths, final_video_path, self.director.workspace_dir)
+
+ shutil.rmtree(temp_dir)
+ logger.info(f"Upscaling de latentes completo! Vídeo final em: {final_video_path}")
+ yield {"final_path": final_video_path}
+
+ def task_run_hd_mastering(self, source_video_path: str, steps: int, prompt: str, progress_callback: ProgressCallback = None) -> Generator[Dict[str, Any], None, None]:
+ """Aplica masterização em HD usando o pool de GPUs do SeedVR com o modelo 3B."""
+ if not self.director.workspace_dir: raise RuntimeError("Orchestrator não inicializado.")
+ logger.info(f"--- ORQUESTRADOR: Tarefa de Masterização HD com SeedVR 3B ---")
+
+ run_timestamp = int(time.time())
+ output_path = os.path.join(self.director.workspace_dir, f"hd_mastered_movie_3B_{run_timestamp}.mp4")
+
+ final_path = seedvr_manager_singleton.process_video(
+ input_video_path=source_video_path,
+ output_video_path=output_path,
+ prompt=prompt,
+ steps=steps
+ )
+ logger.info(f"Masterização HD completa! Vídeo final em: {final_path}")
+ yield {"final_path": final_path}
+
+ def task_run_audio_generation(self, source_video_path: str, audio_prompt: str, progress_callback: ProgressCallback = None) -> Generator[Dict[str, Any], None, None]:
+ """Gera e adiciona áudio ao vídeo usando o pool de GPUs do MMAudio."""
+ if not self.director.workspace_dir: raise RuntimeError("Orchestrator não inicializado.")
+ logger.info(f"--- ORQUESTRADOR: Tarefa de Geração de Áudio ---")
+
+ if progress_callback: progress_callback(0.1, "Preparando para geração de áudio...")
+
+ run_timestamp = int(time.time())
+ source_name = Path(source_video_path).stem
+ output_path = os.path.join(self.director.workspace_dir, f"{source_name}_with_audio_{run_timestamp}.mp4")
+
+ try:
+ result = subprocess.run(
+ ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
+ capture_output=True, text=True, check=True
+ )
+ duration = float(result.stdout.strip())
+ except Exception as e:
+ logger.error(f"Não foi possível obter a duração do vídeo '{source_video_path}': {e}", exc_info=True)
+ yield {"error": "Falha ao obter duração do vídeo."}
+ return
+
+ if progress_callback: progress_callback(0.5, "Gerando trilha de áudio...")
+
+ final_path = mmaudio_manager_singleton.generate_audio_for_video(
+ video_path=source_video_path,
+ prompt=audio_prompt,
+ duration_seconds=duration,
+ output_path_override=output_path
+ )
+
+ logger.info(f"Geração de áudio completa! Vídeo com áudio em: {final_path}")
+ if progress_callback: progress_callback(1.0, "Geração de áudio completa!")
+ yield {"final_path": final_path}
\ No newline at end of file
diff --git a/prompts/LICENSE b/aduc_framework/prompts/LICENSE
similarity index 100%
rename from prompts/LICENSE
rename to aduc_framework/prompts/LICENSE
diff --git a/prompts/NOTICE.md b/aduc_framework/prompts/NOTICE.md
similarity index 100%
rename from prompts/NOTICE.md
rename to aduc_framework/prompts/NOTICE.md
diff --git a/prompts/README.md b/aduc_framework/prompts/README.md
similarity index 100%
rename from prompts/README.md
rename to aduc_framework/prompts/README.md
diff --git a/prompts/anticipatory_keyframe_prompt.txt b/aduc_framework/prompts/anticipatory_keyframe_prompt.txt
similarity index 100%
rename from prompts/anticipatory_keyframe_prompt.txt
rename to aduc_framework/prompts/anticipatory_keyframe_prompt.txt
diff --git a/prompts/audio_director_prompt.txt b/aduc_framework/prompts/audio_director_prompt.txt
similarity index 100%
rename from prompts/audio_director_prompt.txt
rename to aduc_framework/prompts/audio_director_prompt.txt
diff --git a/aduc_framework/prompts/cinematic_director_prompt.txt b/aduc_framework/prompts/cinematic_director_prompt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e9535fd1625aad6262ccbae4f713b4c05b2fbff
--- /dev/null
+++ b/aduc_framework/prompts/cinematic_director_prompt.txt
@@ -0,0 +1,27 @@
+# ROLE: AI Cinematic Scenarist
+
+# GOAL:
+# Your single, crucial task is to write a rich, cinematic motion prompt.
+# This prompt must describe the most logical and compelling action that
+# connects the PRESENT visual state to the FUTURE visual state, considering
+# the context of the PAST.
+
+# CONTEXT FOR YOUR PROMPT:
+- Global Story Goal: {global_prompt}
+- Creative History: {story_history}
+- The Past: "{past_scene_desc}" -> [PAST_IMAGE]
+- The Present: "{present_scene_desc}" -> [PRESENT_IMAGE]
+- The Future: "{future_scene_desc}" -> [FUTURE_IMAGE]
+
+# CRITICAL PROMPT DIRECTIVES:
+# 1. ALWAYS DESCRIBE MOTION: The scene must not be static.
+# 2. STYLE: Be descriptive, cinematic, and direct.
+# 3. STRUCTURE: In a single paragraph (under 150 words), describe the scene's
+# motion, prioritizing in this EXACT order:
+# a. Actors/Animals: What are they doing?
+# b. Objects: How do they interact?
+# c. Camera: How is it moving?
+# d. Scenery/Environment: What details add to the mood?
+
+# RESPONSE FORMAT:
+# You MUST respond with ONLY the raw, single-line string for the motion prompt.
diff --git a/prompts/director_composition_prompt.txt b/aduc_framework/prompts/director_composition_prompt.txt
similarity index 100%
rename from prompts/director_composition_prompt.txt
rename to aduc_framework/prompts/director_composition_prompt.txt
diff --git a/prompts/flux_composition_wrapper_prompt.txt b/aduc_framework/prompts/flux_composition_wrapper_prompt.txt
similarity index 100%
rename from prompts/flux_composition_wrapper_prompt.txt
rename to aduc_framework/prompts/flux_composition_wrapper_prompt.txt
diff --git a/prompts/initial_motion_prompt.txt b/aduc_framework/prompts/initial_motion_prompt.txt
similarity index 100%
rename from prompts/initial_motion_prompt.txt
rename to aduc_framework/prompts/initial_motion_prompt.txt
diff --git a/prompts/keyframe_selection_prompt.txt b/aduc_framework/prompts/keyframe_selection_prompt.txt
similarity index 100%
rename from prompts/keyframe_selection_prompt.txt
rename to aduc_framework/prompts/keyframe_selection_prompt.txt
diff --git a/prompts/sound_director_prompt.txt b/aduc_framework/prompts/sound_director_prompt.txt
similarity index 100%
rename from prompts/sound_director_prompt.txt
rename to aduc_framework/prompts/sound_director_prompt.txt
diff --git a/prompts/sound_director_prompt.txt.txt b/aduc_framework/prompts/sound_director_prompt.txt.txt
similarity index 100%
rename from prompts/sound_director_prompt.txt.txt
rename to aduc_framework/prompts/sound_director_prompt.txt.txt
diff --git a/prompts/transition_decision_prompt.txt b/aduc_framework/prompts/transition_decision_prompt.txt
similarity index 100%
rename from prompts/transition_decision_prompt.txt
rename to aduc_framework/prompts/transition_decision_prompt.txt
diff --git a/prompts/unified_cinematographer_prompt.txt b/aduc_framework/prompts/unified_cinematographer_prompt.txt
similarity index 100%
rename from prompts/unified_cinematographer_prompt.txt
rename to aduc_framework/prompts/unified_cinematographer_prompt.txt
diff --git a/prompts/unified_storyboard_prompt.txt b/aduc_framework/prompts/unified_storyboard_prompt.txt
similarity index 100%
rename from prompts/unified_storyboard_prompt.txt
rename to aduc_framework/prompts/unified_storyboard_prompt.txt
diff --git a/tools/LICENSE b/aduc_framework/tools/LICENSE
similarity index 100%
rename from tools/LICENSE
rename to aduc_framework/tools/LICENSE
diff --git a/tools/NOTICE.md b/aduc_framework/tools/NOTICE.md
similarity index 100%
rename from tools/NOTICE.md
rename to aduc_framework/tools/NOTICE.md
diff --git a/tools/README.md b/aduc_framework/tools/README.md
similarity index 100%
rename from tools/README.md
rename to aduc_framework/tools/README.md
diff --git a/aduc_framework/tools/__init__.py b/aduc_framework/tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ba18ddfc00bcb0180fbd08c0699852dbb222e19
--- /dev/null
+++ b/aduc_framework/tools/__init__.py
@@ -0,0 +1,15 @@
+# aduc_framework/tools/__init__.py
+
+# Expõe os singletons e classes principais do sub-pacote de ferramentas.
+
+from .hardware_manager import hardware_manager
+from .video_encode_tool import video_encode_tool_singleton
+from . import optimization
+from . import tensor_utils
+
+__all__ = [
+ "hardware_manager",
+ "video_encode_tool_singleton",
+ "optimization",
+ "tensor_utils",
+]
\ No newline at end of file
diff --git a/tools/hardware_manager.py b/aduc_framework/tools/hardware_manager.py
similarity index 100%
rename from tools/hardware_manager.py
rename to aduc_framework/tools/hardware_manager.py
diff --git a/tools/optimization.py b/aduc_framework/tools/optimization.py
similarity index 100%
rename from tools/optimization.py
rename to aduc_framework/tools/optimization.py
diff --git a/tools/tensor_utils.py b/aduc_framework/tools/tensor_utils.py
similarity index 100%
rename from tools/tensor_utils.py
rename to aduc_framework/tools/tensor_utils.py
diff --git a/tools/video_encode_tool.py b/aduc_framework/tools/video_encode_tool.py
similarity index 100%
rename from tools/video_encode_tool.py
rename to aduc_framework/tools/video_encode_tool.py
diff --git a/aduc_framework/types.py b/aduc_framework/types.py
new file mode 100644
index 0000000000000000000000000000000000000000..308016a296c67dffce62b158ad760ca8bc191288
--- /dev/null
+++ b/aduc_framework/types.py
@@ -0,0 +1,100 @@
+# aduc_framework/types.py
+#
+# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
+#
+# Versão 3.1.0 (Framework Data Models with Core Types)
+#
+# Este arquivo define as estruturas de dados centrais para o Aduc Framework
+# usando Pydantic. Ele também inclui tipos de dados de baixo nível, como dataclasses,
+# que são usados internamente pelos managers e engineers.
+
+from pydantic import BaseModel, Field
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+import torch
+
+# --- Modelos de Parâmetros de Entrada (Pydantic) ---
+# Representam os dados que o usuário fornece através de uma interface.
+
+class PreProductionParams(BaseModel):
+ """Parâmetros para a etapa de Roteiro e Keyframes."""
+ prompt: str = Field(..., description="A ideia geral do filme ou cena.")
+ num_keyframes: int = Field(..., gt=0, description="O número de keyframes a serem gerados.")
+ ref_paths: List[str] = Field(..., description="Lista de caminhos para as imagens de referência iniciais.")
+ resolution: int = Field(..., description="A resolução base (largura/altura) para a geração.")
+ duration_per_fragment: float = Field(..., gt=0, description="A duração alvo em segundos para cada fragmento de vídeo.")
+
+class ProductionParams(BaseModel):
+ """Parâmetros para a etapa de Geração de Vídeo."""
+ trim_percent: int = Field(..., ge=0, le=100, description="Poda causal para o mecanismo Déjà-Vu.")
+ handler_strength: float = Field(..., ge=0.0, le=1.0, description="Força do guia de trajetória (Déjà-Vu).")
+ destination_convergence_strength: float = Field(..., ge=0.0, le=1.0, description="Força da âncora final (destino).")
+ guidance_scale: float = Field(..., ge=0.0, description="Escala de orientação do prompt de movimento.")
+ stg_scale: float = Field(..., ge=0.0, description="Escala de continuidade temporal (STG).")
+ inference_steps: int = Field(..., gt=0, description="Número de passos de inferência para a geração de vídeo.")
+
+class GenerationParameters(BaseModel):
+ """Agrega todos os parâmetros de configuração da geração."""
+ pre_producao: Optional[PreProductionParams] = None
+ producao: Optional[ProductionParams] = None
+ pos_producao: Optional[Dict[str, Any]] = None
+
+
+# --- Modelos de Artefatos Gerados (Pydantic) ---
+# Representam os dados e metadados dos resultados criados pelo framework.
+
+class MediaRef(BaseModel):
+ """Representa uma mídia de referência fornecida pelo usuário."""
+ id: int
+ caminho: str
+
+class Ato(BaseModel):
+ """Representa uma unidade narrativa (sub-tarefa) do storyboard."""
+ id: int
+ resumo_ato: str
+
+class KeyframeData(BaseModel):
+ """Estrutura de dados completa para um único keyframe gerado."""
+ id: int
+ caminho_pixel: str
+ caminho_latent: str
+ prompt_keyframe: str
+
+class VideoFragmentData(BaseModel):
+ """Metadados sobre a geração de um único fragmento de vídeo entre dois keyframes."""
+ id: int
+ prompt_video: str
+
+class VideoData(BaseModel):
+ """Estrutura de dados completa para o vídeo final (ou um grande clipe)."""
+ id: int
+ caminho_pixel: str
+ caminhos_latentes_fragmentos: List[str]
+ fragmentos_componentes: List[VideoFragmentData]
+
+
+# --- O Modelo de Estado Principal (Pydantic) ---
+
+class GenerationState(BaseModel):
+ """
+ O "DNA Digital" completo de uma geração.
+ Este é o objeto de estado central que flui através do framework.
+ """
+ parametros_geracao: GenerationParameters = Field(default_factory=GenerationParameters)
+ Promt_geral: str = ""
+ midias_referencia: List[MediaRef] = Field(default_factory=list)
+ Atos: List[Ato] = Field(default_factory=list)
+ Keyframe_atos: List[KeyframeData] = Field(default_factory=list)
+ videos_atos: List[VideoData] = Field(default_factory=list)
+
+
+# --- Tipos de Dados Internos (Dataclass) ---
+# Usado para passar dados complexos (como tensores) que não são facilmente
+# serializáveis em JSON, entre os componentes internos do framework.
+
+@dataclass
+class LatentConditioningItem:
+ """Representa uma âncora de condicionamento no espaço latente para o LTX."""
+ latent_tensor: torch.Tensor
+ media_frame_number: int
+ conditioning_strength: float
\ No newline at end of file
diff --git a/aduc_orchestrator.py b/aduc_orchestrator.py
deleted file mode 100644
index 594a2d8c35854f380346248c8845c7e06b367326..0000000000000000000000000000000000000000
--- a/aduc_orchestrator.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# aduc_orchestrator.py
-#
-# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
-#
-# Version: 2.2.0
-#
-# This file contains the core ADUC (Automated Discovery and Orchestration of Complex tasks)
-# orchestrator, known as the "Maestro" (Γ). Its responsibility is to manage the high-level
-# creative workflow of film production. This version is updated to reflect the final
-# refactored project structure with `engineers` and `managers`.
-
-import os
-import logging
-from typing import List, Dict, Any, Generator, Tuple
-
-import gradio as gr
-from PIL import Image, ImageOps
-
-from engineers.deformes4D import Deformes4DEngine
-from engineers.deformes2D_thinker import deformes2d_thinker_singleton
-from engineers.deformes3D import deformes3d_engine_singleton
-
-# The logger is configured in app.py; here we just get the instance.
-logger = logging.getLogger(__name__)
-
-class AducDirector:
- """
- Represents the Scene Director, responsible for managing the production state.
- Acts as the "score" for the orchestra, keeping track of all generated artifacts
- (script, keyframes, etc.) during the creative process.
- """
- def __init__(self, workspace_dir: str):
- self.workspace_dir = workspace_dir
- os.makedirs(self.workspace_dir, exist_ok=True)
- self.state: Dict[str, Any] = {}
- logger.info(f"The stage is set. Workspace at '{self.workspace_dir}'.")
-
- def update_state(self, key: str, value: Any) -> None:
- logger.info(f"Notating on the score: State '{key}' updated.")
- self.state[key] = value
-
- def get_state(self, key: str, default: Any = None) -> Any:
- return self.state.get(key, default)
-
-class AducOrchestrator:
- """
- Implements the Maestro (Γ), the central orchestration layer of the ADUC architecture.
- It does not execute AI tasks directly but delegates each step of the creative
- process (scriptwriting, art direction, cinematography) to the appropriate Specialists.
- """
- def __init__(self, workspace_dir: str):
- self.director = AducDirector(workspace_dir)
- self.editor = Deformes4DEngine(workspace_dir)
- self.painter = deformes3d_engine_singleton
- logger.info("ADUC Maestro is on the podium. Musicians (specialists) are ready.")
-
- def process_image_for_story(self, image_path: str, size: int, filename: str) -> str:
- """
- Pre-processes a reference image, standardizing it for use by the Specialists.
- """
- img = Image.open(image_path).convert("RGB")
- img_square = ImageOps.fit(img, (size, size), Image.Resampling.LANCZOS)
- processed_path = os.path.join(self.director.workspace_dir, filename)
- img_square.save(processed_path)
- logger.info(f"Reference image processed and saved to: {processed_path}")
- return processed_path
-
- # --- PRE-PRODUCTION TASKS ---
-
- def task_generate_storyboard(self, prompt: str, num_keyframes: int, ref_image_paths: List[str],
- progress: gr.Progress) -> Tuple[List[str], str, Any]:
- """
- Delegates the task of creating the storyboard to the Scriptwriter (deformes2D_thinker).
- """
- logger.info(f"Act 1, Scene 1: Script. Instructing Scriptwriter to create {num_keyframes} scenes.")
- progress(0.2, desc="Consulting AI Scriptwriter...")
-
- storyboard = deformes2d_thinker_singleton.generate_storyboard(prompt, num_keyframes, ref_image_paths)
-
- logger.info(f"Scriptwriter returned the score: {storyboard}")
- self.director.update_state("storyboard", storyboard)
- self.director.update_state("processed_ref_paths", ref_image_paths)
- return storyboard, ref_image_paths[0], gr.update(visible=True, open=True)
-
- def task_select_keyframes(self, storyboard: List[str], base_ref_paths: List[str],
- pool_ref_paths: List[str]) -> List[str]:
- """
- Delegates to the Photographer (deformes2D_thinker) the task of selecting keyframes.
- """
- logger.info(f"Act 1, Scene 2 (Photographer Mode): Instructing Photographer to select {len(storyboard)} keyframes.")
- selected_paths = deformes2d_thinker_singleton.select_keyframes_from_pool(storyboard, base_ref_paths, pool_ref_paths)
- logger.info(f"Photographer selected the following scenes: {[os.path.basename(p) for p in selected_paths]}")
- self.director.update_state("keyframes", selected_paths)
- return selected_paths
-
- def task_generate_keyframes(self, storyboard: List[str], initial_ref_path: str, global_prompt: str,
- keyframe_resolution: int, progress_callback_factory=None) -> List[str]:
- """
- Delegates to the Art Director (Deformes3DEngine) the task of generating keyframes.
- """
- logger.info("Act 1, Scene 2 (Art Director Mode): Delegating to Art Director.")
- general_ref_paths = self.director.get_state("processed_ref_paths", [])
-
- final_keyframes = self.painter.generate_keyframes_from_storyboard(
- storyboard=storyboard,
- initial_ref_path=initial_ref_path,
- global_prompt=global_prompt,
- keyframe_resolution=keyframe_resolution,
- general_ref_paths=general_ref_paths,
- progress_callback_factory=progress_callback_factory
- )
- self.director.update_state("keyframes", final_keyframes)
- logger.info("Maestro: Art Director has completed keyframe generation.")
- return final_keyframes
-
- # --- PRODUCTION & POST-PRODUCTION TASKS ---
-
- def task_produce_original_movie(self, keyframes: List[str], global_prompt: str, seconds_per_fragment: float,
- trim_percent: int, handler_strength: float,
- destination_convergence_strength: float,
- guidance_scale: float, stg_scale: float, inference_steps: int,
- video_resolution: int, use_continuity_director: bool,
- progress: gr.Progress) -> Dict[str, Any]:
- """
- Delegates the production of the original master video to the Deformes4DEngine.
- """
- logger.info("Maestro: Delegating production of the original movie to Deformes4DEngine.")
- storyboard = self.director.get_state("storyboard", [])
-
- result = self.editor.generate_original_movie(
- keyframes=keyframes,
- global_prompt=global_prompt,
- storyboard=storyboard,
- seconds_per_fragment=seconds_per_fragment,
- trim_percent=trim_percent,
- handler_strength=handler_strength,
- destination_convergence_strength=destination_convergence_strength,
- video_resolution=video_resolution,
- use_continuity_director=use_continuity_director,
- guidance_scale=guidance_scale,
- stg_scale=stg_scale,
- num_inference_steps=inference_steps,
- progress=progress
- )
-
- self.director.update_state("final_video_path", result["final_path"])
- self.director.update_state("latent_paths", result["latent_paths"])
- logger.info("Maestro: Original movie production complete.")
- return result
-
- def task_run_latent_upscaler(self, latent_paths: List[str], chunk_size: int, progress: gr.Progress) -> Generator[Dict[str, Any], None, None]:
- """
- Orchestrates the latent upscaling task.
- """
- logger.info(f"Maestro: Delegating latent upscaling task for {len(latent_paths)} fragments.")
- for update in self.editor.upscale_latents_and_create_video(
- latent_paths=latent_paths,
- chunk_size=chunk_size,
- progress=progress
- ):
- if "final_path" in update and update["final_path"]:
- self.director.update_state("final_video_path", update["final_path"])
- yield update
- break
- logger.info("Maestro: Latent upscaling complete.")
-
- def task_run_hd_mastering(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress) -> Generator[Dict[str, Any], None, None]:
- """
- Orchestrates the HD mastering task.
- """
- logger.info(f"Maestro: Delegating HD mastering task using SeedVR {model_version}.")
- for update in self.editor.master_video_hd(
- source_video_path=source_video_path,
- model_version=model_version,
- steps=steps,
- prompt=prompt,
- progress=progress
- ):
- if "final_path" in update and update["final_path"]:
- self.director.update_state("final_video_path", update["final_path"])
- yield update
- break
- logger.info("Maestro: HD mastering complete.")
-
- def task_run_audio_generation(self, source_video_path: str, audio_prompt: str, progress: gr.Progress) -> Generator[Dict[str, Any], None, None]:
- """
- Orchestrates the audio generation task.
- """
- logger.info(f"Maestro: Delegating audio generation task.")
- for update in self.editor.generate_audio_for_final_video(
- source_video_path=source_video_path,
- audio_prompt=audio_prompt,
- progress=progress
- ):
- if "final_path" in update and update["final_path"]:
- self.director.update_state("final_video_path", update["final_path"])
- yield update
- break
- logger.info("Maestro: Audio generation complete.")
\ No newline at end of file
diff --git a/aduc_types.py b/aduc_types.py
deleted file mode 100644
index e830e159063a8599b3e6e181c9c1449878ee9feb..0000000000000000000000000000000000000000
--- a/aduc_types.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# aduc_types.py
-# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
-# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
-#
-# Contato:
-# Carlos Rodrigues dos Santos
-# carlex22@gmail.com
-# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
-#
-# Repositórios e Projetos Relacionados:
-# GitHub: https://github.com/carlex22/Aduc-sdr
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see
{default_lang.get('app_subtitle')}
") + gr.Markdown("Crie um filme completo com vídeo e áudio, orquestrado por uma equipe de IAs especialistas.
") + with gr.Row(): - lang_selector = gr.Radio(["🇧🇷", "🇺🇸", "🇨🇳"], value="🇧🇷", label=default_lang.get('lang_selector_label')) - resolution_selector = gr.Radio(["480x480", "720x720", "960x960"], value="480x480", label="Base Resolution") + lang_selector = gr.Radio(["🇧🇷", "🇺🇸", "🇨🇳"], value="🇧🇷", label="Idioma / Language") + resolution_selector = gr.Radio(["480x480", "720x720", "960x960"], value="480x480", label="Resolução Base") - with gr.Accordion(default_lang.get('step1_accordion'), open=True) as step1_accordion: - prompt_input = gr.Textbox(label=default_lang.get('prompt_label'), value="A majestic lion walks across the savanna, sits down, and then roars at the setting sun.") - ref_image_input = gr.File(label=default_lang.get('ref_images_label'), file_count="multiple", file_types=["image"]) - with gr.Row(): - num_keyframes_slider = gr.Slider(minimum=3, maximum=42, value=5, step=1, label=default_lang.get('keyframes_label')) - duration_per_fragment_slider = gr.Slider(label=default_lang.get('duration_label'), info=default_lang.get('duration_info'), minimum=2.0, maximum=10.0, value=4.0, step=0.1) + with gr.Accordion("Etapa 1: Roteiro e Cenas-Chave (Pré-Produção)", open=True) as step1_accordion: + prompt_input = gr.Textbox(label="Ideia Geral do Filme", value="Um leão majestoso caminha pela savana, senta-se e ruge para o sol poente.") + ref_image_input = gr.File(label="Imagens de Referência", file_count="multiple", file_types=["image"]) with gr.Row(): - storyboard_and_keyframes_button = gr.Button(default_lang.get('storyboard_and_keyframes_button'), variant="primary") - storyboard_from_photos_button = gr.Button(default_lang.get('storyboard_from_photos_button'), variant="secondary") - step1_mode_b_info_md = gr.Markdown(f"*{default_lang.get('step1_mode_b_info')}*") - storyboard_output = gr.JSON(label=default_lang.get('storyboard_output_label')) - keyframe_gallery = gr.Gallery(label=default_lang.get('keyframes_gallery_label'), visible=True, object_fit="contain", height="auto", type="filepath") - - with gr.Accordion(default_lang.get('step3_accordion'), open=False, visible=False) as step3_accordion: - step3_description_md = gr.Markdown(default_lang.get('step3_description')) - with gr.Accordion(default_lang.get('ltx_advanced_options'), open=False) as ltx_advanced_options_accordion: - with gr.Accordion(default_lang.get('causality_controls_title'), open=True) as causality_accordion: - trim_percent_slider = gr.Slider(minimum=10, maximum=90, value=50, step=5, label=default_lang.get('trim_percent_label'), info=default_lang.get('trim_percent_info')) - with gr.Row(): - forca_guia_slider = gr.Slider(label=default_lang.get('forca_guia_label'), minimum=0.0, maximum=1.0, value=0.5, step=0.05, info=default_lang.get('forca_guia_info')) - convergencia_destino_slider = gr.Slider(label=default_lang.get('convergencia_final_label'), minimum=0.0, maximum=1.0, value=0.75, step=0.05, info=default_lang.get('convergencia_final_info')) - with gr.Accordion(default_lang.get('ltx_pipeline_options'), open=True) as ltx_pipeline_accordion: - with gr.Row(): - guidance_scale_slider = gr.Slider(minimum=1.0, maximum=10.0, value=2.0, step=0.1, label=default_lang.get('guidance_scale_label'), info=default_lang.get('guidance_scale_info')) - stg_scale_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.025, step=0.005, label=default_lang.get('stg_scale_label'), info=default_lang.get('stg_scale_info')) - inference_steps_slider = gr.Slider(minimum=10, maximum=50, value=20, step=1, label=default_lang.get('steps_label'), info=default_lang.get('steps_info')) - produce_original_button = gr.Button(default_lang.get('produce_original_button'), variant="primary") - original_video_output = gr.Video(label="Original Master Video", visible=False, interactive=False) - - with gr.Accordion(default_lang.get('step4_accordion'), open=False, visible=False) as step4_accordion: - step4_description_md = gr.Markdown(default_lang.get('step4_description')) - with gr.Accordion(default_lang.get('sub_step_a_upscaler'), open=True) as sub_step_a_accordion: - upscaler_description_md = gr.Markdown(default_lang.get('upscaler_description')) - with gr.Accordion(default_lang.get('upscaler_options'), open=False) as upscaler_options_accordion: - upscaler_chunk_size_slider = gr.Slider(minimum=1, maximum=10, value=2, step=1, label=default_lang.get('upscaler_chunk_size_label'), info=default_lang.get('upscaler_chunk_size_info')) - run_upscaler_button = gr.Button(default_lang.get('run_upscaler_button'), variant="secondary") - upscaler_video_output = gr.Video(label="Upscaled Video", visible=False, interactive=False) - with gr.Accordion(default_lang.get('sub_step_b_hd'), open=True) as sub_step_b_accordion: - hd_description_md = gr.Markdown(default_lang.get('hd_description')) - with gr.Accordion(default_lang.get('hd_options'), open=False) as hd_options_accordion: - hd_model_radio = gr.Radio(["3B", "7B"], value="7B", label=default_lang.get('hd_model_label')) - hd_steps_slider = gr.Slider(minimum=20, maximum=150, value=100, step=5, label=default_lang.get('hd_steps_label'), info=default_lang.get('hd_steps_info')) - run_hd_button = gr.Button(default_lang.get('run_hd_button'), variant="secondary") - hd_video_output = gr.Video(label="HD Mastered Video", visible=False, interactive=False) - with gr.Accordion(default_lang.get('sub_step_c_audio'), open=True) as sub_step_c_accordion: - audio_description_md = gr.Markdown(default_lang.get('audio_description')) - with gr.Accordion(default_lang.get('audio_options'), open=False) as audio_options_accordion: - audio_prompt_input = gr.Textbox(label=default_lang.get('audio_prompt_label'), info=default_lang.get('audio_prompt_info'), lines=3) - run_audio_button = gr.Button(default_lang.get('run_audio_button'), variant="secondary") - audio_video_output = gr.Video(label="Video with Audio", visible=False, interactive=False) - - final_video_output = gr.Video(label=default_lang.get('final_video_label'), visible=False, interactive=False) - with gr.Accordion(default_lang.get('log_accordion_label'), open=False) as log_accordion: - log_display = gr.Textbox(label=default_lang.get('log_display_label'), lines=20, interactive=False, autoscroll=True) - update_log_button = gr.Button(default_lang.get('update_log_button')) - - # --- 4. UI EVENT CONNECTIONS --- - all_ui_components = [title_md, subtitle_md, lang_selector, step1_accordion, prompt_input, ref_image_input, num_keyframes_slider, duration_per_fragment_slider, storyboard_and_keyframes_button, storyboard_from_photos_button, step1_mode_b_info_md, storyboard_output, keyframe_gallery, step3_accordion, step3_description_md, produce_original_button, ltx_advanced_options_accordion, causality_accordion, trim_percent_slider, forca_guia_slider, convergencia_destino_slider, ltx_pipeline_accordion, guidance_scale_slider, stg_scale_slider, inference_steps_slider, step4_accordion, step4_description_md, sub_step_a_accordion, upscaler_description_md, upscaler_options_accordion, upscaler_chunk_size_slider, run_upscaler_button, sub_step_b_accordion, hd_description_md, hd_options_accordion, hd_model_radio, hd_steps_slider, run_hd_button, sub_step_c_accordion, audio_description_md, audio_options_accordion, audio_prompt_input, run_audio_button, final_video_output, log_accordion, log_display, update_log_button] - def create_lang_update_fn(): - def update_lang(lang_emoji): - lang_code_map = {"🇧🇷": "pt", "🇺🇸": "en", "🇨🇳": "zh"} - lang_code = lang_code_map.get(lang_emoji, "en") - lang_map = i18n.get(lang_code, i18n.get('en', {})) - return [gr.update(value=f"{lang_map.get('app_subtitle')}
"),gr.update(label=lang_map.get('lang_selector_label')),gr.update(label=lang_map.get('step1_accordion')),gr.update(label=lang_map.get('prompt_label')),gr.update(label=lang_map.get('ref_images_label')),gr.update(label=lang_map.get('keyframes_label')),gr.update(label=lang_map.get('duration_label'), info=lang_map.get('duration_info')),gr.update(value=lang_map.get('storyboard_and_keyframes_button')),gr.update(value=lang_map.get('storyboard_from_photos_button')),gr.update(value=f"*{lang_map.get('step1_mode_b_info')}*"),gr.update(label=lang_map.get('storyboard_output_label')),gr.update(label=lang_map.get('keyframes_gallery_label')),gr.update(label=lang_map.get('step3_accordion')),gr.update(value=lang_map.get('step3_description')),gr.update(value=lang_map.get('produce_original_button')),gr.update(label=lang_map.get('ltx_advanced_options')),gr.update(label=lang_map.get('causality_controls_title')),gr.update(label=lang_map.get('trim_percent_label'), info=lang_map.get('trim_percent_info')),gr.update(label=lang_map.get('forca_guia_label'), info=lang_map.get('forca_guia_info')),gr.update(label=lang_map.get('convergencia_final_label'), info=lang_map.get('convergencia_final_info')),gr.update(label=lang_map.get('ltx_pipeline_options')),gr.update(label=lang_map.get('guidance_scale_label'), info=lang_map.get('guidance_scale_info')),gr.update(label=lang_map.get('stg_scale_label'), info=lang_map.get('stg_scale_info')),gr.update(label=lang_map.get('steps_label'), info=lang_map.get('steps_info')),gr.update(label=lang_map.get('step4_accordion')),gr.update(value=lang_map.get('step4_description')),gr.update(label=lang_map.get('sub_step_a_upscaler')),gr.update(value=lang_map.get('upscaler_description')),gr.update(label=lang_map.get('upscaler_options')),gr.update(label=lang_map.get('upscaler_chunk_size_label'), info=lang_map.get('upscaler_chunk_size_info')),gr.update(value=lang_map.get('run_upscaler_button')),gr.update(label=lang_map.get('sub_step_b_hd')),gr.update(value=lang_map.get('hd_description')),gr.update(label=lang_map.get('hd_options')),gr.update(label=lang_map.get('hd_model_label')),gr.update(label=lang_map.get('hd_steps_label'), info=lang_map.get('hd_steps_info')),gr.update(value=lang_map.get('run_hd_button')),gr.update(label=lang_map.get('sub_step_c_audio')),gr.update(value=lang_map.get('audio_description')),gr.update(label=lang_map.get('audio_options')),gr.update(label=lang_map.get('audio_prompt_label'), info=lang_map.get('audio_prompt_info')),gr.update(value=lang_map.get('run_audio_button')),gr.update(label=lang_map.get('final_video_label')),gr.update(label=lang_map.get('log_accordion_label')),gr.update(label=lang_map.get('log_display_label')),gr.update(value=lang_map.get('update_log_button'))] - return update_lang - lang_selector.change(fn=create_lang_update_fn(), inputs=lang_selector, outputs=all_ui_components) + num_keyframes_slider = gr.Slider(minimum=3, maximum=42, value=5, step=1, label="Número de Cenas-Chave") + duration_per_fragment_slider = gr.Slider(label="Duração de cada Clipe (s)", info="Duração alvo para cada fragmento de vídeo.", minimum=2.0, maximum=10.0, value=4.0, step=0.1) + storyboard_and_keyframes_button = gr.Button("Gerar Roteiro e Keyframes", variant="primary") + storyboard_output = gr.JSON(label="Roteiro Gerado (Storyboard)") + keyframe_gallery = gr.Gallery(label="Galeria de Cenas-Chave (Keyframes)", visible=True, object_fit="contain", height="auto", type="filepath") + + with gr.Accordion("Etapa 3: Produção do Vídeo Original", open=False, visible=False) as step3_accordion: + trim_percent_slider = gr.Slider(minimum=10, maximum=90, value=50, step=5, label="Poda Causal (%)") + handler_strength = gr.Slider(label="Força do Déjà-Vu", minimum=0.0, maximum=1.0, value=0.5, step=0.05) + dest_strength = gr.Slider(label="Força da Âncora Final", minimum=0.0, maximum=1.0, value=0.75, step=0.05) + guidance_scale_slider = gr.Slider(minimum=1.0, maximum=10.0, value=2.0, step=0.1, label="Escala de Orientação") + stg_scale_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.025, step=0.005, label="Escala STG") + inference_steps_slider = gr.Slider(minimum=10, maximum=50, value=20, step=1, label="Passos de Inferência") + produce_original_button = gr.Button("🎬 Produzir Vídeo Original", variant="primary") + original_video_output = gr.Video(label="Filme Original Master", visible=False, interactive=False) + + with gr.Accordion("Etapa 4: Pós-Produção (Opcional)", open=False, visible=False) as step4_accordion: + gr.Markdown("Aplique efeitos de melhoria ao vídeo mais recente. Cada etapa usa o resultado da anterior como fonte.") + with gr.Accordion("A. Upscaler Latente 2x", open=True): + upscaler_chunk_size_slider = gr.Slider(minimum=1, maximum=10, value=2, step=1, label="Fragmentos por Lote") + run_upscaler_button = gr.Button("Executar Upscaler Latente", variant="secondary") + upscaler_video_output = gr.Video(label="Vídeo com Upscale", visible=False, interactive=False) + with gr.Accordion("B. Masterização HD (SeedVR)", open=True): + hd_steps_slider = gr.Slider(minimum=20, maximum=150, value=100, step=5, label="Passos de Inferência HD") + run_hd_button = gr.Button("Executar Masterização HD (Modelo 3B)", variant="secondary") + hd_video_output = gr.Video(label="Vídeo Masterizado em HD", visible=False, interactive=False) + with gr.Accordion("C. Geração de Áudio", open=True): + audio_prompt_input = gr.Textbox(label="Prompt de Áudio Detalhado (Opcional)", lines=3, placeholder="Descreva os sons, efeitos e música desejados. Se vazio, usará o prompt geral do filme.") + run_audio_button = gr.Button("Gerar Áudio", variant="secondary") + audio_video_output = gr.Video(label="Vídeo com Áudio", visible=False, interactive=False) + + with gr.Accordion("🧬 DNA Digital da Geração (JSON)", open=False) as data_accordion: + generation_data_output = gr.JSON(label="Estado de Geração Completo") + + final_video_output = gr.Video(label="Filme Final (Resultado da Última Etapa)", visible=False, interactive=False) + + with gr.Accordion("📝 Log de Geração (Detalhado)", open=False) as log_accordion: + log_display = gr.Textbox(label="Log da Sessão", lines=20, interactive=False, autoscroll=True) + update_log_button = gr.Button("Atualizar Log") - storyboard_and_keyframes_button.click(fn=run_pre_production_wrapper, inputs=[prompt_input, num_keyframes_slider, ref_image_input, resolution_selector, duration_per_fragment_slider], outputs=[storyboard_output, keyframe_gallery, step3_accordion]) - storyboard_from_photos_button.click(fn=run_pre_production_photo_wrapper, inputs=[prompt_input, num_keyframes_slider, ref_image_input], outputs=[storyboard_output, keyframe_gallery, step3_accordion]) - produce_original_button.click(fn=run_original_production_wrapper, inputs=[keyframe_gallery, prompt_input, duration_per_fragment_slider, trim_percent_slider, forca_guia_slider, convergencia_destino_slider, guidance_scale_slider, stg_scale_slider, inference_steps_slider, resolution_selector], outputs=[original_video_output, final_video_output, step4_accordion, original_latents_paths_state, original_video_path_state, current_source_video_state]) + # --- 4. CONEXÕES DE EVENTOS DA UI --- + storyboard_and_keyframes_button.click(fn=run_pre_production_wrapper, inputs=[prompt_input, num_keyframes_slider, ref_image_input, resolution_selector, duration_per_fragment_slider], outputs=[generation_state_holder, storyboard_output, keyframe_gallery, step3_accordion]) + produce_original_button.click(fn=run_original_production_wrapper, inputs=[generation_state_holder, trim_percent_slider, handler_strength, dest_strength, guidance_scale_slider, stg_scale_slider, inference_steps_slider], outputs=[original_video_output, final_video_output, step4_accordion, original_latents_paths_state, original_video_path_state, current_source_video_state, generation_state_holder, generation_data_output]) + run_upscaler_button.click(fn=run_upscaler_wrapper, inputs=[original_latents_paths_state, upscaler_chunk_size_slider], outputs=[upscaler_video_output, final_video_output, upscaled_video_path_state, current_source_video_state]) - run_hd_button.click(fn=run_hd_wrapper, inputs=[current_source_video_state, hd_model_radio, hd_steps_slider, prompt_input], outputs=[hd_video_output, final_video_output, hd_video_path_state, current_source_video_state]) + run_hd_button.click(fn=run_hd_wrapper, inputs=[current_source_video_state, hd_steps_slider, prompt_input], outputs=[hd_video_output, final_video_output, hd_video_path_state, current_source_video_state]) run_audio_button.click(fn=run_audio_wrapper, inputs=[current_source_video_state, audio_prompt_input, prompt_input], outputs=[audio_video_output, final_video_output]) + + generation_state_holder.change(fn=lambda state: state, inputs=generation_state_holder, outputs=generation_data_output) update_log_button.click(fn=get_log_content, inputs=[], outputs=[log_display]) -# --- 5. APPLICATION LAUNCH --- +# --- 5. INICIALIZAÇÃO DA APLICAÇÃO --- if __name__ == "__main__": if os.path.exists(WORKSPACE_DIR): - logger.info(f"Clearing previous workspace at: {WORKSPACE_DIR}") shutil.rmtree(WORKSPACE_DIR) os.makedirs(WORKSPACE_DIR) - logger.info(f"Application started. Launching Gradio interface...") + logger.info("Aplicação Gradio iniciada. Lançando interface...") demo.queue().launch() \ No newline at end of file diff --git a/app_api.py b/app_api.py new file mode 100644 index 0000000000000000000000000000000000000000..36156ac9b6eec8305c12ca91c2892779f44636a0 --- /dev/null +++ b/app_api.py @@ -0,0 +1,127 @@ +# app_api.py +# +# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos +# +# Versão 3.0.0 (API Head for Aduc Framework) +# +# Este arquivo implementa um servidor de API usando FastAPI para expor as +# funcionalidades do Aduc Framework. Ele permite o controle programático +# do processo de geração de vídeo. + +import yaml +import logging +import uuid +from typing import Dict + +from fastapi import FastAPI, BackgroundTasks, HTTPException + +# --- 1. IMPORTAÇÃO DO FRAMEWORK E SEUS TIPOS --- +import aduc_framework +from aduc_framework.types import GenerationState, PreProductionParams, ProductionParams + +# --- CONFIGURAÇÃO INICIAL --- +logger = logging.getLogger(__name__) + +# Cria a aplicação FastAPI +app = FastAPI( + title="ADUC-SDR Framework API", + description="API para orquestração de geração de vídeo coerente com IA.", + version="3.0.0" +) + +# Carrega a configuração e inicializa uma instância SINGLETON do framework. +# O framework é pesado e deve ser carregado apenas uma vez na inicialização da API. +try: + with open("config.yaml", 'r') as f: config = yaml.safe_load(f) + WORKSPACE_DIR = config['application']['workspace_dir'] + + aduc = aduc_framework.create_aduc_instance(workspace_dir=WORKSPACE_DIR) + + logger.info("API FastAPI inicializada e conectada ao Aduc Framework.") +except Exception as e: + logger.critical(f"ERRO CRÍTICO durante a inicialização da API: {e}", exc_info=True) + # A API não pode funcionar sem o framework, então saímos se falhar. + exit() + +# --- ARMAZENAMENTO DE TAREFAS EM MEMÓRIA --- +# Em um ambiente de produção real, isso seria substituído por um banco de dados +# ou um cache como Redis para persistir o estado das tarefas. +tasks_state: Dict[str, GenerationState] = {} + + +# --- FUNÇÕES DE BACKGROUND --- + +def run_production_in_background(task_id: str, params: ProductionParams): + """ + Função que executa a tarefa de produção demorada em segundo plano. + Ela opera na instância global 'aduc' para modificar seu estado interno. + """ + logger.info(f"Background task {task_id}: Iniciando produção de vídeo...") + try: + # A tarefa do framework modifica o estado interno da instância 'aduc' + _, _, final_state = aduc.task_produce_original_movie(params=params) + + # Armazena o estado final e completo no nosso "banco de dados" de tarefas + tasks_state[task_id] = final_state + logger.info(f"Background task {task_id}: Produção de vídeo concluída com sucesso.") + except Exception as e: + logger.error(f"Background task {task_id}: Falha na produção. Erro: {e}", exc_info=True) + # Opcional: Atualizar o estado da tarefa com uma mensagem de erro. + + +# --- ENDPOINTS DA API --- + +@app.post("/v1/pre-production", response_model=GenerationState, tags=["Workflow"]) +async def start_pre_production(params: PreProductionParams): + """ + Inicia e executa a etapa de pré-produção (storyboard e keyframes). + + Esta é uma chamada síncrona, pois a pré-produção é relativamente rápida. + Ela retorna o estado de geração completo após a conclusão. + """ + logger.info(f"API: Recebida solicitação de pré-produção com prompt: '{params.prompt[:30]}...'") + try: + _, _, updated_state = aduc.task_pre_production(params=params) + return updated_state + except Exception as e: + logger.error(f"API: Erro na pré-produção: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Erro interno durante a pré-produção: {e}") + +@app.post("/v1/production", status_code=202, tags=["Workflow"]) +async def start_production(params: ProductionParams, background_tasks: BackgroundTasks): + """ + Inicia a tarefa de produção de vídeo principal em segundo plano. + + Esta chamada retorna imediatamente com um `task_id`. Use o endpoint + `/v1/status/{task_id}` para verificar o progresso e obter o resultado final. + """ + task_id = str(uuid.uuid4()) + logger.info(f"API: Recebida solicitação de produção. Criando tarefa de background com ID: {task_id}") + + # Armazena o estado atual (pré-produção) antes de iniciar a nova tarefa + tasks_state[task_id] = aduc.get_current_state() + + # Adiciona a função demorada para ser executada em segundo plano + background_tasks.add_task(run_production_in_background, task_id, params) + + return {"message": "Produção de vídeo iniciada em segundo plano.", "task_id": task_id} + +@app.get("/v1/status/{task_id}", response_model=GenerationState, tags=["Workflow"]) +async def get_task_status(task_id: str): + """ + Verifica o estado de uma tarefa de geração em andamento ou concluída. + """ + logger.info(f"API: Verificando status da tarefa {task_id}") + state = tasks_state.get(task_id) + if not state: + raise HTTPException(status_code=404, detail="ID de tarefa não encontrado.") + + # Retorna o estado mais recente que temos para essa tarefa + return state + +@app.get("/health", tags=["Infra"]) +async def health_check(): + """ + Endpoint simples para verificar se a API está online. + """ + return {"status": "ok"} \ No newline at end of file diff --git a/config.yaml b/config.yaml index 7aab03c427150e2f8091f5f20ba466f1af81239e..4b9c5f70b80b25e4aacbf2200c22641d36a61b86 100644 --- a/config.yaml +++ b/config.yaml @@ -9,11 +9,16 @@ sdk: gradio app_file: app.py specialists: + + seedvr: + gpus_required: 2 + + flux: # Define quantas GPUs o pool do Flux deve tentar alocar. # Se não houver GPUs suficientes, o hardware_manager lançará um erro. # Se 0, usará a CPU. - gpus_required: 4 + gpus_required: 0 ltx: # Define quantas GPUs o pool do LTX deve tentar alocar. @@ -21,4 +26,15 @@ specialists: # Aponta para o arquivo de configuração específico do modelo LTX. # Alterado para usar o modelo 0.9.8-dev. - config_file: "ltxv-13b-0.9.8-distilled.yaml" \ No newline at end of file + config_file: "ltxv-13b-0.9.8-distilled.yaml" + enable_prompt_enhancement: false + + + mmaudio: + gpus_required: 2 + + + prompt_enhancer: + image_caption_model: "MiaoshouAI/Florence-2-large-PromptGen-v2.0" + llm_model: "unsloth/Llama-3.2-3B-Instruct" + prompt_file: "prompts/cinematic_director_prompt.txt" \ No newline at end of file diff --git a/engineers/__init__.py b/engineers/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/engineers/deformes3D.py b/engineers/deformes3D.py deleted file mode 100644 index 6f9dda89b06101fbef4f3797420b1599dbce9f18..0000000000000000000000000000000000000000 --- a/engineers/deformes3D.py +++ /dev/null @@ -1,171 +0,0 @@ -# engineers/deformes3D.py -# -# Copyright (C) 2025 Carlos Rodrigues dos Santos -# -# Version: 1.5.1 -# -# This version maintains the core FLUX-based keyframe generation and adds the -# LTX-based "enrichment" as a secondary, experimental step for each keyframe, -# allowing for direct comparison without altering the primary workflow. - -from PIL import Image, ImageOps -import os -import time -import logging -import gradio as gr -import yaml -import torch -import numpy as np - -from managers.flux_kontext_manager import flux_kontext_singleton -from engineers.deformes2D_thinker import deformes2d_thinker_singleton -from aduc_types import LatentConditioningItem -from managers.ltx_manager import ltx_manager_singleton -from managers.vae_manager import vae_manager_singleton -from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton - -logger = logging.getLogger(__name__) - -class Deformes3DEngine: - """ - ADUC Specialist for static image (keyframe) generation. - """ - def __init__(self, workspace_dir): - self.workspace_dir = workspace_dir - self.image_generation_helper = flux_kontext_singleton - logger.info("3D Engine (Image Specialist) ready to receive orders from the Maestro.") - - def _generate_single_keyframe(self, prompt: str, reference_images: list[Image.Image], output_filename: str, width: int, height: int, callback: callable = None) -> str: - """ - Low-level function that generates a single image using the LTX helper. - """ - logger.info(f"Generating keyframe '{output_filename}' with prompt: '{prompt}'") - generated_image = self.image_generation_helper.generate_image( - reference_images=reference_images, prompt=prompt, width=width, - height=height, seed=int(time.time()), callback=callback - ) - final_path = os.path.join(self.workspace_dir, output_filename) - generated_image.save(final_path) - logger.info(f"Keyframe successfully saved to: {final_path}") - return final_path - - def generate_keyframes_from_storyboard(self, storyboard: list, initial_ref_path: str, global_prompt: str, keyframe_resolution: int, general_ref_paths: list, progress_callback_factory: callable = None): - """ - Orchestrates the generation of all keyframes. - """ - current_base_image_path = initial_ref_path - previous_prompt = "N/A (initial reference image)" - final_keyframes_gallery = [] #[current_base_image_path] - width, height = keyframe_resolution, keyframe_resolution - target_resolution_tuple = (width, height) - - num_keyframes_to_generate = len(storyboard) - 1 - logger.info(f"IMAGE SPECIALIST: Received order to generate {num_keyframes_to_generate} keyframes (LTX versions).") - - for i in range(num_keyframes_to_generate): - scene_index = i + 1 - current_scene = storyboard[i] - future_scene = storyboard[i+1] - progress_callback_flux = progress_callback_factory(scene_index, num_keyframes_to_generate) if progress_callback_factory else None - - logger.info(f"--> Generating Keyframe {scene_index}/{num_keyframes_to_generate}...") - - # --- STEP A: Generate with FLUX (Primary Method) --- - logger.info(f" - Step A: Generating with keyframe...") - - img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt( - global_prompt=global_prompt, scene_history=previous_prompt, - current_scene_desc=current_scene, future_scene_desc=future_scene, - last_image_path=current_base_image_path, fixed_ref_paths=general_ref_paths - ) - - #flux_ref_paths = list(set([current_base_image_path] + general_ref_paths)) - #flux_ref_images = [Image.open(p) for p in flux_ref_paths] - - #flux_keyframe_path = self._generate_single_keyframe( - # prompt=img_prompt, reference_images=flux_ref_images, - # output_filename=f"keyframe_{scene_index}_flux.png", width=width, height=height, - # callback=progress_callback_flux - #) - #final_keyframes_gallery.append(flux_keyframe_path) - - # --- STEP B: LTX Enrichment Experiment --- - #logger.info(f" - Step B: Generating enrichment with LTX...") - - ltx_context_paths = [] - context_paths = [] - context_paths = [current_base_image_path] + [p for p in general_ref_paths if p != current_base_image_path][:3] - - ltx_context_paths = list(reversed(context_paths)) - logger.info(f" - LTX Context Order (Reversed): {[os.path.basename(p) for p in ltx_context_paths]}") - - ltx_conditioning_items = [] - - weight = 0.6 - for idx, path in enumerate(ltx_context_paths): - img_pil = Image.open(path).convert("RGB") - img_processed = self._preprocess_image_for_latent_conversion(img_pil, target_resolution_tuple) - pixel_tensor = self._pil_to_pixel_tensor(img_processed) - latent_tensor = vae_manager_singleton.encode(pixel_tensor) - - ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight)) - - if idx >= 0: - weight -= 0.1 - - ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25} - generated_latents, _ = ltx_manager_singleton.generate_latent_fragment( - height=height, width=width, - conditioning_items_data=ltx_conditioning_items, - motion_prompt=img_prompt, - video_total_frames=48, - video_fps=24, - **ltx_base_params - ) - - final_latent = generated_latents[:, :, -1:, :, :] - upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent) - enriched_pixel_tensor = vae_manager_singleton.decode(upscaled_latent) - - ltx_keyframe_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index}_ltx.png") - self.save_image_from_tensor(enriched_pixel_tensor, ltx_keyframe_path) - final_keyframes_gallery.append(ltx_keyframe_path) - - # Use the FLUX keyframe as the base for the next iteration to maintain the primary narrative path - current_base_image_path = ltx_keyframe_path #flux_keyframe_path - previous_prompt = img_prompt - - logger.info(f"IMAGE SPECIALIST: Generation of all keyframe versions (LTX) complete.") - return final_keyframes_gallery - - # --- HELPER FUNCTIONS --- - - def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: - """Resizes and fits an image to the target resolution for VAE encoding.""" - if image.size != target_resolution: - return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) - return image - - def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor: - """Helper to convert PIL to the 5D pixel tensor the VAE expects.""" - image_np = np.array(pil_image).astype(np.float32) / 255.0 - tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) - return (tensor * 2.0) - 1.0 - - def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str): - """Helper to save a 1-frame pixel tensor as an image.""" - tensor_chw = pixel_tensor.squeeze(0).squeeze(1) - tensor_hwc = tensor_chw.permute(1, 2, 0) - tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0 - image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8) - Image.fromarray(image_np).save(path) - -# --- Singleton Instantiation --- -try: - with open("config.yaml", 'r') as f: - config = yaml.safe_load(f) - WORKSPACE_DIR = config['application']['workspace_dir'] - deformes3d_engine_singleton = Deformes3DEngine(workspace_dir=WORKSPACE_DIR) -except Exception as e: - logger.error(f"Could not initialize Deformes3DEngine: {e}", exc_info=True) - deformes3d_engine_singleton = None \ No newline at end of file diff --git a/engineers/deformes4D.py b/engineers/deformes4D.py deleted file mode 100644 index 21e7cd96df16d6abd0ea1ac9fda5112c604d6e7a..0000000000000000000000000000000000000000 --- a/engineers/deformes4D.py +++ /dev/null @@ -1,338 +0,0 @@ -# engineers/deformes4D.py -# -# AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR -# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos -# -# Contato: -# Carlos Rodrigues dos Santos -# carlex22@gmail.com -# Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025 -# -# Repositórios e Projetos Relacionados: -# GitHub: https://github.com/carlex22/Aduc-sdr -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see