Carlexxx commited on
Commit
99c6a62
·
1 Parent(s): 44e066b

feat: Implement self-contained specialist managers

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. aduc_framework/__init__.py +75 -0
  2. aduc_framework/director.py +116 -0
  3. {engineers → aduc_framework/engineers}/LICENSE +0 -0
  4. {engineers → aduc_framework/engineers}/NOTICE.md +0 -0
  5. {engineers → aduc_framework/engineers}/README.md +0 -0
  6. aduc_framework/engineers/__init__.py +13 -0
  7. {engineers → aduc_framework/engineers}/deformes2D_thinker.py +8 -8
  8. aduc_framework/engineers/deformes3D.py +183 -0
  9. aduc_framework/engineers/deformes4D.py +235 -0
  10. {managers → aduc_framework/managers}/LICENSE +0 -0
  11. {managers → aduc_framework/managers}/LICENSE.txt +0 -0
  12. {managers → aduc_framework/managers}/NOTICE.md +0 -0
  13. {managers → aduc_framework/managers}/README.md +0 -0
  14. aduc_framework/managers/__init__.py +19 -0
  15. {managers → aduc_framework/managers}/config.yaml +0 -0
  16. {managers → aduc_framework/managers}/flux_kontext_manager.py +1 -1
  17. {managers → aduc_framework/managers}/gemini_manager.py +1 -1
  18. {managers → aduc_framework/managers}/latent_enhancer_manager.py +1 -1
  19. {managers → aduc_framework/managers}/ltx_manager.py +101 -110
  20. {managers → aduc_framework/managers}/ltx_pipeline_utils.py +0 -0
  21. aduc_framework/managers/mmaudio_manager.py +226 -0
  22. {managers → aduc_framework/managers}/seedvr_manager.py +157 -141
  23. {managers → aduc_framework/managers}/upscaler_specialist.py +1 -1
  24. {managers → aduc_framework/managers}/vae_manager.py +1 -1
  25. aduc_framework/orchestrator.py +194 -0
  26. {prompts → aduc_framework/prompts}/LICENSE +0 -0
  27. {prompts → aduc_framework/prompts}/NOTICE.md +0 -0
  28. {prompts → aduc_framework/prompts}/README.md +0 -0
  29. {prompts → aduc_framework/prompts}/anticipatory_keyframe_prompt.txt +0 -0
  30. {prompts → aduc_framework/prompts}/audio_director_prompt.txt +0 -0
  31. aduc_framework/prompts/cinematic_director_prompt.txt +27 -0
  32. {prompts → aduc_framework/prompts}/director_composition_prompt.txt +0 -0
  33. {prompts → aduc_framework/prompts}/flux_composition_wrapper_prompt.txt +0 -0
  34. {prompts → aduc_framework/prompts}/initial_motion_prompt.txt +0 -0
  35. {prompts → aduc_framework/prompts}/keyframe_selection_prompt.txt +0 -0
  36. {prompts → aduc_framework/prompts}/sound_director_prompt.txt +0 -0
  37. {prompts → aduc_framework/prompts}/sound_director_prompt.txt.txt +0 -0
  38. {prompts → aduc_framework/prompts}/transition_decision_prompt.txt +0 -0
  39. {prompts → aduc_framework/prompts}/unified_cinematographer_prompt.txt +0 -0
  40. {prompts → aduc_framework/prompts}/unified_storyboard_prompt.txt +0 -0
  41. {tools → aduc_framework/tools}/LICENSE +0 -0
  42. {tools → aduc_framework/tools}/NOTICE.md +0 -0
  43. {tools → aduc_framework/tools}/README.md +0 -0
  44. aduc_framework/tools/__init__.py +15 -0
  45. {tools → aduc_framework/tools}/hardware_manager.py +0 -0
  46. {tools → aduc_framework/tools}/optimization.py +0 -0
  47. {tools → aduc_framework/tools}/tensor_utils.py +0 -0
  48. {tools → aduc_framework/tools}/video_encode_tool.py +0 -0
  49. aduc_framework/types.py +100 -0
  50. aduc_orchestrator.py +0 -199
aduc_framework/__init__.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/__init__.py
2
+ #
3
+ # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
+ #
5
+ # Versão 3.0.0 (Framework Entry Point)
6
+ #
7
+ # Este arquivo serve como o ponto de entrada principal para o Aduc Framework.
8
+ # Ele define a interface pública que os clientes (UIs, APIs, etc.) usarão
9
+ # para criar e interagir com o sistema de orquestração.
10
+ #
11
+ # A principal responsabilidade deste arquivo é expor uma função de fábrica
12
+ # ('create_aduc_instance') que encapsula a lógica de inicialização do
13
+ # orquestrador e seus componentes, garantindo que o framework seja fácil
14
+ # de consumir.
15
+
16
+ import logging
17
+
18
+ # Importa as classes e tipos que formarão a interface pública do framework
19
+ from .orchestrator import AducOrchestrator
20
+ from .types import (
21
+ GenerationState,
22
+ PreProductionParams,
23
+ ProductionParams,
24
+ GenerationParameters,
25
+ MediaRef,
26
+ Ato,
27
+ KeyframeData,
28
+ VideoData
29
+ )
30
+
31
+ # Configura um logger para o framework para que os clientes possam ver as mensagens de inicialização.
32
+ logger = logging.getLogger(__name__)
33
+
34
+ def create_aduc_instance(workspace_dir: str) -> AducOrchestrator:
35
+ """
36
+ Ponto de entrada de fábrica para criar uma instância totalmente funcional do Aduc Framework.
37
+
38
+ Esta função abstrai a complexidade da inicialização do AducOrchestrator e de todos
39
+ os seus engenheiros e managers dependentes. Clientes do framework devem usar esta
40
+ função para garantir uma inicialização correta e consistente.
41
+
42
+ Args:
43
+ workspace_dir (str): O caminho para o diretório onde todos os artefatos
44
+ (imagens, vídeos, latentes, logs) serão salvos.
45
+
46
+ Returns:
47
+ AducOrchestrator: Uma instância pronta para uso do orquestrador principal.
48
+ """
49
+ logger.info(f"Fábrica ADUC: Criando uma nova instância com workspace em '{workspace_dir}'...")
50
+
51
+ # Futuramente, lógicas mais complexas de inicialização, como a verificação de
52
+ # dependências ou configuração de hardware, podem ser adicionadas aqui.
53
+
54
+ instance = AducOrchestrator(workspace_dir=workspace_dir)
55
+
56
+ logger.info("Fábrica ADUC: Instância do framework criada e pronta para uso.")
57
+
58
+ return instance
59
+
60
+ # Mensagem de log para confirmar que o pacote do framework foi importado com sucesso.
61
+ logger.info("Módulo 'aduc_framework' carregado. Use a função 'create_aduc_instance()' para começar.")
62
+
63
+ # Opcional: Definir __all__ para controlar o que é importado com 'from aduc_framework import *'
64
+ __all__ = [
65
+ "create_aduc_instance",
66
+ "AducOrchestrator",
67
+ "GenerationState",
68
+ "PreProductionParams",
69
+ "ProductionParams",
70
+ "GenerationParameters",
71
+ "MediaRef",
72
+ "Ato",
73
+ "KeyframeData",
74
+ "VideoData"
75
+ ]
aduc_framework/director.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/director.py
2
+ #
3
+ # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
+ #
5
+ # Versão 3.0.0 (Framework State Manager)
6
+ #
7
+ # Este arquivo contém a classe AducDirector. Sua única responsabilidade
8
+ # é gerenciar o objeto de estado da geração (GenerationState). Ele atua
9
+ # como o "score" da orquestra ou o "script" do filme, mantendo um registro
10
+ # preciso de todos os parâmetros e artefatos gerados.
11
+
12
+ import logging
13
+ import os
14
+ from typing import List, Dict, Any
15
+
16
+ # Importa os modelos de dados Pydantic que ele irá gerenciar
17
+ from .types import GenerationState, PreProductionParams, ProductionParams, Ato, MediaRef, KeyframeData, VideoData
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ class AducDirector:
22
+ """
23
+ Representa o Diretor de Cena, responsável por gerenciar o estado da produção.
24
+ Atua como a fonte única da verdade para todos os dados relacionados a uma
25
+ única tarefa de geração de vídeo.
26
+ """
27
+ def __init__(self, workspace_dir: str):
28
+ """
29
+ Inicializa o Diretor.
30
+
31
+ Args:
32
+ workspace_dir (str): O diretório onde os artefatos são salvos.
33
+ O Diretor usa isso para referenciar caminhos se necessário.
34
+ """
35
+ self.workspace_dir = workspace_dir
36
+ self.state: GenerationState = self._initialize_state()
37
+ os.makedirs(self.workspace_dir, exist_ok=True)
38
+ logger.info(f"AducDirector inicializado. O estado de geração foi criado.")
39
+
40
+ def _initialize_state(self) -> GenerationState:
41
+ """
42
+ Cria uma instância vazia e válida do modelo GenerationState.
43
+ """
44
+ return GenerationState()
45
+
46
+ def get_full_state(self) -> GenerationState:
47
+ """
48
+ Retorna o objeto de estado Pydantic completo.
49
+
50
+ Returns:
51
+ GenerationState: O estado atual da geração.
52
+ """
53
+ return self.state
54
+
55
+ def get_full_state_as_dict(self) -> Dict[str, Any]:
56
+ """
57
+ Retorna o estado completo serializado como um dicionário Python.
58
+ Útil para passar para bibliotecas que não suportam Pydantic diretamente.
59
+
60
+ Returns:
61
+ Dict[str, Any]: O estado atual como um dicionário.
62
+ """
63
+ return self.state.model_dump()
64
+
65
+ def update_parameters(self, stage: str, params: Any):
66
+ """
67
+ Atualiza o nó de parâmetros no estado de geração.
68
+
69
+ Args:
70
+ stage (str): O estágio da produção ('pre_producao', 'producao', etc.).
71
+ params (BaseModel): O objeto Pydantic contendo os parâmetros para aquele estágio.
72
+ """
73
+ if hasattr(self.state.parametros_geracao, stage):
74
+ setattr(self.state.parametros_geracao, stage, params)
75
+ logger.info(f"Parâmetros do estágio '{stage}' atualizados no estado.")
76
+ else:
77
+ logger.warning(f"Tentativa de atualizar parâmetros para um estágio desconhecido: '{stage}'")
78
+
79
+ def update_pre_production_state(self, prompt: str, ref_paths: List[str], storyboard: List[str]):
80
+ """
81
+ Popula as seções iniciais do estado após a geração do storyboard.
82
+
83
+ Args:
84
+ prompt (str): O prompt geral.
85
+ ref_paths (List[str]): Lista de caminhos para as mídias de referência.
86
+ storyboard (List[str]): Lista de resumos dos atos.
87
+ """
88
+ self.state.Promt_geral = prompt
89
+ self.state.midias_referencia = [MediaRef(id=i, caminho=path) for i, path in enumerate(ref_paths)]
90
+ self.state.Atos = [Ato(id=i, resumo_ato=ato) for i, ato in enumerate(storyboard)]
91
+ logger.info("Estado de pré-produção (prompt, referências, atos) atualizado.")
92
+
93
+ def update_keyframes_state(self, keyframes_data: List[Dict[str, Any]]):
94
+ """
95
+ Atualiza a lista de keyframes no estado.
96
+
97
+ Args:
98
+ keyframes_data (List[Dict[str, Any]]): Uma lista de dicionários, cada um
99
+ representando os dados de um keyframe.
100
+ """
101
+ # Converte os dicionários em modelos Pydantic KeyframeData
102
+ self.state.Keyframe_atos = [KeyframeData(**data) for data in keyframes_data]
103
+ logger.info(f"{len(keyframes_data)} keyframes adicionados ao estado.")
104
+
105
+ def update_video_state(self, video_data_dict: Dict[str, Any]):
106
+ """
107
+ Atualiza a lista de vídeos gerados no estado.
108
+
109
+ Args:
110
+ video_data_dict (Dict[str, Any]): Um dicionário representando os dados do vídeo gerado.
111
+ """
112
+ # Converte o dicionário em um modelo Pydantic VideoData
113
+ video_model = VideoData(**video_data_dict)
114
+ # Atualmente, substituímos a lista, mas poderíamos adicionar a ela no futuro.
115
+ self.state.videos_atos = [video_model]
116
+ logger.info("Dados da produção de vídeo atualizados no estado.")
{engineers → aduc_framework/engineers}/LICENSE RENAMED
File without changes
{engineers → aduc_framework/engineers}/NOTICE.md RENAMED
File without changes
{engineers → aduc_framework/engineers}/README.md RENAMED
File without changes
aduc_framework/engineers/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/engineers/__init__.py
2
+
3
+ # Expõe os singletons e classes principais do sub-pacote de engenheiros.
4
+
5
+ from .deformes2D_thinker import deformes2d_thinker_singleton
6
+ from .deformes3D import deformes3d_engine_singleton
7
+ from .deformes4D import Deformes4DEngine
8
+
9
+ __all__ = [
10
+ "deformes2d_thinker_singleton",
11
+ "deformes3d_engine_singleton",
12
+ "Deformes4DEngine",
13
+ ]
{engineers → aduc_framework/engineers}/deformes2D_thinker.py RENAMED
@@ -36,7 +36,7 @@ import gradio as gr
36
  from typing import List
37
 
38
  # It imports the communication layer, not the API directly
39
- from managers.gemini_manager import gemini_manager_singleton
40
 
41
  logger = logging.getLogger(__name__)
42
 
@@ -117,16 +117,16 @@ class Deformes2DThinker:
117
  prompt_parts = [
118
  f"# CONTEXT:\n- Global Story Goal: {global_prompt}\n# VISUAL ASSETS:",
119
  "Current Base Image [IMG-BASE]:",
120
- Image.open(last_image_path)
121
  ]
122
 
123
- ref_counter = 1
124
- for path in fixed_ref_paths:
125
- if path != last_image_path:
126
- prompt_parts.extend([f"General Reference Image [IMG-REF-{ref_counter}]:", Image.open(path)])
127
- ref_counter += 1
128
 
129
- prompt_parts.append(director_prompt)
130
 
131
  final_flux_prompt = gemini_manager_singleton.get_raw_text(prompt_parts)
132
 
 
36
  from typing import List
37
 
38
  # It imports the communication layer, not the API directly
39
+ from ..managers.gemini_manager import gemini_manager_singleton
40
 
41
  logger = logging.getLogger(__name__)
42
 
 
117
  prompt_parts = [
118
  f"# CONTEXT:\n- Global Story Goal: {global_prompt}\n# VISUAL ASSETS:",
119
  "Current Base Image [IMG-BASE]:",
120
+ "",#Image.open(last_image_path)
121
  ]
122
 
123
+ #ref_counter = 1
124
+ #for path in fixed_ref_paths:
125
+ # if path != last_image_path:
126
+ # prompt_parts.extend([f"General Reference Image [IMG-REF-{ref_counter}]:", Image.open(path)])
127
+ # ref_counter += 1
128
 
129
+ #prompt_parts.append(director_prompt)
130
 
131
  final_flux_prompt = gemini_manager_singleton.get_raw_text(prompt_parts)
132
 
aduc_framework/engineers/deformes3D.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/engineers/deformes3D.py
2
+ #
3
+ # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
+ #
5
+ # Versão 3.1.2 (Com correção de import de 'typing')
6
+ #
7
+ # Este engenheiro é o "Diretor de Arte" do framework. Sua responsabilidade
8
+ # é ler o estado de geração (storyboard, parâmetros) e orquestrar a criação
9
+ # dos keyframes visuais, que servirão de âncora para a geração de vídeo.
10
+
11
+ import os
12
+ import time
13
+ import logging
14
+ import yaml
15
+ import torch
16
+ import numpy as np
17
+ from PIL import Image, ImageOps
18
+
19
+ # >>> INÍCIO DA CORREÇÃO <<<
20
+ from typing import List, Dict, Any, Callable, Optional
21
+ # >>> FIM DA CORREÇÃO <<<
22
+
23
+ # --- Imports Relativos Corrigidos ---
24
+ from .deformes2D_thinker import deformes2d_thinker_singleton
25
+ from ..types import LatentConditioningItem
26
+ from ..managers.ltx_manager import ltx_manager_singleton
27
+ from ..managers.vae_manager import vae_manager_singleton
28
+ from ..managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Define um tipo para o callback de progresso para clareza
33
+ ProgressCallback = Optional[Callable[[float, str], None]]
34
+
35
+ class Deformes3DEngine:
36
+ """
37
+ Especialista ADUC para a geração de imagens estáticas (keyframes).
38
+ """
39
+ def __init__(self):
40
+ """O construtor é leve e não recebe argumentos."""
41
+ self.workspace_dir: Optional[str] = None
42
+ logger.info("Deformes3DEngine instanciado (não inicializado).")
43
+
44
+ def initialize(self, workspace_dir: str):
45
+ """Inicializa o engenheiro com as configurações necessárias."""
46
+ if self.workspace_dir is not None:
47
+ return # Evita reinicialização
48
+ self.workspace_dir = workspace_dir
49
+ logger.info(f"3D Engine (Image Specialist) inicializado com workspace: {self.workspace_dir}.")
50
+
51
+ def generate_keyframes_from_storyboard(
52
+ self,
53
+ generation_state: Dict[str, Any],
54
+ progress_callback: ProgressCallback = None
55
+ ) -> List[Dict[str, Any]]:
56
+ """
57
+ Orquestra a geração de todos os keyframes com base no estado de geração completo.
58
+ Retorna uma lista de dicionários com dados detalhados de cada keyframe.
59
+ """
60
+ if not self.workspace_dir:
61
+ raise RuntimeError("Deformes3DEngine não foi inicializado. Chame o método initialize() antes de usar.")
62
+
63
+ # 1. Extrai todos os parâmetros necessários do estado
64
+ params = generation_state.get("parametros_geracao", {}).get("pre_producao", {})
65
+ storyboard = [ato["resumo_ato"] for ato in generation_state.get("Atos", [])]
66
+ global_prompt = generation_state.get("Promt_geral", "")
67
+ general_ref_paths = [media["caminho"] for media in generation_state.get("midias_referencia", [])]
68
+
69
+ keyframe_resolution = params.get('resolution', 480)
70
+ initial_ref_path = general_ref_paths[0]
71
+
72
+ previous_prompt = ""
73
+ all_keyframes_data: List[Dict[str, Any]] = []
74
+ width, height = keyframe_resolution, keyframe_resolution
75
+ target_resolution_tuple = (width, height)
76
+
77
+
78
+
79
+ logger.info(f"IMAGE SPECIALIST: Ordem para gerar {num_keyframes_to_generate} keyframes (versões LTX).")
80
+ ltx_conditioning_items0 = []
81
+
82
+
83
+ img_pil0 = Image.open(initial_ref_path).convert("RGB")
84
+ img_processed0 = self._preprocess_image_for_latent_conversion(img_pil0, target_resolution_tuple)
85
+ pixel_tensor0 = self._pil_to_pixel_tensor(img_processed0)
86
+
87
+ ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 0, 0.05))
88
+ ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 23, 0.05))
89
+
90
+ latent_tensorY = pixel_tensor0
91
+ latent_tensorX = latent_tensorY
92
+
93
+
94
+ current_base_image_path = initial_ref_path
95
+ past_base_image_path = initial_ref_path
96
+
97
+
98
+ for i in range(num_keyframes_to_generate):
99
+ ltx_conditioning_items = ltx_conditioning_items0
100
+ scene_index = i + 1
101
+
102
+ current_scene = storyboard[i]
103
+ future_scene = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final."
104
+ logger.info(f"--> Gerando Keyframe {scene_index}/{num_keyframes_to_generate}...")
105
+
106
+ img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt(
107
+ global_prompt=global_prompt,
108
+ scene_history=previous_prompt,
109
+ current_scene_desc=current_scene,
110
+ future_scene_desc=future_scene,
111
+ last_image_path=past_base_image_path,
112
+ fixed_ref_paths=current_base_image_path
113
+ )
114
+
115
+ past_base_image_path = current_base_image_path
116
+
117
+
118
+
119
+ ltx_conditioning_items = ltx_conditioning_items0
120
+ ltx_conditioning_items.append(LatentConditioningItem(latent_tensorX, 0, 0.4))
121
+ ltx_conditioning_items.append(LatentConditioningItem(latent_tensorY, 8, 0.6))
122
+
123
+ latent_tensorX = latent_tensorY
124
+
125
+ ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25}
126
+ generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
127
+ height=height, width=width,
128
+ conditioning_items_data=ltx_conditioning_items,
129
+ motion_prompt=img_prompt,
130
+ video_total_frames=24, video_fps=24,
131
+ **ltx_base_params
132
+ )
133
+
134
+ final_latent = generated_latents[:, :, -1:, :, :]
135
+ #upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent)
136
+ enriched_pixel_tensor = vae_manager_singleton.decode(final_latent)
137
+
138
+ pixel_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_pixel.png")
139
+ latent_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_latent.pt")
140
+ self.save_image_from_tensor(enriched_pixel_tensor, pixel_path)
141
+ torch.save(final_latent.cpu(), latent_path)
142
+
143
+ latent_tensorY = latent_path
144
+ past_base_image_path = current_base_image_path
145
+
146
+ keyframe_data = {
147
+ "id": scene_index,
148
+ "caminho_pixel": pixel_path,
149
+ "caminho_latent": latent_path,
150
+ "prompt_keyframe": img_prompt
151
+ }
152
+
153
+ all_keyframes_data.append(keyframe_data)
154
+
155
+
156
+ current_base_image_path = pixel_path
157
+ previous_prompt = img_prompt
158
+
159
+ logger.info("IMAGE SPECIALIST: Geração de todos os dados de keyframes completa.")
160
+ return all_keyframes_data
161
+
162
+ # --- FUNÇÕES HELPER ---
163
+
164
+ def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
165
+ if image.size != target_resolution:
166
+ return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
167
+ return image
168
+
169
+ def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
170
+ image_np = np.array(pil_image).astype(np.float32) / 255.0
171
+ tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
172
+ return (tensor * 2.0) - 1.0
173
+
174
+ def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
175
+ tensor_chw = pixel_tensor.squeeze(0).squeeze(1)
176
+ tensor_hwc = tensor_chw.permute(1, 2, 0)
177
+ tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0
178
+ image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8)
179
+ Image.fromarray(image_np).save(path)
180
+
181
+ # --- Instanciação Singleton ---
182
+ # A instância é criada, mas não configurada ainda. O Orchestrator fará isso.
183
+ deformes3d_engine_singleton = Deformes3DEngine()
aduc_framework/engineers/deformes4D.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/engineers/deformes4D.py
2
+ #
3
+ # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
+ #
5
+ # Versão 3.1.1 (Com correção de limpeza de arquivos)
6
+ #
7
+ # Este engenheiro implementa a Câmera (Ψ) e o Destilador (Δ) da arquitetura
8
+ # ADUC-SDR. Sua única responsabilidade é a geração sequencial de fragmentos de
9
+ # vídeo com base em um conjunto de keyframes pré-definido.
10
+
11
+ import os
12
+ import time
13
+ import imageio
14
+ import numpy as np
15
+ import torch
16
+ import logging
17
+ from PIL import Image, ImageOps
18
+ import gc
19
+ import shutil
20
+ from pathlib import Path
21
+ from typing import List, Tuple, Dict, Any, Callable, Optional
22
+
23
+ # --- Imports Relativos Corrigidos ---
24
+ from ..types import LatentConditioningItem
25
+ from ..managers.ltx_manager import ltx_manager_singleton
26
+ from ..managers.vae_manager import vae_manager_singleton
27
+ from .deformes2D_thinker import deformes2d_thinker_singleton
28
+ from ..tools.video_encode_tool import video_encode_tool_singleton
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ ProgressCallback = Optional[Callable[[float, str], None]]
33
+
34
+ class Deformes4DEngine:
35
+ """
36
+ Orquestra a geração e concatenação de fragmentos de vídeo.
37
+ """
38
+ def __init__(self):
39
+ """O construtor é leve e não recebe argumentos."""
40
+ self.workspace_dir: Optional[str] = None
41
+ self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
42
+ logger.info("Deformes4DEngine instanciado (não inicializado).")
43
+
44
+ def initialize(self, workspace_dir: str):
45
+ """Inicializa o engenheiro com as configurações necessárias."""
46
+ if self.workspace_dir is not None:
47
+ return # Evita reinicialização
48
+ self.workspace_dir = workspace_dir
49
+ os.makedirs(self.workspace_dir, exist_ok=True)
50
+ logger.info(f"Deformes4D Specialist (Executor) inicializado com workspace: {self.workspace_dir}.")
51
+
52
+ def generate_original_movie(
53
+ self,
54
+ full_generation_state: Dict[str, Any],
55
+ progress_callback: ProgressCallback = None
56
+ ) -> Dict[str, Any]:
57
+ """
58
+ Gera o filme principal lendo todos os parâmetros do estado de geração.
59
+ """
60
+ if not self.workspace_dir:
61
+ raise RuntimeError("Deformes4DEngine não foi inicializado. Chame o método initialize() antes de usar.")
62
+
63
+ # 1. Extrai todos os parâmetros do estado de geração
64
+ pre_prod_params = full_generation_state.get("parametros_geracao", {}).get("pre_producao", {})
65
+ prod_params = full_generation_state.get("parametros_geracao", {}).get("producao", {})
66
+
67
+ keyframes_data = full_generation_state.get("Keyframe_atos", [])
68
+ global_prompt = full_generation_state.get("Promt_geral", "")
69
+ storyboard = [ato["resumo_ato"] for ato in full_generation_state.get("Atos", [])]
70
+ keyframe_paths = [kf["caminho_pixel"] for kf in keyframes_data]
71
+
72
+ seconds_per_fragment = pre_prod_params.get('duration_per_fragment', 4.0)
73
+ video_resolution = pre_prod_params.get('resolution', 480)
74
+
75
+ trim_percent = prod_params.get('trim_percent', 50)
76
+ handler_strength = prod_params.get('handler_strength', 0.5)
77
+ destination_convergence_strength = prod_params.get('destination_convergence_strength', 0.75)
78
+ guidance_scale = prod_params.get('guidance_scale', 2.0)
79
+ stg_scale = prod_params.get('stg_scale', 0.025)
80
+ num_inference_steps = prod_params.get('inference_steps', 20)
81
+
82
+ # 2. Inicia o processo de geração
83
+ FPS = 24
84
+ FRAMES_PER_LATENT_CHUNK = 8
85
+ LATENT_PROCESSING_CHUNK_SIZE = 4
86
+
87
+ run_timestamp = int(time.time())
88
+ temp_latent_dir = os.path.join(self.workspace_dir, f"temp_latents_{run_timestamp}")
89
+ temp_video_clips_dir = os.path.join(self.workspace_dir, f"temp_clips_{run_timestamp}")
90
+ os.makedirs(temp_latent_dir, exist_ok=True)
91
+ os.makedirs(temp_video_clips_dir, exist_ok=True)
92
+
93
+ total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
94
+ frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
95
+ latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
96
+ DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
97
+ DESTINATION_FRAME_TARGET = total_frames_brutos - 1
98
+
99
+ base_ltx_params = {"guidance_scale": guidance_scale, "stg_scale": stg_scale, "num_inference_steps": num_inference_steps}
100
+ story_history = ""
101
+ target_resolution_tuple = (video_resolution, video_resolution)
102
+ eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
103
+ latent_fragment_paths = []
104
+ video_fragments_data = []
105
+
106
+ if len(keyframe_paths) < 2:
107
+ raise ValueError(f"A geração requer pelo menos 2 keyframes. Fornecidos: {len(keyframe_paths)}.")
108
+ num_transitions_to_generate = len(keyframe_paths) - 1
109
+
110
+ logger.info("--- ESTÁGIO 1: Geração de Fragmentos Latentes ---")
111
+ for i in range(num_transitions_to_generate):
112
+ fragment_index = i + 1
113
+ if progress_callback:
114
+ progress_fraction = (i / num_transitions_to_generate) * 0.7
115
+ progress_callback(progress_fraction, f"Gerando Latente {fragment_index}/{num_transitions_to_generate}")
116
+
117
+ past_keyframe_path = keyframe_paths[i - 1] if i > 0 else keyframe_paths[i]
118
+ start_keyframe_path = keyframe_paths[i]
119
+ destination_keyframe_path = keyframe_paths[i + 1]
120
+ future_story_prompt = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final."
121
+ decision = deformes2d_thinker_singleton.get_cinematic_decision(
122
+ global_prompt, story_history, past_keyframe_path, start_keyframe_path,
123
+ destination_keyframe_path, storyboard[i - 1] if i > 0 else "O início.",
124
+ storyboard[i], future_story_prompt
125
+ )
126
+ motion_prompt = decision["motion_prompt"]
127
+ story_history += f"\n- Ato {fragment_index}: {motion_prompt}"
128
+
129
+ conditioning_items = []
130
+ if eco_latent_for_next_loop is None:
131
+ img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
132
+ conditioning_items.append(LatentConditioningItem(self._pil_to_latent(img_start), 0, 1.0))
133
+ else:
134
+ conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0))
135
+ conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
136
+
137
+ img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
138
+ conditioning_items.append(LatentConditioningItem(self._pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
139
+
140
+ latents_brutos, _ = ltx_manager_singleton.generate_latent_fragment(
141
+ height=video_resolution, width=video_resolution,
142
+ conditioning_items_data=conditioning_items, motion_prompt=motion_prompt,
143
+ video_total_frames=total_frames_brutos, video_fps=FPS,
144
+ **base_ltx_params
145
+ )
146
+
147
+ last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
148
+ eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
149
+ dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
150
+ latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
151
+ del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache()
152
+
153
+ cpu_latent = latents_video.cpu()
154
+ latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt")
155
+ torch.save(cpu_latent, latent_path)
156
+ latent_fragment_paths.append(latent_path)
157
+
158
+ video_fragments_data.append({"id": i, "prompt_video": motion_prompt})
159
+ del latents_video, cpu_latent; gc.collect()
160
+
161
+ del eco_latent_for_next_loop, dejavu_latent_for_next_loop; gc.collect(); torch.cuda.empty_cache()
162
+
163
+ logger.info(f"--- ESTÁGIO 2: Processando {len(latent_fragment_paths)} latentes ---")
164
+ final_video_clip_paths = []
165
+ num_chunks = -(-len(latent_fragment_paths) // LATENT_PROCESSING_CHUNK_SIZE) if LATENT_PROCESSING_CHUNK_SIZE > 0 else 0
166
+ for i in range(num_chunks):
167
+ chunk_start_index = i * LATENT_PROCESSING_CHUNK_SIZE
168
+ chunk_end_index = chunk_start_index + LATENT_PROCESSING_CHUNK_SIZE
169
+ chunk_paths = latent_fragment_paths[chunk_start_index:chunk_end_index]
170
+
171
+ if progress_callback:
172
+ progress_fraction = 0.7 + (i / num_chunks * 0.28)
173
+ progress_callback(progress_fraction, f"Processando & Decodificando Lote {i+1}/{num_chunks}")
174
+
175
+ tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
176
+ sub_group_latent = torch.cat(tensors_in_chunk, dim=2)
177
+ del tensors_in_chunk; gc.collect(); torch.cuda.empty_cache()
178
+
179
+ pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
180
+ del sub_group_latent; gc.collect(); torch.cuda.empty_cache()
181
+
182
+ base_name = f"clip_{i:04d}_{run_timestamp}"
183
+ current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
184
+ self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
185
+ final_video_clip_paths.append(current_clip_path)
186
+ del pixel_tensor; gc.collect(); torch.cuda.empty_cache()
187
+
188
+ if progress_callback: progress_callback(0.98, "Montando o filme final...")
189
+ final_video_path = os.path.join(self.workspace_dir, f"original_movie_{run_timestamp}.mp4")
190
+ video_encode_tool_singleton.concatenate_videos(final_video_clip_paths, final_video_path, self.workspace_dir)
191
+
192
+ try:
193
+ shutil.rmtree(temp_video_clips_dir)
194
+ # A linha que apagava 'temp_latent_dir' foi removida para persistir os latentes.
195
+ except OSError as e:
196
+ logger.warning(f"Não foi possível remover o diretório de clipes temporários: {e}")
197
+
198
+ logger.info(f"Processo completo! Vídeo original salvo em: {final_video_path}")
199
+
200
+ final_video_data_for_state = {
201
+ "id": 0, "caminho_pixel": final_video_path,
202
+ "caminhos_latentes_fragmentos": latent_fragment_paths,
203
+ "fragmentos_componentes": video_fragments_data
204
+ }
205
+
206
+ return {
207
+ "final_path": final_video_path,
208
+ "latent_paths": latent_fragment_paths,
209
+ "video_data": final_video_data_for_state
210
+ }
211
+
212
+ # --- FUNÇÕES HELPER ---
213
+ def save_video_from_tensor(self, video_tensor: torch.Tensor, path: str, fps: int = 24):
214
+ if video_tensor is None or video_tensor.ndim != 5 or video_tensor.shape[2] == 0: return
215
+ video_tensor = video_tensor.squeeze(0).permute(1, 2, 3, 0)
216
+ video_tensor = (video_tensor.clamp(-1, 1) + 1) / 2.0
217
+ video_np = (video_tensor.detach().cpu().float().numpy() * 255).astype(np.uint8)
218
+ with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
219
+ for frame in video_np: writer.append_data(frame)
220
+
221
+ def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
222
+ if image.size != target_resolution:
223
+ return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS)
224
+ return image
225
+
226
+ def _pil_to_latent(self, pil_image: Image.Image) -> torch.Tensor:
227
+ image_np = np.array(pil_image).astype(np.float32) / 255.0
228
+ tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2)
229
+ tensor = (tensor * 2.0) - 1.0
230
+ return vae_manager_singleton.encode(tensor)
231
+
232
+ def _quantize_to_multiple(self, n: int, m: int) -> int:
233
+ if m == 0: return n
234
+ quantized = int(round(n / m) * m)
235
+ return m if n > 0 and quantized == 0 else quantized
{managers → aduc_framework/managers}/LICENSE RENAMED
File without changes
{managers → aduc_framework/managers}/LICENSE.txt RENAMED
File without changes
{managers → aduc_framework/managers}/NOTICE.md RENAMED
File without changes
{managers → aduc_framework/managers}/README.md RENAMED
File without changes
aduc_framework/managers/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/managers/__init__.py
2
+
3
+ # Expõe os singletons e classes principais do sub-pacote de managers.
4
+
5
+ from .gemini_manager import gemini_manager_singleton
6
+ from .ltx_manager import ltx_manager_singleton
7
+ from .vae_manager import vae_manager_singleton
8
+ from .latent_enhancer_manager import latent_enhancer_specialist_singleton
9
+ from .mmaudio_manager import mmaudio_manager_singleton
10
+ from .seedvr_manager import seedvr_manager_singleton
11
+
12
+ __all__ = [
13
+ "gemini_manager_singleton",
14
+ "ltx_manager_singleton",
15
+ "vae_manager_singleton",
16
+ "latent_enhancer_specialist_singleton",
17
+ "mmaudio_manager_singleton",
18
+ "seedvr_manager_singleton",
19
+ ]
{managers → aduc_framework/managers}/config.yaml RENAMED
File without changes
{managers → aduc_framework/managers}/flux_kontext_manager.py RENAMED
@@ -25,7 +25,7 @@ import threading
25
  import yaml
26
  import logging
27
 
28
- from tools.hardware_manager import hardware_manager
29
 
30
  logger = logging.getLogger(__name__)
31
 
 
25
  import yaml
26
  import logging
27
 
28
+ from ..tools.hardware_manager import hardware_manager
29
 
30
  logger = logging.getLogger(__name__)
31
 
{managers → aduc_framework/managers}/gemini_manager.py RENAMED
@@ -63,7 +63,7 @@ class GeminiManager:
63
  self.api_key = os.environ.get("GEMINI_API_KEY")
64
  if self.api_key:
65
  genai.configure(api_key=self.api_key)
66
- self.model = genai.GenerativeModel('gemini-2.5-flash')
67
  logger.info("GeminiManager (Communication Layer) initialized successfully.")
68
  else:
69
  self.model = None
 
63
  self.api_key = os.environ.get("GEMINI_API_KEY")
64
  if self.api_key:
65
  genai.configure(api_key=self.api_key)
66
+ self.model = genai.GenerativeModel('gemini-2.0-flash')
67
  logger.info("GeminiManager (Communication Layer) initialized successfully.")
68
  else:
69
  self.model = None
{managers → aduc_framework/managers}/latent_enhancer_manager.py RENAMED
@@ -19,7 +19,7 @@ import torch
19
  import logging
20
  import time
21
  from diffusers import LTXLatentUpsamplePipeline
22
- from managers.ltx_manager import ltx_manager_singleton
23
 
24
  logger = logging.getLogger(__name__)
25
 
 
19
  import logging
20
  import time
21
  from diffusers import LTXLatentUpsamplePipeline
22
+ from ..managers.ltx_manager import ltx_manager_singleton
23
 
24
  logger = logging.getLogger(__name__)
25
 
{managers → aduc_framework/managers}/ltx_manager.py RENAMED
@@ -1,22 +1,13 @@
1
- # managers/ltx_manager.py
2
- # AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR
3
- # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
  #
5
- # Contato:
6
- # Carlos Rodrigues dos Santos
7
8
- # Rua Eduardo Carlos Pereira, 4125, B1 Ap32, Curitiba, PR, Brazil, CEP 8102025
9
  #
10
- # Repositórios e Projetos Relacionados:
11
- # GitHub: https://github.com/carlex22/Aduc-sdr
12
  #
13
- # PENDING PATENT NOTICE: Please see NOTICE.md.
14
- #
15
- # Version: 2.2.2
16
- #
17
- # This file manages the LTX-Video specialist pool. It has been refactored to be
18
- # self-contained by automatically cloning its own dependencies and using a local
19
- # utility module for pipeline creation, fully decoupling it from external scripts.
20
 
21
  import torch
22
  import gc
@@ -31,18 +22,19 @@ import subprocess
31
  from pathlib import Path
32
  from typing import Optional, List, Tuple, Union
33
 
34
- from tools.optimization import optimize_ltx_worker, can_optimize_fp8
35
- from tools.hardware_manager import hardware_manager
36
- from aduc_types import LatentConditioningItem
 
37
 
38
  logger = logging.getLogger(__name__)
39
 
40
- # --- Dependency Management ---
41
  DEPS_DIR = Path("./deps")
42
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
43
  LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
44
 
45
- # --- Placeholders for lazy-loaded modules ---
46
  create_ltx_video_pipeline = None
47
  calculate_padding = None
48
  LTXVideoPipeline = None
@@ -54,11 +46,10 @@ randn_tensor = None
54
 
55
  class LtxPoolManager:
56
  """
57
- Manages a pool of LtxWorkers for optimized multi-GPU usage.
58
- Handles its own code dependencies by cloning the LTX-Video repository.
59
  """
60
- def __init__(self, device_ids, ltx_config_file_name):
61
- logger.info(f"LTX POOL MANAGER: Creating workers for devices: {device_ids}")
62
  self._ltx_modules_loaded = False
63
  self._setup_dependencies()
64
  self._lazy_load_ltx_modules()
@@ -69,61 +60,65 @@ class LtxPoolManager:
69
  self.current_worker_index = 0
70
  self.lock = threading.Lock()
71
 
 
 
 
 
72
  self._apply_ltx_pipeline_patches()
73
 
74
  if all(w.device.type == 'cuda' for w in self.workers):
75
- logger.info("LTX POOL MANAGER: HOT START MODE ENABLED. Pre-warming all GPUs...")
76
  for worker in self.workers:
77
  worker.to_gpu()
78
- logger.info("LTX POOL MANAGER: All GPUs are hot and ready.")
79
  else:
80
- logger.info("LTX POOL MANAGER: Operating in CPU or mixed mode. GPU pre-warming skipped.")
81
 
82
  def _setup_dependencies(self):
83
- """Clones the LTX-Video repo if not found and adds it to the system path."""
84
  if not LTX_VIDEO_REPO_DIR.exists():
85
- logger.info(f"LTX-Video repository not found at '{LTX_VIDEO_REPO_DIR}'. Cloning from GitHub...")
86
  try:
87
  DEPS_DIR.mkdir(exist_ok=True)
88
  subprocess.run(
89
- ["git", "clone", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
90
  check=True, capture_output=True, text=True
91
  )
92
- logger.info("LTX-Video repository cloned successfully.")
93
  except subprocess.CalledProcessError as e:
94
- logger.error(f"Failed to clone LTX-Video repository. Git stderr: {e.stderr}")
95
- raise RuntimeError("Could not clone the required LTX-Video dependency from GitHub.")
96
  else:
97
- logger.info("Found local LTX-Video repository.")
98
 
99
  if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
100
  sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
101
- logger.info(f"Added '{LTX_VIDEO_REPO_DIR.resolve()}' to sys.path.")
102
-
103
  def _lazy_load_ltx_modules(self):
104
- """Dynamically imports LTX-Video modules after ensuring the repo exists."""
105
  if self._ltx_modules_loaded:
106
  return
107
 
108
  global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
109
  global vae_encode, latent_to_pixel_coords, randn_tensor
110
 
111
- from managers.ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding
112
  from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
113
  from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
114
  from diffusers.utils.torch_utils import randn_tensor
115
 
116
  self._ltx_modules_loaded = True
117
- logger.info("LTX-Video modules have been dynamically loaded.")
118
 
119
  def _apply_ltx_pipeline_patches(self):
120
- """Applies runtime patches to the LTX pipeline for ADUC-SDR compatibility."""
121
- logger.info("LTX POOL MANAGER: Applying ADUC-SDR patches to LTX pipeline...")
122
  for worker in self.workers:
123
  worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline)
124
- logger.info("LTX POOL MANAGER: All pipeline instances have been patched successfully.")
125
 
126
- def _get_next_worker(self):
127
  with self.lock:
128
  worker = self.workers[self.current_worker_index]
129
  self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
@@ -144,63 +139,72 @@ class LtxPoolManager:
144
  pipeline_params["latents"] = kwargs['latents'].to(worker.device, dtype=worker.pipeline.transformer.dtype)
145
  if 'strength' in kwargs:
146
  pipeline_params["strength"] = kwargs['strength']
 
147
  if 'conditioning_items_data' in kwargs:
148
  final_conditioning_items = []
149
  for item in kwargs['conditioning_items_data']:
 
 
150
  item.latent_tensor = item.latent_tensor.to(worker.device)
151
  final_conditioning_items.append(item)
152
  pipeline_params["conditioning_items"] = final_conditioning_items
 
153
  if worker.is_distilled:
154
- logger.info(f"Worker {worker.device} is using a distilled model. Using fixed timesteps.")
155
  fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
156
- pipeline_params["timesteps"] = fixed_timesteps
157
  if fixed_timesteps:
 
158
  pipeline_params["num_inference_steps"] = len(fixed_timesteps)
 
 
 
 
 
 
159
  return pipeline_params
160
 
161
- def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
162
  worker_to_use = self._get_next_worker()
163
  try:
164
  height, width = kwargs['height'], kwargs['width']
165
  padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
166
  padding_vals = calculate_padding(height, width, padded_h, padded_w)
167
  kwargs['height'], kwargs['width'] = padded_h, padded_w
 
168
  pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
169
- logger.info(f"Initiating GENERATION on {worker_to_use.device} with shape {padded_w}x{padded_h}")
 
 
170
  if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
171
  result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
172
  else:
173
  result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
174
  return result, padding_vals
175
  except Exception as e:
176
- logger.error(f"LTX POOL MANAGER: Error during generation on {worker_to_use.device}: {e}", exc_info=True)
177
  raise e
178
  finally:
179
  if worker_to_use and worker_to_use.device.type == 'cuda':
180
  with torch.cuda.device(worker_to_use.device):
181
- gc.collect(); torch.cuda.empty_cache()
 
182
 
183
- def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
184
- # This function can be expanded later if needed.
185
- pass
186
 
187
  class LtxWorker:
188
- """
189
- Represents a single instance of the LTX-Video pipeline on a specific device.
190
- """
191
  def __init__(self, device_id, ltx_config_file):
192
  self.cpu_device = torch.device('cpu')
193
  self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
194
- logger.info(f"LTX Worker ({self.device}): Initializing with config '{ltx_config_file}'...")
195
 
196
  with open(ltx_config_file, "r") as file:
197
  self.config = yaml.safe_load(file)
198
 
199
  self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
200
-
201
  models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded"
202
 
203
- logger.info(f"LTX Worker ({self.device}): Preparing to load model...")
204
  model_filename = self.config["checkpoint_path"]
205
  model_path = huggingface_hub.hf_hub_download(
206
  repo_id="Lightricks/LTX-Video", filename=model_filename,
@@ -214,22 +218,20 @@ class LtxWorker:
214
  sampler=self.config["sampler"],
215
  device='cpu'
216
  )
217
- logger.info(f"LTX Worker ({self.device}): Model ready on CPU. Is distilled model? {self.is_distilled}")
218
 
219
  def to_gpu(self):
220
  if self.device.type == 'cpu': return
221
- logger.info(f"LTX Worker: Moving pipeline to GPU {self.device}...")
222
  self.pipeline.to(self.device)
223
  if self.device.type == 'cuda' and can_optimize_fp8():
224
- logger.info(f"LTX Worker ({self.device}): FP8 supported GPU detected. Optimizing...")
225
  optimize_ltx_worker(self)
226
- logger.info(f"LTX Worker ({self.device}): Optimization complete.")
227
- elif self.device.type == 'cuda':
228
- logger.info(f"LTX Worker ({self.device}): FP8 optimization not supported or disabled.")
229
-
230
  def to_cpu(self):
231
  if self.device.type == 'cpu': return
232
- logger.info(f"LTX Worker: Unloading pipeline from GPU {self.device}...")
233
  self.pipeline.to('cpu')
234
  gc.collect()
235
  if torch.cuda.is_available(): torch.cuda.empty_cache()
@@ -237,10 +239,9 @@ class LtxWorker:
237
  def generate_video_fragment_internal(self, **kwargs):
238
  return self.pipeline(**kwargs).images
239
 
240
-
241
  def _aduc_prepare_conditioning_patch(
242
- self: LTXVideoPipeline,
243
- conditioning_items: Optional[List[Union[ConditioningItem, "LatentConditioningItem"]]],
244
  init_latents: torch.Tensor,
245
  num_frames: int,
246
  height: int,
@@ -252,62 +253,52 @@ def _aduc_prepare_conditioning_patch(
252
  init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
253
  init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
254
  return init_latents, init_pixel_coords, None, 0
255
- init_conditioning_mask = torch.zeros(init_latents[:, 0, :, :, :].shape, dtype=torch.float32, device=init_latents.device)
 
256
  extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
257
  extra_conditioning_num_latents = 0
258
- is_latent_mode = hasattr(conditioning_items[0], 'latent_tensor')
259
- if is_latent_mode:
260
- for item in conditioning_items:
261
- media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
262
- media_frame_number, strength = item.media_frame_number, item.conditioning_strength
263
- if media_frame_number == 0:
264
- f_l, h_l, w_l = media_item_latents.shape[-3:]
265
- init_latents[:, :, :f_l, :h_l, :w_l] = torch.lerp(init_latents[:, :, :f_l, :h_l, :w_l], media_item_latents, strength)
266
- init_conditioning_mask[:, :f_l, :h_l, :w_l] = strength
267
- else:
268
- noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
269
- media_item_latents = torch.lerp(noise, media_item_latents, strength)
270
- patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
271
- pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
272
- pixel_coords[:, 0] += media_frame_number
273
- extra_conditioning_num_latents += patched_latents.shape[1]
274
- new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
275
- extra_conditioning_latents.append(patched_latents)
276
- extra_conditioning_pixel_coords.append(pixel_coords)
277
- extra_conditioning_mask.append(new_mask)
278
- else:
279
- for item in conditioning_items:
280
- if not isinstance(item, ConditioningItem): continue
281
- item = self._resize_conditioning_item(item, height, width)
282
- media_item_latents = vae_encode(item.media_item.to(dtype=self.vae.dtype, device=self.vae.device), self.vae, vae_per_channel_normalize=vae_per_channel_normalize).to(dtype=init_latents.dtype)
283
- if item.media_frame_number == 0:
284
- media_item_latents, l_x, l_y = self._get_latent_spatial_position(media_item_latents, item, height, width, strip_latent_border=True)
285
- f_l, h_l, w_l = media_item_latents.shape[-3:]
286
- init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = torch.lerp(init_latents[:, :, :f_l, l_y:l_y+h_l, l_x:l_x+w_l], media_item_latents, item.conditioning_strength)
287
- init_conditioning_mask[:, :f_l, l_y:l_y+h_l, l_x:l_x+w_l] = item.conditioning_strength
288
- else:
289
- logger.warning("Pixel-based conditioning for non-zero frames is not fully implemented in this patch.")
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
292
  init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
293
  init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
294
  init_conditioning_mask = init_conditioning_mask.squeeze(-1)
 
295
  if extra_conditioning_latents:
296
  init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
297
  init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
298
  init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
299
- if self.transformer.use_tpu_flash_attention:
300
- init_latents = init_latents[:, :-extra_conditioning_num_latents]
301
- init_pixel_coords = init_pixel_coords[:, :, :-extra_conditioning_num_latents]
302
- init_conditioning_mask = init_conditioning_mask[:, :-extra_conditioning_num_latents]
303
  return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
304
 
305
-
306
- # --- Singleton Instantiation ---
307
  with open("config.yaml", 'r') as f:
308
  config = yaml.safe_load(f)
309
  ltx_gpus_required = config['specialists']['ltx']['gpus_required']
310
  ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
311
  ltx_config_filename = config['specialists']['ltx']['config_file']
312
  ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename)
313
- logger.info("Video Specialist (LTX) ready.")
 
1
+ # aduc_framework/managers/ltx_manager.py
 
 
2
  #
3
+ # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
 
 
 
4
  #
5
+ # Versão 2.3.2 (Com correção de manipulação de dataclass)
 
6
  #
7
+ # Este manager é responsável por controlar a pipeline LTX-Video. Ele gerencia
8
+ # um pool de workers para otimizar o uso de múltiplas GPUs, lida com a inicialização
9
+ # e o setup de dependências complexas, e expõe uma interface de alto nível para a
10
+ # geração de fragmentos de vídeo no espaço latente.
 
 
 
11
 
12
  import torch
13
  import gc
 
22
  from pathlib import Path
23
  from typing import Optional, List, Tuple, Union
24
 
25
+ # --- Imports Relativos Corrigidos ---
26
+ from ..types import LatentConditioningItem
27
+ from ..tools.optimization import optimize_ltx_worker, can_optimize_fp8
28
+ from ..tools.hardware_manager import hardware_manager
29
 
30
  logger = logging.getLogger(__name__)
31
 
32
+ # --- Gerenciamento de Dependências e Placeholders ---
33
  DEPS_DIR = Path("./deps")
34
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
35
  LTX_VIDEO_REPO_URL = "https://github.com/Lightricks/LTX-Video.git"
36
 
37
+ # Placeholders para módulos importados tardiamente (lazy-loaded)
38
  create_ltx_video_pipeline = None
39
  calculate_padding = None
40
  LTXVideoPipeline = None
 
46
 
47
  class LtxPoolManager:
48
  """
49
+ Gerencia um pool de LtxWorkers e expõe a pipeline de aprimoramento de prompt.
 
50
  """
51
+ def __init__(self, device_ids: List[str], ltx_config_file_name: str):
52
+ logger.info(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
53
  self._ltx_modules_loaded = False
54
  self._setup_dependencies()
55
  self._lazy_load_ltx_modules()
 
60
  self.current_worker_index = 0
61
  self.lock = threading.Lock()
62
 
63
+ self.prompt_enhancement_pipeline = self.workers[0].pipeline if self.workers else None
64
+ if self.prompt_enhancement_pipeline:
65
+ logger.info("LTX POOL MANAGER: Pipeline de aprimoramento de prompt exposta para outros especialistas.")
66
+
67
  self._apply_ltx_pipeline_patches()
68
 
69
  if all(w.device.type == 'cuda' for w in self.workers):
70
+ logger.info("LTX POOL MANAGER: MODO HOT START ATIVADO. Pré-aquecendo todas as GPUs...")
71
  for worker in self.workers:
72
  worker.to_gpu()
73
+ logger.info("LTX POOL MANAGER: Todas as GPUs estão prontas.")
74
  else:
75
+ logger.info("LTX POOL MANAGER: Operando em modo CPU ou misto. Pré-aquecimento de GPU pulado.")
76
 
77
  def _setup_dependencies(self):
78
+ """Clona o repositório LTX-Video se não encontrado e o adiciona ao sys.path."""
79
  if not LTX_VIDEO_REPO_DIR.exists():
80
+ logger.info(f"Repositório LTX-Video não encontrado em '{LTX_VIDEO_REPO_DIR}'. Clonando do GitHub...")
81
  try:
82
  DEPS_DIR.mkdir(exist_ok=True)
83
  subprocess.run(
84
+ ["git", "clone", "--depth", "1", LTX_VIDEO_REPO_URL, str(LTX_VIDEO_REPO_DIR)],
85
  check=True, capture_output=True, text=True
86
  )
87
+ logger.info("Repositório LTX-Video clonado com sucesso.")
88
  except subprocess.CalledProcessError as e:
89
+ logger.error(f"Falha ao clonar o repositório LTX-Video. Git stderr: {e.stderr}")
90
+ raise RuntimeError("Não foi possível clonar a dependência LTX-Video do GitHub.")
91
  else:
92
+ logger.info("Repositório LTX-Video local encontrado.")
93
 
94
  if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
95
  sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
96
+ logger.info(f"Adicionado '{LTX_VIDEO_REPO_DIR.resolve()}' ao sys.path.")
97
+
98
  def _lazy_load_ltx_modules(self):
99
+ """Importa dinamicamente os módulos do LTX-Video após garantir que o repositório existe."""
100
  if self._ltx_modules_loaded:
101
  return
102
 
103
  global create_ltx_video_pipeline, calculate_padding, LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
104
  global vae_encode, latent_to_pixel_coords, randn_tensor
105
 
106
+ from .ltx_pipeline_utils import create_ltx_video_pipeline, calculate_padding
107
  from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline, ConditioningItem, LTXMultiScalePipeline
108
  from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
109
  from diffusers.utils.torch_utils import randn_tensor
110
 
111
  self._ltx_modules_loaded = True
112
+ logger.info("Módulos do LTX-Video foram carregados dinamicamente.")
113
 
114
  def _apply_ltx_pipeline_patches(self):
115
+ """Aplica patches em tempo de execução na pipeline LTX para compatibilidade com ADUC-SDR."""
116
+ logger.info("LTX POOL MANAGER: Aplicando patches ADUC-SDR na pipeline LTX...")
117
  for worker in self.workers:
118
  worker.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(worker.pipeline, LTXVideoPipeline)
119
+ logger.info("LTX POOL MANAGER: Todas as instâncias da pipeline foram corrigidas com sucesso.")
120
 
121
+ def _get_next_worker(self) -> 'LtxWorker':
122
  with self.lock:
123
  worker = self.workers[self.current_worker_index]
124
  self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
 
139
  pipeline_params["latents"] = kwargs['latents'].to(worker.device, dtype=worker.pipeline.transformer.dtype)
140
  if 'strength' in kwargs:
141
  pipeline_params["strength"] = kwargs['strength']
142
+
143
  if 'conditioning_items_data' in kwargs:
144
  final_conditioning_items = []
145
  for item in kwargs['conditioning_items_data']:
146
+ # CORREÇÃO: Como LatentConditioningItem é uma dataclass mutável,
147
+ # nós modificamos o atributo diretamente no dispositivo do worker.
148
  item.latent_tensor = item.latent_tensor.to(worker.device)
149
  final_conditioning_items.append(item)
150
  pipeline_params["conditioning_items"] = final_conditioning_items
151
+
152
  if worker.is_distilled:
 
153
  fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
 
154
  if fixed_timesteps:
155
+ pipeline_params["timesteps"] = fixed_timesteps
156
  pipeline_params["num_inference_steps"] = len(fixed_timesteps)
157
+
158
+ callback = kwargs.get('callback')
159
+ if callback:
160
+ pipeline_params["callback_on_step_end"] = callback
161
+ pipeline_params["callback_on_step_end_tensor_inputs"] = ["latents"]
162
+
163
  return pipeline_params
164
 
165
+ def generate_latent_fragment(self, **kwargs) -> Tuple[torch.Tensor, tuple]:
166
  worker_to_use = self._get_next_worker()
167
  try:
168
  height, width = kwargs['height'], kwargs['width']
169
  padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
170
  padding_vals = calculate_padding(height, width, padded_h, padded_w)
171
  kwargs['height'], kwargs['width'] = padded_h, padded_w
172
+
173
  pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
174
+
175
+ logger.info(f"Iniciando GERAÇÃO em {worker_to_use.device} com shape {padded_w}x{padded_h}")
176
+
177
  if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
178
  result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
179
  else:
180
  result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
181
  return result, padding_vals
182
  except Exception as e:
183
+ logger.error(f"LTX POOL MANAGER: Erro durante a geração em {worker_to_use.device}: {e}", exc_info=True)
184
  raise e
185
  finally:
186
  if worker_to_use and worker_to_use.device.type == 'cuda':
187
  with torch.cuda.device(worker_to_use.device):
188
+ gc.collect()
189
+ torch.cuda.empty_cache()
190
 
191
+ def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> Tuple[torch.Tensor, tuple]:
192
+ pass # Placeholder
 
193
 
194
  class LtxWorker:
195
+ """Representa uma única instância da pipeline LTX-Video em um dispositivo específico."""
 
 
196
  def __init__(self, device_id, ltx_config_file):
197
  self.cpu_device = torch.device('cpu')
198
  self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
199
+ logger.info(f"LTX Worker ({self.device}): Inicializando com config '{ltx_config_file}'...")
200
 
201
  with open(ltx_config_file, "r") as file:
202
  self.config = yaml.safe_load(file)
203
 
204
  self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
 
205
  models_dir = LTX_VIDEO_REPO_DIR / "models_downloaded"
206
 
207
+ logger.info(f"LTX Worker ({self.device}): Preparando para carregar modelo...")
208
  model_filename = self.config["checkpoint_path"]
209
  model_path = huggingface_hub.hf_hub_download(
210
  repo_id="Lightricks/LTX-Video", filename=model_filename,
 
218
  sampler=self.config["sampler"],
219
  device='cpu'
220
  )
221
+ logger.info(f"LTX Worker ({self.device}): Modelo pronto na CPU. É um modelo distilled? {self.is_distilled}")
222
 
223
  def to_gpu(self):
224
  if self.device.type == 'cpu': return
225
+ logger.info(f"LTX Worker: Movendo pipeline para a GPU {self.device}...")
226
  self.pipeline.to(self.device)
227
  if self.device.type == 'cuda' and can_optimize_fp8():
228
+ logger.info(f"LTX Worker ({self.device}): GPU com suporte a FP8 detectada. Otimizando...")
229
  optimize_ltx_worker(self)
230
+ logger.info(f"LTX Worker ({self.device}): Otimização completa.")
231
+
 
 
232
  def to_cpu(self):
233
  if self.device.type == 'cpu': return
234
+ logger.info(f"LTX Worker: Descarregando pipeline da GPU {self.device}...")
235
  self.pipeline.to('cpu')
236
  gc.collect()
237
  if torch.cuda.is_available(): torch.cuda.empty_cache()
 
239
  def generate_video_fragment_internal(self, **kwargs):
240
  return self.pipeline(**kwargs).images
241
 
 
242
  def _aduc_prepare_conditioning_patch(
243
+ self: "LTXVideoPipeline",
244
+ conditioning_items: Optional[List[Union["ConditioningItem", "LatentConditioningItem"]]],
245
  init_latents: torch.Tensor,
246
  num_frames: int,
247
  height: int,
 
253
  init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
254
  init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
255
  return init_latents, init_pixel_coords, None, 0
256
+
257
+ init_conditioning_mask = torch.zeros_like(init_latents[:, 0, ...], dtype=torch.float32, device=init_latents.device)
258
  extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
259
  extra_conditioning_num_latents = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
+ for item in conditioning_items:
262
+ if not isinstance(item, LatentConditioningItem):
263
+ logger.warning("Patch ADUC: Item de condicionamento não é um LatentConditioningItem e será ignorado.")
264
+ continue
265
+
266
+ media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
267
+ media_frame_number, strength = item.media_frame_number, item.conditioning_strength
268
+
269
+ if media_frame_number == 0:
270
+ f_l, h_l, w_l = media_item_latents.shape[-3:]
271
+ init_latents[..., :f_l, :h_l, :w_l] = torch.lerp(init_latents[..., :f_l, :h_l, :w_l], media_item_latents, strength)
272
+ init_conditioning_mask[..., :f_l, :h_l, :w_l] = strength
273
+ else:
274
+ noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
275
+ media_item_latents = torch.lerp(noise, media_item_latents, strength)
276
+ patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
277
+ pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
278
+ pixel_coords[:, 0] += media_frame_number
279
+ extra_conditioning_num_latents += patched_latents.shape[1]
280
+ new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
281
+ extra_conditioning_latents.append(patched_latents)
282
+ extra_conditioning_pixel_coords.append(pixel_coords)
283
+ extra_conditioning_mask.append(new_mask)
284
+
285
  init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
286
  init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
287
  init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
288
  init_conditioning_mask = init_conditioning_mask.squeeze(-1)
289
+
290
  if extra_conditioning_latents:
291
  init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
292
  init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
293
  init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
294
+
 
 
 
295
  return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
296
 
297
+ # --- Instanciação Singleton ---
 
298
  with open("config.yaml", 'r') as f:
299
  config = yaml.safe_load(f)
300
  ltx_gpus_required = config['specialists']['ltx']['gpus_required']
301
  ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
302
  ltx_config_filename = config['specialists']['ltx']['config_file']
303
  ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file_name=ltx_config_filename)
304
+ logger.info("Especialista de Vídeo (LTX) pronto.")
{managers → aduc_framework/managers}/ltx_pipeline_utils.py RENAMED
File without changes
aduc_framework/managers/mmaudio_manager.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # managers/mmaudio_manager.py
2
+ #
3
+ # Copyright (C) 2025 Carlos Rodrigues dos Santos
4
+ #
5
+ # Version: 3.0.0 (GPU Pool Manager)
6
+ #
7
+ # Esta versão refatora o MMAudioManager para um modelo de Pool com Workers,
8
+ # permitindo o uso de múltiplas GPUs dedicadas para a geração de áudio
9
+ # com um sistema de rodízio para gerenciamento eficiente de VRAM.
10
+
11
+ import torch
12
+ import logging
13
+ import subprocess
14
+ import os
15
+ import time
16
+ import yaml
17
+ import gc
18
+ import threading
19
+ from pathlib import Path
20
+ import gradio as gr
21
+ import sys
22
+
23
+ # Imports relativos para o hardware_manager
24
+ from ..tools.hardware_manager import hardware_manager
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # --- Gerenciamento de Dependências ---
29
+ DEPS_DIR = Path("./deps")
30
+ MMAUDIO_REPO_DIR = DEPS_DIR / "MMAudio"
31
+ MMAUDIO_REPO_URL = "https://github.com/hkchengrex/MMAudio.git"
32
+
33
+ # Lazy-loaded imports
34
+ ModelConfig, all_model_cfg, mmaudio_generate, load_video, make_video = None, None, None, None, None
35
+ MMAudio, get_my_mmaudio = None, None
36
+ FeaturesUtils = None
37
+ SequenceConfig = None
38
+ FlowMatching = None
39
+
40
+ class MMAudioWorker:
41
+ """Representa uma única instância do pipeline MMAudio em um dispositivo."""
42
+ def __init__(self, device_id: str):
43
+ self.device = torch.device(device_id)
44
+ self.cpu_device = torch.device("cpu")
45
+ self.dtype = torch.bfloat16 if 'cuda' in self.device.type else torch.float32
46
+
47
+ self.net: 'MMAudio' = None
48
+ self.feature_utils: 'FeaturesUtils' = None
49
+ self.seq_cfg: 'SequenceConfig' = None
50
+ self.model_config: 'ModelConfig' = None
51
+
52
+ self._check_and_run_global_setup()
53
+ self._lazy_load_mmaudio_modules()
54
+ logger.info(f"MMAudio Worker inicializado para o dispositivo {self.device}.")
55
+
56
+ def _lazy_load_mmaudio_modules(self):
57
+ """Importa dinamicamente os módulos do MMAudio."""
58
+ global ModelConfig, all_model_cfg, mmaudio_generate, load_video, make_video, MMAudio, get_my_mmaudio, FeaturesUtils, SequenceConfig, FlowMatching
59
+ if MMAudio is not None: return
60
+
61
+ from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
62
+ from mmaudio.model.flow_matching import FlowMatching
63
+ from mmaudio.model.networks import MMAudio, get_my_mmaudio
64
+ from mmaudio.model.utils.features_utils import FeaturesUtils
65
+ from mmaudio.model.sequence_config import SequenceConfig
66
+ logger.info("Módulos do MMAudio foram carregados dinamicamente.")
67
+
68
+ @staticmethod
69
+ def _check_and_run_global_setup():
70
+ """Executa o setup de clonagem do repositório e download de modelos uma única vez."""
71
+ setup_flag = DEPS_DIR / "mmaudio.setup.complete"
72
+ if setup_flag.exists():
73
+ return True
74
+
75
+ logger.info("--- Iniciando Setup Global do MMAudio (primeira execução) ---")
76
+ if not MMAUDIO_REPO_DIR.exists():
77
+ DEPS_DIR.mkdir(exist_ok=True)
78
+ subprocess.run(["git", "clone", "--depth", "1", MMAUDIO_REPO_URL, str(MMAUDIO_REPO_DIR)], check=True)
79
+
80
+ if str(MMAUDIO_REPO_DIR.resolve()) not in sys.path:
81
+ sys.path.insert(0, str(MMAUDIO_REPO_DIR.resolve()))
82
+
83
+ # Importar após adicionar ao path
84
+ from mmaudio.eval_utils import all_model_cfg as cfg
85
+
86
+ # Ajustar caminhos e baixar modelos
87
+ for cfg_key in cfg:
88
+ config = cfg[cfg_key]
89
+ config.model_path = MMAUDIO_REPO_DIR / config.model_path
90
+ config.vae_path = MMAUDIO_REPO_DIR / config.vae_path
91
+ if config.bigvgan_16k_path:
92
+ config.bigvgan_16k_path = MMAUDIO_REPO_DIR / config.bigvgan_16k_path
93
+ config.synchformer_ckpt = MMAUDIO_REPO_DIR / config.synchformer_ckpt
94
+ config.download_if_needed()
95
+
96
+ setup_flag.touch()
97
+ logger.info("--- Setup Global do MMAudio Concluído ---")
98
+ return True
99
+
100
+ def initialize_models(self):
101
+ """Carrega os modelos do worker para a CPU e depois para a GPU designada."""
102
+ if self.net is not None: return
103
+
104
+ self.model_config = all_model_cfg['large_44k_v2']
105
+ self.seq_cfg = self.model_config.seq_cfg
106
+
107
+ logger.info(f"Worker {self.device}: Carregando modelo MMAudio para a CPU...")
108
+ self.net = get_my_mmaudio(self.model_config.model_name).eval()
109
+ self.net.load_weights(torch.load(self.model_config.model_path, map_location=self.cpu_device, weights_only=True))
110
+
111
+ self.feature_utils = FeaturesUtils(
112
+ tod_vae_ckpt=self.model_config.vae_path,
113
+ synchformer_ckpt=self.model_config.synchformer_ckpt,
114
+ enable_conditions=True, mode=self.model_config.mode,
115
+ bigvgan_vocoder_ckpt=self.model_config.bigvgan_16k_path,
116
+ need_vae_encoder=False
117
+ ).eval()
118
+
119
+ self.net.to(self.device, self.dtype)
120
+ self.feature_utils.to(self.device, self.dtype)
121
+ logger.info(f"Worker {self.device}: Modelos MMAudio prontos na VRAM.")
122
+
123
+ def unload_models(self):
124
+ """Descarrega os modelos da VRAM, movendo-os para a CPU."""
125
+ if self.net is None: return
126
+ logger.info(f"Worker {self.device}: Descarregando modelos MMAudio da VRAM...")
127
+ self.net.to(self.cpu_device)
128
+ self.feature_utils.to(self.cpu_device)
129
+ del self.net, self.feature_utils, self.seq_cfg, self.model_config
130
+ self.net, self.feature_utils, self.seq_cfg, self.model_config = None, None, None, None
131
+ gc.collect()
132
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
133
+
134
+ def generate_audio_internal(self, video_path: str, prompt: str, duration_seconds: float, output_path: str) -> str:
135
+ """Lógica de geração de áudio que roda na GPU do worker."""
136
+ negative_prompt = "human voice, speech, talking, singing, narration"
137
+ rng = torch.Generator(device=self.device).manual_seed(int(time.time()))
138
+ fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25)
139
+
140
+ video_info = load_video(Path(video_path), duration_seconds)
141
+ self.seq_cfg.duration = video_info.duration_sec
142
+ self.net.update_seq_lengths(self.seq_cfg.latent_seq_len, self.seq_cfg.clip_seq_len, self.seq_cfg.sync_seq_len)
143
+
144
+ with torch.no_grad():
145
+ audios = mmaudio_generate(
146
+ clip_video=video_info.clip_frames.unsqueeze(0).to(self.device, self.dtype),
147
+ sync_video=video_info.sync_frames.unsqueeze(0).to(self.device, self.dtype),
148
+ text=[prompt], negative_text=[negative_prompt],
149
+ feature_utils=self.feature_utils, net=self.net, fm=fm, rng=rng, cfg_strength=4.5
150
+ )
151
+ audio_waveform = audios.float().cpu()[0]
152
+
153
+ make_video(video_info, Path(output_path), audio_waveform, sampling_rate=self.seq_cfg.sampling_rate)
154
+ return output_path
155
+
156
+ class MMAudioPoolManager:
157
+ def __init__(self, device_ids: list[str], workspace_dir: str):
158
+ logger.info(f"MMAUDIO POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
159
+ self.workspace_dir = workspace_dir
160
+ if not device_ids or 'cpu' in device_ids:
161
+ raise ValueError("MMAudioPoolManager requer GPUs dedicadas.")
162
+ self.workers = [MMAudioWorker(device_id) for device_id in device_ids]
163
+ self.current_worker_index = 0
164
+ self.lock = threading.Lock()
165
+ self.last_cleanup_thread = None
166
+
167
+ def _cleanup_worker_thread(self, worker: MMAudioWorker):
168
+ logger.info(f"MMAUDIO CLEANUP THREAD: Iniciando limpeza de {worker.device} em background...")
169
+ worker.unload_models()
170
+
171
+ def generate_audio_for_video(self, video_path: str, prompt: str, duration_seconds: float, output_path_override: str = None) -> str:
172
+ if duration_seconds < 1:
173
+ logger.warning(f"Vídeo muito curto ({duration_seconds:.2f}s). Pulando geração de áudio.")
174
+ return video_path
175
+
176
+ worker_to_use = None
177
+ try:
178
+ with self.lock:
179
+ if self.last_cleanup_thread and self.last_cleanup_thread.is_alive():
180
+ self.last_cleanup_thread.join()
181
+
182
+ worker_to_use = self.workers[self.current_worker_index]
183
+ previous_worker_index = (self.current_worker_index - 1 + len(self.workers)) % len(self.workers)
184
+ worker_to_cleanup = self.workers[previous_worker_index]
185
+
186
+ cleanup_thread = threading.Thread(target=self._cleanup_worker_thread, args=(worker_to_cleanup,))
187
+ cleanup_thread.start()
188
+ self.last_cleanup_thread = cleanup_thread
189
+
190
+ worker_to_use.initialize_models()
191
+ self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
192
+
193
+ logger.info(f"MMAUDIO POOL MANAGER: Gerando áudio em {worker_to_use.device}...")
194
+
195
+ output_path = output_path_override or os.path.join(self.workspace_dir, f"{Path(video_path).stem}_with_audio.mp4")
196
+
197
+ return worker_to_use.generate_audio_internal(
198
+ video_path=video_path, prompt=prompt, duration_seconds=duration_seconds, output_path=output_path
199
+ )
200
+ except Exception as e:
201
+ logger.error(f"MMAUDIO POOL MANAGER: Erro durante a geração de áudio: {e}", exc_info=True)
202
+ raise gr.Error(f"Falha na geração de áudio: {e}")
203
+
204
+ # --- Instanciação Singleton ---
205
+ class MMAudioPlaceholder:
206
+ def generate_audio_for_video(self, video_path, *args, **kwargs):
207
+ logger.error("MMAudio não foi inicializado pois nenhuma GPU foi alocada. Pulando etapa de áudio.")
208
+ return video_path
209
+
210
+ try:
211
+ with open("config.yaml", 'r') as f:
212
+ config = yaml.safe_load(f)
213
+ WORKSPACE_DIR = config['application']['workspace_dir']
214
+
215
+ mmaudio_gpus_required = config['specialists'].get('mmaudio', {}).get('gpus_required', 0)
216
+ mmaudio_device_ids = hardware_manager.allocate_gpus('MMAudio', mmaudio_gpus_required)
217
+
218
+ if mmaudio_gpus_required > 0 and 'cpu' not in mmaudio_device_ids:
219
+ mmaudio_manager_singleton = MMAudioPoolManager(device_ids=mmaudio_device_ids, workspace_dir=WORKSPACE_DIR)
220
+ logger.info("Especialista de Áudio (MMAudio Pool) pronto.")
221
+ else:
222
+ mmaudio_manager_singleton = MMAudioPlaceholder()
223
+ logger.warning("MMAudio Pool Manager não foi inicializado. Nenhuma GPU foi requisitada na config.yaml.")
224
+ except Exception as e:
225
+ logger.critical(f"Falha CRÍTICA ao inicializar o MMAudioManager: {e}", exc_info=True)
226
+ mmaudio_manager_singleton = MMAudioPlaceholder()
{managers → aduc_framework/managers}/seedvr_manager.py RENAMED
@@ -2,14 +2,16 @@
2
  #
3
  # Copyright (C) 2025 Carlos Rodrigues dos Santos
4
  #
5
- # Version: 2.3.5
6
  #
7
- # This version uses the optimal strategy of cloning the self-contained Hugging Face
8
- # Space repository and uses the full, correct import paths to resolve all
9
- # ModuleNotFoundErrors, while retaining necessary runtime patches.
 
 
 
10
 
11
  import torch
12
- import torch.distributed as dist
13
  import os
14
  import gc
15
  import logging
@@ -18,196 +20,210 @@ import subprocess
18
  from pathlib import Path
19
  from urllib.parse import urlparse
20
  from torch.hub import download_url_to_file
21
- import gradio as gr
22
  import mediapy
23
  from einops import rearrange
 
 
 
24
 
25
- from tools.tensor_utils import wavelet_reconstruction
26
 
27
  logger = logging.getLogger(__name__)
28
 
29
- # --- Dependency Management ---
30
- DEPS_DIR = Path("./deps")
31
  SEEDVR_SPACE_DIR = DEPS_DIR / "SeedVR_Space"
32
  SEEDVR_SPACE_URL = "https://huggingface.co/spaces/ByteDance-Seed/SeedVR2-3B"
33
- VAE_CONFIG_URL = "https://raw.githubusercontent.com/ByteDance-Seed/SeedVR/main/models/video_vae_v3/s8_c16_t4_inflation_sd3.yaml"
34
-
35
- def setup_seedvr_dependencies():
36
- """
37
- Ensures the SeedVR Space repository is cloned and available in the sys.path.
38
- """
39
- if not SEEDVR_SPACE_DIR.exists():
40
- logger.info(f"SeedVR Space not found at '{SEEDVR_SPACE_DIR}'. Cloning from Hugging Face...")
41
- try:
42
- DEPS_DIR.mkdir(exist_ok=True)
43
- subprocess.run(
44
- ["git", "clone", SEEDVR_SPACE_URL, str(SEEDVR_SPACE_DIR)],
45
- check=True, capture_output=True, text=True
46
- )
47
- logger.info("SeedVR Space cloned successfully.")
48
- except subprocess.CalledProcessError as e:
49
- logger.error(f"Failed to clone SeedVR Space. Git stderr: {e.stderr}")
50
- raise RuntimeError("Could not clone the required SeedVR dependency from Hugging Face.")
51
- else:
52
- logger.info("Found local SeedVR Space repository.")
53
-
54
- if str(SEEDVR_SPACE_DIR.resolve()) not in sys.path:
55
- sys.path.insert(0, str(SEEDVR_SPACE_DIR.resolve()))
56
- logger.info(f"Added '{SEEDVR_SPACE_DIR.resolve()}' to sys.path.")
57
-
58
- setup_seedvr_dependencies()
59
-
60
- # Use full import paths relative to the root of the cloned repository
61
- from projects.video_diffusion_sr.infer import VideoDiffusionInfer
62
- from common.config import load_config
63
- from common.seed import set_seed
64
- from data.image.transforms.divisible_crop import DivisibleCrop
65
- from data.image.transforms.na_resize import NaResize
66
- from data.video.transforms.rearrange import Rearrange
67
- from torchvision.transforms import Compose, Lambda, Normalize
68
- from torchvision.io.video import read_video
69
- from omegaconf import OmegaConf
70
-
71
-
72
- def _load_file_from_url(url, model_dir='./', file_name=None):
73
- os.makedirs(model_dir, exist_ok=True)
74
- filename = file_name or os.path.basename(urlparse(url).path)
75
- cached_file = os.path.abspath(os.path.join(model_dir, filename))
76
- if not os.path.exists(cached_file):
77
- logger.info(f'Downloading: "{url}" to {cached_file}')
78
- download_url_to_file(url, cached_file, hash_prefix=None, progress=True)
79
- return cached_file
80
 
81
  class SeedVrManager:
82
- """Manages the SeedVR model for HD Mastering tasks."""
83
- def __init__(self, workspace_dir="deformes_workspace"):
84
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
 
85
  self.runner = None
86
- self.workspace_dir = workspace_dir
87
- self.is_initialized = False
88
- self._original_barrier = None
89
- logger.info("SeedVrManager initialized. Model will be loaded on demand.")
90
-
91
- def _download_models_and_configs(self):
92
- """Downloads the necessary checkpoints AND the missing VAE config file."""
93
- logger.info("Verifying and downloading SeedVR2 models and configs...")
94
- ckpt_dir = SEEDVR_SPACE_DIR / 'ckpts'
95
- config_dir = SEEDVR_SPACE_DIR / 'configs' / 'vae'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  ckpt_dir.mkdir(exist_ok=True)
97
- config_dir.mkdir(parents=True, exist_ok=True)
98
- _load_file_from_url(url=VAE_CONFIG_URL, model_dir=str(config_dir))
99
- pretrain_model_urls = {
100
- 'vae_ckpt': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
101
  'dit_3b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
102
- 'dit_7b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-7B/resolve/main/seedvr2_ema_7b.pth',
103
  'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt',
104
  'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt'
105
  }
106
- for key, url in pretrain_model_urls.items():
107
  _load_file_from_url(url=url, model_dir=str(ckpt_dir))
108
- logger.info("SeedVR2 models and configs downloaded successfully.")
 
 
109
 
110
- def _initialize_runner(self, model_version: str):
111
- """Loads and configures the SeedVR model, with patches for single-GPU inference."""
112
  if self.runner is not None: return
113
- self._download_models_and_configs()
114
-
115
- if dist.is_available() and not dist.is_initialized():
116
- logger.info("Applying patch to disable torch.distributed.barrier for single-GPU inference.")
117
- self._original_barrier = dist.barrier
118
- dist.barrier = lambda *args, **kwargs: None
119
 
120
- logger.info(f"Initializing SeedVR2 {model_version} runner...")
121
- if model_version == '3B':
122
- config_path = SEEDVR_SPACE_DIR / 'configs_3b' / 'main.yaml'
123
- checkpoint_path = SEEDVR_SPACE_DIR / 'ckpts' / 'seedvr2_ema_3b.pth'
124
- elif model_version == '7B':
125
- config_path = SEEDVR_SPACE_DIR / 'configs_7b' / 'main.yaml'
126
- checkpoint_path = SEEDVR_SPACE_DIR / 'ckpts' / 'seedvr2_ema_7b.pth'
127
- else:
128
- raise ValueError(f"Unsupported SeedVR model version: {model_version}")
129
-
130
- try:
131
- config = load_config(str(config_path))
132
- except FileNotFoundError:
133
- logger.warning("Caught expected FileNotFoundError. Loading config manually.")
134
- config = OmegaConf.load(str(config_path))
135
- correct_vae_config_path = SEEDVR_SPACE_DIR / 'configs' / 'vae' / 's8_c16_t4_inflation_sd3.yaml'
136
- vae_config = OmegaConf.load(str(correct_vae_config_path))
137
- config.vae = vae_config
138
- logger.info("Configuration loaded and patched manually.")
139
 
 
140
  self.runner = VideoDiffusionInfer(config)
141
  OmegaConf.set_readonly(self.runner.config, False)
142
- self.runner.configure_dit_model(device=self.device, checkpoint=str(checkpoint_path))
 
143
  self.runner.configure_vae_model()
144
- if hasattr(self.runner.vae, "set_memory_limit"):
145
- self.runner.vae.set_memory_limit(**self.runner.config.vae.memory_limit)
146
- self.is_initialized = True
147
- logger.info(f"Runner for SeedVR2 {model_version} initialized and ready.")
148
-
149
  def _unload_runner(self):
150
- """Unloads the runner from VRAM and restores patches."""
151
  if self.runner is not None:
152
  del self.runner; self.runner = None
153
  gc.collect(); torch.cuda.empty_cache()
154
- self.is_initialized = False
155
- logger.info("SeedVR runner unloaded from VRAM.")
156
- if self._original_barrier is not None:
157
- logger.info("Restoring original torch.distributed.barrier function.")
158
- dist.barrier = self._original_barrier
159
- self._original_barrier = None
160
 
161
  def process_video(self, input_video_path: str, output_video_path: str, prompt: str,
162
- model_version: str = '3B', steps: int = 50, seed: int = 666,
163
- progress: gr.Progress = None) -> str:
164
- """Applies HD enhancement to a video."""
165
  try:
166
- self._initialize_runner(model_version)
 
 
 
 
 
 
 
 
 
 
 
167
  set_seed(seed, same_across_ranks=True)
168
  self.runner.config.diffusion.timesteps.sampling.steps = steps
169
  self.runner.configure_diffusion()
 
170
  video_tensor = read_video(input_video_path, output_format="TCHW")[0] / 255.0
171
  res_h, res_w = video_tensor.shape[-2:]
172
  video_transform = Compose([
173
  NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False),
174
  Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
175
- DivisibleCrop((16, 16)),
176
- Normalize(0.5, 0.5),
177
- Rearrange("t c h w -> c t h w"),
178
  ])
179
- cond_latents = [video_transform(video_tensor.to(self.device))]
180
- input_videos = cond_latents
181
- self.runner.dit.to("cpu")
182
- self.runner.vae.to(self.device)
183
  cond_latents = self.runner.vae_encode(cond_latents)
184
- self.runner.vae.to("cpu"); gc.collect(); torch.cuda.empty_cache()
185
- self.runner.dit.to(self.device)
186
- pos_emb_path = SEEDVR_SPACE_DIR / 'ckpts' / 'pos_emb.pt'
187
- neg_emb_path = SEEDVR_SPACE_DIR / 'ckpts' / 'neg_emb.pt'
188
- text_pos_embeds = torch.load(pos_emb_path).to(self.device)
189
- text_neg_embeds = torch.load(neg_emb_path).to(self.device)
190
- text_embeds_dict = {"texts_pos": [text_pos_embeds], "texts_neg": [text_neg_embeds]}
191
  noises = [torch.randn_like(latent) for latent in cond_latents]
192
  conditions = [self.runner.get_condition(noise, latent_blur=latent, task="sr") for noise, latent in zip(noises, cond_latents)]
 
193
  with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
194
  video_tensors = self.runner.inference(noises=noises, conditions=conditions, dit_offload=True, **text_embeds_dict)
195
- self.runner.dit.to("cpu"); gc.collect(); torch.cuda.empty_cache()
196
- self.runner.vae.to(self.device)
197
  samples = self.runner.vae_decode(video_tensors)
198
- final_sample = samples[0]
199
- input_video_sample = input_videos[0]
200
  if final_sample.shape[1] < input_video_sample.shape[1]:
201
  input_video_sample = input_video_sample[:, :final_sample.shape[1]]
 
202
  final_sample = wavelet_reconstruction(rearrange(final_sample, "c t h w -> t c h w"), rearrange(input_video_sample, "c t h w -> t c h w"))
203
  final_sample = rearrange(final_sample, "t c h w -> t h w c")
204
  final_sample = final_sample.clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round()
205
  final_sample_np = final_sample.to(torch.uint8).cpu().numpy()
 
206
  mediapy.write_video(output_video_path, final_sample_np, fps=24)
207
- logger.info(f"HD Mastered video saved to: {output_video_path}")
208
  return output_video_path
209
  finally:
210
  self._unload_runner()
211
 
212
- # --- Singleton Instance ---
213
- seedvr_manager_singleton = SeedVrManager()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  #
3
  # Copyright (C) 2025 Carlos Rodrigues dos Santos
4
  #
5
+ # Version: 10.0.0 (Definitive Monkey Patch / Single Instance)
6
  #
7
+ # Esta é a arquitetura final e mais robusta. O paralelismo problemático
8
+ # é desativado programaticamente via "monkey patching" no decorador `master_only`.
9
+ # Isso elimina a necessidade de gerenciar `torch.distributed`, simplificando
10
+ # o código e resolvendo a causa raiz de todos os erros de paralelismo.
11
+ # A isolação de GPU com CUDA_VISIBLE_DEVICES é mantida como a melhor
12
+ # prática para o gerenciamento de hardware.
13
 
14
  import torch
 
15
  import os
16
  import gc
17
  import logging
 
20
  from pathlib import Path
21
  from urllib.parse import urlparse
22
  from torch.hub import download_url_to_file
 
23
  import mediapy
24
  from einops import rearrange
25
+ import shutil
26
+ from omegaconf import OmegaConf
27
+ import yaml
28
 
29
+ from ..tools.hardware_manager import hardware_manager
30
 
31
  logger = logging.getLogger(__name__)
32
 
33
+ APP_ROOT = Path("/home/user/app")
34
+ DEPS_DIR = APP_ROOT / "deps"
35
  SEEDVR_SPACE_DIR = DEPS_DIR / "SeedVR_Space"
36
  SEEDVR_SPACE_URL = "https://huggingface.co/spaces/ByteDance-Seed/SeedVR2-3B"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  class SeedVrManager:
39
+ """Gerencia uma única instância do pipeline SeedVR em uma GPU dedicada e isolada."""
40
+ def __init__(self, device_id: str):
41
+ self.global_device_id = device_id
42
+ self.local_device_name = 'cuda:0' # O que o processo enxergará
43
+ self.gpu_index = self.global_device_id.split(':')[-1]
44
+
45
  self.runner = None
46
+ self._check_and_run_global_setup()
47
+ logger.info(f"SeedVR Manager (Single Instance) inicializado para operar na GPU {self.global_device_id}.")
48
+
49
+ @staticmethod
50
+ def _check_and_run_global_setup():
51
+ """Executa o setup de arquivos e aplica o patch para desativar o paralelismo."""
52
+ setup_flag = DEPS_DIR / "seedvr.setup.complete"
53
+ if str(APP_ROOT) not in sys.path: sys.path.insert(0, str(APP_ROOT))
54
+
55
+ # O patch deve ser aplicado toda vez que o código roda.
56
+ try:
57
+ from common import decorators
58
+ import functools
59
+
60
+ def _passthrough_decorator(func):
61
+ @functools.wraps(func)
62
+ def wrapped(*args, **kwargs):
63
+ return func(*args, **kwargs)
64
+ return wrapped
65
+
66
+ decorators.master_only = _passthrough_decorator
67
+ logger.info("Monkey patch aplicado com sucesso em 'common.decorators.master_only' para desativar o paralelismo.")
68
+ except Exception as e:
69
+ logger.error(f"Falha ao aplicar o monkey patch: {e}", exc_info=True)
70
+
71
+ if setup_flag.exists(): return True
72
+
73
+ logger.info("--- Iniciando Setup Global do SeedVR (primeira execução) ---")
74
+ if not SEEDVR_SPACE_DIR.exists():
75
+ DEPS_DIR.mkdir(exist_ok=True, parents=True)
76
+ subprocess.run(["git", "clone", "--depth", "1", SEEDVR_SPACE_URL, str(SEEDVR_SPACE_DIR)], check=True)
77
+
78
+ required_dirs = ["projects", "common", "models", "configs_3b", "configs_7b", "data"]
79
+ for dirname in required_dirs:
80
+ source, target = SEEDVR_SPACE_DIR / dirname, APP_ROOT / dirname
81
+ if not target.exists(): shutil.copytree(source, target)
82
+
83
+ try:
84
+ import apex
85
+ except ImportError:
86
+ apex_url = 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl'
87
+ apex_wheel_path = _load_file_from_url(url=apex_url, model_dir=str(DEPS_DIR))
88
+ subprocess.run(f"pip install {apex_wheel_path}", check=True, shell=True)
89
+
90
+ ckpt_dir = APP_ROOT / 'ckpts'
91
  ckpt_dir.mkdir(exist_ok=True)
92
+ model_urls = {
93
+ 'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
 
 
94
  'dit_3b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
95
+ #'dit_7b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-7B/resolve/main/seedvr2_ema_7b.pth',
96
  'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt',
97
  'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt'
98
  }
99
+ for name, url in model_urls.items():
100
  _load_file_from_url(url=url, model_dir=str(ckpt_dir))
101
+
102
+ setup_flag.touch()
103
+ logger.info("--- Setup Global do SeedVR Concluído ---")
104
 
105
+ def _initialize_runner(self):
106
+ """Carrega o modelo 3B em um ambiente de GPU isolado."""
107
  if self.runner is not None: return
 
 
 
 
 
 
108
 
109
+ os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_index
110
+
111
+ from projects.video_diffusion_sr.infer import VideoDiffusionInfer
112
+ from common.config import load_config
113
+
114
+ logger.info(f"Manager na GPU {self.global_device_id}: Inicializando runner SeedVR 3B...")
115
+
116
+ config_path = APP_ROOT / 'configs_3b' / 'main.yaml'
117
+ checkpoint_path = APP_ROOT / 'ckpts' / 'seedvr2_ema_3b.pth'
 
 
 
 
 
 
 
 
 
 
118
 
119
+ config = load_config(str(config_path))
120
  self.runner = VideoDiffusionInfer(config)
121
  OmegaConf.set_readonly(self.runner.config, False)
122
+
123
+ self.runner.configure_dit_model(device=self.local_device_name, checkpoint=str(checkpoint_path))
124
  self.runner.configure_vae_model()
125
+
126
+ logger.info(f"Manager na GPU {self.global_device_id}: Runner 3B pronto na VRAM.")
127
+
 
 
128
  def _unload_runner(self):
129
+ """Descarrega os modelos da VRAM e limpa o ambiente."""
130
  if self.runner is not None:
131
  del self.runner; self.runner = None
132
  gc.collect(); torch.cuda.empty_cache()
133
+ logger.info(f"Manager na GPU {self.global_device_id}: Runner descarregado da VRAM.")
134
+
135
+ if 'CUDA_VISIBLE_DEVICES' in os.environ:
136
+ del os.environ['CUDA_VISIBLE_DEVICES']
 
 
137
 
138
  def process_video(self, input_video_path: str, output_video_path: str, prompt: str,
139
+ steps: int = 100, seed: int = 666) -> str:
140
+ """Ciclo completo de carga, processamento e descarga para uma única tarefa."""
 
141
  try:
142
+ self._initialize_runner()
143
+
144
+ device = torch.device(self.local_device_name)
145
+
146
+ from common.seed import set_seed
147
+ from data.image.transforms.divisible_crop import DivisibleCrop
148
+ from data.image.transforms.na_resize import NaResize
149
+ from data.video.transforms.rearrange import Rearrange
150
+ from projects.video_diffusion_sr.color_fix import wavelet_reconstruction
151
+ from torchvision.transforms import Compose, Lambda, Normalize
152
+ from torchvision.io.video import read_video
153
+
154
  set_seed(seed, same_across_ranks=True)
155
  self.runner.config.diffusion.timesteps.sampling.steps = steps
156
  self.runner.configure_diffusion()
157
+
158
  video_tensor = read_video(input_video_path, output_format="TCHW")[0] / 255.0
159
  res_h, res_w = video_tensor.shape[-2:]
160
  video_transform = Compose([
161
  NaResize(resolution=(res_h * res_w) ** 0.5, mode="area", downsample_only=False),
162
  Lambda(lambda x: torch.clamp(x, 0.0, 1.0)),
163
+ DivisibleCrop((16, 16)), Normalize(0.5, 0.5), Rearrange("t c h w -> c t h w"),
 
 
164
  ])
165
+ cond_latents = [video_transform(video_tensor.to(device))]
166
+ self.runner.dit.to("cpu"); self.runner.vae.to(device)
 
 
167
  cond_latents = self.runner.vae_encode(cond_latents)
168
+ self.runner.vae.to("cpu"); gc.collect(); torch.cuda.empty_cache(); self.runner.dit.to(device)
169
+
170
+ pos_emb = torch.load(APP_ROOT / 'ckpts' / 'pos_emb.pt').to(device)
171
+ neg_emb = torch.load(APP_ROOT / 'ckpts' / 'neg_emb.pt').to(device)
172
+ text_embeds_dict = {"texts_pos": [pos_emb], "texts_neg": [neg_emb]}
173
+
 
174
  noises = [torch.randn_like(latent) for latent in cond_latents]
175
  conditions = [self.runner.get_condition(noise, latent_blur=latent, task="sr") for noise, latent in zip(noises, cond_latents)]
176
+
177
  with torch.no_grad(), torch.autocast("cuda", torch.bfloat16, enabled=True):
178
  video_tensors = self.runner.inference(noises=noises, conditions=conditions, dit_offload=True, **text_embeds_dict)
179
+
180
+ self.runner.dit.to("cpu"); gc.collect(); torch.cuda.empty_cache(); self.runner.vae.to(device)
181
  samples = self.runner.vae_decode(video_tensors)
182
+ final_sample, input_video_sample = samples[0], cond_latents[0]
 
183
  if final_sample.shape[1] < input_video_sample.shape[1]:
184
  input_video_sample = input_video_sample[:, :final_sample.shape[1]]
185
+
186
  final_sample = wavelet_reconstruction(rearrange(final_sample, "c t h w -> t c h w"), rearrange(input_video_sample, "c t h w -> t c h w"))
187
  final_sample = rearrange(final_sample, "t c h w -> t h w c")
188
  final_sample = final_sample.clip(-1, 1).mul_(0.5).add_(0.5).mul_(255).round()
189
  final_sample_np = final_sample.to(torch.uint8).cpu().numpy()
190
+
191
  mediapy.write_video(output_video_path, final_sample_np, fps=24)
 
192
  return output_video_path
193
  finally:
194
  self._unload_runner()
195
 
196
+
197
+ def _load_file_from_url(url, model_dir='./', file_name=None):
198
+ os.makedirs(model_dir, exist_ok=True)
199
+ filename = file_name or os.path.basename(urlparse(url).path)
200
+ cached_file = os.path.abspath(os.path.join(model_dir, filename))
201
+ if not os.path.exists(cached_file):
202
+ download_url_to_file(url, cached_file, hash_prefix=None, progress=True)
203
+ return cached_file
204
+
205
+ # --- Instanciação Singleton ---
206
+ class SeedVrPlaceholder:
207
+ def process_video(self, input_video_path, *args, **kwargs):
208
+ logger.warning("SeedVR está desabilitado (gpus_required: 0). Pulando etapa de masterização HD.")
209
+ return input_video_path
210
+
211
+ try:
212
+ with open("config.yaml", 'r') as f: config = yaml.safe_load(f)
213
+ seedvr_gpus_required = config['specialists'].get('seedvr', {}).get('gpus_required', 2)
214
+
215
+ if seedvr_gpus_required > 0:
216
+ seedvr_device_ids = hardware_manager.allocate_gpus('SeedVR', seedvr_gpus_required)
217
+ if seedvr_device_ids and 'cpu' not in seedvr_device_ids:
218
+ device_to_use = seedvr_device_ids[0]
219
+ seedvr_manager_singleton = SeedVrManager(device_id=device_to_use)
220
+ logger.info(f"Especialista de Masterização HD (SeedVR Single Instance) pronto para usar a GPU {device_to_use}.")
221
+ else:
222
+ seedvr_manager_singleton = SeedVrPlaceholder()
223
+ logger.warning("SeedVR não foi inicializado porque nenhuma GPU pôde ser alocada.")
224
+ else:
225
+ seedvr_manager_singleton = SeedVrPlaceholder()
226
+ logger.warning("SeedVR Manager não foi inicializado (gpus_required: 0 na config).")
227
+ except Exception as e:
228
+ logger.critical(f"Falha CRÍTICA ao inicializar o SeedVrManager: {e}", exc_info=True)
229
+ seedvr_manager_singleton = SeedVrPlaceholder()
{managers → aduc_framework/managers}/upscaler_specialist.py RENAMED
@@ -5,7 +5,7 @@
5
  import torch
6
  import logging
7
  from diffusers import LTXLatentUpsamplePipeline
8
- from managers.ltx_manager import ltx_manager_singleton
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
5
  import torch
6
  import logging
7
  from diffusers import LTXLatentUpsamplePipeline
8
+ from ..managers.ltx_manager import ltx_manager_singleton
9
 
10
  logger = logging.getLogger(__name__)
11
 
{managers → aduc_framework/managers}/vae_manager.py RENAMED
@@ -28,7 +28,7 @@ import gc
28
  from typing import Generator
29
 
30
  # Import the source of the VAE model and the low-level functions
31
- from managers.ltx_manager import ltx_manager_singleton
32
  from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
33
 
34
  logger = logging.getLogger(__name__)
 
28
  from typing import Generator
29
 
30
  # Import the source of the VAE model and the low-level functions
31
+ from ..managers.ltx_manager import ltx_manager_singleton
32
  from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
33
 
34
  logger = logging.getLogger(__name__)
aduc_framework/orchestrator.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/orchestrator.py
2
+ #
3
+ # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
+ #
5
+ # Version: 4.1.0 (Multi-Pool with Forced SeedVR 3B)
6
+ #
7
+ # Esta versão do orquestrador opera com uma arquitetura de múltiplos pools de
8
+ # especialistas e simplifica a tarefa de masterização HD, fixando o uso do
9
+ # modelo SeedVR 3B e removendo a necessidade de selecionar a versão do modelo.
10
+
11
+ import logging
12
+ from typing import List, Dict, Any, Tuple, Callable, Optional, Generator
13
+ from PIL import Image, ImageOps
14
+ import os
15
+ import subprocess
16
+ import shutil
17
+ from pathlib import Path
18
+ import time
19
+ import gc
20
+ import torch
21
+
22
+ # Componentes internos do framework
23
+ from .director import AducDirector
24
+ from .types import GenerationState, PreProductionParams, ProductionParams
25
+
26
+ # Engenheiros de alto nível que definem a lógica do fluxo
27
+ from .engineers import deformes2d_thinker_singleton, deformes3d_engine_singleton, Deformes4DEngine
28
+
29
+ # Managers (Pools) de especialistas que executam as tarefas em hardware dedicado
30
+ from .managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
31
+ from .managers.seedvr_manager import seedvr_manager_singleton
32
+ from .managers.mmaudio_manager import mmaudio_manager_singleton
33
+ from .managers.vae_manager import vae_manager_singleton
34
+
35
+ # Ferramentas de utilidade
36
+ from .tools.video_encode_tool import video_encode_tool_singleton
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+ ProgressCallback = Optional[Callable[[float, str], None]]
41
+
42
+ class AducOrchestrator:
43
+ """
44
+ Implementa o Maestro (Γ), a camada de orquestração central do Aduc Framework.
45
+ Ele recebe solicitações, atualiza o estado de geração, delega tarefas para os
46
+ engenheiros e seus pools de especialistas, e retorna o estado atualizado.
47
+ """
48
+ def __init__(self, workspace_dir: str):
49
+ self.director = AducDirector(workspace_dir)
50
+ self.editor = Deformes4DEngine()
51
+ self.editor.initialize(workspace_dir)
52
+ self.painter = deformes3d_engine_singleton
53
+ self.painter.initialize(workspace_dir)
54
+ self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
55
+ logger.info("ADUC Maestro (Framework Core) pronto para reger a orquestra de especialistas.")
56
+
57
+ def get_current_state(self) -> GenerationState:
58
+ """Retorna o estado de geração atual."""
59
+ return self.director.get_full_state()
60
+
61
+ def process_image_for_story(self, image_path: str, size: int, filename: str) -> str:
62
+ """Processa uma imagem de referência para o formato quadrado padrão."""
63
+ img = Image.open(image_path).convert("RGB")
64
+ img_square = ImageOps.fit(img, (size, size), Image.Resampling.LANCZOS)
65
+ processed_path = os.path.join(self.director.workspace_dir, filename)
66
+ img_square.save(processed_path)
67
+ logger.info(f"Imagem de referência processada e salva em: {processed_path}")
68
+ return processed_path
69
+
70
+ # --- ETAPA 1: PRÉ-PRODUÇÃO ---
71
+ def task_pre_production(self, params: PreProductionParams, progress_callback: ProgressCallback = None) -> Tuple[List[str], List[str], GenerationState]:
72
+ """Orquestra a criação do storyboard e dos keyframes visuais."""
73
+ logger.info("Maestro: Iniciando tarefa de Pré-Produção.")
74
+ self.director.update_parameters("pre_producao", params)
75
+
76
+ if progress_callback: progress_callback(0.1, "Gerando storyboard...")
77
+ storyboard_list = deformes2d_thinker_singleton.generate_storyboard(prompt=params.prompt, num_keyframes=params.num_keyframes, ref_image_paths=params.ref_paths)
78
+ self.director.update_pre_production_state(params.prompt, params.ref_paths, storyboard_list)
79
+
80
+ if progress_callback: progress_callback(0.2, "Iniciando geração de keyframes...")
81
+ keyframes_detailed_data = self.painter.generate_keyframes_from_storyboard(generation_state=self.director.get_full_state_as_dict(), progress_callback=progress_callback)
82
+ self.director.update_keyframes_state(keyframes_detailed_data)
83
+
84
+ final_keyframe_paths = [kf["caminho_pixel"] for kf in keyframes_detailed_data]
85
+ final_state = self.director.get_full_state()
86
+ logger.info("Maestro: Tarefa de Pré-Produção concluída.")
87
+ return storyboard_list, final_keyframe_paths, final_state
88
+
89
+ # --- ETAPA 2: PRODUÇÃO ---
90
+ def task_produce_original_movie(self, params: ProductionParams, progress_callback: ProgressCallback = None) -> Tuple[str, List[str], GenerationState]:
91
+ """Orquestra a geração do vídeo principal a partir dos keyframes."""
92
+ logger.info("Maestro: Iniciando tarefa de Produção do Filme Original.")
93
+ self.director.update_parameters("producao", params)
94
+
95
+ result_data = self.editor.generate_original_movie(full_generation_state=self.director.get_full_state_as_dict(), progress_callback=progress_callback)
96
+ self.director.update_video_state(result_data["video_data"])
97
+
98
+ final_video_path = result_data["final_path"]
99
+ latent_paths = result_data["latent_paths"]
100
+ final_state = self.director.get_full_state()
101
+ logger.info("Maestro: Tarefa de Produção do Filme Original concluída.")
102
+ return final_video_path, latent_paths, final_state
103
+
104
+ # --- ETAPA 3: PÓS-PRODUÇÃO (Cadeia de Efeitos) ---
105
+
106
+ def task_run_latent_upscaler(self, latent_paths: List[str], chunk_size: int, progress_callback: ProgressCallback = None) -> Generator[Dict[str, Any], None, None]:
107
+ """Aplica upscale 2x nos latentes e os decodifica para um novo vídeo."""
108
+ if not self.director.workspace_dir: raise RuntimeError("Orchestrator não inicializado.")
109
+ if not latent_paths: raise ValueError("Nenhum caminho de latente fornecido para o upscale.")
110
+
111
+ logger.info("--- ORQUESTRADOR: Tarefa de Upscaling de Latentes ---")
112
+ run_timestamp = int(time.time())
113
+ temp_dir = os.path.join(self.director.workspace_dir, f"temp_upscaled_clips_{run_timestamp}")
114
+ os.makedirs(temp_dir, exist_ok=True)
115
+
116
+ final_upscaled_clip_paths = []
117
+ num_chunks = -(-len(latent_paths) // chunk_size)
118
+
119
+ for i in range(num_chunks):
120
+ chunk_paths = latent_paths[i * chunk_size:(i + 1) * chunk_size]
121
+ if progress_callback: progress_callback(i / num_chunks, f"Upscalando & Decodificando Lote {i+1}/{num_chunks}")
122
+
123
+ tensors_in_chunk = [torch.load(p, map_location=self.device) for p in chunk_paths]
124
+ sub_group_latent = torch.cat(tensors_in_chunk, dim=2)
125
+
126
+ upscaled_latent_chunk = latent_enhancer_specialist_singleton.upscale(sub_group_latent)
127
+ pixel_tensor = vae_manager_singleton.decode(upscaled_latent_chunk)
128
+
129
+ current_clip_path = os.path.join(temp_dir, f"upscaled_clip_{i:04d}.mp4")
130
+ self.editor.save_video_from_tensor(pixel_tensor, current_clip_path, fps=24)
131
+ final_upscaled_clip_paths.append(current_clip_path)
132
+
133
+ del tensors_in_chunk, sub_group_latent, upscaled_latent_chunk, pixel_tensor
134
+ gc.collect(); torch.cuda.empty_cache()
135
+ yield {"progress": (i + 1) / num_chunks}
136
+
137
+ final_video_path = os.path.join(self.director.workspace_dir, f"upscaled_movie_{run_timestamp}.mp4")
138
+ video_encode_tool_singleton.concatenate_videos(final_upscaled_clip_paths, final_video_path, self.director.workspace_dir)
139
+
140
+ shutil.rmtree(temp_dir)
141
+ logger.info(f"Upscaling de latentes completo! Vídeo final em: {final_video_path}")
142
+ yield {"final_path": final_video_path}
143
+
144
+ def task_run_hd_mastering(self, source_video_path: str, steps: int, prompt: str, progress_callback: ProgressCallback = None) -> Generator[Dict[str, Any], None, None]:
145
+ """Aplica masterização em HD usando o pool de GPUs do SeedVR com o modelo 3B."""
146
+ if not self.director.workspace_dir: raise RuntimeError("Orchestrator não inicializado.")
147
+ logger.info(f"--- ORQUESTRADOR: Tarefa de Masterização HD com SeedVR 3B ---")
148
+
149
+ run_timestamp = int(time.time())
150
+ output_path = os.path.join(self.director.workspace_dir, f"hd_mastered_movie_3B_{run_timestamp}.mp4")
151
+
152
+ final_path = seedvr_manager_singleton.process_video(
153
+ input_video_path=source_video_path,
154
+ output_video_path=output_path,
155
+ prompt=prompt,
156
+ steps=steps
157
+ )
158
+ logger.info(f"Masterização HD completa! Vídeo final em: {final_path}")
159
+ yield {"final_path": final_path}
160
+
161
+ def task_run_audio_generation(self, source_video_path: str, audio_prompt: str, progress_callback: ProgressCallback = None) -> Generator[Dict[str, Any], None, None]:
162
+ """Gera e adiciona áudio ao vídeo usando o pool de GPUs do MMAudio."""
163
+ if not self.director.workspace_dir: raise RuntimeError("Orchestrator não inicializado.")
164
+ logger.info(f"--- ORQUESTRADOR: Tarefa de Geração de Áudio ---")
165
+
166
+ if progress_callback: progress_callback(0.1, "Preparando para geração de áudio...")
167
+
168
+ run_timestamp = int(time.time())
169
+ source_name = Path(source_video_path).stem
170
+ output_path = os.path.join(self.director.workspace_dir, f"{source_name}_with_audio_{run_timestamp}.mp4")
171
+
172
+ try:
173
+ result = subprocess.run(
174
+ ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", source_video_path],
175
+ capture_output=True, text=True, check=True
176
+ )
177
+ duration = float(result.stdout.strip())
178
+ except Exception as e:
179
+ logger.error(f"Não foi possível obter a duração do vídeo '{source_video_path}': {e}", exc_info=True)
180
+ yield {"error": "Falha ao obter duração do vídeo."}
181
+ return
182
+
183
+ if progress_callback: progress_callback(0.5, "Gerando trilha de áudio...")
184
+
185
+ final_path = mmaudio_manager_singleton.generate_audio_for_video(
186
+ video_path=source_video_path,
187
+ prompt=audio_prompt,
188
+ duration_seconds=duration,
189
+ output_path_override=output_path
190
+ )
191
+
192
+ logger.info(f"Geração de áudio completa! Vídeo com áudio em: {final_path}")
193
+ if progress_callback: progress_callback(1.0, "Geração de áudio completa!")
194
+ yield {"final_path": final_path}
{prompts → aduc_framework/prompts}/LICENSE RENAMED
File without changes
{prompts → aduc_framework/prompts}/NOTICE.md RENAMED
File without changes
{prompts → aduc_framework/prompts}/README.md RENAMED
File without changes
{prompts → aduc_framework/prompts}/anticipatory_keyframe_prompt.txt RENAMED
File without changes
{prompts → aduc_framework/prompts}/audio_director_prompt.txt RENAMED
File without changes
aduc_framework/prompts/cinematic_director_prompt.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ROLE: AI Cinematic Scenarist
2
+
3
+ # GOAL:
4
+ # Your single, crucial task is to write a rich, cinematic motion prompt.
5
+ # This prompt must describe the most logical and compelling action that
6
+ # connects the PRESENT visual state to the FUTURE visual state, considering
7
+ # the context of the PAST.
8
+
9
+ # CONTEXT FOR YOUR PROMPT:
10
+ - Global Story Goal: {global_prompt}
11
+ - Creative History: {story_history}
12
+ - The Past: "{past_scene_desc}" -> [PAST_IMAGE]
13
+ - The Present: "{present_scene_desc}" -> [PRESENT_IMAGE]
14
+ - The Future: "{future_scene_desc}" -> [FUTURE_IMAGE]
15
+
16
+ # CRITICAL PROMPT DIRECTIVES:
17
+ # 1. ALWAYS DESCRIBE MOTION: The scene must not be static.
18
+ # 2. STYLE: Be descriptive, cinematic, and direct.
19
+ # 3. STRUCTURE: In a single paragraph (under 150 words), describe the scene's
20
+ # motion, prioritizing in this EXACT order:
21
+ # a. Actors/Animals: What are they doing?
22
+ # b. Objects: How do they interact?
23
+ # c. Camera: How is it moving?
24
+ # d. Scenery/Environment: What details add to the mood?
25
+
26
+ # RESPONSE FORMAT:
27
+ # You MUST respond with ONLY the raw, single-line string for the motion prompt.
{prompts → aduc_framework/prompts}/director_composition_prompt.txt RENAMED
File without changes
{prompts → aduc_framework/prompts}/flux_composition_wrapper_prompt.txt RENAMED
File without changes
{prompts → aduc_framework/prompts}/initial_motion_prompt.txt RENAMED
File without changes
{prompts → aduc_framework/prompts}/keyframe_selection_prompt.txt RENAMED
File without changes
{prompts → aduc_framework/prompts}/sound_director_prompt.txt RENAMED
File without changes
{prompts → aduc_framework/prompts}/sound_director_prompt.txt.txt RENAMED
File without changes
{prompts → aduc_framework/prompts}/transition_decision_prompt.txt RENAMED
File without changes
{prompts → aduc_framework/prompts}/unified_cinematographer_prompt.txt RENAMED
File without changes
{prompts → aduc_framework/prompts}/unified_storyboard_prompt.txt RENAMED
File without changes
{tools → aduc_framework/tools}/LICENSE RENAMED
File without changes
{tools → aduc_framework/tools}/NOTICE.md RENAMED
File without changes
{tools → aduc_framework/tools}/README.md RENAMED
File without changes
aduc_framework/tools/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/tools/__init__.py
2
+
3
+ # Expõe os singletons e classes principais do sub-pacote de ferramentas.
4
+
5
+ from .hardware_manager import hardware_manager
6
+ from .video_encode_tool import video_encode_tool_singleton
7
+ from . import optimization
8
+ from . import tensor_utils
9
+
10
+ __all__ = [
11
+ "hardware_manager",
12
+ "video_encode_tool_singleton",
13
+ "optimization",
14
+ "tensor_utils",
15
+ ]
{tools → aduc_framework/tools}/hardware_manager.py RENAMED
File without changes
{tools → aduc_framework/tools}/optimization.py RENAMED
File without changes
{tools → aduc_framework/tools}/tensor_utils.py RENAMED
File without changes
{tools → aduc_framework/tools}/video_encode_tool.py RENAMED
File without changes
aduc_framework/types.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_framework/types.py
2
+ #
3
+ # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
+ #
5
+ # Versão 3.1.0 (Framework Data Models with Core Types)
6
+ #
7
+ # Este arquivo define as estruturas de dados centrais para o Aduc Framework
8
+ # usando Pydantic. Ele também inclui tipos de dados de baixo nível, como dataclasses,
9
+ # que são usados internamente pelos managers e engineers.
10
+
11
+ from pydantic import BaseModel, Field
12
+ from typing import List, Dict, Any, Optional
13
+ from dataclasses import dataclass
14
+ import torch
15
+
16
+ # --- Modelos de Parâmetros de Entrada (Pydantic) ---
17
+ # Representam os dados que o usuário fornece através de uma interface.
18
+
19
+ class PreProductionParams(BaseModel):
20
+ """Parâmetros para a etapa de Roteiro e Keyframes."""
21
+ prompt: str = Field(..., description="A ideia geral do filme ou cena.")
22
+ num_keyframes: int = Field(..., gt=0, description="O número de keyframes a serem gerados.")
23
+ ref_paths: List[str] = Field(..., description="Lista de caminhos para as imagens de referência iniciais.")
24
+ resolution: int = Field(..., description="A resolução base (largura/altura) para a geração.")
25
+ duration_per_fragment: float = Field(..., gt=0, description="A duração alvo em segundos para cada fragmento de vídeo.")
26
+
27
+ class ProductionParams(BaseModel):
28
+ """Parâmetros para a etapa de Geração de Vídeo."""
29
+ trim_percent: int = Field(..., ge=0, le=100, description="Poda causal para o mecanismo Déjà-Vu.")
30
+ handler_strength: float = Field(..., ge=0.0, le=1.0, description="Força do guia de trajetória (Déjà-Vu).")
31
+ destination_convergence_strength: float = Field(..., ge=0.0, le=1.0, description="Força da âncora final (destino).")
32
+ guidance_scale: float = Field(..., ge=0.0, description="Escala de orientação do prompt de movimento.")
33
+ stg_scale: float = Field(..., ge=0.0, description="Escala de continuidade temporal (STG).")
34
+ inference_steps: int = Field(..., gt=0, description="Número de passos de inferência para a geração de vídeo.")
35
+
36
+ class GenerationParameters(BaseModel):
37
+ """Agrega todos os parâmetros de configuração da geração."""
38
+ pre_producao: Optional[PreProductionParams] = None
39
+ producao: Optional[ProductionParams] = None
40
+ pos_producao: Optional[Dict[str, Any]] = None
41
+
42
+
43
+ # --- Modelos de Artefatos Gerados (Pydantic) ---
44
+ # Representam os dados e metadados dos resultados criados pelo framework.
45
+
46
+ class MediaRef(BaseModel):
47
+ """Representa uma mídia de referência fornecida pelo usuário."""
48
+ id: int
49
+ caminho: str
50
+
51
+ class Ato(BaseModel):
52
+ """Representa uma unidade narrativa (sub-tarefa) do storyboard."""
53
+ id: int
54
+ resumo_ato: str
55
+
56
+ class KeyframeData(BaseModel):
57
+ """Estrutura de dados completa para um único keyframe gerado."""
58
+ id: int
59
+ caminho_pixel: str
60
+ caminho_latent: str
61
+ prompt_keyframe: str
62
+
63
+ class VideoFragmentData(BaseModel):
64
+ """Metadados sobre a geração de um único fragmento de vídeo entre dois keyframes."""
65
+ id: int
66
+ prompt_video: str
67
+
68
+ class VideoData(BaseModel):
69
+ """Estrutura de dados completa para o vídeo final (ou um grande clipe)."""
70
+ id: int
71
+ caminho_pixel: str
72
+ caminhos_latentes_fragmentos: List[str]
73
+ fragmentos_componentes: List[VideoFragmentData]
74
+
75
+
76
+ # --- O Modelo de Estado Principal (Pydantic) ---
77
+
78
+ class GenerationState(BaseModel):
79
+ """
80
+ O "DNA Digital" completo de uma geração.
81
+ Este é o objeto de estado central que flui através do framework.
82
+ """
83
+ parametros_geracao: GenerationParameters = Field(default_factory=GenerationParameters)
84
+ Promt_geral: str = ""
85
+ midias_referencia: List[MediaRef] = Field(default_factory=list)
86
+ Atos: List[Ato] = Field(default_factory=list)
87
+ Keyframe_atos: List[KeyframeData] = Field(default_factory=list)
88
+ videos_atos: List[VideoData] = Field(default_factory=list)
89
+
90
+
91
+ # --- Tipos de Dados Internos (Dataclass) ---
92
+ # Usado para passar dados complexos (como tensores) que não são facilmente
93
+ # serializáveis em JSON, entre os componentes internos do framework.
94
+
95
+ @dataclass
96
+ class LatentConditioningItem:
97
+ """Representa uma âncora de condicionamento no espaço latente para o LTX."""
98
+ latent_tensor: torch.Tensor
99
+ media_frame_number: int
100
+ conditioning_strength: float
aduc_orchestrator.py DELETED
@@ -1,199 +0,0 @@
1
- # aduc_orchestrator.py
2
- #
3
- # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
4
- #
5
- # Version: 2.2.0
6
- #
7
- # This file contains the core ADUC (Automated Discovery and Orchestration of Complex tasks)
8
- # orchestrator, known as the "Maestro" (Γ). Its responsibility is to manage the high-level
9
- # creative workflow of film production. This version is updated to reflect the final
10
- # refactored project structure with `engineers` and `managers`.
11
-
12
- import os
13
- import logging
14
- from typing import List, Dict, Any, Generator, Tuple
15
-
16
- import gradio as gr
17
- from PIL import Image, ImageOps
18
-
19
- from engineers.deformes4D import Deformes4DEngine
20
- from engineers.deformes2D_thinker import deformes2d_thinker_singleton
21
- from engineers.deformes3D import deformes3d_engine_singleton
22
-
23
- # The logger is configured in app.py; here we just get the instance.
24
- logger = logging.getLogger(__name__)
25
-
26
- class AducDirector:
27
- """
28
- Represents the Scene Director, responsible for managing the production state.
29
- Acts as the "score" for the orchestra, keeping track of all generated artifacts
30
- (script, keyframes, etc.) during the creative process.
31
- """
32
- def __init__(self, workspace_dir: str):
33
- self.workspace_dir = workspace_dir
34
- os.makedirs(self.workspace_dir, exist_ok=True)
35
- self.state: Dict[str, Any] = {}
36
- logger.info(f"The stage is set. Workspace at '{self.workspace_dir}'.")
37
-
38
- def update_state(self, key: str, value: Any) -> None:
39
- logger.info(f"Notating on the score: State '{key}' updated.")
40
- self.state[key] = value
41
-
42
- def get_state(self, key: str, default: Any = None) -> Any:
43
- return self.state.get(key, default)
44
-
45
- class AducOrchestrator:
46
- """
47
- Implements the Maestro (Γ), the central orchestration layer of the ADUC architecture.
48
- It does not execute AI tasks directly but delegates each step of the creative
49
- process (scriptwriting, art direction, cinematography) to the appropriate Specialists.
50
- """
51
- def __init__(self, workspace_dir: str):
52
- self.director = AducDirector(workspace_dir)
53
- self.editor = Deformes4DEngine(workspace_dir)
54
- self.painter = deformes3d_engine_singleton
55
- logger.info("ADUC Maestro is on the podium. Musicians (specialists) are ready.")
56
-
57
- def process_image_for_story(self, image_path: str, size: int, filename: str) -> str:
58
- """
59
- Pre-processes a reference image, standardizing it for use by the Specialists.
60
- """
61
- img = Image.open(image_path).convert("RGB")
62
- img_square = ImageOps.fit(img, (size, size), Image.Resampling.LANCZOS)
63
- processed_path = os.path.join(self.director.workspace_dir, filename)
64
- img_square.save(processed_path)
65
- logger.info(f"Reference image processed and saved to: {processed_path}")
66
- return processed_path
67
-
68
- # --- PRE-PRODUCTION TASKS ---
69
-
70
- def task_generate_storyboard(self, prompt: str, num_keyframes: int, ref_image_paths: List[str],
71
- progress: gr.Progress) -> Tuple[List[str], str, Any]:
72
- """
73
- Delegates the task of creating the storyboard to the Scriptwriter (deformes2D_thinker).
74
- """
75
- logger.info(f"Act 1, Scene 1: Script. Instructing Scriptwriter to create {num_keyframes} scenes.")
76
- progress(0.2, desc="Consulting AI Scriptwriter...")
77
-
78
- storyboard = deformes2d_thinker_singleton.generate_storyboard(prompt, num_keyframes, ref_image_paths)
79
-
80
- logger.info(f"Scriptwriter returned the score: {storyboard}")
81
- self.director.update_state("storyboard", storyboard)
82
- self.director.update_state("processed_ref_paths", ref_image_paths)
83
- return storyboard, ref_image_paths[0], gr.update(visible=True, open=True)
84
-
85
- def task_select_keyframes(self, storyboard: List[str], base_ref_paths: List[str],
86
- pool_ref_paths: List[str]) -> List[str]:
87
- """
88
- Delegates to the Photographer (deformes2D_thinker) the task of selecting keyframes.
89
- """
90
- logger.info(f"Act 1, Scene 2 (Photographer Mode): Instructing Photographer to select {len(storyboard)} keyframes.")
91
- selected_paths = deformes2d_thinker_singleton.select_keyframes_from_pool(storyboard, base_ref_paths, pool_ref_paths)
92
- logger.info(f"Photographer selected the following scenes: {[os.path.basename(p) for p in selected_paths]}")
93
- self.director.update_state("keyframes", selected_paths)
94
- return selected_paths
95
-
96
- def task_generate_keyframes(self, storyboard: List[str], initial_ref_path: str, global_prompt: str,
97
- keyframe_resolution: int, progress_callback_factory=None) -> List[str]:
98
- """
99
- Delegates to the Art Director (Deformes3DEngine) the task of generating keyframes.
100
- """
101
- logger.info("Act 1, Scene 2 (Art Director Mode): Delegating to Art Director.")
102
- general_ref_paths = self.director.get_state("processed_ref_paths", [])
103
-
104
- final_keyframes = self.painter.generate_keyframes_from_storyboard(
105
- storyboard=storyboard,
106
- initial_ref_path=initial_ref_path,
107
- global_prompt=global_prompt,
108
- keyframe_resolution=keyframe_resolution,
109
- general_ref_paths=general_ref_paths,
110
- progress_callback_factory=progress_callback_factory
111
- )
112
- self.director.update_state("keyframes", final_keyframes)
113
- logger.info("Maestro: Art Director has completed keyframe generation.")
114
- return final_keyframes
115
-
116
- # --- PRODUCTION & POST-PRODUCTION TASKS ---
117
-
118
- def task_produce_original_movie(self, keyframes: List[str], global_prompt: str, seconds_per_fragment: float,
119
- trim_percent: int, handler_strength: float,
120
- destination_convergence_strength: float,
121
- guidance_scale: float, stg_scale: float, inference_steps: int,
122
- video_resolution: int, use_continuity_director: bool,
123
- progress: gr.Progress) -> Dict[str, Any]:
124
- """
125
- Delegates the production of the original master video to the Deformes4DEngine.
126
- """
127
- logger.info("Maestro: Delegating production of the original movie to Deformes4DEngine.")
128
- storyboard = self.director.get_state("storyboard", [])
129
-
130
- result = self.editor.generate_original_movie(
131
- keyframes=keyframes,
132
- global_prompt=global_prompt,
133
- storyboard=storyboard,
134
- seconds_per_fragment=seconds_per_fragment,
135
- trim_percent=trim_percent,
136
- handler_strength=handler_strength,
137
- destination_convergence_strength=destination_convergence_strength,
138
- video_resolution=video_resolution,
139
- use_continuity_director=use_continuity_director,
140
- guidance_scale=guidance_scale,
141
- stg_scale=stg_scale,
142
- num_inference_steps=inference_steps,
143
- progress=progress
144
- )
145
-
146
- self.director.update_state("final_video_path", result["final_path"])
147
- self.director.update_state("latent_paths", result["latent_paths"])
148
- logger.info("Maestro: Original movie production complete.")
149
- return result
150
-
151
- def task_run_latent_upscaler(self, latent_paths: List[str], chunk_size: int, progress: gr.Progress) -> Generator[Dict[str, Any], None, None]:
152
- """
153
- Orchestrates the latent upscaling task.
154
- """
155
- logger.info(f"Maestro: Delegating latent upscaling task for {len(latent_paths)} fragments.")
156
- for update in self.editor.upscale_latents_and_create_video(
157
- latent_paths=latent_paths,
158
- chunk_size=chunk_size,
159
- progress=progress
160
- ):
161
- if "final_path" in update and update["final_path"]:
162
- self.director.update_state("final_video_path", update["final_path"])
163
- yield update
164
- break
165
- logger.info("Maestro: Latent upscaling complete.")
166
-
167
- def task_run_hd_mastering(self, source_video_path: str, model_version: str, steps: int, prompt: str, progress: gr.Progress) -> Generator[Dict[str, Any], None, None]:
168
- """
169
- Orchestrates the HD mastering task.
170
- """
171
- logger.info(f"Maestro: Delegating HD mastering task using SeedVR {model_version}.")
172
- for update in self.editor.master_video_hd(
173
- source_video_path=source_video_path,
174
- model_version=model_version,
175
- steps=steps,
176
- prompt=prompt,
177
- progress=progress
178
- ):
179
- if "final_path" in update and update["final_path"]:
180
- self.director.update_state("final_video_path", update["final_path"])
181
- yield update
182
- break
183
- logger.info("Maestro: HD mastering complete.")
184
-
185
- def task_run_audio_generation(self, source_video_path: str, audio_prompt: str, progress: gr.Progress) -> Generator[Dict[str, Any], None, None]:
186
- """
187
- Orchestrates the audio generation task.
188
- """
189
- logger.info(f"Maestro: Delegating audio generation task.")
190
- for update in self.editor.generate_audio_for_final_video(
191
- source_video_path=source_video_path,
192
- audio_prompt=audio_prompt,
193
- progress=progress
194
- ):
195
- if "final_path" in update and update["final_path"]:
196
- self.director.update_state("final_video_path", update["final_path"])
197
- yield update
198
- break
199
- logger.info("Maestro: Audio generation complete.")