Carlexxx commited on
Commit
c0e5fc7
·
1 Parent(s): 3470339
Files changed (6) hide show
  1. aduc_orchestrator.py +140 -0
  2. app.py +301 -0
  3. audio_specialist.py +141 -0
  4. config.yaml +24 -0
  5. deformes4D_engine.py +1 -3
  6. packages.txt +1 -0
aduc_orchestrator.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aduc_orchestrator.py
2
+ # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
+ #
4
+ # Este programa é software livre: você pode redistribuí-lo e/ou modificá-lo
5
+ # sob os termos da Licença Pública Geral Affero GNU...
6
+ # AVISO DE PATENTE PENDENTE: Consulte NOTICE.md.
7
+
8
+ import os
9
+ import time
10
+ import shutil
11
+ import logging
12
+ import gradio as gr
13
+ from PIL import Image, ImageOps
14
+ import subprocess
15
+ from pathlib import Path
16
+ import json
17
+
18
+ from deformes4D_engine import Deformes4DEngine
19
+ from ltx_manager_helpers import ltx_manager_singleton
20
+ from gemini_helpers import gemini_singleton
21
+ from image_specialist import image_specialist_singleton
22
+
23
+ # Configuração de logging centralizada deve ser feita no app.py
24
+ logger = logging.getLogger(__name__)
25
+
26
+ class AducDirector:
27
+ def __init__(self, workspace_dir):
28
+ self.workspace_dir = workspace_dir
29
+ os.makedirs(self.workspace_dir, exist_ok=True)
30
+ self.state = {}
31
+ logger.info(f"O palco está pronto. Workspace em '{self.workspace_dir}'.")
32
+
33
+ def reset(self):
34
+ os.makedirs(self.workspace_dir, exist_ok=True)
35
+ self.state = {}
36
+ logger.info("Partitura limpa. Estado do Diretor reiniciado.")
37
+
38
+ def update_state(self, key, value):
39
+ log_value = value if not isinstance(value, (dict, list)) and not hasattr(value, 'shape') else f"Objeto complexo"
40
+ logger.info(f"Anotando na partitura: Estado '{key}' atualizado.")
41
+ self.state[key] = value
42
+
43
+ def get_state(self, key, default=None):
44
+ return self.state.get(key, default)
45
+
46
+ class AducOrchestrator:
47
+ def __init__(self, workspace_dir: str):
48
+ self.director = AducDirector(workspace_dir)
49
+ self.editor = Deformes4DEngine(ltx_manager_singleton, workspace_dir)
50
+ self.painter = image_specialist_singleton
51
+ logger.info("Maestro ADUC está no pódio. Músicos (especialistas) prontos.")
52
+
53
+ def process_image_for_story(self, image_path: str, size: int, filename: str = None) -> str:
54
+ """
55
+ Pré-processa uma imagem de referência: converte para RGB, redimensiona para um
56
+ quadrado e salva no diretório de trabalho.
57
+ """
58
+ img = Image.open(image_path).convert("RGB")
59
+ img_square = ImageOps.fit(img, (size, size), Image.Resampling.LANCZOS)
60
+
61
+ if filename:
62
+ processed_path = os.path.join(self.director.workspace_dir, filename)
63
+ else:
64
+ processed_path = os.path.join(self.director.workspace_dir, f"ref_processed_{int(time.time()*1000)}.png")
65
+
66
+ img_square.save(processed_path)
67
+ logger.info(f"Imagem de referência processada e salva em: {processed_path}")
68
+ return processed_path
69
+
70
+ def task_generate_storyboard(self, prompt, num_keyframes, processed_ref_image_paths, progress):
71
+ logger.info(f"Ato 1, Cena 1: Roteiro. Instruindo o Roteirista (Gemini) a criar {num_keyframes} cenas a partir de: '{prompt}'")
72
+ progress(0.2, desc="Consultando Roteirista IA (Gemini)...")
73
+ storyboard = gemini_singleton.generate_storyboard(prompt, num_keyframes, processed_ref_image_paths)
74
+ logger.info(f"Roteirista retornou a partitura: {storyboard}")
75
+ self.director.update_state("storyboard", storyboard)
76
+ self.director.update_state("processed_ref_paths", processed_ref_image_paths)
77
+ return storyboard, processed_ref_image_paths[0], gr.update(visible=True, open=True)
78
+
79
+ def task_select_keyframes(self, storyboard, base_ref_paths, pool_ref_paths):
80
+ logger.info(f"Ato 1, Cena 2 (Alternativa): Fotografia. Instruindo o Editor (Gemini) a selecionar {len(storyboard)} keyframes de um banco de {len(pool_ref_paths)} imagens.")
81
+ selected_paths = gemini_singleton.select_keyframes_from_pool(storyboard, base_ref_paths, pool_ref_paths)
82
+ logger.info(f"Editor selecionou as seguintes cenas: {[os.path.basename(p) for p in selected_paths]}")
83
+ self.director.update_state("keyframes", selected_paths)
84
+ return selected_paths
85
+
86
+ def task_generate_keyframes(self, storyboard, initial_ref_path, global_prompt, keyframe_resolution, progress_callback_factory=None):
87
+ """
88
+ Delega a tarefa de geração de keyframes para o ImageSpecialist.
89
+ """
90
+ logger.info(f"Ato 1, Cena 2: Direção de Arte. Delegando ao Especialista de Imagem.")
91
+
92
+ general_ref_paths = self.director.get_state("processed_ref_paths", [])
93
+
94
+ final_keyframes = self.painter.generate_keyframes_from_storyboard(
95
+ storyboard=storyboard,
96
+ initial_ref_path=initial_ref_path,
97
+ global_prompt=global_prompt,
98
+ keyframe_resolution=int(keyframe_resolution),
99
+ general_ref_paths=general_ref_paths,
100
+ progress_callback_factory=progress_callback_factory
101
+ )
102
+
103
+ self.director.update_state("keyframes", final_keyframes)
104
+ logger.info("Maestro: Especialista de Imagem concluiu a geração dos keyframes.")
105
+ return final_keyframes
106
+
107
+ def task_produce_final_movie_with_feedback(self, keyframes, global_prompt, seconds_per_fragment,
108
+ overlap_percent, echo_frames,
109
+ handler_strength,
110
+ destination_convergence_strength,
111
+ base_ltx_params,
112
+ video_resolution, use_continuity_director,
113
+ use_cinematographer, progress):
114
+
115
+ logger.info("AducOrchestrator: Delegando a produção do filme completo ao Deformes4DEngine.")
116
+ storyboard = self.director.get_state("storyboard", [])
117
+
118
+ for update in self.editor.generate_full_movie(
119
+ keyframes=keyframes,
120
+ global_prompt=global_prompt,
121
+ storyboard=storyboard,
122
+ seconds_per_fragment=seconds_per_fragment,
123
+ overlap_percent=overlap_percent,
124
+ echo_frames=echo_frames,
125
+ handler_strength=handler_strength,
126
+ destination_convergence_strength=destination_convergence_strength,
127
+ base_ltx_params=base_ltx_params,
128
+ video_resolution=video_resolution,
129
+ use_continuity_director=use_continuity_director,
130
+ progress=progress
131
+ ):
132
+ if "fragment_path" in update and update["fragment_path"]:
133
+ yield {"fragment_path": update["fragment_path"]}
134
+ elif "final_path" in update and update["final_path"]:
135
+ final_movie_path = update["final_path"]
136
+ self.director.update_state("final_video_path", final_movie_path)
137
+ yield {"final_path": final_movie_path}
138
+ break
139
+
140
+ logger.info("AducOrchestrator: Produção do filme concluída e estado do diretor atualizado.")
app.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
+ #
4
+ # Este programa é software livre: você pode redistribuí-lo e/ou modificá-lo
5
+ # sob os termos da Licença Pública Geral Affero GNU como publicada pela
6
+ # Free Software Foundation, seja a versão 3 da Licença, ou
7
+ # (a seu critério) qualquer versão posterior.
8
+ #
9
+ # AVISO DE PATENTE PENDENTE: O método e sistema ADUC implementado neste
10
+ # software está em processo de patenteamento. Consulte NOTICE.md.
11
+
12
+ import gradio as gr
13
+ import yaml
14
+ import logging
15
+ import os
16
+ import sys
17
+ import shutil
18
+ import time
19
+ import json
20
+
21
+ from aduc_orchestrator import AducOrchestrator
22
+
23
+ # --- 1. CONFIGURAÇÃO E INICIALIZAÇÃO ---
24
+
25
+ LOG_FILE_PATH = "aduc_log.txt"
26
+ if os.path.exists(LOG_FILE_PATH):
27
+ os.remove(LOG_FILE_PATH)
28
+
29
+ log_format = '%(asctime)s - %(levelname)s - [%(name)s:%(funcName)s] - %(message)s'
30
+ root_logger = logging.getLogger()
31
+ root_logger.setLevel(logging.INFO)
32
+ root_logger.handlers.clear()
33
+
34
+ stream_handler = logging.StreamHandler(sys.stdout)
35
+ stream_handler.setLevel(logging.INFO)
36
+ stream_handler.setFormatter(logging.Formatter(log_format))
37
+ root_logger.addHandler(stream_handler)
38
+
39
+ file_handler = logging.FileHandler(LOG_FILE_PATH, mode='w', encoding='utf-8')
40
+ file_handler.setLevel(logging.INFO)
41
+ file_handler.setFormatter(logging.Formatter(log_format))
42
+ root_logger.addHandler(file_handler)
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+ i18n = {}
47
+ try:
48
+ with open("i18n.json", "r", encoding="utf-8") as f:
49
+ i18n = json.load(f)
50
+ except FileNotFoundError:
51
+ logger.warning("Arquivo i18n.json não encontrado! A interface usará textos em inglês como fallback.")
52
+ except json.JSONDecodeError:
53
+ logger.error("Erro ao decodificar i18n.json. Verifique a formatação do arquivo.")
54
+
55
+ if 'pt' not in i18n: i18n['pt'] = i18n.get('en', {})
56
+ if 'en' not in i18n: i18n['en'] = {}
57
+ if 'zh' not in i18n: i18n['zh'] = i18n.get('en', {})
58
+
59
+ try:
60
+ with open("config.yaml", 'r') as f: config = yaml.safe_load(f)
61
+ WORKSPACE_DIR = config['application']['workspace_dir']
62
+ aduc = AducOrchestrator(workspace_dir=WORKSPACE_DIR)
63
+ logger.info("Orquestrador ADUC e Especialistas inicializados com sucesso.")
64
+ except Exception as e:
65
+ logger.error(f"ERRO CRÍTICO ao inicializar: {e}", exc_info=True)
66
+ exit()
67
+
68
+ # --- 2. WRAPPERS DA UI ---
69
+
70
+ def preprocess_base_images_wrapper(uploaded_files):
71
+ if not uploaded_files: return None
72
+ processed_paths = [aduc.process_image_for_story(f.name, 480, f"ref_processed_{i}.png") for i, f in enumerate(uploaded_files)]
73
+ return gr.update(value=processed_paths)
74
+
75
+ def run_mode_a_wrapper(prompt, num_keyframes, ref_files, resolution_str, duration_per_fragment, progress=gr.Progress()):
76
+ if not ref_files:
77
+ raise gr.Error("Por favor, forneça pelo menos uma imagem de referência.")
78
+
79
+ ref_paths = [f.name for f in ref_files]
80
+
81
+ progress(0.1, desc="Gerando roteiro...")
82
+ storyboard, initial_ref_path, _ = aduc.task_generate_storyboard(prompt, num_keyframes, ref_paths, progress)
83
+
84
+ resolution = int(resolution_str.split('x')[0])
85
+
86
+ def cb_factory(scene_index, total_scenes):
87
+ start_time = time.time()
88
+ total_steps = 30
89
+ def callback(pipe_self, step, timestep, callback_kwargs):
90
+ elapsed = time.time() - start_time
91
+ current_step = step + 1
92
+ if current_step > 0:
93
+ it_per_sec = current_step / elapsed
94
+ eta = (total_steps - current_step) / it_per_sec if it_per_sec > 0 else 0
95
+ desc = f"Keyframe {scene_index}/{total_scenes}: {int((current_step/total_steps)*100)}% | {current_step}/{total_steps} [{elapsed:.0f}s<{eta:.0f}s, {it_per_sec:.2f}it/s]"
96
+ progress(0.2 + (current_step / total_steps) * 0.8, desc=desc)
97
+ return {}
98
+ return callback
99
+
100
+ final_keyframes = aduc.task_generate_keyframes(storyboard, initial_ref_path, prompt, resolution, cb_factory)
101
+
102
+ return gr.update(value=storyboard), gr.update(value=final_keyframes), gr.update(visible=True, open=True)
103
+
104
+ def run_mode_b_wrapper(prompt, num_keyframes, ref_files, progress=gr.Progress()):
105
+ if not ref_files or len(ref_files) < 2:
106
+ raise gr.Error("Modo Fotógrafo requer pelo menos 2 imagens: uma base e uma para o banco de cenas.")
107
+
108
+ base_ref_paths = [aduc.process_image_for_story(ref_files[0].name, 480, "base_ref_processed_0.png")]
109
+ pool_ref_paths = [f.name for f in ref_files[1:]]
110
+
111
+ progress(0.1, desc="Gerando roteiro...")
112
+ storyboard, _, _ = aduc.task_generate_storyboard(prompt, num_keyframes, base_ref_paths, progress)
113
+
114
+ progress(0.5, desc="IA (Fotógrafo) está selecionando as melhores cenas...")
115
+ selected_keyframes = aduc.task_select_keyframes(storyboard, base_ref_paths, pool_ref_paths)
116
+
117
+ return gr.update(value=storyboard), gr.update(value=selected_keyframes), gr.update(visible=True, open=True)
118
+
119
+ def run_video_production_wrapper(keyframes, prompt, duration, overlap_percent, echo_frames,
120
+ handler_strength, destination_convergence_strength,
121
+ guidance, stg, rescaling, num_inference_steps,
122
+ video_resolution, use_cont, use_cine,
123
+ progress=gr.Progress()):
124
+ yield {
125
+ video_fragments_gallery: gr.update(value=None, visible=True),
126
+ final_video_output: gr.update(value=None, visible=True, label="🎬 Produzindo seu filme... Por favor, aguarde.")
127
+ }
128
+
129
+ adv_params = {
130
+ "guidance_scale": guidance, "stg_scale": stg, "rescaling_scale": rescaling,
131
+ "num_inference_steps": num_inference_steps
132
+ }
133
+ resolution = int(video_resolution.split('x')[0])
134
+
135
+ video_fragments_so_far = []
136
+ final_movie_path = None
137
+
138
+ for update in aduc.task_produce_final_movie_with_feedback(
139
+ keyframes, prompt, duration, overlap_percent, echo_frames,
140
+ handler_strength, destination_convergence_strength,
141
+ adv_params, resolution, use_cont, use_cine, progress
142
+ ):
143
+ if "fragment_path" in update and update["fragment_path"]:
144
+ video_fragments_so_far.append(update["fragment_path"])
145
+ yield { video_fragments_gallery: gr.update(value=video_fragments_so_far), final_video_output: gr.update() }
146
+ elif "final_path" in update and update["final_path"]:
147
+ final_movie_path = update["final_path"]
148
+ break
149
+
150
+ yield {
151
+ video_fragments_gallery: gr.update(),
152
+ final_video_output: gr.update(value=final_movie_path, label="🎉 FILME COMPLETO 🎉")
153
+ }
154
+
155
+ def get_log_content():
156
+ """Função para ler e retornar o conteúdo do arquivo de log."""
157
+ try:
158
+ with open(LOG_FILE_PATH, "r", encoding="utf-8") as f:
159
+ return f.read()
160
+ except FileNotFoundError:
161
+ return "Arquivo de log ainda não criado. Inicie uma geração."
162
+
163
+ def update_ui_language(lang_code):
164
+ lang_map = i18n.get(lang_code, i18n.get('en', {}))
165
+ # ... (a função de tradução permanece a mesma, mas está aqui para completude)
166
+ return {
167
+ title_md: gr.update(value=f"# {lang_map.get('app_title')}"),
168
+ subtitle_md: gr.update(value=lang_map.get('app_subtitle')),
169
+ lang_selector: gr.update(label=lang_map.get('lang_selector_label')),
170
+ step1_accordion: gr.update(label=lang_map.get('step1_accordion')),
171
+ prompt_input: gr.update(label=lang_map.get('prompt_label')),
172
+ ref_image_input: gr.update(label=lang_map.get('ref_images_label')),
173
+ num_keyframes_slider: gr.update(label=lang_map.get('keyframes_label')),
174
+ duration_per_fragment_slider: gr.update(label=lang_map.get('duration_label')),
175
+ storyboard_and_keyframes_button: gr.update(value=lang_map.get('storyboard_and_keyframes_button')),
176
+ storyboard_from_photos_button: gr.update(value=lang_map.get('storyboard_from_photos_button')),
177
+ storyboard_output: gr.update(label=lang_map.get('storyboard_output_label')),
178
+ keyframe_gallery: gr.update(label=lang_map.get('keyframes_gallery_label')),
179
+ step3_accordion: gr.update(label=lang_map.get('step3_accordion')),
180
+ step3_description_md: gr.update(value=lang_map.get('step3_description')),
181
+ continuity_director_checkbox: gr.update(label=lang_map.get('continuity_director_label')),
182
+ cinematographer_checkbox: gr.update(label=lang_map.get('cinematographer_label')),
183
+ echo_frames_selector: gr.update(label=lang_map.get('echo_frames_label'), info=lang_map.get('echo_frames_info')),
184
+ overlap_percent_slider: gr.update(label=lang_map.get('overlap_percent_label'), info=lang_map.get('overlap_percent_info')),
185
+ handler_strength_slider: gr.update(label=lang_map.get('handler_strength_label'), info=lang_map.get('handler_strength_info')),
186
+ destination_convergence_slider: gr.update(label=lang_map.get('destination_convergence_label'), info=lang_map.get('destination_convergence_info')),
187
+ produce_button: gr.update(value=lang_map.get('produce_button')),
188
+ advanced_accordion: gr.update(label=lang_map.get('advanced_accordion_label')),
189
+ guidance_scale_slider: gr.update(label=lang_map.get('guidance_label')),
190
+ stg_scale_slider: gr.update(label=lang_map.get('stg_label')),
191
+ rescaling_scale_slider: gr.update(label=lang_map.get('rescaling_label')),
192
+ num_inference_steps_slider: gr.update(label=lang_map.get('steps_label'), info=lang_map.get('steps_info')),
193
+ video_fragments_gallery: gr.update(label=lang_map.get('video_fragments_gallery_label')),
194
+ final_video_output: gr.update(label=lang_map.get('final_movie_with_audio_label')),
195
+ log_accordion: gr.update(label=lang_map.get('log_accordion_label')),
196
+ log_display: gr.update(label=lang_map.get('log_display_label')),
197
+ update_log_button: gr.update(value=lang_map.get('update_log_button')),
198
+ }
199
+
200
+ # --- 4. DEFINIÇÃO DA UI ---
201
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
202
+ default_lang = i18n.get('pt', {})
203
+
204
+ title_md = gr.Markdown(f"# {default_lang.get('app_title')}")
205
+ subtitle_md = gr.Markdown(default_lang.get('app_subtitle'))
206
+
207
+ with gr.Row():
208
+ lang_selector = gr.Radio(["pt", "en", "zh"], value="pt", label=default_lang.get('lang_selector_label'))
209
+ resolution_selector = gr.Radio(["480x480", "512x736", "736x1280"], value="480x480", label="Resolução do Vídeo")
210
+
211
+ with gr.Accordion(default_lang.get('step1_accordion'), open=True) as step1_accordion:
212
+ prompt_input = gr.Textbox(label=default_lang.get('prompt_label'), value="A majestic lion walks across the savanna, sits down, and then roars at the setting sun.")
213
+ with gr.Row():
214
+ num_keyframes_slider = gr.Slider(minimum=3, maximum=10, value=3, step=1, label=default_lang.get('keyframes_label'), info="Mínimo de 3 para a lógica do cineasta.")
215
+ duration_per_fragment_slider = gr.Slider(label=default_lang.get('duration_label'), minimum=2.0, maximum=10.0, value=4.0, step=0.1)
216
+ ref_image_input = gr.File(label=default_lang.get('ref_images_label'), file_count="multiple", file_types=["image"])
217
+ with gr.Row():
218
+ storyboard_and_keyframes_button = gr.Button(default_lang.get('storyboard_and_keyframes_button'), variant="primary")
219
+ storyboard_from_photos_button = gr.Button(default_lang.get('storyboard_from_photos_button'))
220
+ gr.Markdown(f"*{default_lang.get('step1_mode_b_info')}*")
221
+ storyboard_output = gr.JSON(label=default_lang.get('storyboard_output_label'))
222
+ keyframe_gallery = gr.Gallery(label=default_lang.get('keyframes_gallery_label'), visible=True, object_fit="contain", height="auto", type="filepath")
223
+
224
+ with gr.Accordion(default_lang.get('step3_accordion'), open=False, visible=False) as step3_accordion:
225
+ step3_description_md = gr.Markdown(default_lang.get('step3_description'))
226
+ with gr.Row():
227
+ continuity_director_checkbox = gr.Checkbox(label=default_lang.get('continuity_director_label'), value=True)
228
+ cinematographer_checkbox = gr.Checkbox(label=default_lang.get('cinematographer_label'), value=True, visible=False)
229
+
230
+ gr.Markdown("--- \n**Controles de Continuidade e Edição:**")
231
+ with gr.Row():
232
+ echo_frames_selector = gr.Radio(choices=[8, 16, 24], value=8, label=default_lang.get('echo_frames_label'), info=default_lang.get('echo_frames_info'))
233
+ overlap_percent_slider = gr.Slider(label=default_lang.get('overlap_percent_label'), minimum=0, maximum=50, value=15, step=1, info=default_lang.get('overlap_percent_info'))
234
+
235
+ gr.Markdown("**Controle de Influência (Convergência):**")
236
+ with gr.Row():
237
+ handler_strength_slider = gr.Slider(label=default_lang.get('handler_strength_label'), minimum=0.0, maximum=1.0, value=0.5, step=0.05, info=default_lang.get('handler_strength_info'))
238
+ destination_convergence_slider = gr.Slider(label=default_lang.get('destination_convergence_label'), minimum=0.0, maximum=1.0, value=0.75, step=0.05, info=default_lang.get('destination_convergence_info'))
239
+
240
+ with gr.Accordion(default_lang.get('advanced_accordion_label'), open=False) as advanced_accordion:
241
+ with gr.Row():
242
+ guidance_scale_slider = gr.Slider(label=default_lang.get('guidance_label'), minimum=1.0, maximum=15.0, value=1.0, step=0.5)
243
+ stg_scale_slider = gr.Slider(label=default_lang.get('stg_label'), minimum=0.0, maximum=10.0, value=0.0, step=0.5)
244
+ rescaling_scale_slider = gr.Slider(label=default_lang.get('rescaling_label'), minimum=0.0, maximum=1.0, value=0.15, step=0.05)
245
+ with gr.Row():
246
+ num_inference_steps_slider = gr.Slider(label=default_lang.get('steps_label'), minimum=4, maximum=50, value=7, step=1, info=default_lang.get('steps_info'))
247
+ produce_button = gr.Button(default_lang.get('produce_button'), variant="primary")
248
+
249
+ video_fragments_gallery = gr.Gallery(label=default_lang.get('video_fragments_gallery_label'), visible=False, object_fit="contain", height="auto", type="filepath")
250
+ final_video_output = gr.Video(label=default_lang.get('final_movie_with_audio_label'), visible=False)
251
+
252
+ with gr.Accordion("📝 Log de Geração (Detalhado)", open=False) as log_accordion:
253
+ log_display = gr.Textbox(label="Log da Sessão", lines=20, interactive=False, autoscroll=True)
254
+ update_log_button = gr.Button("Atualizar Log")
255
+
256
+ # --- 5. CONEXÕES DA UI ---
257
+ all_ui_components = list(update_ui_language('pt').keys())
258
+ lang_selector.change(fn=update_ui_language, inputs=lang_selector, outputs=all_ui_components)
259
+
260
+ ref_image_input.upload(fn=preprocess_base_images_wrapper, inputs=ref_image_input, outputs=ref_image_input)
261
+
262
+ storyboard_and_keyframes_button.click(
263
+ fn=run_mode_a_wrapper,
264
+ inputs=[prompt_input, num_keyframes_slider, ref_image_input, resolution_selector, duration_per_fragment_slider],
265
+ outputs=[storyboard_output, keyframe_gallery, step3_accordion]
266
+ )
267
+
268
+ storyboard_from_photos_button.click(
269
+ fn=run_mode_b_wrapper,
270
+ inputs=[prompt_input, num_keyframes_slider, ref_image_input],
271
+ outputs=[storyboard_output, keyframe_gallery, step3_accordion]
272
+ )
273
+
274
+ produce_button.click(
275
+ fn=run_video_production_wrapper,
276
+ inputs=[
277
+ keyframe_gallery, prompt_input, duration_per_fragment_slider,
278
+ overlap_percent_slider,
279
+ echo_frames_selector,
280
+ handler_strength_slider,
281
+ destination_convergence_slider,
282
+ guidance_scale_slider, stg_scale_slider, rescaling_scale_slider,
283
+ num_inference_steps_slider,
284
+ resolution_selector, continuity_director_checkbox, cinematographer_checkbox
285
+ ],
286
+ outputs=[video_fragments_gallery, final_video_output]
287
+ )
288
+
289
+ update_log_button.click(
290
+ fn=get_log_content,
291
+ inputs=[],
292
+ outputs=[log_display]
293
+ )
294
+
295
+ if __name__ == "__main__":
296
+ if os.path.exists(WORKSPACE_DIR):
297
+ logger.info(f"Limpando o workspace anterior em: {WORKSPACE_DIR}")
298
+ shutil.rmtree(WORKSPACE_DIR)
299
+ os.makedirs(WORKSPACE_DIR)
300
+ logger.info(f"Aplicação iniciada. Lançando interface Gradio...")
301
+ demo.queue().launch()
audio_specialist.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # audio_specialist.py (Versão final para áudio dinâmico por fragmento)
2
+ # Especialista ADUC para geração de áudio, com gerenciamento de memória GPU.
3
+
4
+ import torch
5
+ import logging
6
+ import subprocess
7
+ import os
8
+ import time
9
+ import yaml
10
+ import gc
11
+ from pathlib import Path
12
+ import gradio as gr
13
+
14
+ # Importa as classes e funções necessárias do MMAudio
15
+ try:
16
+ from mmaudio.eval_utils import ModelConfig, all_model_cfg, generate as mmaudio_generate, load_video, make_video
17
+ from mmaudio.model.flow_matching import FlowMatching
18
+ from mmaudio.model.networks import MMAudio, get_my_mmaudio
19
+ from mmaudio.model.utils.features_utils import FeaturesUtils
20
+ from mmaudio.model.sequence_config import SequenceConfig
21
+ except ImportError:
22
+ raise ImportError("MMAudio não foi encontrado. Por favor, instale-o a partir do GitHub: git+https://github.com/hkchengrex/MMAudio.git")
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ class AudioSpecialist:
27
+ def __init__(self, workspace_dir):
28
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
29
+ self.cpu_device = torch.device("cpu")
30
+ self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
31
+ self.workspace_dir = workspace_dir
32
+
33
+ self.model_config: ModelConfig = all_model_cfg['large_44k_v2']
34
+ self.net: MMAudio = None
35
+ self.feature_utils: FeaturesUtils = None
36
+ self.seq_cfg: SequenceConfig = None
37
+
38
+ self._load_models_to_cpu()
39
+
40
+ def _load_models_to_cpu(self):
41
+ try:
42
+ logger.info("Verificando e baixando modelos MMAudio, se necessário...")
43
+ self.model_config.download_if_needed()
44
+
45
+ self.seq_cfg = self.model_config.seq_cfg
46
+
47
+ logger.info(f"Carregando modelo MMAudio: {self.model_config.model_name} para a CPU...")
48
+ self.net = get_my_mmaudio(self.model_config.model_name).eval()
49
+ self.net.load_weights(torch.load(self.model_config.model_path, map_location=self.cpu_device, weights_only=True))
50
+
51
+ logger.info("Carregando utilitários de features do MMAudio para a CPU...")
52
+ self.feature_utils = FeaturesUtils(
53
+ tod_vae_ckpt=self.model_config.vae_path,
54
+ synchformer_ckpt=self.model_config.synchformer_ckpt,
55
+ enable_conditions=True,
56
+ mode=self.model_config.mode,
57
+ bigvgan_vocoder_ckpt=self.model_config.bigvgan_16k_path,
58
+ need_vae_encoder=False
59
+ )
60
+ self.feature_utils = self.feature_utils.eval()
61
+ self.net.to(self.cpu_device)
62
+ self.feature_utils.to(self.cpu_device)
63
+ logger.info("Especialista de áudio pronto na CPU.")
64
+ except Exception as e:
65
+ logger.error(f"Falha ao carregar modelos de áudio: {e}", exc_info=True)
66
+ self.net = None
67
+
68
+ def to_gpu(self):
69
+ if self.device == 'cpu': return
70
+ logger.info(f"Movendo especialista de áudio para a GPU ({self.device})...")
71
+ self.net.to(self.device, self.dtype)
72
+ self.feature_utils.to(self.device, self.dtype)
73
+
74
+ def to_cpu(self):
75
+ if self.device == 'cpu': return
76
+ logger.info("Descarregando especialista de áudio da GPU...")
77
+ self.net.to(self.cpu_device)
78
+ self.feature_utils.to(self.cpu_device)
79
+ gc.collect()
80
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
81
+
82
+ def generate_audio_for_video(self, video_path: str, prompt: str, negative_prompt: str, duration_seconds: float) -> str:
83
+ if self.net is None:
84
+ raise gr.Error("Modelo MMAudio não está carregado. Não é possível gerar áudio.")
85
+
86
+ logger.info("------------------------------------------------------")
87
+ logger.info("--- Gerando Áudio para Fragmento de Vídeo ---")
88
+ logger.info(f"--- Vídeo Fragmento: {os.path.basename(video_path)}")
89
+ logger.info(f"--- Duração: {duration_seconds:.2f}s")
90
+ logger.info(f"--- Prompt (Descrição da Cena): '{prompt}'")
91
+
92
+ if duration_seconds < 1:
93
+ logger.warning("Fragmento muito curto (<1s). Retornando vídeo silencioso.")
94
+ logger.info("------------------------------------------------------")
95
+ return video_path
96
+
97
+ if self.device == 'cpu':
98
+ logger.warning("Gerando áudio na CPU. Isso pode ser muito lento.")
99
+
100
+ try:
101
+ self.to_gpu()
102
+ with torch.no_grad():
103
+ rng = torch.Generator(device=self.device).manual_seed(int(time.time()))
104
+ fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25)
105
+
106
+ video_info = load_video(Path(video_path), duration_seconds)
107
+ self.seq_cfg.duration = video_info.duration_sec
108
+ self.net.update_seq_lengths(self.seq_cfg.latent_seq_len, self.seq_cfg.clip_seq_len, self.seq_cfg.sync_seq_len)
109
+
110
+ audios = mmaudio_generate(
111
+ clip_video=video_info.clip_frames.unsqueeze(0),
112
+ sync_video=video_info.sync_frames.unsqueeze(0),
113
+ text=[prompt],
114
+ negative_text=[negative_prompt],
115
+ feature_utils=self.feature_utils,
116
+ net=self.net,
117
+ fm=fm,
118
+ rng=rng,
119
+ cfg_strength=4.5
120
+ )
121
+ audio_waveform = audios.float().cpu()[0]
122
+
123
+ fragment_name = Path(video_path).stem
124
+ output_video_path = os.path.join(self.workspace_dir, f"{fragment_name}_com_audio.mp4")
125
+
126
+ make_video(video_info, Path(output_video_path), audio_waveform, sampling_rate=self.seq_cfg.sampling_rate)
127
+ logger.info(f"--- Fragmento com áudio salvo em: {os.path.basename(output_video_path)}")
128
+ logger.info("------------------------------------------------------")
129
+ return output_video_path
130
+ finally:
131
+ self.to_cpu()
132
+
133
+ # Singleton instantiation
134
+ try:
135
+ with open("config.yaml", 'r') as f:
136
+ config = yaml.safe_load(f)
137
+ WORKSPACE_DIR = config['application']['workspace_dir']
138
+ audio_specialist_singleton = AudioSpecialist(workspace_dir=WORKSPACE_DIR)
139
+ except Exception as e:
140
+ logger.error(f"Não foi possível inicializar o AudioSpecialist: {e}")
141
+ audio_specialist_singleton = None
config.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.yaml
2
+ # Configuração central para a aplicação Deformes4D e seus especialistas.
3
+
4
+ application:
5
+ workspace_dir: "deformes_workspace"
6
+
7
+ # Configuração para Hugging Face Spaces
8
+ sdk: gradio
9
+ app_file: app.py
10
+
11
+ specialists:
12
+ flux:
13
+ # Define quantas GPUs o pool do Flux deve tentar alocar.
14
+ # Se não houver GPUs suficientes, o hardware_manager lançará um erro.
15
+ # Se 0, usará a CPU.
16
+ gpus_required: 2
17
+
18
+ ltx:
19
+ # Define quantas GPUs o pool do LTX deve tentar alocar.
20
+ gpus_required: 2
21
+
22
+ # Aponta para o arquivo de configuração específico do modelo LTX.
23
+ # Alterado para usar o modelo 0.9.8-dev.
24
+ config_file: "configs/ltxv-13b-0.9.8-distilled.yaml"
deformes4D_engine.py CHANGED
@@ -1,6 +1,7 @@
1
  # deformes4D_engine.py
2
  # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
  #
 
4
  # MODIFICATIONS FOR ADUC-SDR:
5
  # Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
6
  #
@@ -8,9 +9,6 @@
8
  # video fragment generation, latent manipulation, and dynamic editing,
9
  # governed by the ADUC orchestrator.
10
  # This component is licensed under the GNU Affero General Public License v3.0.
11
- #
12
- # AVISO DE PATENTE PENDENTE: O método e sistema ADUC implementado neste
13
- # software está em processo de patenteamento. Consulte NOTICE.md.
14
 
15
  import os
16
  import time
 
1
  # deformes4D_engine.py
2
  # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
3
  #
4
+ #
5
  # MODIFICATIONS FOR ADUC-SDR:
6
  # Copyright (C) 2025 Carlos Rodrigues dos Santos. All rights reserved.
7
  #
 
9
  # video fragment generation, latent manipulation, and dynamic editing,
10
  # governed by the ADUC orchestrator.
11
  # This component is licensed under the GNU Affero General Public License v3.0.
 
 
 
12
 
13
  import os
14
  import time
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg