Aduc-sdr-2_5s

Paused

App Files Files Community

x2XcarleX2x commited on Sep 24

Commit

1b22aaf

verified ·

1 Parent(s): 2bb289a

Update app_wan.py

Browse files

Files changed (1) hide show

app_wan.py +302 -128

app_wan.py CHANGED Viewed

@@ -1,145 +1,319 @@
-# app_wan.py (Aplicação ADUC-SDR Completa com Interface Gradio)
-import gradio as gr
 import torch
-import logging
 from PIL import Image
-import os
-# --- Importa os Pilares da nossa Arquitetatura ADUC ---
-from aduc_framework.managers.vae_wan_manager import vae_wan_manager_singleton as VaeWan
-from aduc_framework.managers.wan_manager import wan_manager_singleton as Wan
-from aduc_framework.types import LatentConditioningItem
-from aduc_framework.tools.video_encode_tool import video_encode_tool_singleton as VideoTool
-# --- Configuração do Logging ---
-logging.basicConfig(level=logging.DEBUG, format='%(message)s')
-logger = logging.getLogger("ADUC_WAN_APP")
-# --- Lógica de Geração #1 (Modo ADUC Avançado) ---
-def generate_aduc_video(
-    image1, strength1, frame_idx1, image2, strength2, frame_idx2, image3, strength3, frame_idx3,
-    motion_prompt, progress=gr.Progress(track_tqdm=True)
-):
     """
-    Função principal para o fluxo ADUC: orquestra a conversão para latentes,
-    a geração controlada e a decodificação final.
     """
-    try:
-        progress(0, desc="[Diretor ADUC] Validando o plano de produção...")
-        production_plan = []
-        if image1: production_plan.append({"image": image1, "frame_index": int(frame_idx1), "strength": strength1})
-        if image2: production_plan.append({"image": image2, "frame_index": int(frame_idx2), "strength": strength2})
-        if image3: production_plan.append({"image": image3, "frame_index": int(frame_idx3), "strength": strength3})
-        if not production_plan: raise gr.Error("Modo ADUC: Forneça pelo menos uma imagem para iniciar.")
-        progress(0.1, desc="[VaeWan] Convertendo imagens para o espaço latente...")
-        pil_images = [item["image"] for item in production_plan]
-        latent_tensors = VaeWan.encode_batch(pil_images, target_resolution=(480, 832))
-        progress(0.2, desc="[Diretor ADUC] Montando ordens de serviço (LatentConditioningItems)...")
-        conditioning_items = [LatentConditioningItem(latent_tensor=latent_tensors[i], media_frame_number=item["frame_index"], conditioning_strength=item["strength"]) for i, item in enumerate(production_plan)]
-        progress(0.3, desc="[WanManager ADUC] Gerando fragmento latente (pode levar um momento)...")
-        job_params = {"conditioning_items_data": conditioning_items, "motion_prompt": motion_prompt, "height": 480, "width": 832, "video_total_frames": 81, "num_inference_steps": 8}
-        video_latents, _ = Wan.generate_latent_fragment(**job_params)
-        progress(0.8, desc="[VaeWan] Decodificando o resultado final para pixels...")
-        video_pixels = VaeWan.decode(video_latents)
-        progress(0.9, desc="[VideoTool] Salvando o arquivo de vídeo final...")
-        output_path = "demo_aduc_wan_output.mp4"
-        VideoTool.save_video_from_tensor(video_pixels, path=output_path, fps=16)
-        progress(1.0, desc="Produção Concluída!")
-        return output_path
-    except Exception as e:
-        logger.error("Ocorreu um erro durante a Geração ADUC.", exc_info=True)
-        raise gr.Error(f"Falha na Geração ADUC: {e}")
-# --- Lógica de Geração #2 (Modo Simples - demowan.py) ---
-def generate_simple_video(
-    start_image, end_image, motion_prompt, progress=gr.Progress(track_tqdm=True)
 ):
     """
-    Função para o fluxo simples: chama o WanManager no modo de compatibilidade
-    e salva o resultado.
     """
-    try:
-        progress(0, desc="[Diretor Simples] Validando imagens...")
-        if start_image is None or end_image is None:
-            raise gr.Error("Modo Simples: Forneça uma imagem de Início e Fim.")
-        progress(0.2, desc="[WanManager Simples] Gerando clipe de pixels (pode levar um momento)...")
-        job_params = {"motion_prompt": motion_prompt, "video_total_frames": 81, "num_inference_steps": 8}
-        video_pixels = Wan.generate_clip_from_images(start_image, end_image, **job_params)
-        progress(0.9, desc="[VideoTool] Salvando o arquivo de vídeo final...")
-        output_path = "demo_simple_wan_output.mp4"
-        # A saída do generate_clip_from_images é (B, F, H, W, C), o VideoTool espera (B, C, F, H, W)
-        video_pixels_c_first = video_pixels.permute(0, 4, 1, 2, 3)
-        VideoTool.save_video_from_tensor(video_pixels_c_first, path=output_path, fps=16)
-        progress(1.0, desc="Produção Concluída!")
-        return output_path
-    except Exception as e:
-        logger.error("Ocorreu um erro durante a Geração Simples.", exc_info=True)
-        raise gr.Error(f"Falha na Geração Simples: {e}")
-# --- Construção da Interface Gráfica (UI) ---
-with gr.Blocks(theme=gr.themes.Soft(), title="ADUC-SDR Wan Demo") as demo:
-    gr.Markdown(
-        """
-        # 🎬 ADUC-SDR: Estúdio de Produção Wan2.2 (Lightning)
-        ### Bem-vindo, Mestre Deformes!
-        Use os modos abaixo para controlar o especialista `WanManager` e criar seu vídeo.
-        """
-    )
-    with gr.Tabs() as tabs:
-        with gr.TabItem("🎛️ Modo ADUC (Avançado)"):
-            gr.Markdown("Controle preciso usando a metodologia ADUC com múltiplos keyframes, índices e forças. O vídeo será gerado no espaço latente.")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    gr.Markdown("### Keyframe 1 (Inicial)")
-                    img1 = gr.Image(type="pil", label="Imagem")
-                    strength1 = gr.Slider(0.0, 1.0, value=1.0, label="Força")
-                    frame_idx1 = gr.Number(value=0, label="Índice no Vídeo", precision=0)
-                with gr.Column(scale=1):
-                    gr.Markdown("### Keyframe 2 (Intermediário)")
-                    img2 = gr.Image(type="pil", label="Imagem")
-                    strength2 = gr.Slider(0.0, 1.0, value=0.6, label="Força")
-                    frame_idx2 = gr.Number(value=40, label="Índice no Vídeo", precision=0)
-                with gr.Column(scale=1):
-                    gr.Markdown("### Keyframe 3 (Final)")
-                    img3 = gr.Image(type="pil", label="Imagem")
-                    strength3 = gr.Slider(0.0, 1.0, value=1.0, label="Força")
-                    frame_idx3 = gr.Number(value=80, label="Índice no Vídeo", precision=0)
-            motion_prompt_aduc = gr.Textbox(label="📝 Prompt de Movimento", lines=2, placeholder="Ex: a slow dolly zoom out, revealing a vast alien desert at sunset")
-            generate_btn_aduc = gr.Button("🎬 Gerar Vídeo (Modo ADUC)", variant="primary")
-        with gr.TabItem("▶️ Modo Simples (Início/Fim)"):
-            gr.Markdown("Gere um vídeo diretamente a partir de uma imagem de início e uma de fim, como no `demowan.py`. A geração ocorre no espaço de pixels.")
-            with gr.Row():
-                img_start_simple = gr.Image(type="pil", label="Imagem de Início")
-                img_end_simple = gr.Image(type="pil", label="Imagem de Fim")
-            motion_prompt_simple = gr.Textbox(label="📝 Prompt de Movimento", lines=2, placeholder="Ex: a character dodges the missiles")
-            generate_btn_simple = gr.Button("🎬 Gerar Vídeo (Modo Simples)", variant="primary")
-    output_video = gr.Video(label="Resultado da Produção", interactive=False)
-    # Conexões dos botões às suas respectivas lógicas de geração
-    generate_btn_aduc.click(
-        fn=generate_aduc_video,
-        inputs=[img1, strength1, frame_idx1, img2, strength2, frame_idx2, img3, strength3, frame_idx3, motion_prompt_aduc],
-        outputs=[output_video]
     )
-    generate_btn_simple.click(
-        fn=generate_simple_video,
-        inputs=[img_start_simple, img_end_simple, motion_prompt_simple],
-        outputs=[output_video]
     )
 if __name__ == "__main__":
     # Cria a pasta e as imagens de exemplo se não existirem
     if not os.path.exists("examples"):

+# app_wa
+import os
+# PyTorch 2.8 (temporary hack)
+os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
+# --- 1. Model Download and Setup (Diffusers Backend) ---
+import spaces
 import torch
+from diffusers import FlowMatchEulerDiscreteScheduler
+from pipeline_wan_i2v import WanImageToVideoPipeline
+from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
+from diffusers.utils.export_utils import export_to_video
+import gradio as gr
+import tempfile
+import numpy as np
 from PIL import Image
+import random
+import gc
+from gradio_client import Client, handle_file # Import for API call
+# --- Constants and Model Loading ---
+MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
+# --- NEW: Flexible Dimension Constants ---
+MAX_DIMENSION = 832
+MIN_DIMENSION = 480
+DIMENSION_MULTIPLE = 16
+SQUARE_SIZE = 480
+MAX_SEED = np.iinfo(np.int32).max
+FIXED_FPS = 16
+MIN_FRAMES_MODEL = 8
+MAX_FRAMES_MODEL = 81
+MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS, 1)
+MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS, 1)
+default_negative_prompt = "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走,过曝，"
+print("Loading models into memory. This may take a few minutes...")
+pipe    = WanImageToVideoPipeline.from_pretrained(
+    MODEL_ID,
+    transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+        subfolder='transformer',
+        torch_dtype=torch.bfloat16,
+        device_map='auto',
+    ),
+    transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
+        subfolder='transformer_2',
+        torch_dtype=torch.bfloat16,
+        device_map='auto',
+    ),
+    torch_dtype=torch.bfloat16,
+)
+pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config, shift=32.0)
+# ====================================================================================
+# A fusão do LoRA "Lightning" é ESSENCIAL para a geração em 8 passos.
+# Trazemos essa lógica para cá, mantendo a otimização completa desativada.
+# ====================================================================================
+print("Applying 8-step Lightning LoRA...")
+try:
+    # Carrega os pesos do LoRA para os dois transformadores
+    pipe.load_lora_weights(
+        "Kijai/WanVideo_comfy",
+        weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+        adapter_name="lightx2v"
+    )
+    kwargs_lora = {"load_into_transformer_2": True}
+    pipe.load_lora_weights(
+        "Kijai/WanVideo_comfy",
+        weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
+        adapter_name="lightx2v_2", **kwargs_lora
+    )
+    # Define como os adaptadores LoRA serão combinados
+    pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
+    # Funde os LoRAs diretamente nos pesos do modelo para acelerar a inferência.
+    print("Fusing LoRA weights into the main model...")
+    pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3.0, components=["transformer"])
+    pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1.0, components=["transformer_2"])
+    # Descarrega os pesos LoRA da memória, pois eles já foram incorporados.
+    pipe.unload_lora_weights()
+    print("Lightning LoRA successfully fused. Model is ready for fast 8-step generation.")
+except Exception as e:
+    print(f"AVISO: Falha ao carregar ou fundir o LoRA. A geração pode ser lenta ou de baixa qualidade. Erro: {e}")
+print("All models loaded. Gradio app is ready.")
+# --- 2. Image Processing and Application Logic ---
+def generate_end_frame(start_img, gen_prompt, progress=gr.Progress(track_tqdm=True)):
+    """Calls an external Gradio API to generate an image."""
+    if start_img is None:
+        raise gr.Error("Please provide a Start Frame first.")
+    hf_token = os.getenv("HF_TOKEN")
+    if not hf_token:
+        raise gr.Error("HF_TOKEN not found in environment variables. Please set it in your Space secrets.")
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
+        start_img.save(tmpfile.name)
+        tmp_path = tmpfile.name
+    progress(0.1, desc="Connecting to image generation API...")
+    client = Client("multimodalart/nano-banana")
+    progress(0.5, desc=f"Generating with prompt: '{gen_prompt}'...")
+    try:
+        result = client.predict(
+            prompt=gen_prompt,
+            images=[
+                {"image": handle_file(tmp_path)}
+            ],
+            manual_token=hf_token,
+            api_name="/unified_image_generator"
+        )
+    finally:
+        os.remove(tmp_path)
+    progress(1.0, desc="Done!")
+    print(result)
+    return result
+def switch_to_upload_tab():
+    """Returns a gr.Tabs update to switch to the first tab."""
+    return gr.Tabs(selected="upload_tab")
+def process_image_for_video(image: Image.Image) -> Image.Image:
     """
+    Resizes an image based on the following rules for video generation.
     """
+    width, height = image.size
+    if width == height:
+        return image.resize((SQUARE_SIZE, SQUARE_SIZE), Image.Resampling.LANCZOS)
+    aspect_ratio = width / height
+    new_width, new_height = width, height
+    if new_width > MAX_DIMENSION or new_height > MAX_DIMENSION:
+        if aspect_ratio > 1: scale = MAX_DIMENSION / new_width
+        else: scale = MAX_DIMENSION / new_height
+        new_width *= scale; new_height *= scale
+    if new_width < MIN_DIMENSION or new_height < MIN_DIMENSION:
+        if aspect_ratio > 1: scale = MIN_DIMENSION / new_height
+        else: scale = MIN_DIMENSION / new_width
+        new_width *= scale; new_height *= scale
+    final_width = int(round(new_width / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
+    final_height = int(round(new_height / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
+    final_width = max(final_width, MIN_DIMENSION if aspect_ratio < 1 else SQUARE_SIZE)
+    final_height = max(final_height, MIN_DIMENSION if aspect_ratio > 1 else SQUARE_SIZE)
+    return image.resize((final_width, final_height), Image.Resampling.LANCZOS)
+def resize_and_crop_to_match(target_image, reference_image):
+    """Resizes and center-crops the target image to match the reference image's dimensions."""
+    ref_width, ref_height = reference_image.size
+    target_width, target_height = target_image.size
+    scale = max(ref_width / target_width, ref_height / target_height)
+    new_width, new_height = int(target_width * scale), int(target_height * scale)
+    resized = target_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
+    left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
+    return resized.crop((left, top, left + ref_width, top + ref_height))
+def generate_video(
+    start_image_pil,
+    end_image_pil,
+    prompt,
+    negative_prompt=default_negative_prompt,
+    duration_seconds=2.1,
+    steps=8,
+    guidance_scale=1,
+    guidance_scale_2=1,
+    seed=42,
+    randomize_seed=False,
+    progress=gr.Progress(track_tqdm=True)
 ):
     """
+    Generates a video by interpolating between a start and end image, guided by a text prompt.
     """
+    if start_image_pil is None or end_image_pil is None:
+        raise gr.Error("Please upload both a start and an end image.")
+    progress(0.1, desc="Preprocessing images...")
+    processed_start_image = process_image_for_video(start_image_pil)
+    processed_end_image = resize_and_crop_to_match(end_image_pil, processed_start_image)
+    target_height, target_width = processed_start_image.height, processed_start_image.width
+    current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+    num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
+    progress(0.2, desc=f"Generating {num_frames} frames at {target_width}x{target_height} (seed: {current_seed})...")
+    # CORREÇÃO FINAL: O gerador é criado na CPU (padrão) para evitar o erro de dispositivo 'meta'.
+    # A pipeline cuidará de mover os latentes para a GPU.
+    generator = torch.Generator().manual_seed(current_seed)
+    output_frames_list = pipe(
+        image=processed_start_image,
+        last_image=processed_end_image,
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        height=target_height,
+        width=target_width,
+        num_frames=num_frames,
+        guidance_scale=float(guidance_scale),
+        guidance_scale_2=float(guidance_scale_2),
+        num_inference_steps=int(steps),
+        generator=generator,
+    ).frames[0]
+    progress(0.9, desc="Encoding and saving video...")
+    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
+        video_path = tmpfile.name
+    export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
+    progress(1.0, desc="Done!")
+    return video_path, current_seed
+# --- 3. Gradio User Interface ---
+css = '''
+.fillable{max-width: 1100px !important}
+.dark .progress-text {color: white}
+#general_items{margin-top: 2em}
+#group_all{overflow:visible}
+#group_all .styler{overflow:visible}
+#group_tabs .tabitem{padding: 0}
+.tab-wrapper{margin-top: -33px;z-index: 999;position: absolute;width: 100%;background-color: var(--block-background-fill);padding: 0;}
+#component-9-button{width: 50%;justify-content: center}
+#component-11-button{width: 50%;justify-content: center}
+#or_item{text-align: center; padding-top: 1em; padding-bottom: 1em; font-size: 1.1em;margin-left: .5em;margin-right: .5em;width: calc(100% - 1em)}
+#fivesec{margin-top: 5em;margin-left: .5em;margin-right: .5em;width: calc(100% - 1em)}
+'''
+with gr.Blocks(theme=gr.themes.Citrus(), css=css) as app:
+    gr.Markdown("# Wan 2.2 Aduca-sdr")
+    with gr.Row(elem_id="general_items"):
+        with gr.Column():
+            with gr.Group(elem_id="group_all"):
+                with gr.Row():
+                    start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
+                    with gr.Tabs(elem_id="group_tabs") as tabs:
+                        with gr.TabItem("Upload", id="upload_tab"):
+                            end_image = gr.Image(type="pil", label="End Frame", sources=["upload", "clipboard"])
+                        with gr.TabItem("Generate", id="generate_tab"):
+                            generate_5seconds = gr.Button("Generate scene 5 seconds in the future", elem_id="fivesec")
+                            gr.Markdown("Generate a custom end-frame with an edit model like [Nano Banana](https://huggingface.co/spaces/multimodalart/nano-banana) or [Qwen Image Edit](https://huggingface.co/spaces/multimodalart/Qwen-Image-Edit-Fast)", elem_id="or_item")
+                prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images")
+                with gr.Accordion("Advanced Settings", open=False):
+                    duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=2.1, label="Video Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
+                    negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
+                    steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=8, label="Inference Steps")
+                    guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - high noise")
+                    guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - low noise")
+                    with gr.Row():
+                        seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
+                        randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True)
+                generate_button = gr.Button("Generate Video", variant="primary")
+        with gr.Column():
+            output_video = gr.Video(label="Generated Video", autoplay=True)
+    ui_inputs = [
+        start_image, end_image, prompt, negative_prompt_input, duration_seconds_input,
+        steps_slider, guidance_scale_input, guidance_scale_2_input, seed_input,
+        randomize_seed_checkbox
+    ]
+    ui_outputs = [output_video, seed_input]
+    generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=ui_outputs)
+    generate_5seconds.click(
+        fn=switch_to_upload_tab,
+        inputs=None,
+        outputs=[tabs]
+    ).then(
+        fn=lambda img: generate_end_frame(img, "this image is a still frame from a movie. generate a new frame with what happens on this scene 5 seconds in the future"),
+        inputs=[start_image],
+        outputs=[end_image]
+    ).success(
+        fn=generate_video,
+        inputs=ui_inputs,
+        outputs=ui_outputs
     )
+    gr.Examples(
+        examples=[
+            ["poli_tower.png", "tower_takes_off.png", "the man turns around"],
+            ["ugly_sonic.jpeg", "squatting_sonic.png", "the character dodges the missiles"],
+            ["capyabara_zoomed.png", "capyabara.webp", "a dramatic dolly zoom"],
+        ],
+        inputs=[start_image, end_image, prompt],
+        outputs=ui_outputs,
+        fn=generate_video,
+        cache_examples="lazy",
     )
 if __name__ == "__main__":
     # Cria a pasta e as imagens de exemplo se não existirem
     if not os.path.exists("examples"):