Aduc-sdr-2_5s

Paused

App Files Files Community

euIaxs22 commited on Oct 2

Commit

d51e1a6

verified ·

1 Parent(s): 1a909d2

Update app_ltx.py

Browse files

Files changed (1) hide show

app_ltx.py +28 -121

app_ltx.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import torch
 import numpy as np
@@ -13,93 +14,17 @@ from huggingface_hub import snapshot_download
 from huggingface_hub import hf_hub_download
 import shutil
 import sys
-from inference import (
-    create_ltx_video_pipeline,
-    create_latent_upsampler,
-    load_image_to_tensor_with_resize_and_crop,
-    seed_everething,
-    get_device,
-    calculate_padding,
-    load_media_file
-)
-from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline, LTXVideoPipeline
-from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
-APP_HOME = Path(os.environ.get("APP_HOME", "/app"))
-config_file_path = APP_HOME / "configs/ltxv-13b-0.9.8-distilled-fp8.yaml"
-with open(config_file_path, "r") as file:
-    PIPELINE_CONFIG_YAML = yaml.safe_load(file)
-HF_HOME_CACHE = Path(os.getenv("HF_HOME", "/data/.cache/huggingface"))
-models_dir = Path("/data/ltx_models")
-LTX_REPO = "Lightricks/LTX-Video"
-MAX_IMAGE_SIZE = PIPELINE_CONFIG_YAML.get("max_resolution", 1280)
-MAX_NUM_FRAMES = 257
-FPS = 30.0
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-DTYPE = torch.bfloat16 if DEVICE == "cuda" and torch.cuda.is_bf16_supported() else torch.float16
-# --- Global variables for loaded models ---
-pipeline_instance = None
-latent_upsampler_instance = None
-Path(models_dir).mkdir(parents=True, exist_ok=True)
-print("Downloading models (if not present)...")
-distilled_model_actual_path = hf_hub_download(
-    repo_id=LTX_REPO,
-    filename=PIPELINE_CONFIG_YAML["checkpoint_path"],
-    local_dir=models_dir,
-    #local_dir_use_symlinks=False,
-    cache_dir=HF_HOME_CACHE,
-)
-PIPELINE_CONFIG_YAML["checkpoint_path"] = distilled_model_actual_path
-print(f"Distilled model path: {distilled_model_actual_path}")
-SPATIAL_UPSCALER_FILENAME = PIPELINE_CONFIG_YAML["spatial_upscaler_model_path"]
-spatial_upscaler_actual_path = hf_hub_download(
-    repo_id=LTX_REPO,
-    filename=SPATIAL_UPSCALER_FILENAME,
-    local_dir=models_dir,
-    #local_dir_use_symlinks=False,
-    cache_dir=HF_HOME_CACHE,
-)
-PIPELINE_CONFIG_YAML["spatial_upscaler_model_path"] = spatial_upscaler_actual_path
-print(f"Spatial upscaler model path: {spatial_upscaler_actual_path}")
-print("Creating LTX Video pipeline on CPU...")
-pipeline_instance = create_ltx_video_pipeline(
-    ckpt_path=PIPELINE_CONFIG_YAML["checkpoint_path"],
-    precision=PIPELINE_CONFIG_YAML["precision"],
-    text_encoder_model_name_or_path=PIPELINE_CONFIG_YAML["text_encoder_model_name_or_path"],
-    sampler=PIPELINE_CONFIG_YAML["sampler"],
-    device="cpu",
-    enhance_prompt=False,
-    prompt_enhancer_image_caption_model_name_or_path=PIPELINE_CONFIG_YAML["prompt_enhancer_image_caption_model_name_or_path"],
-    prompt_enhancer_llm_model_name_or_path=PIPELINE_CONFIG_YAML["prompt_enhancer_llm_model_name_or_path"],
 )
-print("LTX Video pipeline created on CPU.")
-if PIPELINE_CONFIG_YAML.get("spatial_upscaler_model_path"):
-    print("Creating latent upsampler on CPU...")
-    latent_upsampler_instance = create_latent_upsampler(
-        PIPELINE_CONFIG_YAML["spatial_upscaler_model_path"],
-        device="cpu"
-    )
-    print("Latent upsampler created on CPU.")
-target_inference_device = "cuda"
-print(f"Target inference device: {target_inference_device}")
-pipeline_instance.to(target_inference_device)
-if latent_upsampler_instance:
-    latent_upsampler_instance.to(target_inference_device)
 # --- FUNÇÃO DE GERAÇÃO PRINCIPAL ---
@@ -109,45 +34,27 @@ def generate(
     progress=gr.Progress(track_tqdm=True)
 ):
     seed_everething(seed)
-    generator = torch.Generator(device=DEVICE).manual_seed(seed)
-    height_padded = ((target_height - 1) // 32 + 1) * 32
-    width_padded = ((target_width - 1) // 32 + 1) * 32
-    padding_values = calculate_padding(target_height, target_width, height_padded, width_padded)
-    conditioning_items = None
-    if image_input:
-        progress(0.1, desc="Preparando imagem de condição...")
-        media_tensor = load_media_file(
-            media_path=image_input, height=target_height, width=target_width,
-            max_frames=1, padding=padding_values, just_crop=True
-        )
-        conditioning_items = [ConditioningItem(media_tensor.to(DEVICE, dtype=DTYPE), 0, 1.0)]
-    multi_scale_pipeline = LTXMultiScalePipeline(pipeline_instance, latent_upsampler_instance)
-    call_kwargs = {
-        "prompt": prompt, "negative_prompt": "worst quality...",
-        "height": target_height, "width": target_width, "num_frames": num_frames, "frame_rate": int(FPS),
-        "generator": generator, "output_type": "pt",
-        "conditioning_items": conditioning_items,
-        **PIPELINE_CONFIG_YAML
-    }
-    progress(0.3, desc="Gerando vídeo...")
-    result_tensor = multi_scale_pipeline(**call_kwargs).images
-    pad_left, pad_right, pad_top, pad_bottom = padding_values
-    slice_h_end = -pad_bottom if pad_bottom > 0 else None
-    slice_w_end = -pad_right if pad_right > 0 else None
-    result_tensor = result_tensor[:, :, :num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
-    progress(0.9, desc="Exportando vídeo...")
-    output_video_path = tempfile.mktemp(suffix=".mp4")
-    video_np = result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy()
-    video_np = np.clip(video_np * 255, 0, 255).astype("uint8")
     export_to_video(video_np, str(output_video_path), fps=24)
     return output_video_path

+import torch
 import gradio as gr
 import torch
 import numpy as np
 from huggingface_hub import hf_hub_download
 import shutil
 import sys
+from diffusers import LTXImageToVideoPipeline
+from diffusers.utils import export_to_video, load_image
+pipe = LTXImageToVideoPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
+pipe.to("cuda")
+image = load_image(
+    "https://huggingface.co/datasets/a-r-r-o-w/tiny-meme-dataset-captioned/resolve/main/images/8.png"
 )
+prompt = "A young girl stands calmly in the foreground, looking directly at the camera, as a house fire rages in the background. Flames engulf the structure, with smoke billowing into the air. Firefighters in protective gear rush to the scene, a fire truck labeled '38' visible behind them. The girl's neutral expression contrasts sharply with the chaos of the fire, creating a poignant and emotionally charged scene."
+negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
 # --- FUNÇÃO DE GERAÇÃO PRINCIPAL ---
     progress=gr.Progress(track_tqdm=True)
 ):
     seed_everething(seed)
+    #conditioning_items = None
+    #if image_input:
+    #    progress(0.1, desc="Preparando imagem de condição...")
+    #    media_tensor = load_media_file(
+    #        media_path=image_input, height=target_height, width=target_width,
+    #        max_frames=1, padding=padding_values, just_crop=True
+    #    )
+    #    conditioning_items = [ConditioningItem(media_tensor.to(DEVICE, dtype=DTYPE), 0, 1.0)]
+    video = pipe(
+        image=load_image(image),
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        width=480,
+        height=480,
+        num_frames=120,
+        num_inference_steps=50,
+    ).frames[0]
     export_to_video(video_np, str(output_video_path), fps=24)
     return output_video_path