euIaxs22 commited on
Commit
d51e1a6
·
verified ·
1 Parent(s): 1a909d2

Update app_ltx.py

Browse files
Files changed (1) hide show
  1. app_ltx.py +28 -121
app_ltx.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import torch
3
  import numpy as np
@@ -13,93 +14,17 @@ from huggingface_hub import snapshot_download
13
  from huggingface_hub import hf_hub_download
14
  import shutil
15
  import sys
 
 
16
 
17
- from inference import (
18
- create_ltx_video_pipeline,
19
- create_latent_upsampler,
20
- load_image_to_tensor_with_resize_and_crop,
21
- seed_everething,
22
- get_device,
23
- calculate_padding,
24
- load_media_file
25
- )
26
- from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline, LTXVideoPipeline
27
- from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
28
-
29
- APP_HOME = Path(os.environ.get("APP_HOME", "/app"))
30
-
31
- config_file_path = APP_HOME / "configs/ltxv-13b-0.9.8-distilled-fp8.yaml"
32
- with open(config_file_path, "r") as file:
33
- PIPELINE_CONFIG_YAML = yaml.safe_load(file)
34
-
35
- HF_HOME_CACHE = Path(os.getenv("HF_HOME", "/data/.cache/huggingface"))
36
- models_dir = Path("/data/ltx_models")
37
- LTX_REPO = "Lightricks/LTX-Video"
38
- MAX_IMAGE_SIZE = PIPELINE_CONFIG_YAML.get("max_resolution", 1280)
39
- MAX_NUM_FRAMES = 257
40
- FPS = 30.0
41
-
42
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
43
- DTYPE = torch.bfloat16 if DEVICE == "cuda" and torch.cuda.is_bf16_supported() else torch.float16
44
-
45
-
46
- # --- Global variables for loaded models ---
47
- pipeline_instance = None
48
- latent_upsampler_instance = None
49
-
50
- Path(models_dir).mkdir(parents=True, exist_ok=True)
51
-
52
- print("Downloading models (if not present)...")
53
- distilled_model_actual_path = hf_hub_download(
54
- repo_id=LTX_REPO,
55
- filename=PIPELINE_CONFIG_YAML["checkpoint_path"],
56
- local_dir=models_dir,
57
- #local_dir_use_symlinks=False,
58
- cache_dir=HF_HOME_CACHE,
59
- )
60
- PIPELINE_CONFIG_YAML["checkpoint_path"] = distilled_model_actual_path
61
- print(f"Distilled model path: {distilled_model_actual_path}")
62
-
63
- SPATIAL_UPSCALER_FILENAME = PIPELINE_CONFIG_YAML["spatial_upscaler_model_path"]
64
- spatial_upscaler_actual_path = hf_hub_download(
65
- repo_id=LTX_REPO,
66
- filename=SPATIAL_UPSCALER_FILENAME,
67
- local_dir=models_dir,
68
- #local_dir_use_symlinks=False,
69
- cache_dir=HF_HOME_CACHE,
70
- )
71
- PIPELINE_CONFIG_YAML["spatial_upscaler_model_path"] = spatial_upscaler_actual_path
72
- print(f"Spatial upscaler model path: {spatial_upscaler_actual_path}")
73
 
74
- print("Creating LTX Video pipeline on CPU...")
75
- pipeline_instance = create_ltx_video_pipeline(
76
- ckpt_path=PIPELINE_CONFIG_YAML["checkpoint_path"],
77
- precision=PIPELINE_CONFIG_YAML["precision"],
78
- text_encoder_model_name_or_path=PIPELINE_CONFIG_YAML["text_encoder_model_name_or_path"],
79
- sampler=PIPELINE_CONFIG_YAML["sampler"],
80
- device="cpu",
81
- enhance_prompt=False,
82
- prompt_enhancer_image_caption_model_name_or_path=PIPELINE_CONFIG_YAML["prompt_enhancer_image_caption_model_name_or_path"],
83
- prompt_enhancer_llm_model_name_or_path=PIPELINE_CONFIG_YAML["prompt_enhancer_llm_model_name_or_path"],
84
  )
85
- print("LTX Video pipeline created on CPU.")
86
-
87
- if PIPELINE_CONFIG_YAML.get("spatial_upscaler_model_path"):
88
- print("Creating latent upsampler on CPU...")
89
- latent_upsampler_instance = create_latent_upsampler(
90
- PIPELINE_CONFIG_YAML["spatial_upscaler_model_path"],
91
- device="cpu"
92
- )
93
- print("Latent upsampler created on CPU.")
94
-
95
- target_inference_device = "cuda"
96
- print(f"Target inference device: {target_inference_device}")
97
- pipeline_instance.to(target_inference_device)
98
- if latent_upsampler_instance:
99
- latent_upsampler_instance.to(target_inference_device)
100
-
101
-
102
-
103
 
104
 
105
  # --- FUNÇÃO DE GERAÇÃO PRINCIPAL ---
@@ -109,45 +34,27 @@ def generate(
109
  progress=gr.Progress(track_tqdm=True)
110
  ):
111
  seed_everething(seed)
112
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- generator = torch.Generator(device=DEVICE).manual_seed(seed)
115
-
116
- height_padded = ((target_height - 1) // 32 + 1) * 32
117
- width_padded = ((target_width - 1) // 32 + 1) * 32
118
- padding_values = calculate_padding(target_height, target_width, height_padded, width_padded)
119
-
120
- conditioning_items = None
121
- if image_input:
122
- progress(0.1, desc="Preparando imagem de condição...")
123
- media_tensor = load_media_file(
124
- media_path=image_input, height=target_height, width=target_width,
125
- max_frames=1, padding=padding_values, just_crop=True
126
- )
127
- conditioning_items = [ConditioningItem(media_tensor.to(DEVICE, dtype=DTYPE), 0, 1.0)]
128
-
129
- multi_scale_pipeline = LTXMultiScalePipeline(pipeline_instance, latent_upsampler_instance)
130
-
131
- call_kwargs = {
132
- "prompt": prompt, "negative_prompt": "worst quality...",
133
- "height": target_height, "width": target_width, "num_frames": num_frames, "frame_rate": int(FPS),
134
- "generator": generator, "output_type": "pt",
135
- "conditioning_items": conditioning_items,
136
- **PIPELINE_CONFIG_YAML
137
- }
138
-
139
- progress(0.3, desc="Gerando vídeo...")
140
- result_tensor = multi_scale_pipeline(**call_kwargs).images
141
-
142
- pad_left, pad_right, pad_top, pad_bottom = padding_values
143
- slice_h_end = -pad_bottom if pad_bottom > 0 else None
144
- slice_w_end = -pad_right if pad_right > 0 else None
145
- result_tensor = result_tensor[:, :, :num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
146
 
147
- progress(0.9, desc="Exportando vídeo...")
148
- output_video_path = tempfile.mktemp(suffix=".mp4")
149
- video_np = result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy()
150
- video_np = np.clip(video_np * 255, 0, 255).astype("uint8")
151
  export_to_video(video_np, str(output_video_path), fps=24)
152
 
153
  return output_video_path
 
1
+ import torch
2
  import gradio as gr
3
  import torch
4
  import numpy as np
 
14
  from huggingface_hub import hf_hub_download
15
  import shutil
16
  import sys
17
+ from diffusers import LTXImageToVideoPipeline
18
+ from diffusers.utils import export_to_video, load_image
19
 
20
+ pipe = LTXImageToVideoPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
21
+ pipe.to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ image = load_image(
24
+ "https://huggingface.co/datasets/a-r-r-o-w/tiny-meme-dataset-captioned/resolve/main/images/8.png"
 
 
 
 
 
 
 
 
25
  )
26
+ prompt = "A young girl stands calmly in the foreground, looking directly at the camera, as a house fire rages in the background. Flames engulf the structure, with smoke billowing into the air. Firefighters in protective gear rush to the scene, a fire truck labeled '38' visible behind them. The girl's neutral expression contrasts sharply with the chaos of the fire, creating a poignant and emotionally charged scene."
27
+ negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
 
30
  # --- FUNÇÃO DE GERAÇÃO PRINCIPAL ---
 
34
  progress=gr.Progress(track_tqdm=True)
35
  ):
36
  seed_everething(seed)
37
+
38
+ #conditioning_items = None
39
+ #if image_input:
40
+ # progress(0.1, desc="Preparando imagem de condição...")
41
+ # media_tensor = load_media_file(
42
+ # media_path=image_input, height=target_height, width=target_width,
43
+ # max_frames=1, padding=padding_values, just_crop=True
44
+ # )
45
+ # conditioning_items = [ConditioningItem(media_tensor.to(DEVICE, dtype=DTYPE), 0, 1.0)]
46
+
47
+ video = pipe(
48
+ image=load_image(image),
49
+ prompt=prompt,
50
+ negative_prompt=negative_prompt,
51
+ width=480,
52
+ height=480,
53
+ num_frames=120,
54
+ num_inference_steps=50,
55
+ ).frames[0]
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
 
 
 
 
58
  export_to_video(video_np, str(output_video_path), fps=24)
59
 
60
  return output_video_path