Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -74,74 +74,46 @@ aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
|
|
| 74 |
|
| 75 |
|
| 76 |
default_prompt_i2v = "ultra realistic cinematic footage, perfectly preserved facial identity and body structure across all frames, stable anatomy and consistent body proportions, realistic muscle definition, natural motion flow and breathing dynamics, seamless motion continuity, photorealistic clothing preservation with accurate fabric movement and lighting response, consistent outfit color and texture, high-fidelity skin tone and texture stability, lifelike eye reflections and natural gaze consistency, cinematic lighting with soft volumetric shadows, professional film-grade color grading, HDR tone mapping with true-to-life contrast and depth, realistic ambient and subsurface light behavior, physically accurate reflections and highlights, detailed cinematic background with depth of field and natural bokeh, smooth camera movement with film-level motion fluidity, 35mm film aesthetic, ultra-detailed textures, consistent and coherent composition, perfect balance between depth, light, and motion for a truly photorealistic cinematic atmosphere, temporal coherence, identity consistency, no facial drift, no texture flickering, no color shifting."
|
| 77 |
-
default_negative_prompt = "low quality, low resolution, low contrast, poor lighting, underexposed, overexposed, bad composition, bad framing, bad perspective, flat lighting, washed out colors, jpeg artifacts, noise, static, grain, compression artifacts, flickering, stutter, shaky camera, inconsistent motion, poor transition, broken motion, unnatural interpolation, out of focus, blurry, motion blur, ghosting, double exposure, distorted face, consistent face, changing face, warped face, face drift, identity shift, face inconsistency,
|
|
|
|
| 78 |
|
| 79 |
def resize_image(image: Image.Image) -> Image.Image:
|
| 80 |
-
"""
|
| 81 |
-
Resizes an image to fit within the model's constraints, preserving aspect ratio as much as possible.
|
| 82 |
-
"""
|
| 83 |
width, height = image.size
|
| 84 |
-
|
| 85 |
-
# Handle square case
|
| 86 |
if width == height:
|
| 87 |
return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
|
| 88 |
-
|
| 89 |
aspect_ratio = width / height
|
| 90 |
-
|
| 91 |
MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
|
| 92 |
MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
|
| 93 |
-
|
| 94 |
image_to_resize = image
|
| 95 |
-
|
| 96 |
if aspect_ratio > MAX_ASPECT_RATIO:
|
| 97 |
-
# Very wide image -> crop width to fit 832x480 aspect ratio
|
| 98 |
target_w, target_h = MAX_DIM, MIN_DIM
|
| 99 |
crop_width = int(round(height * MAX_ASPECT_RATIO))
|
| 100 |
left = (width - crop_width) // 2
|
| 101 |
image_to_resize = image.crop((left, 0, left + crop_width, height))
|
| 102 |
elif aspect_ratio < MIN_ASPECT_RATIO:
|
| 103 |
-
# Very tall image -> crop height to fit 480x832 aspect ratio
|
| 104 |
target_w, target_h = MIN_DIM, MAX_DIM
|
| 105 |
crop_height = int(round(width / MIN_ASPECT_RATIO))
|
| 106 |
top = (height - crop_height) // 2
|
| 107 |
image_to_resize = image.crop((0, top, width, top + crop_height))
|
| 108 |
else:
|
| 109 |
-
if width > height:
|
| 110 |
target_w = MAX_DIM
|
| 111 |
target_h = int(round(target_w / aspect_ratio))
|
| 112 |
-
else:
|
| 113 |
target_h = MAX_DIM
|
| 114 |
target_w = int(round(target_h * aspect_ratio))
|
| 115 |
-
|
| 116 |
final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
|
| 117 |
final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
|
| 118 |
-
|
| 119 |
final_w = max(MIN_DIM, min(MAX_DIM, final_w))
|
| 120 |
final_h = max(MIN_DIM, min(MAX_DIM, final_h))
|
| 121 |
-
|
| 122 |
return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
|
| 123 |
|
| 124 |
|
| 125 |
def get_num_frames(duration_seconds: float):
|
| 126 |
-
return 1 + int(np.clip(
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
))
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
def get_duration(
|
| 134 |
-
input_image,
|
| 135 |
-
prompt,
|
| 136 |
-
steps,
|
| 137 |
-
negative_prompt,
|
| 138 |
-
duration_seconds,
|
| 139 |
-
guidance_scale,
|
| 140 |
-
guidance_scale_2,
|
| 141 |
-
seed,
|
| 142 |
-
randomize_seed,
|
| 143 |
-
progress,
|
| 144 |
-
):
|
| 145 |
BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
|
| 146 |
BASE_STEP_DURATION = 15
|
| 147 |
width, height = resize_image(input_image).size
|
|
@@ -150,59 +122,12 @@ def get_duration(
|
|
| 150 |
step_duration = BASE_STEP_DURATION * factor ** 1.5
|
| 151 |
return 10 + int(steps) * step_duration
|
| 152 |
|
|
|
|
| 153 |
@spaces.GPU(duration=get_duration)
|
| 154 |
-
def generate_video(
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
negative_prompt=default_negative_prompt,
|
| 159 |
-
duration_seconds = MAX_DURATION,
|
| 160 |
-
guidance_scale = 1,
|
| 161 |
-
guidance_scale_2 = 1,
|
| 162 |
-
seed = 42,
|
| 163 |
-
randomize_seed = False,
|
| 164 |
-
progress=gr.Progress(track_tqdm=True),
|
| 165 |
-
):
|
| 166 |
-
"""
|
| 167 |
-
Generate a video from an input image using the Wan 2.2 14B I2V model with Lightning LoRA.
|
| 168 |
-
|
| 169 |
-
This function takes an input image and generates a video animation based on the provided
|
| 170 |
-
prompt and parameters. It uses an FP8 qunatized Wan 2.2 14B Image-to-Video model in with Lightning LoRA
|
| 171 |
-
for fast generation in 4-8 steps.
|
| 172 |
-
|
| 173 |
-
Args:
|
| 174 |
-
input_image (PIL.Image): The input image to animate. Will be resized to target dimensions.
|
| 175 |
-
prompt (str): Text prompt describing the desired animation or motion.
|
| 176 |
-
steps (int, optional): Number of inference steps. More steps = higher quality but slower.
|
| 177 |
-
Defaults to 4. Range: 1-30.
|
| 178 |
-
negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
|
| 179 |
-
Defaults to default_negative_prompt (contains unwanted visual artifacts).
|
| 180 |
-
duration_seconds (float, optional): Duration of the generated video in seconds.
|
| 181 |
-
Defaults to 2. Clamped between MIN_FRAMES_MODEL/FIXED_FPS and MAX_FRAMES_MODEL/FIXED_FPS.
|
| 182 |
-
guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
|
| 183 |
-
Defaults to 1.0. Range: 0.0-20.0.
|
| 184 |
-
guidance_scale_2 (float, optional): Controls adherence to the prompt. Higher values = more adherence.
|
| 185 |
-
Defaults to 1.0. Range: 0.0-20.0.
|
| 186 |
-
seed (int, optional): Random seed for reproducible results. Defaults to 42.
|
| 187 |
-
Range: 0 to MAX_SEED (2147483647).
|
| 188 |
-
randomize_seed (bool, optional): Whether to use a random seed instead of the provided seed.
|
| 189 |
-
Defaults to False.
|
| 190 |
-
progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
|
| 191 |
-
|
| 192 |
-
Returns:
|
| 193 |
-
tuple: A tuple containing:
|
| 194 |
-
- video_path (str): Path to the generated video file (.mp4)
|
| 195 |
-
- current_seed (int): The seed used for generation (useful when randomize_seed=True)
|
| 196 |
-
|
| 197 |
-
Raises:
|
| 198 |
-
gr.Error: If input_image is None (no image uploaded).
|
| 199 |
-
|
| 200 |
-
Note:
|
| 201 |
-
- Frame count is calculated as duration_seconds * FIXED_FPS (24)
|
| 202 |
-
- Output dimensions are adjusted to be multiples of MOD_VALUE (32)
|
| 203 |
-
- The function uses GPU acceleration via the @spaces.GPU decorator
|
| 204 |
-
- Generation time varies based on steps and duration (see get_duration function)
|
| 205 |
-
"""
|
| 206 |
if input_image is None:
|
| 207 |
raise gr.Error("Please upload an input image.")
|
| 208 |
|
|
@@ -227,36 +152,43 @@ def generate_video(
|
|
| 227 |
video_path = tmpfile.name
|
| 228 |
|
| 229 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
| 230 |
-
|
| 231 |
return video_path, current_seed
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
with gr.Blocks() as demo:
|
|
|
|
| 234 |
gr.Markdown("# Fast 4 steps Wan 2.2 I2V (14B) with Lightning LoRA")
|
| 235 |
-
gr.Markdown("
|
|
|
|
| 236 |
with gr.Row():
|
| 237 |
with gr.Column():
|
| 238 |
input_image_component = gr.Image(type="pil", label="Input Image")
|
| 239 |
prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
|
| 240 |
-
duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5,
|
| 241 |
-
|
|
|
|
| 242 |
with gr.Accordion("Advanced Settings", open=False):
|
| 243 |
negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
|
| 244 |
seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
|
| 245 |
randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
|
| 246 |
-
steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
|
| 247 |
guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
|
| 248 |
guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
|
| 249 |
-
|
| 250 |
generate_button = gr.Button("Generate Video", variant="primary")
|
| 251 |
with gr.Column():
|
| 252 |
video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
|
| 253 |
|
| 254 |
-
ui_inputs = [
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
|
| 260 |
|
|
|
|
| 261 |
if __name__ == "__main__":
|
| 262 |
-
demo.queue().launch(mcp_server=True)
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
default_prompt_i2v = "ultra realistic cinematic footage, perfectly preserved facial identity and body structure across all frames, stable anatomy and consistent body proportions, realistic muscle definition, natural motion flow and breathing dynamics, seamless motion continuity, photorealistic clothing preservation with accurate fabric movement and lighting response, consistent outfit color and texture, high-fidelity skin tone and texture stability, lifelike eye reflections and natural gaze consistency, cinematic lighting with soft volumetric shadows, professional film-grade color grading, HDR tone mapping with true-to-life contrast and depth, realistic ambient and subsurface light behavior, physically accurate reflections and highlights, detailed cinematic background with depth of field and natural bokeh, smooth camera movement with film-level motion fluidity, 35mm film aesthetic, ultra-detailed textures, consistent and coherent composition, perfect balance between depth, light, and motion for a truly photorealistic cinematic atmosphere, temporal coherence, identity consistency, no facial drift, no texture flickering, no color shifting."
|
| 77 |
+
default_negative_prompt = "low quality, low resolution, low contrast, poor lighting, underexposed, overexposed, bad composition, bad framing, bad perspective, flat lighting, washed out colors, jpeg artifacts, noise, static, grain, compression artifacts, flickering, stutter, shaky camera, inconsistent motion, poor transition, broken motion, unnatural interpolation, out of focus, blurry, motion blur, ghosting, double exposure, distorted face, consistent face, changing face, warped face, face drift, identity shift, face inconsistency, natural facial expression, mutated body, deformed limbs, extra fingers, fused fingers, missing fingers, bad anatomy, unrealistic proportions, twisted pose, asymmetrical body, unappealing, uncanny, artificial face, waxy skin, plastic look, text, watermark, logo, signature, frame border, cropped edges, tiling, duplicate, repeated pattern, cartoon, anime, illustration, 3d render, painting, drawing, oversharpened, low detail, artificial texture, poor skin texture, over-smoothed, fake skin, flat skin, color banding, saturation, chromatic aberration, unrealistic shadows, inconsistent lighting, frozen frame, poor depth, lack of realism, fake reflection, artifacted highlights, bloom artifacts, bad transition, broken frame, visual glitch, bad synchronization, oversaturated colors, contrast issues, unbalanced composition, lack of cinematic tone, flat motion, jitter, warped geometry, background distortion, identity mismatch, morphing, inconsistent hair, inconsistent body shape"
|
| 78 |
+
|
| 79 |
|
| 80 |
def resize_image(image: Image.Image) -> Image.Image:
|
|
|
|
|
|
|
|
|
|
| 81 |
width, height = image.size
|
|
|
|
|
|
|
| 82 |
if width == height:
|
| 83 |
return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
|
|
|
|
| 84 |
aspect_ratio = width / height
|
|
|
|
| 85 |
MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
|
| 86 |
MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
|
|
|
|
| 87 |
image_to_resize = image
|
|
|
|
| 88 |
if aspect_ratio > MAX_ASPECT_RATIO:
|
|
|
|
| 89 |
target_w, target_h = MAX_DIM, MIN_DIM
|
| 90 |
crop_width = int(round(height * MAX_ASPECT_RATIO))
|
| 91 |
left = (width - crop_width) // 2
|
| 92 |
image_to_resize = image.crop((left, 0, left + crop_width, height))
|
| 93 |
elif aspect_ratio < MIN_ASPECT_RATIO:
|
|
|
|
| 94 |
target_w, target_h = MIN_DIM, MAX_DIM
|
| 95 |
crop_height = int(round(width / MIN_ASPECT_RATIO))
|
| 96 |
top = (height - crop_height) // 2
|
| 97 |
image_to_resize = image.crop((0, top, width, top + crop_height))
|
| 98 |
else:
|
| 99 |
+
if width > height:
|
| 100 |
target_w = MAX_DIM
|
| 101 |
target_h = int(round(target_w / aspect_ratio))
|
| 102 |
+
else:
|
| 103 |
target_h = MAX_DIM
|
| 104 |
target_w = int(round(target_h * aspect_ratio))
|
|
|
|
| 105 |
final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
|
| 106 |
final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
|
|
|
|
| 107 |
final_w = max(MIN_DIM, min(MAX_DIM, final_w))
|
| 108 |
final_h = max(MIN_DIM, min(MAX_DIM, final_h))
|
|
|
|
| 109 |
return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
|
| 110 |
|
| 111 |
|
| 112 |
def get_num_frames(duration_seconds: float):
|
| 113 |
+
return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL))
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def get_duration(input_image, prompt, steps, negative_prompt, duration_seconds, guidance_scale, guidance_scale_2, seed, randomize_seed, progress):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
|
| 118 |
BASE_STEP_DURATION = 15
|
| 119 |
width, height = resize_image(input_image).size
|
|
|
|
| 122 |
step_duration = BASE_STEP_DURATION * factor ** 1.5
|
| 123 |
return 10 + int(steps) * step_duration
|
| 124 |
|
| 125 |
+
|
| 126 |
@spaces.GPU(duration=get_duration)
|
| 127 |
+
def generate_video(input_image, prompt, steps=4, negative_prompt=default_negative_prompt,
|
| 128 |
+
duration_seconds=MAX_DURATION, guidance_scale=1, guidance_scale_2=1,
|
| 129 |
+
seed=42, randomize_seed=False, progress=gr.Progress(track_tqdm=True)):
|
| 130 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
if input_image is None:
|
| 132 |
raise gr.Error("Please upload an input image.")
|
| 133 |
|
|
|
|
| 152 |
video_path = tmpfile.name
|
| 153 |
|
| 154 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
|
|
|
| 155 |
return video_path, current_seed
|
| 156 |
|
| 157 |
+
|
| 158 |
+
# ================================
|
| 159 |
+
# 💎 تعديل الواجهة مع الرسالة التسويقية
|
| 160 |
+
# ================================
|
| 161 |
+
|
| 162 |
with gr.Blocks() as demo:
|
| 163 |
+
gr.Markdown("🚀 **Over 1,000 runs in the first 48 hours — thank you for your amazing feedback!** \nTry the latest version below 👇")
|
| 164 |
gr.Markdown("# Fast 4 steps Wan 2.2 I2V (14B) with Lightning LoRA")
|
| 165 |
+
gr.Markdown("Run Wan 2.2 in just 4-8 steps, with [Lightning LoRA](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Wan22-Lightning), fp8 quantization & AoT compilation — compatible with 🧨 diffusers and ZeroGPU⚡️")
|
| 166 |
+
|
| 167 |
with gr.Row():
|
| 168 |
with gr.Column():
|
| 169 |
input_image_component = gr.Image(type="pil", label="Input Image")
|
| 170 |
prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
|
| 171 |
+
duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5,
|
| 172 |
+
label="Duration (seconds)",
|
| 173 |
+
info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
|
| 174 |
with gr.Accordion("Advanced Settings", open=False):
|
| 175 |
negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
|
| 176 |
seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
|
| 177 |
randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
|
| 178 |
+
steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
|
| 179 |
guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
|
| 180 |
guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
|
|
|
|
| 181 |
generate_button = gr.Button("Generate Video", variant="primary")
|
| 182 |
with gr.Column():
|
| 183 |
video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
|
| 184 |
|
| 185 |
+
ui_inputs = [input_image_component, prompt_input, steps_slider,
|
| 186 |
+
negative_prompt_input, duration_seconds_input,
|
| 187 |
+
guidance_scale_input, guidance_scale_2_input,
|
| 188 |
+
seed_input, randomize_seed_checkbox]
|
| 189 |
+
|
| 190 |
generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
|
| 191 |
|
| 192 |
+
|
| 193 |
if __name__ == "__main__":
|
| 194 |
+
demo.queue().launch(mcp_server=True)
|