dream2589632147 commited on
Commit
07473ce
·
verified ·
1 Parent(s): cf757bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -100
app.py CHANGED
@@ -74,74 +74,46 @@ aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
74
 
75
 
76
  default_prompt_i2v = "ultra realistic cinematic footage, perfectly preserved facial identity and body structure across all frames, stable anatomy and consistent body proportions, realistic muscle definition, natural motion flow and breathing dynamics, seamless motion continuity, photorealistic clothing preservation with accurate fabric movement and lighting response, consistent outfit color and texture, high-fidelity skin tone and texture stability, lifelike eye reflections and natural gaze consistency, cinematic lighting with soft volumetric shadows, professional film-grade color grading, HDR tone mapping with true-to-life contrast and depth, realistic ambient and subsurface light behavior, physically accurate reflections and highlights, detailed cinematic background with depth of field and natural bokeh, smooth camera movement with film-level motion fluidity, 35mm film aesthetic, ultra-detailed textures, consistent and coherent composition, perfect balance between depth, light, and motion for a truly photorealistic cinematic atmosphere, temporal coherence, identity consistency, no facial drift, no texture flickering, no color shifting."
77
- default_negative_prompt = "low quality, low resolution, low contrast, poor lighting, underexposed, overexposed, bad composition, bad framing, bad perspective, flat lighting, washed out colors, jpeg artifacts, noise, static, grain, compression artifacts, flickering, stutter, shaky camera, inconsistent motion, poor transition, broken motion, unnatural interpolation, out of focus, blurry, motion blur, ghosting, double exposure, distorted face, consistent face, changing face, warped face, face drift, identity shift, face inconsistency, natural facial expression, mutated body, deformed limbs, extra fingers, fused fingers, issing fingers, bad anatomy, unrealistic proportions, twisted pose, asymmetrical body, nappealing, uncanny, artificial face, waxy skin, plastic look, text, watermark, logo, ignature, frame border, cropped edges, tiling, duplicate, repeated pattern, cartoon, nime, illustration, 3d render, painting, drawing, oversharpened, low detail, artificial texture, poor skin texture, over-smoothed, fake skin, flat skin, color banding, saturation, chromatic aberration, unrealistic shadows, inconsistent lighting, frozen frame, poor depth, lack of realism, fake reflection, artifacted highlights, bloom artifacts, bad transition, broken frame, visual glitch, bad synchronization, oversaturated colors, contrast issues, unbalanced composition, lack of cinematic tone, flat motion, jitter, warped geometry, background distortion, identity mismatch, orphing, inconsistent hair, inconsistent body shape"
 
78
 
79
  def resize_image(image: Image.Image) -> Image.Image:
80
- """
81
- Resizes an image to fit within the model's constraints, preserving aspect ratio as much as possible.
82
- """
83
  width, height = image.size
84
-
85
- # Handle square case
86
  if width == height:
87
  return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
88
-
89
  aspect_ratio = width / height
90
-
91
  MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
92
  MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
93
-
94
  image_to_resize = image
95
-
96
  if aspect_ratio > MAX_ASPECT_RATIO:
97
- # Very wide image -> crop width to fit 832x480 aspect ratio
98
  target_w, target_h = MAX_DIM, MIN_DIM
99
  crop_width = int(round(height * MAX_ASPECT_RATIO))
100
  left = (width - crop_width) // 2
101
  image_to_resize = image.crop((left, 0, left + crop_width, height))
102
  elif aspect_ratio < MIN_ASPECT_RATIO:
103
- # Very tall image -> crop height to fit 480x832 aspect ratio
104
  target_w, target_h = MIN_DIM, MAX_DIM
105
  crop_height = int(round(width / MIN_ASPECT_RATIO))
106
  top = (height - crop_height) // 2
107
  image_to_resize = image.crop((0, top, width, top + crop_height))
108
  else:
109
- if width > height: # Landscape
110
  target_w = MAX_DIM
111
  target_h = int(round(target_w / aspect_ratio))
112
- else: # Portrait
113
  target_h = MAX_DIM
114
  target_w = int(round(target_h * aspect_ratio))
115
-
116
  final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
117
  final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
118
-
119
  final_w = max(MIN_DIM, min(MAX_DIM, final_w))
120
  final_h = max(MIN_DIM, min(MAX_DIM, final_h))
121
-
122
  return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
123
 
124
 
125
  def get_num_frames(duration_seconds: float):
126
- return 1 + int(np.clip(
127
- int(round(duration_seconds * FIXED_FPS)),
128
- MIN_FRAMES_MODEL,
129
- MAX_FRAMES_MODEL,
130
- ))
131
-
132
-
133
- def get_duration(
134
- input_image,
135
- prompt,
136
- steps,
137
- negative_prompt,
138
- duration_seconds,
139
- guidance_scale,
140
- guidance_scale_2,
141
- seed,
142
- randomize_seed,
143
- progress,
144
- ):
145
  BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
146
  BASE_STEP_DURATION = 15
147
  width, height = resize_image(input_image).size
@@ -150,59 +122,12 @@ def get_duration(
150
  step_duration = BASE_STEP_DURATION * factor ** 1.5
151
  return 10 + int(steps) * step_duration
152
 
 
153
  @spaces.GPU(duration=get_duration)
154
- def generate_video(
155
- input_image,
156
- prompt,
157
- steps = 4,
158
- negative_prompt=default_negative_prompt,
159
- duration_seconds = MAX_DURATION,
160
- guidance_scale = 1,
161
- guidance_scale_2 = 1,
162
- seed = 42,
163
- randomize_seed = False,
164
- progress=gr.Progress(track_tqdm=True),
165
- ):
166
- """
167
- Generate a video from an input image using the Wan 2.2 14B I2V model with Lightning LoRA.
168
-
169
- This function takes an input image and generates a video animation based on the provided
170
- prompt and parameters. It uses an FP8 qunatized Wan 2.2 14B Image-to-Video model in with Lightning LoRA
171
- for fast generation in 4-8 steps.
172
-
173
- Args:
174
- input_image (PIL.Image): The input image to animate. Will be resized to target dimensions.
175
- prompt (str): Text prompt describing the desired animation or motion.
176
- steps (int, optional): Number of inference steps. More steps = higher quality but slower.
177
- Defaults to 4. Range: 1-30.
178
- negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
179
- Defaults to default_negative_prompt (contains unwanted visual artifacts).
180
- duration_seconds (float, optional): Duration of the generated video in seconds.
181
- Defaults to 2. Clamped between MIN_FRAMES_MODEL/FIXED_FPS and MAX_FRAMES_MODEL/FIXED_FPS.
182
- guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
183
- Defaults to 1.0. Range: 0.0-20.0.
184
- guidance_scale_2 (float, optional): Controls adherence to the prompt. Higher values = more adherence.
185
- Defaults to 1.0. Range: 0.0-20.0.
186
- seed (int, optional): Random seed for reproducible results. Defaults to 42.
187
- Range: 0 to MAX_SEED (2147483647).
188
- randomize_seed (bool, optional): Whether to use a random seed instead of the provided seed.
189
- Defaults to False.
190
- progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
191
-
192
- Returns:
193
- tuple: A tuple containing:
194
- - video_path (str): Path to the generated video file (.mp4)
195
- - current_seed (int): The seed used for generation (useful when randomize_seed=True)
196
-
197
- Raises:
198
- gr.Error: If input_image is None (no image uploaded).
199
-
200
- Note:
201
- - Frame count is calculated as duration_seconds * FIXED_FPS (24)
202
- - Output dimensions are adjusted to be multiples of MOD_VALUE (32)
203
- - The function uses GPU acceleration via the @spaces.GPU decorator
204
- - Generation time varies based on steps and duration (see get_duration function)
205
- """
206
  if input_image is None:
207
  raise gr.Error("Please upload an input image.")
208
 
@@ -227,36 +152,43 @@ def generate_video(
227
  video_path = tmpfile.name
228
 
229
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
230
-
231
  return video_path, current_seed
232
 
 
 
 
 
 
233
  with gr.Blocks() as demo:
 
234
  gr.Markdown("# Fast 4 steps Wan 2.2 I2V (14B) with Lightning LoRA")
235
- gr.Markdown("run Wan 2.2 in just 4-8 steps, with [Lightning LoRA](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Wan22-Lightning), fp8 quantization & AoT compilation - compatible with 🧨 diffusers and ZeroGPU⚡️")
 
236
  with gr.Row():
237
  with gr.Column():
238
  input_image_component = gr.Image(type="pil", label="Input Image")
239
  prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
240
- duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
241
-
 
242
  with gr.Accordion("Advanced Settings", open=False):
243
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
244
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
245
  randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
246
- steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
247
  guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
248
  guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
249
-
250
  generate_button = gr.Button("Generate Video", variant="primary")
251
  with gr.Column():
252
  video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
253
 
254
- ui_inputs = [
255
- input_image_component, prompt_input, steps_slider,
256
- negative_prompt_input, duration_seconds_input,
257
- guidance_scale_input, guidance_scale_2_input, seed_input, randomize_seed_checkbox
258
- ]
259
  generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
260
 
 
261
  if __name__ == "__main__":
262
- demo.queue().launch(mcp_server=True)
 
74
 
75
 
76
  default_prompt_i2v = "ultra realistic cinematic footage, perfectly preserved facial identity and body structure across all frames, stable anatomy and consistent body proportions, realistic muscle definition, natural motion flow and breathing dynamics, seamless motion continuity, photorealistic clothing preservation with accurate fabric movement and lighting response, consistent outfit color and texture, high-fidelity skin tone and texture stability, lifelike eye reflections and natural gaze consistency, cinematic lighting with soft volumetric shadows, professional film-grade color grading, HDR tone mapping with true-to-life contrast and depth, realistic ambient and subsurface light behavior, physically accurate reflections and highlights, detailed cinematic background with depth of field and natural bokeh, smooth camera movement with film-level motion fluidity, 35mm film aesthetic, ultra-detailed textures, consistent and coherent composition, perfect balance between depth, light, and motion for a truly photorealistic cinematic atmosphere, temporal coherence, identity consistency, no facial drift, no texture flickering, no color shifting."
77
+ default_negative_prompt = "low quality, low resolution, low contrast, poor lighting, underexposed, overexposed, bad composition, bad framing, bad perspective, flat lighting, washed out colors, jpeg artifacts, noise, static, grain, compression artifacts, flickering, stutter, shaky camera, inconsistent motion, poor transition, broken motion, unnatural interpolation, out of focus, blurry, motion blur, ghosting, double exposure, distorted face, consistent face, changing face, warped face, face drift, identity shift, face inconsistency, natural facial expression, mutated body, deformed limbs, extra fingers, fused fingers, missing fingers, bad anatomy, unrealistic proportions, twisted pose, asymmetrical body, unappealing, uncanny, artificial face, waxy skin, plastic look, text, watermark, logo, signature, frame border, cropped edges, tiling, duplicate, repeated pattern, cartoon, anime, illustration, 3d render, painting, drawing, oversharpened, low detail, artificial texture, poor skin texture, over-smoothed, fake skin, flat skin, color banding, saturation, chromatic aberration, unrealistic shadows, inconsistent lighting, frozen frame, poor depth, lack of realism, fake reflection, artifacted highlights, bloom artifacts, bad transition, broken frame, visual glitch, bad synchronization, oversaturated colors, contrast issues, unbalanced composition, lack of cinematic tone, flat motion, jitter, warped geometry, background distortion, identity mismatch, morphing, inconsistent hair, inconsistent body shape"
78
+
79
 
80
  def resize_image(image: Image.Image) -> Image.Image:
 
 
 
81
  width, height = image.size
 
 
82
  if width == height:
83
  return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
 
84
  aspect_ratio = width / height
 
85
  MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
86
  MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
 
87
  image_to_resize = image
 
88
  if aspect_ratio > MAX_ASPECT_RATIO:
 
89
  target_w, target_h = MAX_DIM, MIN_DIM
90
  crop_width = int(round(height * MAX_ASPECT_RATIO))
91
  left = (width - crop_width) // 2
92
  image_to_resize = image.crop((left, 0, left + crop_width, height))
93
  elif aspect_ratio < MIN_ASPECT_RATIO:
 
94
  target_w, target_h = MIN_DIM, MAX_DIM
95
  crop_height = int(round(width / MIN_ASPECT_RATIO))
96
  top = (height - crop_height) // 2
97
  image_to_resize = image.crop((0, top, width, top + crop_height))
98
  else:
99
+ if width > height:
100
  target_w = MAX_DIM
101
  target_h = int(round(target_w / aspect_ratio))
102
+ else:
103
  target_h = MAX_DIM
104
  target_w = int(round(target_h * aspect_ratio))
 
105
  final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
106
  final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
 
107
  final_w = max(MIN_DIM, min(MAX_DIM, final_w))
108
  final_h = max(MIN_DIM, min(MAX_DIM, final_h))
 
109
  return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
110
 
111
 
112
  def get_num_frames(duration_seconds: float):
113
+ return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL))
114
+
115
+
116
+ def get_duration(input_image, prompt, steps, negative_prompt, duration_seconds, guidance_scale, guidance_scale_2, seed, randomize_seed, progress):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
118
  BASE_STEP_DURATION = 15
119
  width, height = resize_image(input_image).size
 
122
  step_duration = BASE_STEP_DURATION * factor ** 1.5
123
  return 10 + int(steps) * step_duration
124
 
125
+
126
  @spaces.GPU(duration=get_duration)
127
+ def generate_video(input_image, prompt, steps=4, negative_prompt=default_negative_prompt,
128
+ duration_seconds=MAX_DURATION, guidance_scale=1, guidance_scale_2=1,
129
+ seed=42, randomize_seed=False, progress=gr.Progress(track_tqdm=True)):
130
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  if input_image is None:
132
  raise gr.Error("Please upload an input image.")
133
 
 
152
  video_path = tmpfile.name
153
 
154
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
 
155
  return video_path, current_seed
156
 
157
+
158
+ # ================================
159
+ # 💎 تعديل الواجهة مع الرسالة التسويقية
160
+ # ================================
161
+
162
  with gr.Blocks() as demo:
163
+ gr.Markdown("🚀 **Over 1,000 runs in the first 48 hours — thank you for your amazing feedback!** \nTry the latest version below 👇")
164
  gr.Markdown("# Fast 4 steps Wan 2.2 I2V (14B) with Lightning LoRA")
165
+ gr.Markdown("Run Wan 2.2 in just 4-8 steps, with [Lightning LoRA](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Wan22-Lightning), fp8 quantization & AoT compilation compatible with 🧨 diffusers and ZeroGPU⚡️")
166
+
167
  with gr.Row():
168
  with gr.Column():
169
  input_image_component = gr.Image(type="pil", label="Input Image")
170
  prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
171
+ duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5,
172
+ label="Duration (seconds)",
173
+ info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
174
  with gr.Accordion("Advanced Settings", open=False):
175
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
176
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
177
  randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
178
+ steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
179
  guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
180
  guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
 
181
  generate_button = gr.Button("Generate Video", variant="primary")
182
  with gr.Column():
183
  video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
184
 
185
+ ui_inputs = [input_image_component, prompt_input, steps_slider,
186
+ negative_prompt_input, duration_seconds_input,
187
+ guidance_scale_input, guidance_scale_2_input,
188
+ seed_input, randomize_seed_checkbox]
189
+
190
  generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
191
 
192
+
193
  if __name__ == "__main__":
194
+ demo.queue().launch(mcp_server=True)