Spaces:

linoyts
/

LTXV-lora-the-explorer

Running on Zero

App Files Files Community

linoyts HF Staff commited on May 14

Commit

649221a

verified ·

1 Parent(s): f058434

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -22

app.py CHANGED Viewed

@@ -44,21 +44,16 @@ def generate(prompt,
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    # Part 1. Generate video at smaller resolution
-    # Text-only conditioning is also supported without the need to pass `conditions`
-    expected_height, expected_width = height, width
-    downscale_factor = 2 / 3
-    downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
-    downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
     print(mode)
     if mode == "video-to-video" and (video is not None):
         video = load_video(video)[:frames_to_use]
         condition = True
     # elif mode == "image-to-video" and (image is not None):
     elif mode == "image-to-video":
         video = [load_image(image)]
         condition = True
     else:
        condition=False
@@ -67,6 +62,13 @@ def generate(prompt,
         condition1 = LTXVideoCondition(video=video, frame_index=0)
     else:
         condition1 = None
     latents = pipe(
         conditions=condition1,
@@ -82,20 +84,6 @@ def generate(prompt,
         generator=torch.Generator(device="cuda").manual_seed(seed),
         output_type="latent",
     ).frames
-    # latents = pipe(
-    # conditions=[condition1],
-    # prompt=prompt,
-    # negative_prompt=negative_prompt,
-    # width=downscaled_width,
-    # height=downscaled_height,
-    # decode_timestep=0.05,
-    # decode_noise_scale=0.025,
-    # guidance_scale=3.0,
-    # num_frames=num_frames,
-    # num_inference_steps=8,
-    # generator=torch.Generator().manual_seed(0),
-    # output_type="latent",
-    # ).frames
     # Part 2. Upscale generated video using latent upsampler with fewer inference steps
@@ -116,7 +104,7 @@ def generate(prompt,
             height=upscaled_height,
             num_frames=num_frames,
             guidance_scale=guidance_scale,
-            denoise_strength=0.4,  # Effectively, 0.6 * 3 inference steps
             num_inference_steps=10,
             latents=upscaled_latents,
             decode_timestep=0.05,

     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     print(mode)
     if mode == "video-to-video" and (video is not None):
         video = load_video(video)[:frames_to_use]
         condition = True
+        width, height = video[0].size
     # elif mode == "image-to-video" and (image is not None):
     elif mode == "image-to-video":
         video = [load_image(image)]
+        width, height = video[0].size
         condition = True
     else:
        condition=False
         condition1 = LTXVideoCondition(video=video, frame_index=0)
     else:
         condition1 = None
+    # Part 1. Generate video at smaller resolution
+    # Text-only conditioning is also supported without the need to pass `conditions`
+    expected_height, expected_width = height, width
+    downscale_factor = 2 / 3
+    downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
+    downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
     latents = pipe(
         conditions=condition1,
         generator=torch.Generator(device="cuda").manual_seed(seed),
         output_type="latent",
     ).frames
     # Part 2. Upscale generated video using latent upsampler with fewer inference steps
             height=upscaled_height,
             num_frames=num_frames,
             guidance_scale=guidance_scale,
+            denoise_strength=0.4,  # Effectively, 0.4 * 10 = 4 inference steps
             num_inference_steps=10,
             latents=upscaled_latents,
             decode_timestep=0.05,