linoyts HF Staff commited on
Commit
649221a
·
verified ·
1 Parent(s): f058434

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -22
app.py CHANGED
@@ -44,21 +44,16 @@ def generate(prompt,
44
 
45
  if randomize_seed:
46
  seed = random.randint(0, MAX_SEED)
47
-
48
- # Part 1. Generate video at smaller resolution
49
- # Text-only conditioning is also supported without the need to pass `conditions`
50
- expected_height, expected_width = height, width
51
- downscale_factor = 2 / 3
52
- downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
53
- downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
54
 
55
  print(mode)
56
  if mode == "video-to-video" and (video is not None):
57
  video = load_video(video)[:frames_to_use]
58
  condition = True
 
59
  # elif mode == "image-to-video" and (image is not None):
60
  elif mode == "image-to-video":
61
  video = [load_image(image)]
 
62
  condition = True
63
  else:
64
  condition=False
@@ -67,6 +62,13 @@ def generate(prompt,
67
  condition1 = LTXVideoCondition(video=video, frame_index=0)
68
  else:
69
  condition1 = None
 
 
 
 
 
 
 
70
 
71
  latents = pipe(
72
  conditions=condition1,
@@ -82,20 +84,6 @@ def generate(prompt,
82
  generator=torch.Generator(device="cuda").manual_seed(seed),
83
  output_type="latent",
84
  ).frames
85
- # latents = pipe(
86
- # conditions=[condition1],
87
- # prompt=prompt,
88
- # negative_prompt=negative_prompt,
89
- # width=downscaled_width,
90
- # height=downscaled_height,
91
- # decode_timestep=0.05,
92
- # decode_noise_scale=0.025,
93
- # guidance_scale=3.0,
94
- # num_frames=num_frames,
95
- # num_inference_steps=8,
96
- # generator=torch.Generator().manual_seed(0),
97
- # output_type="latent",
98
- # ).frames
99
 
100
 
101
  # Part 2. Upscale generated video using latent upsampler with fewer inference steps
@@ -116,7 +104,7 @@ def generate(prompt,
116
  height=upscaled_height,
117
  num_frames=num_frames,
118
  guidance_scale=guidance_scale,
119
- denoise_strength=0.4, # Effectively, 0.6 * 3 inference steps
120
  num_inference_steps=10,
121
  latents=upscaled_latents,
122
  decode_timestep=0.05,
 
44
 
45
  if randomize_seed:
46
  seed = random.randint(0, MAX_SEED)
 
 
 
 
 
 
 
47
 
48
  print(mode)
49
  if mode == "video-to-video" and (video is not None):
50
  video = load_video(video)[:frames_to_use]
51
  condition = True
52
+ width, height = video[0].size
53
  # elif mode == "image-to-video" and (image is not None):
54
  elif mode == "image-to-video":
55
  video = [load_image(image)]
56
+ width, height = video[0].size
57
  condition = True
58
  else:
59
  condition=False
 
62
  condition1 = LTXVideoCondition(video=video, frame_index=0)
63
  else:
64
  condition1 = None
65
+
66
+ # Part 1. Generate video at smaller resolution
67
+ # Text-only conditioning is also supported without the need to pass `conditions`
68
+ expected_height, expected_width = height, width
69
+ downscale_factor = 2 / 3
70
+ downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
71
+ downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
72
 
73
  latents = pipe(
74
  conditions=condition1,
 
84
  generator=torch.Generator(device="cuda").manual_seed(seed),
85
  output_type="latent",
86
  ).frames
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
 
89
  # Part 2. Upscale generated video using latent upsampler with fewer inference steps
 
104
  height=upscaled_height,
105
  num_frames=num_frames,
106
  guidance_scale=guidance_scale,
107
+ denoise_strength=0.4, # Effectively, 0.4 * 10 = 4 inference steps
108
  num_inference_steps=10,
109
  latents=upscaled_latents,
110
  decode_timestep=0.05,