x2XcarleX2x commited on
Commit
fa00fca
·
verified ·
1 Parent(s): 27791d3

Update aduc_framework/managers/pipeline_wan_i2v (4).py

Browse files
aduc_framework/managers/pipeline_wan_i2v (4).py CHANGED
@@ -432,7 +432,7 @@ class WanImageToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
432
  latents = latents.to(device=device, dtype=dtype)
433
  print(f"latents{latents.shape}")
434
 
435
- image = image.unsqueeze(2) # [batch_size, channels, 1, height, width]
436
 
437
  if self.config.expand_timesteps:
438
  video_condition = image
@@ -651,6 +651,10 @@ class WanImageToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
651
  if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
652
  callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
653
 
 
 
 
 
654
  # 1. Check inputs. Raise error if not correct
655
  self.check_inputs(
656
  prompt,
@@ -719,6 +723,9 @@ class WanImageToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
719
  image_embeds = image_embeds.repeat(batch_size, 1, 1)
720
  image_embeds = image_embeds.to(transformer_dtype)
721
 
 
 
 
722
  # 4. Prepare timesteps
723
  self.scheduler.set_timesteps(num_inference_steps, device=device)
724
  timesteps = self.scheduler.timesteps
@@ -744,12 +751,21 @@ class WanImageToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
744
  latents,
745
  last_image,
746
  )
 
 
 
 
747
  if self.config.expand_timesteps:
748
  # wan 2.2 5b i2v use firt_frame_mask to mask timesteps
749
  latents, condition, first_frame_mask = latents_outputs
750
  else:
751
  latents, condition = latents_outputs
752
 
 
 
 
 
 
753
  # 6. Denoising loop
754
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
755
  self._num_timesteps = len(timesteps)
@@ -812,6 +828,8 @@ class WanImageToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
812
  # compute the previous noisy sample x_t -> x_t-1
813
  latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
814
 
 
 
815
  if callback_on_step_end is not None:
816
  callback_kwargs = {}
817
  for k in callback_on_step_end_tensor_inputs:
@@ -831,6 +849,10 @@ class WanImageToVideoPipeline(DiffusionPipeline, WanLoraLoaderMixin):
831
 
832
  self._current_timestep = None
833
 
 
 
 
 
834
  if self.config.expand_timesteps:
835
  latents = (1 - first_frame_mask) * condition + first_frame_mask * latents
836
 
 
432
  latents = latents.to(device=device, dtype=dtype)
433
  print(f"latents{latents.shape}")
434
 
435
+ pipeline_wan_i2v = image.unsqueeze(2) # [batch_size, channels, 1, height, width]
436
 
437
  if self.config.expand_timesteps:
438
  video_condition = image
 
651
  if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
652
  callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
653
 
654
+
655
+
656
+ print(f"latents00{latents.shape}")
657
+
658
  # 1. Check inputs. Raise error if not correct
659
  self.check_inputs(
660
  prompt,
 
723
  image_embeds = image_embeds.repeat(batch_size, 1, 1)
724
  image_embeds = image_embeds.to(transformer_dtype)
725
 
726
+
727
+ print(f"image_embeds{image_embeds.shape}")
728
+
729
  # 4. Prepare timesteps
730
  self.scheduler.set_timesteps(num_inference_steps, device=device)
731
  timesteps = self.scheduler.timesteps
 
751
  latents,
752
  last_image,
753
  )
754
+
755
+
756
+ print(f"latents_outputs{latents_outputs.shape}")
757
+
758
  if self.config.expand_timesteps:
759
  # wan 2.2 5b i2v use firt_frame_mask to mask timesteps
760
  latents, condition, first_frame_mask = latents_outputs
761
  else:
762
  latents, condition = latents_outputs
763
 
764
+
765
+
766
+ print(f"latentsxx{latents.shape}")
767
+
768
+
769
  # 6. Denoising loop
770
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
771
  self._num_timesteps = len(timesteps)
 
828
  # compute the previous noisy sample x_t -> x_t-1
829
  latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
830
 
831
+ print(f"latentsppp{latents.shape}")
832
+
833
  if callback_on_step_end is not None:
834
  callback_kwargs = {}
835
  for k in callback_on_step_end_tensor_inputs:
 
849
 
850
  self._current_timestep = None
851
 
852
+
853
+ print(f"latentsfim{latents.shape}")
854
+
855
+
856
  if self.config.expand_timesteps:
857
  latents = (1 - first_frame_mask) * condition + first_frame_mask * latents
858