Finish setup for initial pipeline
Browse files- pipeline.py +16 -62
pipeline.py
CHANGED
|
@@ -71,7 +71,7 @@ if is_torch_xla_available():
|
|
| 71 |
XLA_AVAILABLE = True
|
| 72 |
else:
|
| 73 |
XLA_AVAILABLE = False
|
| 74 |
-
|
| 75 |
|
| 76 |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
| 77 |
|
|
@@ -79,13 +79,18 @@ EXAMPLE_DOC_STRING = """
|
|
| 79 |
Examples:
|
| 80 |
```py
|
| 81 |
>>> import torch
|
| 82 |
-
>>> from diffusers import
|
| 83 |
-
>>>
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
... )
|
| 86 |
>>> pipe = pipe.to("cuda")
|
| 87 |
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
| 88 |
-
>>>
|
|
|
|
| 89 |
```
|
| 90 |
"""
|
| 91 |
|
|
@@ -315,37 +320,6 @@ class PAGCFGIdentitySelfAttnProcessor:
|
|
| 315 |
|
| 316 |
return hidden_states
|
| 317 |
|
| 318 |
-
if is_invisible_watermark_available():
|
| 319 |
-
from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
| 320 |
-
|
| 321 |
-
if is_torch_xla_available():
|
| 322 |
-
import torch_xla.core.xla_model as xm
|
| 323 |
-
|
| 324 |
-
XLA_AVAILABLE = True
|
| 325 |
-
else:
|
| 326 |
-
XLA_AVAILABLE = False
|
| 327 |
-
|
| 328 |
-
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
| 329 |
-
|
| 330 |
-
EXAMPLE_DOC_STRING = """
|
| 331 |
-
Examples:
|
| 332 |
-
```py
|
| 333 |
-
>>> import torch
|
| 334 |
-
>>> from diffusers import StableDiffusionXLImg2ImgPipeline
|
| 335 |
-
>>> from diffusers.utils import load_image
|
| 336 |
-
|
| 337 |
-
>>> pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
|
| 338 |
-
... "stabilityai/stable-diffusion-xl-base-1.0", custom_pipeline="jyoung105/sdxl_perturbed_attention_guidance_i2i", torch_dtype=torch.float16,
|
| 339 |
-
... )
|
| 340 |
-
>>> pipe = pipe.to("cuda")
|
| 341 |
-
>>> url = "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"
|
| 342 |
-
|
| 343 |
-
>>> init_image = load_image(url).convert("RGB")
|
| 344 |
-
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
| 345 |
-
>>> image = pipe(prompt, image=init_image, pag_scale=3.0, pag_applied_layers=['mid']).images[0]
|
| 346 |
-
```
|
| 347 |
-
"""
|
| 348 |
-
|
| 349 |
|
| 350 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
| 351 |
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
@@ -767,7 +741,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
| 767 |
unscale_lora_layers(self.text_encoder_2, lora_scale)
|
| 768 |
|
| 769 |
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
| 770 |
-
|
| 771 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
| 772 |
def prepare_extra_step_kwargs(self, generator, eta):
|
| 773 |
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
|
@@ -923,26 +897,6 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
| 923 |
return timesteps, num_inference_steps
|
| 924 |
|
| 925 |
return timesteps, num_inference_steps - t_start
|
| 926 |
-
|
| 927 |
-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
|
| 928 |
-
# def prepare_latents(
|
| 929 |
-
# self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None
|
| 930 |
-
# ):
|
| 931 |
-
# shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
|
| 932 |
-
# if isinstance(generator, list) and len(generator) != batch_size:
|
| 933 |
-
# raise ValueError(
|
| 934 |
-
# f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
| 935 |
-
# f" size of {batch_size}. Make sure the batch size matches the length of the generators."
|
| 936 |
-
# )
|
| 937 |
-
|
| 938 |
-
# if latents is None:
|
| 939 |
-
# latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
|
| 940 |
-
# else:
|
| 941 |
-
# latents = latents.to(device)
|
| 942 |
-
|
| 943 |
-
# # scale the initial noise by the standard deviation required by the scheduler
|
| 944 |
-
# latents = latents * self.scheduler.init_noise_sigma
|
| 945 |
-
# return latents
|
| 946 |
|
| 947 |
def prepare_latents(
|
| 948 |
self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
|
|
@@ -1259,14 +1213,14 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
| 1259 |
def cross_attention_kwargs(self):
|
| 1260 |
return self._cross_attention_kwargs
|
| 1261 |
|
| 1262 |
-
@property
|
| 1263 |
-
def denoising_end(self):
|
| 1264 |
-
return self._denoising_end
|
| 1265 |
-
|
| 1266 |
@property
|
| 1267 |
def denoising_start(self):
|
| 1268 |
return self._denoising_start
|
| 1269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1270 |
@property
|
| 1271 |
def num_timesteps(self):
|
| 1272 |
return self._num_timesteps
|
|
@@ -1547,8 +1501,8 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
| 1547 |
self._guidance_rescale = guidance_rescale
|
| 1548 |
self._clip_skip = clip_skip
|
| 1549 |
self._cross_attention_kwargs = cross_attention_kwargs
|
| 1550 |
-
self._denoising_end = denoising_end
|
| 1551 |
self._denoising_start = denoising_start
|
|
|
|
| 1552 |
self._interrupt = False
|
| 1553 |
|
| 1554 |
self._pag_scale = pag_scale
|
|
|
|
| 71 |
XLA_AVAILABLE = True
|
| 72 |
else:
|
| 73 |
XLA_AVAILABLE = False
|
| 74 |
+
|
| 75 |
|
| 76 |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
| 77 |
|
|
|
|
| 79 |
Examples:
|
| 80 |
```py
|
| 81 |
>>> import torch
|
| 82 |
+
>>> from diffusers import StableDiffusionXLImg2ImgPipeline, AutoencoderKL
|
| 83 |
+
>>> from diffusers.utils import load_image
|
| 84 |
+
>>> vae = AutoencoderKL.from_pretrained(
|
| 85 |
+
... "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
|
| 86 |
+
... )
|
| 87 |
+
>>> pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
|
| 88 |
+
... "stabilityai/stable-diffusion-xl-base-1.0", custom_pipeline="jyoung105/sdxl_perturbed_attention_guidance_i2i", vae=vae, torch_dtype=torch.float16
|
| 89 |
... )
|
| 90 |
>>> pipe = pipe.to("cuda")
|
| 91 |
>>> prompt = "a photo of an astronaut riding a horse on mars"
|
| 92 |
+
>>> init_image = load_image("")
|
| 93 |
+
>>> image = pipe(prompt, image=init_image, strength=0.9, pag_scale=3.0, pag_applied_layers=['mid']).images[0]
|
| 94 |
```
|
| 95 |
"""
|
| 96 |
|
|
|
|
| 320 |
|
| 321 |
return hidden_states
|
| 322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
| 325 |
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
|
|
| 741 |
unscale_lora_layers(self.text_encoder_2, lora_scale)
|
| 742 |
|
| 743 |
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
| 744 |
+
|
| 745 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
| 746 |
def prepare_extra_step_kwargs(self, generator, eta):
|
| 747 |
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
|
|
|
| 897 |
return timesteps, num_inference_steps
|
| 898 |
|
| 899 |
return timesteps, num_inference_steps - t_start
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 900 |
|
| 901 |
def prepare_latents(
|
| 902 |
self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
|
|
|
|
| 1213 |
def cross_attention_kwargs(self):
|
| 1214 |
return self._cross_attention_kwargs
|
| 1215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
@property
|
| 1217 |
def denoising_start(self):
|
| 1218 |
return self._denoising_start
|
| 1219 |
|
| 1220 |
+
@property
|
| 1221 |
+
def denoising_end(self):
|
| 1222 |
+
return self._denoising_end
|
| 1223 |
+
|
| 1224 |
@property
|
| 1225 |
def num_timesteps(self):
|
| 1226 |
return self._num_timesteps
|
|
|
|
| 1501 |
self._guidance_rescale = guidance_rescale
|
| 1502 |
self._clip_skip = clip_skip
|
| 1503 |
self._cross_attention_kwargs = cross_attention_kwargs
|
|
|
|
| 1504 |
self._denoising_start = denoising_start
|
| 1505 |
+
self._denoising_end = denoising_end
|
| 1506 |
self._interrupt = False
|
| 1507 |
|
| 1508 |
self._pag_scale = pag_scale
|