Spaces:
Runtime error
Runtime error
File size: 3,679 Bytes
0a63786 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
expt_dir: experiments
expt_name: instruct_v2v
trainer_args:
max_epochs: 10
accelerator: "gpu"
devices: [0]
limit_train_batches: 2048
limit_val_batches: 1
# strategy: "ddp"
strategy: "deepspeed_stage_2"
accumulate_grad_batches: 256
check_val_every_n_epoch: 5
diffusion:
target: pl_trainer.instruct_p2p_video.InstructP2PVideoTrainerTemporal
params:
beta_schedule_args:
beta_schedule: scaled_linear
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
clip_sample: false
thresholding: false
prediction_type: epsilon
loss_fn: l2
optim_args:
lr: 1e-5
unet_init_weights:
- pretrained_models/instruct_pix2pix/diffusion_pytorch_model.bin
- pretrained_models/Motion_Module/mm_sd_v15.ckpt
vae_init_weights: pretrained_models/instruct_pix2pix/vqvae.ckpt
text_model_init_weights: pretrained_models/instruct_pix2pix/text.ckpt
scale_factor: 0.18215
guidance_scale: 5 # not used
ddim_sampling_steps: 20
text_cfg: 7.5
img_cfg: 1.2
cond_image_dropout: 0.1
prompt_type: edit_prompt
unet:
target: modules.video_unet_temporal.unet.UNet3DConditionModel
params:
in_channels: 8
out_channels: 4
act_fn: silu
attention_head_dim: 8
block_out_channels:
- 320
- 640
- 1280
- 1280
cross_attention_dim: 768
down_block_types:
- CrossAttnDownBlock3D
- CrossAttnDownBlock3D
- CrossAttnDownBlock3D
- DownBlock3D
up_block_types:
- UpBlock3D
- CrossAttnUpBlock3D
- CrossAttnUpBlock3D
- CrossAttnUpBlock3D
downsample_padding: 1
layers_per_block: 2
mid_block_scale_factor: 1
norm_eps: 1e-05
norm_num_groups: 32
sample_size: 64
use_motion_module: true
motion_module_resolutions:
- 1
- 2
- 4
- 8
motion_module_mid_block: false
motion_module_decoder_only: false
motion_module_type: Vanilla
motion_module_kwargs:
num_attention_heads: 8
num_transformer_block: 1
attention_block_types:
- Temporal_Self
- Temporal_Self
temporal_position_encoding: true
temporal_position_encoding_max_len: 32
temporal_attention_dim_div: 1
vae:
target: modules.kl_autoencoder.autoencoder.AutoencoderKL
params:
embed_dim: 4
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
text_model:
target: modules.openclip.modules.FrozenCLIPEmbedder
params:
freeze: true
data:
batch_size: 1
val_batch_size: 1
train:
target: dataset.videoP2P.VideoPromptToPromptMotionAug
params:
root_dirs:
- video_ptp/raw_generated
- video_ptp/raw_generated_webvid
num_frames: 16
zoom_ratio: 0.2
max_zoom: 1.25
translation_ratio: 0.7
translation_range: [0, 0.2]
val:
target: dataset.videoP2P.VideoPromptToPromptMotionAug
params:
root_dirs:
- video_ptp/raw_generated
num_frames: 16
zoom_ratio: 0.2
max_zoom: 1.25
translation_ratio: 0.7
translation_range: [0, 0.2]
callbacks:
- target: pytorch_lightning.callbacks.ModelCheckpoint
params:
dirpath: "${expt_dir}/${expt_name}"
filename: "{epoch:04d}"
monitor: epoch
mode: max
save_top_k: 5
save_last: true
- target: callbacks.instruct_p2p_video.InstructP2PLogger
params:
max_num_images: 1
require_wandb: true |