Spaces:
Runtime error
Runtime error
| expt_dir: experiments | |
| expt_name: instruct_v2v_ic | |
| trainer_args: | |
| max_epochs: 5 | |
| accelerator: "gpu" | |
| devices: [0,1,2,3] | |
| limit_train_batches: 2048 | |
| limit_val_batches: 1 #! 这边限制了每个epoch只跑多少个batch的validation | |
| # strategy: "ddp" | |
| strategy: "deepspeed_stage_2" | |
| # autotune_only_on_rank_zero: true # 确保只有一个进程执行调优表操作 | |
| accumulate_grad_batches: 32 #! 注意一下这个值 | |
| check_val_every_n_epoch: 1 #! check一下这个值是不是和记录有关。。。 | |
| # precision: 16 # 启用半精度 (FP16) | |
| diffusion: | |
| target: pl_trainer.instruct_p2p_video.InstructP2PVideoTrainerTemporal | |
| params: | |
| beta_schedule_args: | |
| beta_schedule: scaled_linear | |
| num_train_timesteps: 1000 | |
| beta_start: 0.00085 | |
| beta_end: 0.012 | |
| clip_sample: false | |
| thresholding: false | |
| prediction_type: epsilon | |
| loss_fn: l2 | |
| optim_args: | |
| lr: 1e-5 | |
| unet_init_weights: #! 注意一下, 完全可以从iv2v的ckpt开始train | |
| - unet/diffusion_pytorch_model.safetensors # iclight, unet, sf tensor | |
| - pretrained_models/Motion_Module/mm_sd_v15.ckpt # motion module, 推测加载的是animatediff的 | |
| - pretrained_models/iclight/iclight_sd15_fbc.safetensors # iclight lora weights | |
| base_path: /mnt/petrelfs/fangye/.cache/huggingface/hub/models--stablediffusionapi--realistic-vision-v51/snapshots/19e3643d7d963c156d01537188ec08f0b79a514a | |
| # vae_init_weights: pretrained_models/instruct_pix2pix/vqvae.ckpt | |
| # text_model_init_weights: pretrained_models/instruct_pix2pix/text.ckpt #! 这两个可以直接设为None, 从from_pretrained中加载 | |
| scale_factor: 0.18215 | |
| guidance_scale: 5 # not used | |
| ddim_sampling_steps: 20 | |
| text_cfg: 7.5 | |
| img_cfg: 1.2 | |
| cond_image_dropout: 0.1 | |
| prompt_type: edit_prompt | |
| unet: | |
| target: modules.video_unet_temporal.unet.UNet3DConditionModel | |
| params: | |
| in_channels: 4 #! change:8->12 iclight 改为12 注意一下... | |
| out_channels: 4 | |
| act_fn: silu | |
| attention_head_dim: 8 | |
| block_out_channels: | |
| - 320 | |
| - 640 | |
| - 1280 | |
| - 1280 | |
| cross_attention_dim: 768 | |
| down_block_types: | |
| - CrossAttnDownBlock3D | |
| - CrossAttnDownBlock3D | |
| - CrossAttnDownBlock3D | |
| - DownBlock3D | |
| up_block_types: | |
| - UpBlock3D | |
| - CrossAttnUpBlock3D | |
| - CrossAttnUpBlock3D | |
| - CrossAttnUpBlock3D | |
| downsample_padding: 1 | |
| layers_per_block: 2 | |
| mid_block_scale_factor: 1 | |
| norm_eps: 1e-05 | |
| norm_num_groups: 32 | |
| sample_size: 64 | |
| use_motion_module: true #!!! 这边test iclight的时候可以不用motion module 即False | |
| motion_module_resolutions: | |
| - 1 | |
| - 2 | |
| - 4 | |
| - 8 | |
| motion_module_mid_block: false | |
| motion_module_decoder_only: false | |
| motion_module_type: Vanilla | |
| motion_module_kwargs: | |
| num_attention_heads: 8 | |
| num_transformer_block: 1 | |
| attention_block_types: | |
| - Temporal_Self | |
| - Temporal_Self | |
| temporal_position_encoding: true | |
| temporal_position_encoding_max_len: 32 | |
| temporal_attention_dim_div: 1 | |
| text_model: | |
| target: modules.openclip.modules.FrozenCLIPEmbedder | |
| params: | |
| freeze: true | |
| data: | |
| batch_size: 1 | |
| val_batch_size: 1 | |
| train: | |
| target: dataset.videoP2P.VideoPromptToPromptMotionAug | |
| params: # 注意修改一下training的路径,和相关加载的代码, 比如说没有meta.yaml这些参数怎么搞 | |
| root_dirs: | |
| - /mnt/petrelfs/fangye/test/instruct-video-to-video_1019/data_train_v2 | |
| num_frames: 16 | |
| zoom_ratio: 0.2 | |
| max_zoom: 1.25 | |
| translation_ratio: 0.7 | |
| translation_range: [0, 0.2] | |
| is_train: True | |
| val: | |
| target: dataset.videoP2P.VideoPromptToPromptMotionAug | |
| params: | |
| root_dirs: | |
| - data_train | |
| num_frames: 16 | |
| zoom_ratio: 0.2 | |
| max_zoom: 1.25 | |
| translation_ratio: 0.7 | |
| translation_range: [0, 0.2] | |
| callbacks: | |
| - target: pytorch_lightning.callbacks.ModelCheckpoint | |
| params: | |
| dirpath: "${expt_dir}/${expt_name}" | |
| filename: "{epoch:04d}" | |
| monitor: epoch | |
| mode: max | |
| save_top_k: 5 | |
| save_last: true | |
| - target: callbacks.instruct_p2p_video.InstructP2PLogger | |
| params: | |
| max_num_images: 1 | |
| # accumulate_grad_batches: 128 | |
| require_wandb: true | |
| - target: pytorch_lightning.callbacks.DeviceStatsMonitor |