expt_dir: experiments expt_name: instruct_v2v_ic_pexels_text_hdr_test_lr0.5_aug_lossc_fix_bs1 #! 注意传入log里面, 不要每次修改 trainer_args: max_epochs: 10 accelerator: "gpu" devices: [0,1,2,3,4,5,6,7] #! change to get more cards limit_train_batches: 2048 limit_val_batches: 3 #! 这边限制了每个epoch只跑多少个batch的validation # strategy: "ddp" strategy: "deepspeed_stage_2" # autotune_only_on_rank_zero: true # 确保只有一个进程执行调优表操作 accumulate_grad_batches: 128 #! 注意一下这个值 256->128 check_val_every_n_epoch: 1 #! check一下这个值是不是和记录有关。。。 # precision: 16 # 启用半精度 (FP16) diffusion: target: pl_trainer.instruct_p2p_video.InstructP2PVideoTrainerTemporalText params: beta_schedule_args: beta_schedule: scaled_linear num_train_timesteps: 1000 beta_start: 0.00085 beta_end: 0.012 clip_sample: false thresholding: false prediction_type: epsilon loss_fn: l2 optim_args: lr: 1e-5 #! 原来是1e-5 unet_init_weights: #! 注意一下, 完全可以从iv2v的ckpt开始train - unet/diffusion_pytorch_model.safetensors # iclight, unet, sf tensor - pretrained_models/Motion_Module/mm_sd_v15.ckpt # motion module, 推测加载的是animatediff的 - pretrained_models/iclight/iclight_sd15_fc.safetensors # iclight lora weights base_path: /mnt/petrelfs/fangye/.cache/huggingface/hub/models--stablediffusionapi--realistic-vision-v51/snapshots/19e3643d7d963c156d01537188ec08f0b79a514a # vae_init_weights: pretrained_models/instruct_pix2pix/vqvae.ckpt # text_model_init_weights: pretrained_models/instruct_pix2pix/text.ckpt #! 这两个可以直接设为None, 从from_pretrained中加载 scale_factor: 0.18215 guidance_scale: 5 # not used ddim_sampling_steps: 20 text_cfg: 7.5 img_cfg: 1.2 hdr_cfg: 7.5 cond_image_dropout: 0.1 cond_text_dropout: 0.1 cond_hdr_dropout: 0.1 ic_condition: fg hdr_train: True prompt_type: edit_prompt unet: target: modules.video_unet_temporal.unet.UNet3DConditionModel params: in_channels: 4 #! change:8->12 iclight 改为12 注意一下... out_channels: 4 act_fn: silu attention_head_dim: 8 block_out_channels: - 320 - 640 - 1280 - 1280 cross_attention_dim: 768 down_block_types: - CrossAttnDownBlock3D - CrossAttnDownBlock3D - CrossAttnDownBlock3D - DownBlock3D up_block_types: - UpBlock3D - CrossAttnUpBlock3D - CrossAttnUpBlock3D - CrossAttnUpBlock3D downsample_padding: 1 layers_per_block: 2 mid_block_scale_factor: 1 norm_eps: 1e-05 norm_num_groups: 32 sample_size: 64 use_motion_module: true #!!! 这边test iclight的时候可以不用motion module 即False motion_module_resolutions: - 1 - 2 - 4 - 8 motion_module_mid_block: false motion_module_decoder_only: false motion_module_type: Vanilla motion_module_kwargs: num_attention_heads: 8 num_transformer_block: 1 attention_block_types: - Temporal_Self - Temporal_Self temporal_position_encoding: true temporal_position_encoding_max_len: 32 temporal_attention_dim_div: 1 text_model: target: modules.openclip.modules.FrozenCLIPEmbedder params: freeze: true data: batch_size: 1 val_batch_size: 1 train: target: dataset.videoP2P.VideoPromptToPromptMotionAugPexelsHDR params: # 注意修改一下training的路径,和相关加载的代码, 比如说没有meta.yaml这些参数怎么搞 root_dirs: #! 注意root_dirs已经更改 # - /mnt/petrelfs/fangye/test/instruct-video-to-video_1019/data_train_pexels/rmbg_data - /mnt/hwfile/mllm/sunzeyi/iclight_video/rendered_data_rgb_fixlast hdr_dir: /mnt/hwfile/mllm/sunzeyi/iclight_video/haven_hdr_rgb num_frames: 16 zoom_ratio: 0.2 max_zoom: 1.25 translation_ratio: 0.7 translation_range: [0, 0.2] is_train: True ic_condition: fg val: target: dataset.videoP2P.VideoPromptToPromptMotionAugPexelsHDR params: root_dirs: # - /mnt/petrelfs/fangye/test/instruct-video-to-video_1019/data_train_pexels/rmbg_data - /mnt/hwfile/mllm/sunzeyi/iclight_video/rendered_data_rgb_fixlast hdr_dir: /mnt/hwfile/mllm/sunzeyi/iclight_video/haven_hdr_rgb num_frames: 16 zoom_ratio: 0.2 max_zoom: 1.25 translation_ratio: 0.7 translation_range: [0, 0.2] ic_condition: fg callbacks: - target: pytorch_lightning.callbacks.ModelCheckpoint params: dirpath: "${expt_dir}/${expt_name}" # filename: "{epoch:04d}" filename: "{step:06d}" every_n_train_steps: 1 save_last: false # monitor: epoch # mode: max # save_top_k: 3 # save_last: false - target: callbacks.instruct_p2p_video.InstructP2PLogger params: max_num_images: 1 expt_name: instruct_v2v_ic_pexels_text_hdr_test_lr0.5_aug_lossc_fix_bs1 # accumulate_grad_batches: 128 require_wandb: true - target: pytorch_lightning.callbacks.DeviceStatsMonitor