Spaces:

aleafy
/

RelightVid

Runtime error

App Files Files Community

RelightVid / configs /instruct_v2v_ori.yaml

aleafy

Start fresh

0a63786 9 months ago

raw

history blame contribute delete

3.68 kB

	expt_dir: experiments
	expt_name: instruct_v2v
	trainer_args:
	max_epochs: 10
	accelerator: "gpu"
	devices: [0]
	limit_train_batches: 2048
	limit_val_batches: 1
	# strategy: "ddp"
	strategy: "deepspeed_stage_2"
	accumulate_grad_batches: 256
	check_val_every_n_epoch: 5
	diffusion:
	target: pl_trainer.instruct_p2p_video.InstructP2PVideoTrainerTemporal
	params:
	beta_schedule_args:
	beta_schedule: scaled_linear
	num_train_timesteps: 1000
	beta_start: 0.00085
	beta_end: 0.012
	clip_sample: false
	thresholding: false
	prediction_type: epsilon
	loss_fn: l2
	optim_args:
	lr: 1e-5
	unet_init_weights:
	- pretrained_models/instruct_pix2pix/diffusion_pytorch_model.bin
	- pretrained_models/Motion_Module/mm_sd_v15.ckpt
	vae_init_weights: pretrained_models/instruct_pix2pix/vqvae.ckpt
	text_model_init_weights: pretrained_models/instruct_pix2pix/text.ckpt
	scale_factor: 0.18215
	guidance_scale: 5 # not used
	ddim_sampling_steps: 20
	text_cfg: 7.5
	img_cfg: 1.2
	cond_image_dropout: 0.1
	prompt_type: edit_prompt
	unet:
	target: modules.video_unet_temporal.unet.UNet3DConditionModel
	params:
	in_channels: 8
	out_channels: 4
	act_fn: silu
	attention_head_dim: 8
	block_out_channels:
	- 320
	- 640
	- 1280
	- 1280
	cross_attention_dim: 768
	down_block_types:
	- CrossAttnDownBlock3D
	- CrossAttnDownBlock3D
	- CrossAttnDownBlock3D
	- DownBlock3D
	up_block_types:
	- UpBlock3D
	- CrossAttnUpBlock3D
	- CrossAttnUpBlock3D
	- CrossAttnUpBlock3D
	downsample_padding: 1
	layers_per_block: 2
	mid_block_scale_factor: 1
	norm_eps: 1e-05
	norm_num_groups: 32
	sample_size: 64
	use_motion_module: true
	motion_module_resolutions:
	- 1
	- 2
	- 4
	- 8
	motion_module_mid_block: false
	motion_module_decoder_only: false
	motion_module_type: Vanilla
	motion_module_kwargs:
	num_attention_heads: 8
	num_transformer_block: 1
	attention_block_types:
	- Temporal_Self
	- Temporal_Self
	temporal_position_encoding: true
	temporal_position_encoding_max_len: 32
	temporal_attention_dim_div: 1
	vae:
	target: modules.kl_autoencoder.autoencoder.AutoencoderKL
	params:
	embed_dim: 4
	ddconfig:
	double_z: true
	z_channels: 4
	resolution: 256
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult:
	- 1
	- 2
	- 4
	- 4
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0
	lossconfig:
	target: torch.nn.Identity
	text_model:
	target: modules.openclip.modules.FrozenCLIPEmbedder
	params:
	freeze: true
	data:
	batch_size: 1
	val_batch_size: 1
	train:
	target: dataset.videoP2P.VideoPromptToPromptMotionAug
	params:
	root_dirs:
	- video_ptp/raw_generated
	- video_ptp/raw_generated_webvid
	num_frames: 16
	zoom_ratio: 0.2
	max_zoom: 1.25
	translation_ratio: 0.7
	translation_range: [0, 0.2]
	val:
	target: dataset.videoP2P.VideoPromptToPromptMotionAug
	params:
	root_dirs:
	- video_ptp/raw_generated
	num_frames: 16
	zoom_ratio: 0.2
	max_zoom: 1.25
	translation_ratio: 0.7
	translation_range: [0, 0.2]
	callbacks:
	- target: pytorch_lightning.callbacks.ModelCheckpoint
	params:
	dirpath: "${expt_dir}/${expt_name}"
	filename: "{epoch:04d}"
	monitor: epoch
	mode: max
	save_top_k: 5
	save_last: true
	- target: callbacks.instruct_p2p_video.InstructP2PLogger
	params:
	max_num_images: 1
	require_wandb: true