| fabric: | |
| loggers: | |
| _target_: lightning.fabric.loggers.TensorBoardLogger | |
| root_dir: outputs/logs | |
| name: llama_full_bradly_terry_rm | |
| version: null | |
| sub_dir: null | |
| default_hp_metric: false | |
| strategy: | |
| _target_: lightning.fabric.strategies.FSDPStrategy | |
| sharding_strategy: FULL_SHARD | |
| cpu_offload: false | |
| auto_wrap_policy: | |
| _target_: fusion_bench.mixins.lightning_fabric.get_policy | |
| _args_: | |
| - transformers.models.llama.modeling_llama.LlamaDecoderLayer | |
| activation_checkpointing_policy: ${.auto_wrap_policy} | |
| _target_: lightning.Fabric | |
| _recursive_: true | |
| devices: auto | |
| accelerator: auto | |
| precision: bf16-true | |
| method: | |
| _target_: fusion_bench.method.BradlyTerryRewardModeling | |
| _recursive_: false | |
| optimizer: | |
| _target_: torch.optim.AdamW | |
| lr: 0 | |
| weight_decay: 0.001 | |
| fused: null | |
| lr_scheduler: | |
| _target_: fusion_bench.optim.lr_scheduler.CosineDecayWithWarmup | |
| T_max: _T_max_ | |
| init_lr: 0 | |
| warmup_steps: 100 | |
| max_lr: 5.0e-06 | |
| min_lr: 1.0e-07 | |
| dataloader_kwargs: | |
| batch_size: 8 | |
| num_workers: 0 | |
| pin_memory: true | |
| max_epochs: 2 | |
| max_steps: -1 | |
| max_steps_per_epoch: -1 | |
| accumulate_grad_batches: 16 | |
| lr_scheduler_interval: step | |
| lr_scheduler_frequency: 1 | |
| checkpoint_save_interval: epoch | |
| checkpoint_save_frequency: 1 | |
| gradient_clip_val: 1 | |
| gradient_clip_algorithm: norm | |
| save_optimizer_state: false | |
| save_full_model: true | |
| save_ckpt_type: lightning | |
| ckpt_path: null | |
| max_length: 4096 | |
| fix_token_embedding: true | |
| modelpool: | |
| _target_: fusion_bench.modelpool.CausalLMPool | |
| pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct | |
| models: | |
| _pretrained_: | |
| _target_: fusion_bench.modelpool.seq_classification_lm.create_reward_model_from_pretrained | |
| pretrained_model_name_or_path: ${...pretrained_model_name_or_path} | |
| torch_dtype: bfloat16 | |
| use_flash_attention_2: true | |
| tokenizer: | |
| _target_: transformers.AutoTokenizer.from_pretrained | |
| pretrained_model_name_or_path: ${..pretrained_model_name_or_path} | |
| pad_token: <|end_of_text|> | |
| train_datasets: | |
| preference_700k: | |
| _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_bradley_terry_rm | |
| tokenizer: ${...tokenizer} | |
| path: hendrydong/preference_700K | |
| split: train | |
| cache_path: null | |
| taskpool: | |
| _target_: fusion_bench.taskpool.DummyTaskPool | |
| model_save_path: null | |
| _target_: fusion_bench.programs.FabricModelFusionProgram | |
| _recursive_: false | |
| fast_dev_run: false | |
| dry_run: false | |
| print_config: true | |
| report_save_path: null | |
| print_function_call: true | |