fabric:
  loggers:
    _target_: lightning.fabric.loggers.TensorBoardLogger
    root_dir: outputs/logs
    name: llama_full_bradly_terry_rm
    version: null
    sub_dir: null
    default_hp_metric: false
  strategy:
    _target_: lightning.fabric.strategies.FSDPStrategy
    sharding_strategy: FULL_SHARD
    cpu_offload: false
    auto_wrap_policy:
      _target_: fusion_bench.mixins.lightning_fabric.get_policy
      _args_:
      - transformers.models.llama.modeling_llama.LlamaDecoderLayer
    activation_checkpointing_policy: ${.auto_wrap_policy}
  _target_: lightning.Fabric
  _recursive_: true
  devices: auto
  accelerator: auto
  precision: bf16-true
method:
  _target_: fusion_bench.method.BradlyTerryRewardModeling
  _recursive_: false
  optimizer:
    _target_: torch.optim.AdamW
    lr: 0
    weight_decay: 0.001
    fused: null
  lr_scheduler:
    _target_: fusion_bench.optim.lr_scheduler.CosineDecayWithWarmup
    T_max: _T_max_
    init_lr: 0
    warmup_steps: 100
    max_lr: 5.0e-06
    min_lr: 1.0e-07
  dataloader_kwargs:
    batch_size: 8
    num_workers: 0
    pin_memory: true
  max_epochs: 2
  max_steps: -1
  max_steps_per_epoch: -1
  accumulate_grad_batches: 16
  lr_scheduler_interval: step
  lr_scheduler_frequency: 1
  checkpoint_save_interval: epoch
  checkpoint_save_frequency: 1
  gradient_clip_val: 1
  gradient_clip_algorithm: norm
  save_optimizer_state: false
  save_full_model: true
  save_ckpt_type: lightning
  ckpt_path: null
  max_length: 4096
  fix_token_embedding: true
modelpool:
  _target_: fusion_bench.modelpool.CausalLMPool
  pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
  models:
    _pretrained_:
      _target_: fusion_bench.modelpool.seq_classification_lm.create_reward_model_from_pretrained
      pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
      torch_dtype: bfloat16
      use_flash_attention_2: true
  tokenizer:
    _target_: transformers.AutoTokenizer.from_pretrained
    pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
    pad_token: <|end_of_text|>
  train_datasets:
    preference_700k:
      _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_bradley_terry_rm
      tokenizer: ${...tokenizer}
      path: hendrydong/preference_700K
      split: train
      cache_path: null
taskpool:
  _target_: fusion_bench.taskpool.DummyTaskPool
  model_save_path: null
_target_: fusion_bench.programs.FabricModelFusionProgram
_recursive_: false
fast_dev_run: false
dry_run: false
print_config: true
report_save_path: null
print_function_call: true