{ "epoch": 0.9984301412872841, "eval_dpo_lambda": 1.0, "eval_logits/chosen": -0.9540698528289795, "eval_logits/rejected": -1.032906413078308, "eval_logps/chosen": -393.81256103515625, "eval_logps/rejected": -416.878662109375, "eval_loss": 0.5601596236228943, "eval_rewards/accuracies": 0.7341269850730896, "eval_rewards/chosen": -0.6050105690956116, "eval_rewards/margins": 0.49641188979148865, "eval_rewards/rejected": -1.1014224290847778, "eval_runtime": 128.7804, "eval_samples": 2000, "eval_samples_per_second": 15.53, "eval_steps_per_second": 0.489 }