{ "epoch": 1.0, "eval_logits/chosen": -2.0315260887145996, "eval_logits/rejected": -2.0060105323791504, "eval_logps/chosen": -155.9133758544922, "eval_logps/rejected": -168.3367919921875, "eval_loss": 0.22132937610149384, "eval_rewards/accuracies": 0.7689999938011169, "eval_rewards/chosen": -3.684920072555542, "eval_rewards/margins": 2.584770917892456, "eval_rewards/rejected": -6.2696919441223145, "eval_runtime": 65.5407, "eval_samples_per_second": 15.258, "eval_steps_per_second": 1.907 }